package helpers import ( "fmt" "log/slog" "math" "math/rand" "os" "strconv" "strings" "unicode" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/playwright-community/playwright-go" "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" ) type UserAgent struct { version string system string platformInfo string platformDetails string extensionsinfo string } var ( systems = []string{ "Macintosh; Intel Mac OS X 10_15_7", "Windows NT 10.0; Win64; x64", "Windows NT 6.1; Win64; x64; rv:109.0", "X11; Linux x86_64", "X11; CrOS x86_64 14541.0.0", "Linux; Android 10; K", "iPhone; CPU iPhone OS 17_1_2 like Mac OS X", "iPhone; CPU iPhone OS 14_6 like Mac OS X", "Linux; Android 9; JAT-L41", "Linux; Android 11; SAMSUNG SM-G973U", "iPad; CPU OS 14_7_1 like Mac OS X", "Linux; U; en-us; KFAPWI Build/JDQ39", } platformInfo = []string{ "AppleWebKit/605.1.15", "AppleWebKit/537.36", "Gecko/20100101", } platformDetails = []string{ "KHTML, like Gecko", } extensionInfo = []string{ "Firefox", "Chrome/87.0.42", "Safari/604.1", "Safari/537.36", "Version/14.1.2", } ) // NewUA func NewUA() string { ua := UserAgent{} ua.version = "Mozilla/5.0" ua.system = systems[rand.Intn(len(systems))] ua.platformInfo = platformInfo[rand.Intn(len(platformInfo))] ua.platformDetails = platformDetails[0] ua.extensionsinfo = extensionInfo[rand.Intn(len(extensionInfo))] extra := "" if ua.extensionsinfo == "Chrome" { extra = "Safari/537.3" } version := rand.Intn(20) + 100 return fmt.Sprintf("%s (%s) %s (%s) %s/%d %s", ua.version, ua.system, ua.platformInfo, ua.platformDetails, ua.extensionsinfo, version, extra) } // NewMobileUA // returns an Ua string from distinct mobiles func NewMobileUA() string { ua := UserAgent{} ua.version = "Mozilla/5.0" s := systems[5:] ua.system = s[rand.Intn(len(s))] ua.platformInfo = platformInfo[rand.Intn(len(platformInfo))] ua.platformDetails = platformDetails[0] ua.extensionsinfo = extensionInfo[rand.Intn(len(extensionInfo))] extra := "mobile Safari/537.3" switch { case strings.Contains(ua.system, "iPhone"), strings.Contains(ua.system, "iPad"): extra = "Mobile/15E148 Safari/604.1" ua.extensionsinfo = "Version/14.1.2" case strings.Contains(ua.system, "Android"): ie := extensionInfo[0:2] selected := ie[rand.Intn(len(ie))] if strings.Contains(selected, "Firefox") { extra = "Firefox/114.0" ua.extensionsinfo = "Gecko/114.0" } else { ua.extensionsinfo = "Chrome/114.0.0.0" extra = "Mobile Safari/537.36" } case strings.Contains(ua.system, "Android"): extra = "Mobile Safari/537.36" } return fmt.Sprintf("%s (%s) %s (%s) %s %s", ua.version, ua.system, ua.platformInfo, ua.platformDetails, ua.extensionsinfo, extra) } // Normalize func Normalize(val string) float64 { nString := []rune{} for _, v := range val { if !unicode.IsNumber(v) && v != '.' { continue } nString = append(nString, v) } if len(nString) > 0 { cv, err := strconv.ParseFloat(string(nString), 64) if err != nil { fmt.Printf("%s", err) } cvt := math.Round(cv*10000) / 10000 return cvt } return 0 } // CreateBrowser func CreateBrowser(log *slog.Logger) (chrome *playwright.BrowserContext, firefox *playwright.BrowserContext, webkit *playwright.BrowserContext) { pw, err := playwright.Run(&playwright.RunOptions{ Verbose: true, }) ua := helpers.NewMobileUA() headless := true if err != nil { log.Error("running pw, could not start", "error", err) os.Exit(1) } ff, err := pw.Firefox.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless}) if err != nil { log.Error("could not start browser firefox", "error", err) os.Exit(1) } ffc, err := ff.NewContext(playwright.BrowserNewContextOptions{ IgnoreHttpsErrors: &headless, UserAgent: &ua, HasTouch: &headless, Viewport: &playwright.Size{ Width: 412, Height: 915, }, Screen: &playwright.Size{ Width: 412, Height: 915, }, IsMobile: &headless, }) if err != nil { log.Error("could not start browser firefox context", "error", err) os.Exit(1) } cm, err := pw.Firefox.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless}) if err != nil { log.Error("could not start browser chrome", "error", err) os.Exit(1) } cmc, err := cm.NewContext(playwright.BrowserNewContextOptions{ IgnoreHttpsErrors: &headless, UserAgent: &ua, HasTouch: &headless, Viewport: &playwright.Size{ Width: 412, Height: 915, }, Screen: &playwright.Size{ Width: 412, Height: 915, }, IsMobile: &headless, }) if err != nil { log.Error("could not start browser chorme context", "error", err) os.Exit(1) } sf, err := pw.WebKit.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless}) if err != nil { log.Error("could not start browser safari", "error", err) os.Exit(1) } sfc, err := sf.NewContext(playwright.BrowserNewContextOptions{ IgnoreHttpsErrors: &headless, UserAgent: &ua, HasTouch: &headless, Viewport: &playwright.Size{ Width: 412, Height: 915, }, Screen: &playwright.Size{ Width: 412, Height: 915, }, IsMobile: &headless, }) if err != nil { log.Error("could not start browser safari context", "error", err) os.Exit(1) } return &cmc, &ffc, &sfc } // RemoveAccent // helps normalize names in db // https://stackoverflow.com/questions/24588295/go-removing-accents-from-strings func RemoveAccent(str string) string { if str == "" { return "" } t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) s, _, _ := transform.String(t, str) return s }