package helpers import ( "context" "fmt" "log/slog" "math/rand" "os" "strconv" "strings" "unicode" "github.com/maximotejeda/us_dop_db/db" "github.com/playwright-community/playwright-go" "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" ) type UserAgent struct { version string system string platformInfo string platformDetails string extensionsinfo string } var ( systems = []string{ "Macintosh; Intel Mac OS X 10_15_7", "Windows NT 10.0; Win64; x64", "Windows NT 6.1; Win64; x64; rv:109.0", "X11; Linux x86_64", "X11; CrOS x86_64 14541.0.0", "Linux; Android 10; K", "iPhone; CPU iPhone OS 17_1_2 like Mac OS X", "iPhone; CPU iPhone OS 14_6 like Mac OS X", "Linux; Android 9; JAT-L41", "Linux; Android 11; SAMSUNG SM-G973U", "iPad; CPU OS 14_7_1 like Mac OS X", "Linux; U; en-us; KFAPWI Build/JDQ39", } platformInfo = []string{ "AppleWebKit/605.1.15", "AppleWebKit/537.36", "Gecko/20100101", } platformDetails = []string{ "KHTML, like Gecko", } extensionInfo = []string{ "Firefox", "Chrome/87.0.42", "Safari/604.1", "Safari/537.36", "Version/14.1.2", } ) // NewUA func NewUA() string { ua := UserAgent{} ua.version = "Mozilla/5.0" ua.system = systems[rand.Intn(len(systems))] ua.platformInfo = platformInfo[rand.Intn(len(platformInfo))] ua.platformDetails = platformDetails[0] ua.extensionsinfo = extensionInfo[rand.Intn(len(extensionInfo))] extra := "" if ua.extensionsinfo == "Chrome" { extra = "Safari/537.3" } version := rand.Intn(20) + 100 return fmt.Sprintf("%s (%s) %s (%s) %s/%d %s", ua.version, ua.system, ua.platformInfo, ua.platformDetails, ua.extensionsinfo, version, extra) } // NewMobileUA // returns an Ua string from distinct mobiles func NewMobileUA() string { ua := UserAgent{} ua.version = "Mozilla/5.0" s := systems[5:] ua.system = s[rand.Intn(len(s))] ua.platformInfo = platformInfo[rand.Intn(len(platformInfo))] ua.platformDetails = platformDetails[0] ua.extensionsinfo = extensionInfo[rand.Intn(len(extensionInfo))] extra := "mobile Safari/537.3" switch { case strings.Contains(ua.system, "iPhone"), strings.Contains(ua.system, "iPad"): extra = "Mobile/15E148 Safari/604.1" ua.extensionsinfo = "Version/14.1.2" case strings.Contains(ua.system, "Android"): ie := extensionInfo[0:2] selected := ie[rand.Intn(len(ie))] if strings.Contains(selected, "Firefox") { extra = "Firefox/114.0" ua.extensionsinfo = "Gecko/114.0" } else { ua.extensionsinfo = "Chrome/114.0.0.0" extra = "Mobile Safari/537.36" } case strings.Contains(ua.system, "Android"): extra = "Mobile Safari/537.36" } return fmt.Sprintf("%s (%s) %s (%s) %s %s", ua.version, ua.system, ua.platformInfo, ua.platformDetails, ua.extensionsinfo, extra) } // Normalize func Normalize(val string) float64 { nString := []rune{} for _, v := range val { if !unicode.IsNumber(v) && v != '.' { continue } nString = append(nString, v) } if len(nString) > 0 { cv, err := strconv.ParseFloat(string(nString), 64) if err != nil { fmt.Printf("%s", err) } return cv } return 0 } // CreateBrowser func CreateBrowser(log *slog.Logger) (chrome *playwright.Browser, firefox *playwright.Browser, webkit *playwright.Browser) { pw, err := playwright.Run(&playwright.RunOptions{ Verbose: true, }) if err != nil { log.Error("running pw, could not start", "error", err) os.Exit(1) } ff, err := pw.Firefox.Launch() if err != nil { log.Error("could not start browser", "error", err) os.Exit(1) } cm, err := pw.Firefox.Launch() if err != nil { log.Error("could not start browser", "error", err) os.Exit(1) } sf, err := pw.WebKit.Launch() if err != nil { log.Error("could not start browser", "error", err) os.Exit(1) } return &cm, &ff, &sf } // ExecTask func ExecTask( ctx context.Context, dbi *db.DB, browser []*playwright.Browser, log *slog.Logger, errCounter map[string]int, parserName string, parserExecution func(context.Context, *db.DB, *playwright.Browser, *slog.Logger) error) (err error) { err = parserExecution(ctx, dbi, browser[0], log) if err != nil { errCounter[parserName]++ log.Error(err.Error(), "parser", parserName) // todo want a retry with different browser firefox err = parserExecution(ctx, dbi, browser[1], log) if err != nil { errCounter[parserName]++ } } log.Info("executed", "parser", parserName, "errors", errCounter[parserName]) return err } // RemoveAccent // helps normalize names in db // https://stackoverflow.com/questions/24588295/go-removing-accents-from-strings func RemoveAccent(str string) string { if str == "" { return "" } t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) s, _, _ := transform.String(t, str) return s }