diff --git a/Makefile b/Makefile index 6e89537..f97c8b6 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,7 @@ create-descriptors: @envsubst < k8s/deployment.yml.template > k8s/deployment.yml deploy: build-image create-descriptors - @kubectl apply -f k8s/pvc.yaml + #@kubectl apply -f k8s/pvc.yaml @kubectl apply -f k8s/cronjobs.yml test: diff --git a/bin/crawler-linux-amd64-0.0.0_1 b/bin/crawler-linux-amd64-0.0.0_1 index 4d4a320..03eb17c 100755 Binary files a/bin/crawler-linux-amd64-0.0.0_1 and b/bin/crawler-linux-amd64-0.0.0_1 differ diff --git a/bin/crawler-linux-arm64-0.0.0_1 b/bin/crawler-linux-arm64-0.0.0_1 index fe09e82..ac7c051 100755 Binary files a/bin/crawler-linux-arm64-0.0.0_1 and b/bin/crawler-linux-arm64-0.0.0_1 differ diff --git a/cmd/crawler/main.go b/cmd/crawler/main.go index b628eb1..6ba3836 100644 --- a/cmd/crawler/main.go +++ b/cmd/crawler/main.go @@ -32,6 +32,6 @@ func main() { log.Error("selecting crawler adapter", "error", err) panic(err) } - app := api.NewApplication(crawler) + app := api.NewApplication(crawler, dol) app.Run() } diff --git a/config/config.go b/config/config.go index 2ddec80..6bc675f 100644 --- a/config/config.go +++ b/config/config.go @@ -10,6 +10,43 @@ func GetDollarServiceURL() string { return getEnvValue("DOLLAR_SERVICE_URL") } +func GetAPAPURL() string { + return getEnvValue("APA") +} + +func GetBCDURL() string { + return getEnvValue("BCD") +} + +func GetBDRURL() string { + return getEnvValue("BDR") +} + +func GetBHDURL() string { + return getEnvValue("BHD") +} +func GetBNCURL() string { + return getEnvValue("BNC") +} + +func GetBPDURL() string { + return getEnvValue("BPD") +} + +func GetINFURL() string { + return getEnvValue("GENERAL") +} + +// GetSCTAURL +// Scotia bank URL +func GetSCTAURL() string { + return getEnvValue("SCOTIA") +} + +func GetVMCURL() string { + return getEnvValue("VIMENCA") +} + func getEnvValue(key string) string { if os.Getenv(key) == "" { panic("key not found " + key) diff --git a/helpers/helpers.go b/helpers/helpers.go index 56a6202..4d98464 100644 --- a/helpers/helpers.go +++ b/helpers/helpers.go @@ -1,9 +1,9 @@ package helpers import ( - "context" "fmt" "log/slog" + "math" "math/rand" "os" "strconv" @@ -11,7 +11,7 @@ import ( "unicode" - "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/playwright-community/playwright-go" "golang.org/x/text/runes" "golang.org/x/text/transform" @@ -125,62 +125,95 @@ func Normalize(val string) float64 { if err != nil { fmt.Printf("%s", err) } - return cv + cvt := math.Round(cv*10000) / 10000 + return cvt } return 0 } // CreateBrowser -func CreateBrowser(log *slog.Logger) (chrome *playwright.Browser, firefox *playwright.Browser, webkit *playwright.Browser) { +func CreateBrowser(log *slog.Logger) (chrome *playwright.BrowserContext, firefox *playwright.BrowserContext, webkit *playwright.BrowserContext) { pw, err := playwright.Run(&playwright.RunOptions{ Verbose: true, }) + ua := helpers.NewMobileUA() + headless := true if err != nil { log.Error("running pw, could not start", "error", err) os.Exit(1) } - ff, err := pw.Firefox.Launch() + ff, err := pw.Firefox.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless}) if err != nil { - log.Error("could not start browser", "error", err) + log.Error("could not start browser firefox", "error", err) os.Exit(1) } - cm, err := pw.Firefox.Launch() + ffc, err := ff.NewContext(playwright.BrowserNewContextOptions{ + IgnoreHttpsErrors: &headless, + UserAgent: &ua, + HasTouch: &headless, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + IsMobile: &headless, + }) if err != nil { - log.Error("could not start browser", "error", err) + log.Error("could not start browser firefox context", "error", err) os.Exit(1) } - sf, err := pw.WebKit.Launch() + cm, err := pw.Firefox.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless}) if err != nil { - log.Error("could not start browser", "error", err) + log.Error("could not start browser chrome", "error", err) os.Exit(1) } - return &cm, &ff, &sf -} - -// ExecTask -func ExecTask( - ctx context.Context, - dbi *db.DB, - browser []*playwright.Browser, - log *slog.Logger, - errCounter map[string]int, - parserName string, - parserExecution func(context.Context, *db.DB, *playwright.Browser, *slog.Logger) error) (err error) { - err = parserExecution(ctx, dbi, browser[0], log) + cmc, err := cm.NewContext(playwright.BrowserNewContextOptions{ + IgnoreHttpsErrors: &headless, + UserAgent: &ua, + HasTouch: &headless, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + IsMobile: &headless, + }) if err != nil { - errCounter[parserName]++ - log.Error(err.Error(), "parser", parserName) - // todo want a retry with different browser firefox - err = parserExecution(ctx, dbi, browser[1], log) - - if err != nil { - errCounter[parserName]++ - } + log.Error("could not start browser chorme context", "error", err) + os.Exit(1) } - log.Info("executed", "parser", parserName, "errors", errCounter[parserName]) - return err + sf, err := pw.WebKit.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless}) + if err != nil { + log.Error("could not start browser safari", "error", err) + os.Exit(1) + } + sfc, err := sf.NewContext(playwright.BrowserNewContextOptions{ + IgnoreHttpsErrors: &headless, + UserAgent: &ua, + HasTouch: &headless, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + IsMobile: &headless, + }) + if err != nil { + log.Error("could not start browser safari context", "error", err) + os.Exit(1) + } + return &cmc, &ffc, &sfc } // RemoveAccent diff --git a/internal/adapters/crawler/apap.go b/internal/adapters/crawler/apap.go index 57b3e2b..f1955df 100644 --- a/internal/adapters/crawler/apap.go +++ b/internal/adapters/crawler/apap.go @@ -4,28 +4,25 @@ import ( "context" "fmt" "log/slog" - "os" "time" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type Apap struct { - client ports.DollarPort -} +type Apap struct{} -func NewApap(client ports.DollarPort) ports.APIPorts { - return &Apap{client: client} +func NewApap() ports.APIPorts { + return &Apap{} } func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { tout := 120000.00 - uri := os.Getenv("APA") log = log.With("scrapper", "apap") - if _, err := page.Goto(uri, playwright.PageGotoOptions{ + if _, err := page.Goto(config.GetAPAPURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -64,43 +61,3 @@ func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger log.Info("parsed", "value", inst) return []*domain.History{inst}, nil } - -func (a Apap) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) (err error) { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - histList, err := a.Scrape(ctx, page, log) - // here we execute db operations - if err != nil { - return err - } - err = a.client.NewHistory(histList[0]) - if err != nil { - return err - } - return err -} diff --git a/internal/adapters/crawler/bcd.go b/internal/adapters/crawler/bcd.go index bee81bc..e44a2a4 100644 --- a/internal/adapters/crawler/bcd.go +++ b/internal/adapters/crawler/bcd.go @@ -4,30 +4,25 @@ import ( "context" "fmt" "log/slog" - "os" "time" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type bcd struct { - client ports.DollarPort -} +type bcd struct{} -func NewBCD(client ports.DollarPort) ports.APIPorts { - return &bcd{ - client: client, - } +func NewBCD() ports.APIPorts { + return &bcd{} } func (b bcd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { log = log.With("scrapper", "bcd") tout := 90000.00 - uri := os.Getenv("BCD") - if _, err = page.Goto(uri, playwright.PageGotoOptions{ + if _, err = page.Goto(config.GetBCDURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -68,43 +63,3 @@ func (b bcd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) } return []*domain.History{inst}, nil } - -func (bc bcd) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) (err error) { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - inst, err := bc.Scrape(ctx, page, log) - if err != nil { - return err - } - err = bc.client.NewHistory(inst[0]) - - if err != nil { - return err - } - - return err -} diff --git a/internal/adapters/crawler/bdr.go b/internal/adapters/crawler/bdr.go index 5ff49c2..2591a14 100644 --- a/internal/adapters/crawler/bdr.go +++ b/internal/adapters/crawler/bdr.go @@ -4,29 +4,24 @@ import ( "context" "fmt" "log/slog" - "os" "time" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type bdr struct { - client ports.DollarPort -} +type bdr struct{} -func NewBDR(client ports.DollarPort) ports.APIPorts { - return &bdr{ - client: client, - } +func NewBDR() ports.APIPorts { + return &bdr{} } func (bd bdr) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { tout := 120000.00 log = log.With("scrapper", "bdr") - uri := os.Getenv("BDR") - if _, err := page.Goto(uri, playwright.PageGotoOptions{ + if _, err := page.Goto(config.GetBDRURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -72,39 +67,3 @@ func (bd bdr) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger insts = append(insts, inst) return insts, nil } - -func (bd bdr) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) (err error) { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - insts, err := bd.Scrape(ctx, page, log) - if err != nil { - return err - } - err = bd.client.NewHistory(insts[0]) - - return err -} diff --git a/internal/adapters/crawler/bhd.go b/internal/adapters/crawler/bhd.go index 733ed45..6f0d0f2 100644 --- a/internal/adapters/crawler/bhd.go +++ b/internal/adapters/crawler/bhd.go @@ -4,33 +4,28 @@ import ( "context" "fmt" "log/slog" - "os" "strings" "time" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type bhd struct { - client ports.DollarPort -} +type bhd struct{} -func NewBHD(client ports.DollarPort) ports.APIPorts { - return &bhd{ - client: client, - } +func NewBHD() ports.APIPorts { + return &bhd{} } // Scrape // needs a mobile User Agent func (bh bhd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { tout := 120000.00 - uri := os.Getenv("BHD") log = log.With("scrapper", "bhd") - if _, err := page.Goto(uri, playwright.PageGotoOptions{ + if _, err := page.Goto(config.GetBHDURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -111,38 +106,3 @@ func (bh bhd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger //log.Info(fmt.Sprintf("%v", inst)) return []*domain.History{inst}, nil } - -func (bh bhd) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) (err error) { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - inst, err := bh.Scrape(ctx, page, log) - if err != nil { - return err - } - bh.client.NewHistory(inst[0]) - return err -} diff --git a/internal/adapters/crawler/bnc.go b/internal/adapters/crawler/bnc.go index 4aca442..732d9ff 100644 --- a/internal/adapters/crawler/bnc.go +++ b/internal/adapters/crawler/bnc.go @@ -4,30 +4,25 @@ import ( "context" "fmt" "log/slog" - "os" "time" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type bnc struct { - client ports.DollarPort -} +type bnc struct{} -func NewBNC(client ports.DollarPort) ports.APIPorts { - return &bnc{ - client: client, - } +func NewBNC() ports.APIPorts { + return &bnc{} } func (bn bnc) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { tout := 120000.00 - uri := os.Getenv("BNC") log = log.With("scrapper", "bnc") - if _, err := page.Goto(uri, playwright.PageGotoOptions{ + if _, err := page.Goto(config.GetBNCURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -66,39 +61,3 @@ func (bn bnc) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger } return []*domain.History{inst}, nil } - -func (bn bnc) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) (err error) { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - inst, err := bn.Scrape(ctx, page, log) - // here we execute db operations - if err != nil { - return err - } - bn.client.NewHistory(inst[0]) - return err -} diff --git a/internal/adapters/crawler/bpd.go b/internal/adapters/crawler/bpd.go index d188e48..8ebef4f 100644 --- a/internal/adapters/crawler/bpd.go +++ b/internal/adapters/crawler/bpd.go @@ -4,35 +4,28 @@ import ( "context" "fmt" "log/slog" - "os" "strconv" "time" - "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type bpd struct { - client ports.DollarPort -} +type bpd struct{} -func NewBPD(client ports.DollarPort) ports.APIPorts { - return &bpd{ - client: client, - } +func NewBPD() ports.APIPorts { + return &bpd{} } // Scrape // needs a mobile User Agent func (bp bpd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { tout := 120000.00 - - uri := os.Getenv("BPD") log = log.With("scrapper", "bpd") - if _, err := page.Goto(uri, playwright.PageGotoOptions{ + if _, err := page.Goto(config.GetBPDURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -90,38 +83,3 @@ func HoverTasas(page playwright.Page) { tasasMenu := page.Locator(".footer_est_menu_bpd > li:nth-child(3)") tasasMenu.Hover() } - -func (bp bpd) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) (err error) { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - inst, err := bp.Scrape(ctx, page, log) - if err != nil { - return err - } - bp.client.NewHistory(inst[0]) - return err -} diff --git a/internal/adapters/crawler/crawler.go b/internal/adapters/crawler/crawler.go index 2f11783..f6648b5 100644 --- a/internal/adapters/crawler/crawler.go +++ b/internal/adapters/crawler/crawler.go @@ -10,23 +10,23 @@ func Selector(who string, client ports.DollarPort) (ports.APIPorts, error) { var parser ports.APIPorts switch who { case "apap": - parser = NewApap(client) + parser = NewApap() case "bcd": - parser = NewBCD(client) - case "bdr": - parser = NewBDR(client) + parser = NewBCD() + case "brd": + parser = NewBDR() case "bhd": - parser = NewBHD(client) + parser = NewBHD() case "bnc": - parser = NewBNC(client) + parser = NewBNC() case "bpd": - parser = NewBPD(client) + parser = NewBPD() case "inf": - parser = NewINF(client) + parser = NewINF() case "scotia": - parser = NewScotia(client) + parser = NewScotia() case "vimenca": - parser = NewVimenca(client) + parser = NewVimenca() default: return nil, fmt.Errorf("not recognize who: " + who) } diff --git a/internal/adapters/crawler/inf.go b/internal/adapters/crawler/inf.go index 403b38c..d1a26e4 100644 --- a/internal/adapters/crawler/inf.go +++ b/internal/adapters/crawler/inf.go @@ -2,34 +2,28 @@ package crawler import ( "context" - "fmt" "log/slog" - "os" "strings" "time" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type inf struct { - client ports.DollarPort -} +type inf struct{} -func NewINF(client ports.DollarPort) ports.APIPorts { - return &inf{ - client: client, - } +func NewINF() ports.APIPorts { + return &inf{} } // Scrape func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (instList []*domain.History, err error) { - uri := os.Getenv("GENERAL") log = log.With("scrapper", "general") tout := float64(120000) - if _, err := page.Goto(uri, playwright.PageGotoOptions{ + if _, err := page.Goto(config.GetINFURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -49,17 +43,13 @@ func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger log.Error("could not get info", "error", err) return nil, err } - scotia := false // in this page there are 2 scotia one the change online the other is tha bank + instList = []*domain.History{} for _, entry := range entries { inst := &domain.History{ Parser: "inf", } title, _ := entry.Locator("span.nombre").TextContent() - if strings.ToLower(title) == "scotiabank" && !scotia { - title = "scotiabank cambio online" - scotia = true - } name := "" if title != "" { name = helpers.RemoveAccent(strings.ToLower(title)) @@ -81,6 +71,10 @@ func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger log.Warn("skipping", "nombre", inst.Name, "compra", inst.Compra, "venta", inst.Venta) continue } + switch { + case strings.Contains(inst.Name, "banreservas"), strings.Contains(inst.Name,"banco popular"), strings.Contains(inst.Name,"scotia"), strings.Contains(inst.Name,"hipotecario"), strings.Contains(inst.Name,"asociacion popular"), strings.Contains(inst.Name,"vimenca"): + continue + } instList = append(instList, inst) } @@ -98,45 +92,3 @@ func getValue(place playwright.Locator) string { } return value } - -// ExecParser -func (in inf) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) error { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - instList, err := in.Scrape(ctx, page, log) - if err != nil { - return err - } - for _, inst := range instList { - log.Info("processing", "name", inst.Name) - err = in.client.NewHistory(inst) - if err != nil { - log.Error(fmt.Sprintf("inspecting %s", inst.Name), "error", err) - } - } - return err -} diff --git a/internal/adapters/crawler/scotia.go b/internal/adapters/crawler/scotia.go index 193d0d2..6f64583 100644 --- a/internal/adapters/crawler/scotia.go +++ b/internal/adapters/crawler/scotia.go @@ -4,30 +4,26 @@ import ( "context" "fmt" "log/slog" - "os" + "time" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type scotia struct { - client ports.DollarPort -} +type scotia struct{} -func NewScotia(client ports.DollarPort) ports.APIPorts { - return &scotia{ - client: client, - } +func NewScotia() ports.APIPorts { + return &scotia{} } func (sct scotia) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { tout := 120000.00 - uri := os.Getenv("SCOTIA") log = log.With("scrapper", "scotia") - if _, err := page.Goto(uri, playwright.PageGotoOptions{ + if _, err := page.Goto(config.GetSCTAURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -102,41 +98,3 @@ func (sct scotia) Scrape(ctx context.Context, page playwright.Page, log *slog.Lo insts = append(insts, instOnline, instOnsite) return insts, nil } - -func (sct scotia) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) (err error) { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - insts, err := sct.Scrape(ctx, page, log) - // here we execute db operations - if err != nil { - return err - } - for _, inst := range insts { - sct.client.NewHistory(inst) - } - return err -} diff --git a/internal/adapters/crawler/vimenca.go b/internal/adapters/crawler/vimenca.go index 5c3c6f3..80ec1e6 100644 --- a/internal/adapters/crawler/vimenca.go +++ b/internal/adapters/crawler/vimenca.go @@ -3,30 +3,25 @@ package crawler import ( "context" "log/slog" - "os" "time" + "github.com/maximotejeda/us_dop_scrapper/config" "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" "github.com/maximotejeda/us_dop_scrapper/internal/ports" "github.com/playwright-community/playwright-go" ) -type vimenca struct { - client ports.DollarPort -} +type vimenca struct{} -func NewVimenca(client ports.DollarPort) ports.APIPorts { - return &vimenca{ - client: client, - } +func NewVimenca() ports.APIPorts { + return &vimenca{} } func (v vimenca) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { - uri := os.Getenv("VIMENCA") tout := 120000.00 log = log.With("scrapper", "vimenca") - if _, err := page.Goto(uri, playwright.PageGotoOptions{ + if _, err := page.Goto(config.GetVMCURL(), playwright.PageGotoOptions{ Timeout: &tout, WaitUntil: playwright.WaitUntilStateLoad, }); err != nil { @@ -64,43 +59,3 @@ func (v vimenca) Scrape(ctx context.Context, page playwright.Page, log *slog.Log log.Info("institution", "value", inst) return []*domain.History{inst}, nil } - -func (v vimenca) ExecParser( - ctx context.Context, - browser *playwright.Browser, - log *slog.Logger) (err error) { - t := true - ua := helpers.NewMobileUA() - b := *browser - page, err := b.NewPage(playwright.BrowserNewPageOptions{ - UserAgent: &ua, - // IsMobile: &t, - HasTouch: &t, - Viewport: &playwright.Size{ - Width: 412, - Height: 915, - }, - Screen: &playwright.Size{ - Width: 412, - Height: 915, - }, - }) - if err != nil { - log.Error("creating page", "error", err) - os.Exit(1) - } - ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) - defer page.Close() - defer cancel() - inst, err := v.Scrape(ctx, page, log) - // here we execute db operations - if err != nil { - return err - } - err = v.client.NewHistory(inst[0]) - if err != nil { - return err - } - - return err -} diff --git a/internal/application/core/api/api.go b/internal/application/core/api/api.go index ef5004a..c5b706c 100644 --- a/internal/application/core/api/api.go +++ b/internal/application/core/api/api.go @@ -2,38 +2,74 @@ package api import ( "context" + "log/slog" + "os" + "time" + "github.com/maximotejeda/us_dop_scrapper/helpers" "github.com/maximotejeda/us_dop_scrapper/internal/ports" - "log/slog" + "github.com/playwright-community/playwright-go" ) type Application struct { - log *slog.Logger - api ports.APIPorts + log *slog.Logger + api ports.APIPorts + client ports.DollarPort } -func NewApplication(crawler ports.APIPorts) *Application { +func NewApplication(crawler ports.APIPorts, client ports.DollarPort) *Application { log := slog.Default() log = log.With("application", "root") return &Application{ - log: log, - api: crawler, + log: log, + api: crawler, + client: client, } } func (a Application) Run() { ctx := context.Background() ch, ff, wk := helpers.CreateBrowser(a.log) - err := a.api.ExecParser(ctx, ch, a.log) + err := a.ExecParser(ctx, ch, a.log) if err != nil { a.log.Info("failed on frist browser", "browser", "chrome", "error", err) - err := a.api.ExecParser(ctx, ff, a.log) + err := a.ExecParser(ctx, ff, a.log) if err != nil { a.log.Error("failed on second browser", "browser", "firefox", "error", err) - err := a.api.ExecParser(ctx, wk, a.log) + err := a.ExecParser(ctx, wk, a.log) if err != nil { a.log.Error("tried all browsers error", "brwser", "webkit", "error", err) + return } } } } + +func (a Application) ExecParser( + ctx context.Context, + browser *playwright.BrowserContext, + log *slog.Logger) (err error) { + b := *browser + page, err := b.NewPage() + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + histList, err := a.api.Scrape(ctx, page, log) + // here we execute db operations + if err != nil { + return err + } + for _, hist := range histList { + err = a.client.NewHistory(hist) + if err != nil { + a.log.Error("creating new hist", "history", hist, "error", err) + } + a.log.Info("parsed Success", "parser", hist.Parser, "item", hist) + } + return err +} diff --git a/internal/ports/api.go b/internal/ports/api.go index d2eed0a..6af15f4 100644 --- a/internal/ports/api.go +++ b/internal/ports/api.go @@ -10,5 +10,4 @@ import ( type APIPorts interface { Scrape(context.Context, playwright.Page, *slog.Logger) ([]*domain.History, error) - ExecParser(context.Context, *playwright.Browser, *slog.Logger) error } diff --git a/k8s/cronjobs.yml b/k8s/cronjobs.yml index f3c581f..d4856ef 100644 --- a/k8s/cronjobs.yml +++ b/k8s/cronjobs.yml @@ -10,24 +10,19 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-inf image: localhost:32000/crawler:latest env: - - name: GENERAL - value: https://www.infodolar.com.do/ - - name: DBURI - value: dolardb/crawler.db - - name: NATSURI - value: "nats://nats-svc:4222" - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + - name: WHO + value: inf + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80" + - name: GENERAL + value: https://www.infodolar.com.do/ + - name: NATSURI + value: "nats://nats-svc:4222" --- apiVersion: batch/v1 kind: CronJob @@ -41,26 +36,19 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-bcd image: localhost:32000/crawler:latest env: - name: BCD value: https://www.bancentral.gov.do/SectorExterno/HistoricoTasas - - name: DBURI - value: dolardb/crawler.db - name: NATSURI value: "nats://nats-svc:4222" - name: WHO value: bcd - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80" --- apiVersion: batch/v1 kind: CronJob @@ -74,26 +62,19 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-bpd image: localhost:32000/crawler:latest env: - name: BPD value: https://popularenlinea.com/empresarial/Paginas/Home.aspx - - name: DBURI - value: dolardb/crawler.db - name: NATSURI value: "nats://nats-svc:4222" - name: WHO value: bpd - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80" --- apiVersion: batch/v1 kind: CronJob @@ -107,26 +88,19 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-bhd image: localhost:32000/crawler:latest env: - name: BHD value: https://bhd.com.do/calculators?calculator=DIVISAS - - name: DBURI - value: dolardb/crawler.db - name: NATSURI value: "nats://nats-svc:4222" - name: WHO value: bhd - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80" --- apiVersion: batch/v1 kind: CronJob @@ -140,26 +114,20 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-brd image: localhost:32000/crawler:latest env: - name: BDR value: https://www.banreservas.com/calculadoras - - name: DBURI - value: dolardb/crawler.db - name: NATSURI value: "nats://nats-svc:4222" - name: WHO value: brd - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80" + --- apiVersion: batch/v1 kind: CronJob @@ -173,26 +141,20 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-apap image: localhost:32000/crawler:latest env: - name: APA value: https://apap.com.do/ - - name: DBURI - value: dolardb/crawler.db - name: NATSURI value: "nats://nats-svc:4222" - name: WHO - value: apa - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + value: apap + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80" + --- apiVersion: batch/v1 kind: CronJob @@ -206,26 +168,20 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-bnc image: localhost:32000/crawler:latest env: - name: BNC value: https://www.banesco.com.do/ - - name: DBURI - value: dolardb/crawler.db - name: NATSURI value: "nats://nats-svc:4222" - name: WHO value: bnc - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80" + --- apiVersion: batch/v1 kind: CronJob @@ -239,26 +195,20 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-vimenca image: localhost:32000/crawler:latest env: - name: VIMENCA value: https://www.bancovimenca.com/ - - name: DBURI - value: dolardb/crawler.db - name: NATSURI value: "nats://nats-svc:4222" - name: WHO value: vimenca - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80" + --- apiVersion: batch/v1 kind: CronJob @@ -272,23 +222,16 @@ spec: spec: template: spec: + restartPolicy: OnFailure containers: - name: crawler-scotia image: localhost:32000/crawler:latest env: - name: SCOTIA value: https://do.scotiabank.com/banca-personal/tarifas/tasas-de-cambio.html - - name: DBURI - value: dolardb/crawler.db - name: NATSURI value: "nats://nats-svc:4222" - name: WHO value: scotia - volumeMounts: - - name: database - mountPath: /app/dolardb - volumes: - - name: database - persistentVolumeClaim: - claimName: bank-crawler-pvc - restartPolicy: OnFailure + - name: DOLLAR_SERVICE_URL + value: "dolar-grpc-svc:80"