assume new paqradigm

This commit is contained in:
maximo tejeda 2024-05-23 12:03:10 -04:00
parent 151c8f7a7b
commit b5a3123506
19 changed files with 249 additions and 588 deletions

View File

@ -49,7 +49,7 @@ create-descriptors:
@envsubst < k8s/deployment.yml.template > k8s/deployment.yml
deploy: build-image create-descriptors
@kubectl apply -f k8s/pvc.yaml
#@kubectl apply -f k8s/pvc.yaml
@kubectl apply -f k8s/cronjobs.yml
test:

Binary file not shown.

Binary file not shown.

View File

@ -32,6 +32,6 @@ func main() {
log.Error("selecting crawler adapter", "error", err)
panic(err)
}
app := api.NewApplication(crawler)
app := api.NewApplication(crawler, dol)
app.Run()
}

View File

@ -10,6 +10,43 @@ func GetDollarServiceURL() string {
return getEnvValue("DOLLAR_SERVICE_URL")
}
func GetAPAPURL() string {
return getEnvValue("APA")
}
func GetBCDURL() string {
return getEnvValue("BCD")
}
func GetBDRURL() string {
return getEnvValue("BDR")
}
func GetBHDURL() string {
return getEnvValue("BHD")
}
func GetBNCURL() string {
return getEnvValue("BNC")
}
func GetBPDURL() string {
return getEnvValue("BPD")
}
func GetINFURL() string {
return getEnvValue("GENERAL")
}
// GetSCTAURL
// Scotia bank URL
func GetSCTAURL() string {
return getEnvValue("SCOTIA")
}
func GetVMCURL() string {
return getEnvValue("VIMENCA")
}
func getEnvValue(key string) string {
if os.Getenv(key) == "" {
panic("key not found " + key)

View File

@ -1,9 +1,9 @@
package helpers
import (
"context"
"fmt"
"log/slog"
"math"
"math/rand"
"os"
"strconv"
@ -11,7 +11,7 @@ import (
"unicode"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
@ -125,62 +125,95 @@ func Normalize(val string) float64 {
if err != nil {
fmt.Printf("%s", err)
}
return cv
cvt := math.Round(cv*10000) / 10000
return cvt
}
return 0
}
// CreateBrowser
func CreateBrowser(log *slog.Logger) (chrome *playwright.Browser, firefox *playwright.Browser, webkit *playwright.Browser) {
func CreateBrowser(log *slog.Logger) (chrome *playwright.BrowserContext, firefox *playwright.BrowserContext, webkit *playwright.BrowserContext) {
pw, err := playwright.Run(&playwright.RunOptions{
Verbose: true,
})
ua := helpers.NewMobileUA()
headless := true
if err != nil {
log.Error("running pw, could not start", "error", err)
os.Exit(1)
}
ff, err := pw.Firefox.Launch()
ff, err := pw.Firefox.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless})
if err != nil {
log.Error("could not start browser", "error", err)
log.Error("could not start browser firefox", "error", err)
os.Exit(1)
}
cm, err := pw.Firefox.Launch()
ffc, err := ff.NewContext(playwright.BrowserNewContextOptions{
IgnoreHttpsErrors: &headless,
UserAgent: &ua,
HasTouch: &headless,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
IsMobile: &headless,
})
if err != nil {
log.Error("could not start browser", "error", err)
log.Error("could not start browser firefox context", "error", err)
os.Exit(1)
}
sf, err := pw.WebKit.Launch()
cm, err := pw.Firefox.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless})
if err != nil {
log.Error("could not start browser", "error", err)
log.Error("could not start browser chrome", "error", err)
os.Exit(1)
}
return &cm, &ff, &sf
}
// ExecTask
func ExecTask(
ctx context.Context,
dbi *db.DB,
browser []*playwright.Browser,
log *slog.Logger,
errCounter map[string]int,
parserName string,
parserExecution func(context.Context, *db.DB, *playwright.Browser, *slog.Logger) error) (err error) {
err = parserExecution(ctx, dbi, browser[0], log)
cmc, err := cm.NewContext(playwright.BrowserNewContextOptions{
IgnoreHttpsErrors: &headless,
UserAgent: &ua,
HasTouch: &headless,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
IsMobile: &headless,
})
if err != nil {
errCounter[parserName]++
log.Error(err.Error(), "parser", parserName)
// todo want a retry with different browser firefox
err = parserExecution(ctx, dbi, browser[1], log)
log.Error("could not start browser chorme context", "error", err)
os.Exit(1)
}
sf, err := pw.WebKit.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless})
if err != nil {
errCounter[parserName]++
log.Error("could not start browser safari", "error", err)
os.Exit(1)
}
sfc, err := sf.NewContext(playwright.BrowserNewContextOptions{
IgnoreHttpsErrors: &headless,
UserAgent: &ua,
HasTouch: &headless,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
IsMobile: &headless,
})
if err != nil {
log.Error("could not start browser safari context", "error", err)
os.Exit(1)
}
log.Info("executed", "parser", parserName, "errors", errCounter[parserName])
return err
return &cmc, &ffc, &sfc
}
// RemoveAccent

View File

@ -4,28 +4,25 @@ import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type Apap struct {
client ports.DollarPort
}
type Apap struct{}
func NewApap(client ports.DollarPort) ports.APIPorts {
return &Apap{client: client}
func NewApap() ports.APIPorts {
return &Apap{}
}
func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
tout := 120000.00
uri := os.Getenv("APA")
log = log.With("scrapper", "apap")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
if _, err := page.Goto(config.GetAPAPURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -64,43 +61,3 @@ func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
log.Info("parsed", "value", inst)
return []*domain.History{inst}, nil
}
func (a Apap) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
histList, err := a.Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
err = a.client.NewHistory(histList[0])
if err != nil {
return err
}
return err
}

View File

@ -4,30 +4,25 @@ import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type bcd struct {
client ports.DollarPort
}
type bcd struct{}
func NewBCD(client ports.DollarPort) ports.APIPorts {
return &bcd{
client: client,
}
func NewBCD() ports.APIPorts {
return &bcd{}
}
func (b bcd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
log = log.With("scrapper", "bcd")
tout := 90000.00
uri := os.Getenv("BCD")
if _, err = page.Goto(uri, playwright.PageGotoOptions{
if _, err = page.Goto(config.GetBCDURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -68,43 +63,3 @@ func (b bcd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger)
}
return []*domain.History{inst}, nil
}
func (bc bcd) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := bc.Scrape(ctx, page, log)
if err != nil {
return err
}
err = bc.client.NewHistory(inst[0])
if err != nil {
return err
}
return err
}

View File

@ -4,29 +4,24 @@ import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type bdr struct {
client ports.DollarPort
}
type bdr struct{}
func NewBDR(client ports.DollarPort) ports.APIPorts {
return &bdr{
client: client,
}
func NewBDR() ports.APIPorts {
return &bdr{}
}
func (bd bdr) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
tout := 120000.00
log = log.With("scrapper", "bdr")
uri := os.Getenv("BDR")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
if _, err := page.Goto(config.GetBDRURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -72,39 +67,3 @@ func (bd bdr) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
insts = append(insts, inst)
return insts, nil
}
func (bd bdr) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
insts, err := bd.Scrape(ctx, page, log)
if err != nil {
return err
}
err = bd.client.NewHistory(insts[0])
return err
}

View File

@ -4,33 +4,28 @@ import (
"context"
"fmt"
"log/slog"
"os"
"strings"
"time"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type bhd struct {
client ports.DollarPort
}
type bhd struct{}
func NewBHD(client ports.DollarPort) ports.APIPorts {
return &bhd{
client: client,
}
func NewBHD() ports.APIPorts {
return &bhd{}
}
// Scrape
// needs a mobile User Agent
func (bh bhd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
tout := 120000.00
uri := os.Getenv("BHD")
log = log.With("scrapper", "bhd")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
if _, err := page.Goto(config.GetBHDURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -111,38 +106,3 @@ func (bh bhd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
//log.Info(fmt.Sprintf("%v", inst))
return []*domain.History{inst}, nil
}
func (bh bhd) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := bh.Scrape(ctx, page, log)
if err != nil {
return err
}
bh.client.NewHistory(inst[0])
return err
}

View File

@ -4,30 +4,25 @@ import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type bnc struct {
client ports.DollarPort
}
type bnc struct{}
func NewBNC(client ports.DollarPort) ports.APIPorts {
return &bnc{
client: client,
}
func NewBNC() ports.APIPorts {
return &bnc{}
}
func (bn bnc) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
tout := 120000.00
uri := os.Getenv("BNC")
log = log.With("scrapper", "bnc")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
if _, err := page.Goto(config.GetBNCURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -66,39 +61,3 @@ func (bn bnc) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
}
return []*domain.History{inst}, nil
}
func (bn bnc) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := bn.Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
bn.client.NewHistory(inst[0])
return err
}

View File

@ -4,35 +4,28 @@ import (
"context"
"fmt"
"log/slog"
"os"
"strconv"
"time"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type bpd struct {
client ports.DollarPort
}
type bpd struct{}
func NewBPD(client ports.DollarPort) ports.APIPorts {
return &bpd{
client: client,
}
func NewBPD() ports.APIPorts {
return &bpd{}
}
// Scrape
// needs a mobile User Agent
func (bp bpd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
tout := 120000.00
uri := os.Getenv("BPD")
log = log.With("scrapper", "bpd")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
if _, err := page.Goto(config.GetBPDURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -90,38 +83,3 @@ func HoverTasas(page playwright.Page) {
tasasMenu := page.Locator(".footer_est_menu_bpd > li:nth-child(3)")
tasasMenu.Hover()
}
func (bp bpd) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := bp.Scrape(ctx, page, log)
if err != nil {
return err
}
bp.client.NewHistory(inst[0])
return err
}

View File

@ -10,23 +10,23 @@ func Selector(who string, client ports.DollarPort) (ports.APIPorts, error) {
var parser ports.APIPorts
switch who {
case "apap":
parser = NewApap(client)
parser = NewApap()
case "bcd":
parser = NewBCD(client)
case "bdr":
parser = NewBDR(client)
parser = NewBCD()
case "brd":
parser = NewBDR()
case "bhd":
parser = NewBHD(client)
parser = NewBHD()
case "bnc":
parser = NewBNC(client)
parser = NewBNC()
case "bpd":
parser = NewBPD(client)
parser = NewBPD()
case "inf":
parser = NewINF(client)
parser = NewINF()
case "scotia":
parser = NewScotia(client)
parser = NewScotia()
case "vimenca":
parser = NewVimenca(client)
parser = NewVimenca()
default:
return nil, fmt.Errorf("not recognize who: " + who)
}

View File

@ -2,34 +2,28 @@ package crawler
import (
"context"
"fmt"
"log/slog"
"os"
"strings"
"time"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type inf struct {
client ports.DollarPort
}
type inf struct{}
func NewINF(client ports.DollarPort) ports.APIPorts {
return &inf{
client: client,
}
func NewINF() ports.APIPorts {
return &inf{}
}
// Scrape
func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (instList []*domain.History, err error) {
uri := os.Getenv("GENERAL")
log = log.With("scrapper", "general")
tout := float64(120000)
if _, err := page.Goto(uri, playwright.PageGotoOptions{
if _, err := page.Goto(config.GetINFURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -49,17 +43,13 @@ func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
log.Error("could not get info", "error", err)
return nil, err
}
scotia := false // in this page there are 2 scotia one the change online the other is tha bank
instList = []*domain.History{}
for _, entry := range entries {
inst := &domain.History{
Parser: "inf",
}
title, _ := entry.Locator("span.nombre").TextContent()
if strings.ToLower(title) == "scotiabank" && !scotia {
title = "scotiabank cambio online"
scotia = true
}
name := ""
if title != "" {
name = helpers.RemoveAccent(strings.ToLower(title))
@ -81,6 +71,10 @@ func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
log.Warn("skipping", "nombre", inst.Name, "compra", inst.Compra, "venta", inst.Venta)
continue
}
switch {
case strings.Contains(inst.Name, "banreservas"), strings.Contains(inst.Name,"banco popular"), strings.Contains(inst.Name,"scotia"), strings.Contains(inst.Name,"hipotecario"), strings.Contains(inst.Name,"asociacion popular"), strings.Contains(inst.Name,"vimenca"):
continue
}
instList = append(instList, inst)
}
@ -98,45 +92,3 @@ func getValue(place playwright.Locator) string {
}
return value
}
// ExecParser
func (in inf) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) error {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
instList, err := in.Scrape(ctx, page, log)
if err != nil {
return err
}
for _, inst := range instList {
log.Info("processing", "name", inst.Name)
err = in.client.NewHistory(inst)
if err != nil {
log.Error(fmt.Sprintf("inspecting %s", inst.Name), "error", err)
}
}
return err
}

View File

@ -4,30 +4,26 @@ import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type scotia struct {
client ports.DollarPort
}
type scotia struct{}
func NewScotia(client ports.DollarPort) ports.APIPorts {
return &scotia{
client: client,
}
func NewScotia() ports.APIPorts {
return &scotia{}
}
func (sct scotia) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
tout := 120000.00
uri := os.Getenv("SCOTIA")
log = log.With("scrapper", "scotia")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
if _, err := page.Goto(config.GetSCTAURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -102,41 +98,3 @@ func (sct scotia) Scrape(ctx context.Context, page playwright.Page, log *slog.Lo
insts = append(insts, instOnline, instOnsite)
return insts, nil
}
func (sct scotia) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
insts, err := sct.Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
for _, inst := range insts {
sct.client.NewHistory(inst)
}
return err
}

View File

@ -3,30 +3,25 @@ package crawler
import (
"context"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
type vimenca struct {
client ports.DollarPort
}
type vimenca struct{}
func NewVimenca(client ports.DollarPort) ports.APIPorts {
return &vimenca{
client: client,
}
func NewVimenca() ports.APIPorts {
return &vimenca{}
}
func (v vimenca) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
uri := os.Getenv("VIMENCA")
tout := 120000.00
log = log.With("scrapper", "vimenca")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
if _, err := page.Goto(config.GetVMCURL(), playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
@ -64,43 +59,3 @@ func (v vimenca) Scrape(ctx context.Context, page playwright.Page, log *slog.Log
log.Info("institution", "value", inst)
return []*domain.History{inst}, nil
}
func (v vimenca) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := v.Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
err = v.client.NewHistory(inst[0])
if err != nil {
return err
}
return err
}

View File

@ -2,38 +2,74 @@ package api
import (
"context"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"log/slog"
"github.com/playwright-community/playwright-go"
)
type Application struct {
log *slog.Logger
api ports.APIPorts
client ports.DollarPort
}
func NewApplication(crawler ports.APIPorts) *Application {
func NewApplication(crawler ports.APIPorts, client ports.DollarPort) *Application {
log := slog.Default()
log = log.With("application", "root")
return &Application{
log: log,
api: crawler,
client: client,
}
}
func (a Application) Run() {
ctx := context.Background()
ch, ff, wk := helpers.CreateBrowser(a.log)
err := a.api.ExecParser(ctx, ch, a.log)
err := a.ExecParser(ctx, ch, a.log)
if err != nil {
a.log.Info("failed on frist browser", "browser", "chrome", "error", err)
err := a.api.ExecParser(ctx, ff, a.log)
err := a.ExecParser(ctx, ff, a.log)
if err != nil {
a.log.Error("failed on second browser", "browser", "firefox", "error", err)
err := a.api.ExecParser(ctx, wk, a.log)
err := a.ExecParser(ctx, wk, a.log)
if err != nil {
a.log.Error("tried all browsers error", "brwser", "webkit", "error", err)
return
}
}
}
}
func (a Application) ExecParser(
ctx context.Context,
browser *playwright.BrowserContext,
log *slog.Logger) (err error) {
b := *browser
page, err := b.NewPage()
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
histList, err := a.api.Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
for _, hist := range histList {
err = a.client.NewHistory(hist)
if err != nil {
a.log.Error("creating new hist", "history", hist, "error", err)
}
a.log.Info("parsed Success", "parser", hist.Parser, "item", hist)
}
return err
}

View File

@ -10,5 +10,4 @@ import (
type APIPorts interface {
Scrape(context.Context, playwright.Page, *slog.Logger) ([]*domain.History, error)
ExecParser(context.Context, *playwright.Browser, *slog.Logger) error
}

View File

@ -10,24 +10,19 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-inf
image: localhost:32000/crawler:latest
env:
- name: WHO
value: inf
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"
- name: GENERAL
value: https://www.infodolar.com.do/
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
@ -41,26 +36,19 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-bcd
image: localhost:32000/crawler:latest
env:
- name: BCD
value: https://www.bancentral.gov.do/SectorExterno/HistoricoTasas
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: bcd
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"
---
apiVersion: batch/v1
kind: CronJob
@ -74,26 +62,19 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-bpd
image: localhost:32000/crawler:latest
env:
- name: BPD
value: https://popularenlinea.com/empresarial/Paginas/Home.aspx
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: bpd
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"
---
apiVersion: batch/v1
kind: CronJob
@ -107,26 +88,19 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-bhd
image: localhost:32000/crawler:latest
env:
- name: BHD
value: https://bhd.com.do/calculators?calculator=DIVISAS
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: bhd
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"
---
apiVersion: batch/v1
kind: CronJob
@ -140,26 +114,20 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-brd
image: localhost:32000/crawler:latest
env:
- name: BDR
value: https://www.banreservas.com/calculadoras
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: brd
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"
---
apiVersion: batch/v1
kind: CronJob
@ -173,26 +141,20 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-apap
image: localhost:32000/crawler:latest
env:
- name: APA
value: https://apap.com.do/
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: apa
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
value: apap
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"
---
apiVersion: batch/v1
kind: CronJob
@ -206,26 +168,20 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-bnc
image: localhost:32000/crawler:latest
env:
- name: BNC
value: https://www.banesco.com.do/
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: bnc
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"
---
apiVersion: batch/v1
kind: CronJob
@ -239,26 +195,20 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-vimenca
image: localhost:32000/crawler:latest
env:
- name: VIMENCA
value: https://www.bancovimenca.com/
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: vimenca
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"
---
apiVersion: batch/v1
kind: CronJob
@ -272,23 +222,16 @@ spec:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: crawler-scotia
image: localhost:32000/crawler:latest
env:
- name: SCOTIA
value: https://do.scotiabank.com/banca-personal/tarifas/tasas-de-cambio.html
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: scotia
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
- name: DOLLAR_SERVICE_URL
value: "dolar-grpc-svc:80"