143 lines
3.4 KiB
Go
143 lines
3.4 KiB
Go
package crawler
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
|
"github.com/playwright-community/playwright-go"
|
|
)
|
|
|
|
type inf struct {
|
|
client ports.DollarPort
|
|
}
|
|
|
|
func NewINF(client ports.DollarPort) ports.APIPorts {
|
|
return &inf{
|
|
client: client,
|
|
}
|
|
}
|
|
|
|
// Scrape
|
|
func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (instList []*domain.History, err error) {
|
|
uri := os.Getenv("GENERAL")
|
|
log = log.With("scrapper", "general")
|
|
tout := float64(120000)
|
|
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
|
Timeout: &tout,
|
|
WaitUntil: playwright.WaitUntilStateLoad,
|
|
}); err != nil {
|
|
log.Error("could not get info", "error", err)
|
|
return nil, err
|
|
}
|
|
|
|
entriesLocator := page.Locator("table#Dolar > tbody > tr")
|
|
entriesLocator.WaitFor(playwright.LocatorWaitForOptions{
|
|
Timeout: &tout,
|
|
State: playwright.WaitForSelectorStateVisible,
|
|
})
|
|
|
|
entries, err := entriesLocator.All()
|
|
|
|
if err != nil {
|
|
log.Error("could not get info", "error", err)
|
|
return nil, err
|
|
}
|
|
scotia := false // in this page there are 2 scotia one the change online the other is tha bank
|
|
instList = []*domain.History{}
|
|
for _, entry := range entries {
|
|
inst := &domain.History{
|
|
Parser: "inf",
|
|
}
|
|
title, _ := entry.Locator("span.nombre").TextContent()
|
|
if strings.ToLower(title) == "scotiabank" && !scotia {
|
|
title = "scotiabank cambio online"
|
|
scotia = true
|
|
}
|
|
name := ""
|
|
if title != "" {
|
|
name = helpers.RemoveAccent(strings.ToLower(title))
|
|
} else {
|
|
continue
|
|
}
|
|
inst.Name = name
|
|
|
|
compraLocator, ventaLocator := entry.Locator("td:nth-child(2)"), entry.Locator("td:nth-child(3)")
|
|
compra := getValue(compraLocator)
|
|
venta := getValue(ventaLocator)
|
|
|
|
inst.Compra = helpers.Normalize(compra)
|
|
inst.Venta = helpers.Normalize(venta)
|
|
|
|
inst.Parsed = time.Now().Unix()
|
|
// if one of the inst has 0 on the sell/buy dont process it
|
|
if inst.Compra == 0 || inst.Venta == 0 {
|
|
log.Warn("skipping", "nombre", inst.Name, "compra", inst.Compra, "venta", inst.Venta)
|
|
continue
|
|
}
|
|
instList = append(instList, inst)
|
|
}
|
|
|
|
return instList, nil
|
|
}
|
|
func getValue(place playwright.Locator) string {
|
|
text, _ := place.AllInnerTexts()
|
|
value := ""
|
|
if len(text) <= 0 {
|
|
return ""
|
|
}
|
|
nextList := strings.Split(text[0], " ")
|
|
if len(nextList) > 0 {
|
|
value = strings.Replace(nextList[0], "=", "", 1)
|
|
}
|
|
return value
|
|
}
|
|
|
|
// ExecParser
|
|
func (in inf) ExecParser(
|
|
ctx context.Context,
|
|
browser *playwright.Browser,
|
|
log *slog.Logger) error {
|
|
t := true
|
|
ua := helpers.NewMobileUA()
|
|
b := *browser
|
|
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
UserAgent: &ua,
|
|
// IsMobile: &t,
|
|
HasTouch: &t,
|
|
Viewport: &playwright.Size{
|
|
Width: 412,
|
|
Height: 915,
|
|
},
|
|
Screen: &playwright.Size{
|
|
Width: 412,
|
|
Height: 915,
|
|
},
|
|
})
|
|
if err != nil {
|
|
log.Error("creating page", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
defer page.Close()
|
|
defer cancel()
|
|
instList, err := in.Scrape(ctx, page, log)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, inst := range instList {
|
|
log.Info("processing", "name", inst.Name)
|
|
err = in.client.NewHistory(inst)
|
|
if err != nil {
|
|
log.Error(fmt.Sprintf("inspecting %s", inst.Name), "error", err)
|
|
}
|
|
}
|
|
return err
|
|
}
|