us-dop-scrapper/inf/general.go

136 lines
3.1 KiB
Go

package inf
import (
"context"
"fmt"
"log/slog"
"os"
"strings"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("GENERAL")
)
// Scrape
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (instList []*db.History, err error) {
log = log.With("scrapper", "general")
tout := float64(120000)
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
entriesLocator := page.Locator("table#Dolar > tbody > tr")
entriesLocator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: &tout,
State: playwright.WaitForSelectorStateVisible,
})
entries, err := entriesLocator.All()
if err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
scotia := false // in this page there are 2 scotia one the change online the other is tha bank
instList = []*db.History{}
for _, entry := range entries {
inst := &db.History{
Parser: "inf",
}
title, _ := entry.Locator("span.nombre").TextContent()
if strings.ToLower(title) == "scotiabank" && !scotia {
title = "scotiabank cambio online"
scotia = true
}
name := ""
if title != "" {
name = helpers.RemoveAccent(strings.ToLower(title))
} else {
continue
}
inst.Name = name
compraLocator, ventaLocator := entry.Locator("td:nth-child(2)"), entry.Locator("td:nth-child(3)")
compra := getValue(compraLocator)
venta := getValue(ventaLocator)
inst.Compra = helpers.Normalize(compra)
inst.Venta = helpers.Normalize(venta)
inst.Parsed = time.Now().UTC()
// if one of the inst has 0 on the sell/buy dont process it
if inst.Compra == 0 || inst.Venta == 0 {
log.Warn("skipping", "nombre", inst.Name, "compra", inst.Compra, "venta", inst.Venta)
continue
}
instList = append(instList, inst)
}
return instList, nil
}
func getValue(place playwright.Locator) string {
text, _ := place.AllInnerTexts()
value := ""
if len(text) <= 0 {
return ""
}
nextList := strings.Split(text[0], " ")
if len(nextList) > 0 {
value = strings.Replace(nextList[0], "=", "", 1)
}
return value
}
// ExecParser
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) error {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
instList, err := Scrape(ctx, page, log)
if err != nil {
return err
}
for _, inst := range instList {
log.Info("processing", "name", inst.Name)
err = db.Inspect(*inst)
if err != nil {
log.Error(fmt.Sprintf("inspecting %s", inst.Name), "error", err)
}
}
return err
}