migration to grpc service

This commit is contained in:
maximo tejeda 2024-04-17 14:38:54 -04:00
parent d399673d89
commit 7d28c9649a
44 changed files with 2640 additions and 29 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
.env
k8s/deployment.yml

View File

@ -1,17 +1,10 @@
FROM golang:1.22rc2 as builder
FROM golang:latest
ARG BINAME=crawler-linux-arm64-0.0.0_1
RUN go run github.com/playwright-community/playwright-go/cmd/playwright@v0.4001.0 install --with-deps
#RUN go run github.com/playwright-community/playwright-go/cmd/playwright@v0.4001.0 install --with-deps
#RUN playwright install --with-deps
COPY ./bin/${BINAME} /usr/bin/crawler
RUN mkdir /app
WORKDIR /app
COPY . /app
RUN go mod download && go mod tidy
RUN go build -o bin/crawler ./cmd/crawler/main.go
FROM golang:latest
RUN go install github.com/playwright-community/playwright-go/cmd/playwright@latest
RUN playwright install --with-deps
COPY --from=builder /app/bin/crawler /usr/bin/crawler
CMD ["crawler"]
ENTRYPOINT crawler

15
Dockerfile.old Normal file
View File

@ -0,0 +1,15 @@
FROM golang:latest as builder
WORKDIR /crawler
COPY . ./
RUN go mod download && go mod tidy
RUN go build -o ./bin/crawler ./cmd/crawler
FROM golang:latest
#RUN go install github.com/playwright-community/playwright-go@latest
RUN go run github.com/playwright-community/playwright-go/cmd/playwright@v0.4001.0 install --with-deps
#RUN playwright install --with-deps
COPY --from=builder /crawler/bin/crawler /usr/bin/crawler
RUN mkdir /app
WORKDIR /app
ENTRYPOINT crawler

View File

@ -9,13 +9,24 @@ ARRCHS="arm 386"
DEBUG=1
SERVICE=crawler
VERSION=0.0.0_1
BINAME=$(SERVICE)-$(OS)-$(ARCH)-$(VERSION)
BINAMEARM=$(SERVICE)-$(OS)-arm64-$(VERSION)
# can be docker or podman or whatever
CONTAINERS=podman
CONTAINERS=docker
COMPOSE=$(CONTAINERS)-compose
# Configure local registry
REGADDR=192.168.0.151:32000
K8SRSNAME=$(shell kubectl get rs --no-headers -o custom-columns=":metadata.name" | grep bank)
.phony: all clean build test clean-image build-image build-image-debug run-image run-image-debug run-local
build-image: clean
@$(CONTAINERS) compose -f ./docker-compose.yaml build
build-image: build
# here we made the images and push to registry with buildx
@$(CONTAINERS) buildx build --build-arg="BINAME=${BINAMEARM}" --platform linux/arm64 --push -t $(REGADDR)/crawler:latest .
# Here we upload it to local
build-test-image:
@$(CONTAINERS) buildx build --platform linux/arm64 --push -t $(REGADDR)/crawler:latest -f Dockerfile.old .
run-image: build-image
@$(CONTAINERS) compose -f docker-compose.yaml up
@ -27,13 +38,24 @@ run-image-debug: build-image-debug
@$(CONTAINERS) compose -f docker-compose-debug.yaml up
run-local:clean build
@bin/$(SERVICE)-$(OS)-$(ARCH)-$(VERSION)
build:
@go build -o ./bin/$(SERVICE)-$(OS)-$(ARCH)-$(VERSION) ./cmd/crawler/
@bin/$(BINAME)
build: clean
#@mkdir dolardb
@env GOOS=$(OS) GOARCH=$(arch) go build -o ./bin/$(BINAME) ./cmd/crawler/.
@env GOOS=$(OS) GOARCH=arm64 go build -o ./bin/$(BINAMEARM) ./cmd/crawler/.
create-descriptors:
@envsubst < k8s/deployment.yml.template > k8s/deployment.yml
deploy: build-image create-descriptors
@kubectl apply -f k8s/pvc.yaml
@kubectl apply -f k8s/cronjobs.yml
test:
@go -count=1 test ./...
clean:
@rm -rf ./bin
@rm -rf ./bin
clean-image:
@$(CONTAINERS) system prune -f

97
apa/apa.go Normal file
View File

@ -0,0 +1,97 @@
package apa
import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("APA")
)
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) {
tout := 120000.00
log = log.With("scrapper", "apap")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
button := page.Locator("#exchangesRates")
button.WaitFor()
button.Click()
compraLocator := page.Locator("#currency-buy-USD")
ventaLocator := page.Locator("#currency-sell-USD")
compraSTR, err := compraLocator.TextContent()
if err != nil {
log.Error("could not get compra str", "err", err)
return nil, err
}
ventaSTR, err := ventaLocator.TextContent()
if err != nil {
log.Error("could not get venta string", "err", err)
return nil, err
}
inst = &db.History{
Name: "asociacion popular de ahorros y prestamos",
Parser: "apap",
Parsed: time.Now().UTC(),
}
inst.Venta = helpers.Normalize(ventaSTR)
inst.Compra = helpers.Normalize(compraSTR)
if inst.Compra == 0 || inst.Venta == 0 {
return nil, fmt.Errorf("apa: institution not parsed: %v", inst)
}
log.Info("parsed", "value", inst)
return inst, nil
}
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
err = db.Inspect(*inst)
return err
}

104
bcd/bcd.go Normal file
View File

@ -0,0 +1,104 @@
package bcd
import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/models"
"github.com/playwright-community/playwright-go"
)
type bcd struct {
models.Institucion
}
var (
uri = os.Getenv("BCD")
)
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) {
log = log.With("scrapper", "bcd")
tout := 90000.00
if _, err = page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
compraLocator := page.Locator("span#actualPurchaseValue")
compraLocator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: &tout,
State: playwright.WaitForSelectorStateVisible,
})
ventaLocator := page.Locator("span#actualSellingValue")
compra, err := compraLocator.TextContent()
if err != nil {
log.Error("locating compra", "err", err)
return nil, err
}
venta, err := ventaLocator.TextContent()
if err != nil {
log.Error("locating venta", "err", err)
return nil, err
}
inst = &db.History{
Parser: "bcd",
Name: "banco central dominicano",
Parsed: time.Now().UTC(),
}
inst.Compra = helpers.Normalize(compra)
inst.Venta = helpers.Normalize(venta)
if inst.Compra == 0 || inst.Venta == 0 {
return nil, fmt.Errorf("bcd: institution not parsed compra or venta cant be 0")
}
return inst, nil
}
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := Scrape(ctx, page, log)
if err != nil {
return err
}
err = db.Inspect(*inst)
return err
}

104
bdr/bdr.go Normal file
View File

@ -0,0 +1,104 @@
package bdr
import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("BDR")
)
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) {
tout := 120000.00
log = log.With("scrapper", "bdr")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
err = page.WaitForLoadState()
if err != nil {
log.Error("waiting for page state", "err", err)
return nil, err
}
page.Locator("section#divisas").WaitFor()
compraLocator := page.Locator("span#compraUS")
compraLocator.WaitFor()
ventaLocator := page.Locator("span#ventaUS")
compraSTR, err := compraLocator.TextContent()
if err != nil {
log.Error("parsing compra", "err", err)
return nil, err
}
ventaSTR, err := ventaLocator.TextContent()
if err != nil {
log.Error("parsing compra", "err", err)
return nil, err
}
inst = &db.History{
Name: "banreservas",
Parser: "brd",
Parsed: time.Now().UTC(),
}
compra := helpers.Normalize(compraSTR)
venta := helpers.Normalize(ventaSTR)
inst.Compra = compra
inst.Venta = venta
if inst.Compra == 0 || inst.Venta == 0 {
return nil, fmt.Errorf("brd: institution not parsed")
}
return inst, nil
}
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := Scrape(ctx, page, log)
if err != nil {
return err
}
err = db.Inspect(*inst)
return err
}

141
bhd/bhd.go Normal file
View File

@ -0,0 +1,141 @@
package bhd
import (
"context"
"fmt"
"log/slog"
"os"
"strings"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("BHD")
)
// Scrape
// needs a mobile User Agent
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) {
tout := 120000.00
log = log.With("scrapper", "bhd")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
err = page.WaitForLoadState()
if err != nil {
log.Error("waiting for page state", "err", err)
return nil, err
}
err = page.Locator("html body").WaitFor()
if err != nil {
log.Error("waiting for locating body", "err", err)
return nil, err
}
fieldGroupLocator := page.Locator("div.field_group")
fieldGroupLocator.WaitFor()
fieldGroup, err := fieldGroupLocator.All()
//log.Info("divs", "number", len(fieldGroup))
if err != nil {
log.Error("locating field group", "err", err)
return nil, err
}
inst = &db.History{
Name: "banco hipotecario dominicano",
Parser: "bhd",
Parsed: time.Now().UTC(),
}
for _, it := range fieldGroup {
n, _ := it.AllTextContents()
if n[0] == "CompramosUS$RD$" || n[0] == "VendemosUS$RD$" {
// we work from here in the second div as first is amount of dollars
div := it.Locator("div")
/*input := div.Locator("div > div input")
val, _ := input.InputValue()
if val == "1"{
input := div.Locator("div > div input")
}*/
places, _ := div.Locator("div").All()
for _, x := range places {
//txt, _ := x.TextContent()
input := x.Locator("div input")
if n, _ := input.Count(); n <= 0 {
//log.Info("no input")
continue
}
val, _ := input.InputValue()
if val == "" || val == "1" {
continue
}
place := strings.ReplaceAll(n[0], "US$RD$", "")
place = strings.ToLower(place)
price := helpers.Normalize(val)
if err != nil {
log.Error("parsing value", "where", place, "err", err)
return nil, err
}
switch place {
case "compramos":
inst.Compra = price
case "vendemos":
inst.Venta = price
}
}
}
}
if inst.Compra == 0 || inst.Venta == 0 {
return nil, fmt.Errorf("bhd: institution not parsed: %v", inst)
}
//log.Info(fmt.Sprintf("%v", inst))
return inst, nil
}
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := Scrape(ctx, page, log)
if err != nil {
return err
}
err = db.Inspect(*inst)
return err
}

BIN
bin/crawler-linux-amd64-0.0.0_1 Executable file

Binary file not shown.

BIN
bin/crawler-linux-arm64-0.0.0_1 Executable file

Binary file not shown.

97
bnc/bnc.go Normal file
View File

@ -0,0 +1,97 @@
package bnc
import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("BNC")
)
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) {
tout := 120000.00
log = log.With("scrapper", "bnc")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
currencyDiv := page.Locator(".calculator__content")
currencyDiv.WaitFor()
buyInput := page.Locator("div.calculator__buy-input:nth-child(2) > input:nth-child(2)")
sellInput := page.Locator("div.calculator__sell-input:nth-child(2) > input:nth-child(2)")
compraSTR, err := buyInput.InputValue()
if err != nil {
log.Error("could not get compra str", "err", err)
return nil, err
}
ventaSTR, err := sellInput.InputValue()
if err != nil {
log.Error("could not get venta string", "err", err)
return nil, err
}
inst = &db.History{
Name: "banesco",
Parser: "bnc",
Parsed: time.Now().UTC(),
}
inst.Venta = helpers.Normalize(ventaSTR)
inst.Compra = helpers.Normalize(compraSTR)
if inst.Compra == 0 || inst.Venta == 0 {
return nil, fmt.Errorf("bnc: institution not parsed: %v", inst)
}
return inst, nil
}
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
err = db.Inspect(*inst)
return err
}

120
bpd/bpd.go Normal file
View File

@ -0,0 +1,120 @@
package bpd
import (
"context"
"fmt"
"log/slog"
"os"
"strconv"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("BPD")
)
// Scrape
// needs a mobile User Agent
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) {
tout := 120000.00
//start := time.Now()
log = log.With("scrapper", "bpd")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
//log.Info("Page loaded", "time", time.Since(start).Seconds())
// menu := page.Locator(".footer_est_menu_bpd > li:nth-child(3)")
container := page.Locator("div.tasa.tasa_dolar")
container.WaitFor()
compraInput := page.Locator("input#compra_peso_dolar")
compraInput.WaitFor(playwright.LocatorWaitForOptions{
Timeout: &tout,
State: playwright.WaitForSelectorStateVisible,
})
ventaInput := page.Locator("input#venta_peso_dolar")
ventaInput.WaitFor()
compraSTR, err := compraInput.InputValue()
if err != nil {
log.Error("compra value", "err", err)
return nil, err
}
// log.Info("Compra get value", "time", time.Since(start).Seconds())
ventaSTR, err := ventaInput.InputValue()
if err != nil {
log.Error("compra value", "err", err)
return nil, err
}
inst = &db.History{
Name: "banco popular",
Parser: "bpd",
Parsed: time.Now().UTC(),
}
compra, err := strconv.ParseFloat(compraSTR, 64)
if err != nil {
log.Error("parsing value", "where", "compra", "err", err)
return nil, err
}
venta, err := strconv.ParseFloat(ventaSTR, 64)
if err != nil {
log.Error("parsing value", "where", "venta", "err", err)
return nil, err
}
inst.Compra = compra
inst.Venta = venta
if inst.Compra == 0 || inst.Venta == 0 {
return nil, fmt.Errorf("bpd: institution not parsed")
}
return inst, nil
}
func HoverTasas(page playwright.Page) {
tasasMenu := page.Locator(".footer_est_menu_bpd > li:nth-child(3)")
tasasMenu.Hover()
}
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := Scrape(ctx, page, log)
if err != nil {
return err
}
err = db.Inspect(*inst)
return err
}

View File

@ -1,7 +1,79 @@
package main
import "fmt"
import (
"context"
"log/slog"
"os"
"os/signal"
"syscall"
"time"
func main(){
fmt.Println("working imageb")
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/apa"
"github.com/maximotejeda/us_dop_scrapper/bcd"
"github.com/maximotejeda/us_dop_scrapper/bdr"
"github.com/maximotejeda/us_dop_scrapper/bhd"
"github.com/maximotejeda/us_dop_scrapper/bnc"
"github.com/maximotejeda/us_dop_scrapper/bpd"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/inf"
"github.com/maximotejeda/us_dop_scrapper/scotia"
"github.com/maximotejeda/us_dop_scrapper/vimenca"
"github.com/playwright-community/playwright-go"
)
func main() {
var err error
dbRoute := os.Getenv("DBURI")
sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)
log := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{}))
db := db.Dial(dbRoute, log)
// create a chrome and feed parsers
db.CreateTables()
chrome, firefox, webkit := helpers.CreateBrowser(log)
browserList := []*playwright.Browser{chrome, firefox, webkit}
longTick := time.NewTicker(time.Minute * 2)
defer longTick.Stop()
infoTick := time.NewTicker(time.Minute * 1)
defer infoTick.Stop()
ctx, cancel := context.WithCancel(context.Background())
errN := map[string]int{
"bcd": 0,
"bpd": 0,
"apap": 0,
"inf": 0,
}
defer cancel()
who := os.Getenv("WHO")
switch who {
case "bcd":
err = helpers.ExecTask(ctx, db, browserList, log, errN, "bcd", bcd.ExecParser)
case "bpd":
err = helpers.ExecTask(ctx, db, browserList, log, errN, "bpd", bpd.ExecParser)
case "apa":
err = helpers.ExecTask(ctx, db, browserList, log, errN, "apa", apa.ExecParser)
case "brd":
err = helpers.ExecTask(ctx, db, browserList, log, errN, "brd", bdr.ExecParser)
case "bhd":
err = helpers.ExecTask(ctx, db, browserList, log, errN, "bhd", bhd.ExecParser)
case "bnc":
err = helpers.ExecTask(ctx, db, browserList, log, errN, "bnc", bnc.ExecParser)
case "scotia":
err = helpers.ExecTask(ctx, db, browserList, log, errN, "scotia", scotia.ExecParser)
case "vimenca":
err = helpers.ExecTask(ctx, db, browserList, log, errN, "vimenca", vimenca.ExecParser)
default:
err = helpers.ExecTask(ctx, db, browserList, log, errN, "inf", inf.ExecParser)
}
if err != nil {
log.Info("task executed with errors", "name", who, "error", err)
os.Exit(1)
return
}
log.Info("SUCCESS - task executed", "name", who)
os.Exit(0)
}

14
config/config.go Normal file
View File

@ -0,0 +1,14 @@
package config
import "os"
func GetWho() string {
return getEnvValue("WHO")
}
func getEnvValue(key string) string {
if os.Getenv(key) == "" {
panic("key not found " + key)
}
return os.Getenv(key)
}

423
db-old/db.go Normal file
View File

@ -0,0 +1,423 @@
package db
import (
"database/sql"
_ "embed"
"encoding/json"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"github.com/maximotejeda/us_dop_scrapper/models"
"github.com/maximotejeda/us_dop_scrapper/pub"
_ "modernc.org/sqlite"
)
//go:embed schema.sql
var schema string
type DB struct {
*sql.DB
log *slog.Logger
}
type change struct {
Before models.Institucion `json:"before"`
After models.Institucion `json:"after"`
}
type Message struct {
Message string `json:"message"`
Data change `json:"data"`
Error error `json:"error"`
}
type Institution struct {
ID int
Name string
ShortName string
Created time.Time
}
// Dial
func Dial(path string, log *slog.Logger) *DB {
db, err := sql.Open("sqlite", path)
if err != nil {
fmt.Printf("opening database: %s", err.Error())
panic("opening database")
}
if err := db.Ping(); err != nil {
fmt.Printf("pinging database: %s", err.Error())
panic("pinging database")
}
return &DB{db, log}
}
// Schema
func (db *DB) CreateTables() {
_, err := db.Exec(schema)
if err != nil {
panic(err)
}
}
// Inspect
// Handle behavior of the changes
// Will report errors to a nats consumer
func (db *DB) Inspect(enter models.Institucion) error {
if db == nil {
return fmt.Errorf("nil or empty database")
}
pub, close := pub.Publisher()
defer close()
msg := Message{}
// Get last row added
inst, err := db.GetLatest(enter.Parser, enter.Name)
// if no rows are found because of first enter a name - parser ?
if errors.Is(sql.ErrNoRows, err) {
db.log.Info("adding new item to table: ", "parse", enter.Parser, "name", enter.Name)
msg.Message = "add new institution"
msg.Data.After = enter
data, err := json.Marshal(msg)
if err != nil {
db.log.Error("marshaling struct", "error", err)
}
id, err := db.ADDInstitution(enter.Name)
if err != nil {
return err
}
defer pub("dolar-crawler", data)
return db.AddNew(enter, id)
}
// check prices compra venta
if inst == nil {
db.log.Error("row is nil", "name", enter.Name, "parser", enter.Parser)
return fmt.Errorf("row is nil, not entering row")
}
if enter.Compra == inst.Compra && enter.Venta == inst.Venta {
return nil
} else {
// if one of them changes create a new row
db.log.Info("change registered, adding item", "parse", enter.Parser, "name", enter.Name, "compra enter", enter.Compra, "compra db", inst.Compra, "venta enter", enter.Venta, "venta db", inst.Venta)
msg.Message = "change registered"
msg.Data.After = enter
msg.Data.Before = *inst
data, err := json.Marshal(msg)
if err != nil {
db.log.Error("marshaling struct", "error", err)
}
ins, err := db.GETInstitution(enter.Name)
if err != nil {
return err
}
defer pub("dolar-crawler", data)
return db.AddNew(enter, int64(ins.ID))
}
}
// GetLatest
// returns the latest row in a specific parser and name
// we are using DateTime in DB and date.Datetime in go
func (db *DB) GetLatest(parser string, name string) (inst *models.Institucion, err error) {
var parsed string
inst = &models.Institucion{}
stmtt, err := db.Prepare("SELECT i.name, d.parser, d.compra, d.venta, d.parsed FROM dolars AS d JOIN institutions as i ON d.name_id = i.id WHERE d.parser = ? AND i.name = ? ORDER BY d.parsed DESC LIMIT 1;")
if err != nil {
db.log.Error("preparing stmtt", "error", err.Error())
return nil, err
}
defer stmtt.Close()
if err := stmtt.QueryRow(parser, name).Scan(&inst.Name, &inst.Parser, &inst.Compra, &inst.Venta, &parsed); err != nil {
db.log.Error("getting latest", "error", err.Error(), "parser", parser, "name", name)
return nil, err
}
inst.Parsed, err = time.Parse(time.DateTime, parsed)
if err != nil {
//db.log.Error("parsed", "error", err.Error())
return nil, err
}
return inst, nil
}
// AddNew
// Add a new row in the dolar table
// Will send to nats changes on prices
func (db *DB) AddNew(row models.Institucion, id int64) error {
stmt, err := db.Prepare("INSERT INTO dolars (name_id, compra, venta, parser, parsed) VALUES(?,?,?,?,?);")
if err != nil {
return err
}
defer stmt.Close()
parsed := row.Parsed.Format(time.DateTime)
_, err = stmt.Exec(&id, &row.Compra, &row.Venta, &row.Parser, &parsed)
if err != nil {
return err
}
return nil
}
func (db *DB) ADDInstitution(name string) (id int64, err error) {
stmt, err := db.Prepare("INSERT INTO institutions (name, short_name, created) VALUES(?,?,?);")
if err != nil {
return 0, err
}
defer stmt.Close()
parsed := time.Now().Format(time.DateTime)
short := shortner(name)
res, err := stmt.Exec(&name, short, &parsed)
if err != nil {
return 0, err
}
id, err = res.LastInsertId()
if err != nil {
return 0, err
}
return id, nil
}
func (db *DB) GETInstitution(name string) (inst *Institution, err error) {
institution := Institution{}
stmtt, err := db.Prepare("SELECT id, name, short_name FROM institutions WHERE name = ?")
if err != nil {
db.log.Error("preparing stmt", "error", err.Error())
return nil, err
}
defer stmtt.Close()
if err := stmtt.QueryRow(name).Scan(&institution.ID, &institution.Name, &institution.ShortName); err != nil {
db.log.Error("getting institution", "error", err.Error(), "short name", institution.ShortName, "name", name)
return nil, err
}
return inst, err
}
func (db *DB) GetAll() ([]string, error) {
stmt, err := db.Prepare("SELECT i.name FROM institutions AS i;")
if err != nil {
db.log.Error("[db-GetAll]", "error", err)
return nil, err
}
rows, err := stmt.Query()
if err != nil {
db.log.Error("[db-GetAll-stmt]", "error", err)
return nil, err
}
defer rows.Close()
insts := []string{}
for rows.Next() {
inst := ""
if err = rows.Scan(&inst); err != nil {
return nil, err
}
if inst == "" {
continue
}
insts = append(insts, inst)
}
if err := rows.Err(); err != nil {
return insts, err
}
return insts, nil
}
func (db *DB) GetBancos() ([]string, error) {
stmt, err := db.Prepare("SELECT i.name FROM institutions AS i WHERE i.name LIKE '%ban%' OR i.name LIKE '%scoti%'")
if err != nil {
db.log.Error("[inst-GetAll]", "error", err)
return nil, err
}
rows, err := stmt.Query()
if err != nil {
db.log.Error("[inst-GetAll-stmt]", "error", err)
return nil, err
}
defer rows.Close()
insts := []string{}
for rows.Next() {
inst := ""
if err = rows.Scan(&inst); err != nil {
return nil, err
}
if inst == "" {
continue
}
insts = append(insts, inst)
}
if err := rows.Err(); err != nil {
return insts, err
}
return insts, nil
}
func (db *DB) GetCajas() ([]string, error) {
stmt, err := db.Prepare("SELECT i.name FROM institutions AS i WHERE i.name LIKE '%asociacion%'")
if err != nil {
db.log.Error("[inst-GetAll]", "error", err)
return nil, err
}
rows, err := stmt.Query()
if err != nil {
db.log.Error("[inst-GetAll-stmt]", "error", err)
return nil, err
}
defer rows.Close()
insts := []string{}
for rows.Next() {
inst := ""
if err = rows.Scan(&inst); err != nil {
return nil, err
}
if inst == "" {
continue
}
insts = append(insts, inst)
}
if err := rows.Err(); err != nil {
return insts, err
}
return insts, nil
}
func (db *DB) GetAgentes() ([]string, error) {
stmt, err := db.Prepare("SELECT i.name FROM institutions AS i WHERE i.name NOT LIKE '%ban%' AND i.name NOT LIKE '%scoti%' AND i.name NOT LIKE '%asociacion%'")
if err != nil {
db.log.Error("[inst-GetAll]", "error", err)
return nil, err
}
rows, err := stmt.Query()
if err != nil {
db.log.Error("[inst-GetAll-stmt]", "error", err)
return nil, err
}
defer rows.Close()
insts := []string{}
for rows.Next() {
inst := ""
if err = rows.Scan(&inst); err != nil {
return nil, err
}
if inst == "" {
continue
}
insts = append(insts, inst)
}
if err := rows.Err(); err != nil {
return insts, err
}
return insts, nil
}
func (db *DB) GetLastPrice(name string) (inst *models.Institucion, err error) {
var parsed string
inst = &models.Institucion{}
stmt, err := db.Prepare("SELECT i.name, d.parser, d.compra, d.venta, d.parsed FROM dolars AS d JOIN institutions as i ON d.name_id = i.id WHERE name = ? ORDER BY parsed DESC LIMIT 1;")
if err != nil {
db.log.Error("preparing", "error", err.Error())
return nil, err
}
defer stmt.Close()
if err := stmt.QueryRow(name).Scan(&inst.Name, &inst.Parser, &inst.Compra, &inst.Venta, &parsed); err != nil {
db.log.Error("getting last price", "error", err.Error(), "name", name)
return nil, err
}
inst.Parsed, err = time.Parse(time.DateTime, parsed)
if err != nil {
//db.log.Error("parsed", "error", err.Error())
return nil, err
}
return inst, nil
}
func (db *DB) GetChangeSince(name string, duration time.Duration) (insts []*models.Institucion, err error) {
date := time.Now().Add(-duration).Format(time.DateTime)
stmt, err := db.Prepare("SELECT i.name, d.parser, d.compra, d.venta, d.parsed FROM dolars AS d JOIN institutions as i ON d.name_id = i.id WHERE name = ? AND parsed > ? ORDER BY parsed DESC;")
if err != nil {
db.log.Error("[GetChangeSince] preparing", "error", err.Error())
return nil, err
}
defer stmt.Close()
rows, err := stmt.Query(name, date)
if err != nil {
db.log.Error("[GetChangeSince] preparing", "error", err.Error())
return nil, err
}
defer rows.Close()
for rows.Next() {
inst := models.Institucion{}
parsed := ""
if err := rows.Scan(&inst.Name, &inst.Parser, &inst.Compra, &inst.Venta, &parsed); err != nil {
db.log.Error("[GetChangeSince] scanning", "error", err)
return nil, err
}
inst.Parsed, err = time.Parse(time.DateTime, parsed)
if err != nil {
//db.log.Error("parsed", "error", err.Error())
continue
}
insts = append(insts, &inst)
}
return insts, nil
}
func shortner(name string) string {
if name == "" {
return ""
}
switch strings.ToLower(name) {
case "banco popular":
return "bpd"
case "banreservas":
return "brd"
case "banco central dominicano":
return "bcd"
case "banco hipotecario dominicano":
return "bhd"
case "asociacion popular de ahorros y prestamos":
return "apap"
case "asociacion cibao de ahorros y prestamos":
return "acap"
case "asociacion la nacional de ahorros y prestamos":
return "alnap"
case "asociacion peravia de ahorros y prestamos":
return "apeap"
case "banco santa cruz":
return "bsc"
case "imbert y balbuena":
return "imb"
case "banco activo dominicana":
return "bacd"
case "scotiabank cambio online":
return "scline"
case "banco lopez de haro":
return "blh"
}
nameList := strings.Split(name, " ")
switch len(nameList) {
case 1:
return nameList[0]
case 2:
return string(nameList[0][0]) + nameList[1][0:2]
case 3:
return string(nameList[0][0] + nameList[1][0] + nameList[2][0])
default:
return "n/a"
}
}

18
db-old/schema.sql Normal file
View File

@ -0,0 +1,18 @@
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS 'dolars' (
id INTEGER PRIMARY KEY,
name_id INTEGER NOT NULL,
compra REAL NOT NULL,
venta REAL NOT NULL,
parser TEXT NOT NULL,
parsed TEXT NOT NULL,
FOREIGN KEY(name_id) REFERENCES institutions(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS 'institutions' (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
short_name TEXT NOT NULL,
created TEXT NOT NULL
);

View File

@ -2,9 +2,13 @@ version: '3.8'
services:
crawler:
container-name: creawler
build:
dockerfile: Dockerfile
env_file:
- .env
- .env
develop:
watch:
- action: rebuild
path: ./

BIN
dolardb/crawler.db Normal file

Binary file not shown.

31
go.mod
View File

@ -1,12 +1,39 @@
module github.com/maximotejeda/us_dop_scrapper
go 1.22
go 1.22.0
require github.com/playwright-community/playwright-go v0.4001.0
require (
github.com/nats-io/nats.go v1.33.1
github.com/playwright-community/playwright-go v0.4001.0
golang.org/x/text v0.14.0
modernc.org/sqlite v1.29.1
)
require (
github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/go-jose/go-jose/v3 v3.0.1 // indirect
github.com/go-stack/stack v1.8.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/klauspost/compress v1.17.2 // indirect
github.com/mattn/go-isatty v0.0.16 // indirect
github.com/maximotejeda/msvc-proto/golang/dolar v0.0.0-3 // indirect
github.com/nats-io/nkeys v0.4.7 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/crypto v0.19.0 // indirect
golang.org/x/net v0.21.0 // indirect
golang.org/x/sys v0.17.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect
google.golang.org/grpc v1.63.2 // indirect
google.golang.org/protobuf v1.33.0 // indirect
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
modernc.org/libc v1.41.0 // indirect
modernc.org/mathutil v1.6.0 // indirect
modernc.org/memory v1.7.2 // indirect
modernc.org/strutil v1.2.0 // indirect
modernc.org/token v1.1.0 // indirect
)

63
go.sum
View File

@ -3,17 +3,44 @@ github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/go-jose/go-jose/v3 v3.0.1 h1:pWmKFVtt+Jl0vBZTIpz/eAKwsm6LkIxDVVbFHKkchhA=
github.com/go-jose/go-jose/v3 v3.0.1/go.mod h1:RNkWWRld676jZEYoV3+XK8L2ZnNSvIsxFMht0mSX+u8=
github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw=
github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y=
github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
github.com/maximotejeda/msvc-proto/golang/dolar v0.0.0-3 h1:J2qbFWgrnQvcVeHGoqBz5YtfLZW2uG18xYRk09BXIeI=
github.com/maximotejeda/msvc-proto/golang/dolar v0.0.0-3/go.mod h1:bAs0mlC1Vyn/BkHONL2Ik8ox9px9s9bhbJWgUQFMMWo=
github.com/mitchellh/go-ps v1.0.0 h1:i6ampVEEF4wQFF+bkYfwYgY+F/uYJDktmvLPf7qIgjc=
github.com/mitchellh/go-ps v1.0.0/go.mod h1:J4lOc8z8yJs6vUwklHw2XEIiT4z4C40KtWVN3nvg8Pg=
github.com/nats-io/nats.go v1.33.1 h1:8TxLZZ/seeEfR97qV0/Bl939tpDnt2Z2fK3HkPypj70=
github.com/nats-io/nats.go v1.33.1/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/playwright-community/playwright-go v0.4001.0 h1:2cBiTIjCvFu7zUrZ48C0YC2DIp90Tbudueq4brUGjHM=
github.com/playwright-community/playwright-go v0.4001.0/go.mod h1:quEkYFrvvpQyGSxBjnYbGS52vrUDB2uaY1cOzkkSHCc=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
@ -22,12 +49,48 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190911031432-227b76d455e7/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0=
golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc=
golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY=
google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM=
google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI=
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4=
modernc.org/libc v1.41.0 h1:g9YAc6BkKlgORsUWj+JwqoB1wU3o4DE3bM3yvA3k+Gk=
modernc.org/libc v1.41.0/go.mod h1:w0eszPsiXoOnoMJgrXjglgLuDy/bt5RR4y3QzUUeodY=
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
modernc.org/memory v1.7.2 h1:Klh90S215mmH8c9gO98QxQFsY+W451E8AnzjoE2ee1E=
modernc.org/memory v1.7.2/go.mod h1:NO4NVCQy0N7ln+T9ngWqOQfi7ley4vpwvARR+Hjw95E=
modernc.org/sqlite v1.29.1 h1:19GY2qvWB4VPw0HppFlZCPAbmxFU41r+qjKZQdQ1ryA=
modernc.org/sqlite v1.29.1/go.mod h1:hG41jCYxOAOoO6BRK66AdRlmOcDzXf7qnwlwjUIOqa0=
modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=

1
helpers/.#helpers.go Symbolic link
View File

@ -0,0 +1 @@
maximo@debian-pc.9800:1713363571

198
helpers/helpers.go Normal file
View File

@ -0,0 +1,198 @@
package helpers
import (
"context"
"fmt"
"log/slog"
"math/rand"
"os"
"strconv"
"strings"
"unicode"
"github.com/maximotejeda/us_dop_db/db"
"github.com/playwright-community/playwright-go"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
type UserAgent struct {
version string
system string
platformInfo string
platformDetails string
extensionsinfo string
}
var (
systems = []string{
"Macintosh; Intel Mac OS X 10_15_7",
"Windows NT 10.0; Win64; x64",
"Windows NT 6.1; Win64; x64; rv:109.0",
"X11; Linux x86_64",
"X11; CrOS x86_64 14541.0.0",
"Linux; Android 10; K",
"iPhone; CPU iPhone OS 17_1_2 like Mac OS X",
"iPhone; CPU iPhone OS 14_6 like Mac OS X",
"Linux; Android 9; JAT-L41",
"Linux; Android 11; SAMSUNG SM-G973U",
"iPad; CPU OS 14_7_1 like Mac OS X",
"Linux; U; en-us; KFAPWI Build/JDQ39",
}
platformInfo = []string{
"AppleWebKit/605.1.15",
"AppleWebKit/537.36",
"Gecko/20100101",
}
platformDetails = []string{
"KHTML, like Gecko",
}
extensionInfo = []string{
"Firefox",
"Chrome/87.0.42",
"Safari/604.1",
"Safari/537.36",
"Version/14.1.2",
}
)
// NewUA
func NewUA() string {
ua := UserAgent{}
ua.version = "Mozilla/5.0"
ua.system = systems[rand.Intn(len(systems))]
ua.platformInfo = platformInfo[rand.Intn(len(platformInfo))]
ua.platformDetails = platformDetails[0]
ua.extensionsinfo = extensionInfo[rand.Intn(len(extensionInfo))]
extra := ""
if ua.extensionsinfo == "Chrome" {
extra = "Safari/537.3"
}
version := rand.Intn(20) + 100
return fmt.Sprintf("%s (%s) %s (%s) %s/%d %s", ua.version, ua.system, ua.platformInfo, ua.platformDetails, ua.extensionsinfo, version, extra)
}
// NewMobileUA
// returns an Ua string from distinct mobiles
func NewMobileUA() string {
ua := UserAgent{}
ua.version = "Mozilla/5.0"
s := systems[5:]
ua.system = s[rand.Intn(len(s))]
ua.platformInfo = platformInfo[rand.Intn(len(platformInfo))]
ua.platformDetails = platformDetails[0]
ua.extensionsinfo = extensionInfo[rand.Intn(len(extensionInfo))]
extra := "mobile Safari/537.3"
switch {
case strings.Contains(ua.system, "iPhone"), strings.Contains(ua.system, "iPad"):
extra = "Mobile/15E148 Safari/604.1"
ua.extensionsinfo = "Version/14.1.2"
case strings.Contains(ua.system, "Android"):
ie := extensionInfo[0:2]
selected := ie[rand.Intn(len(ie))]
if strings.Contains(selected, "Firefox") {
extra = "Firefox/114.0"
ua.extensionsinfo = "Gecko/114.0"
} else {
ua.extensionsinfo = "Chrome/114.0.0.0"
extra = "Mobile Safari/537.36"
}
case strings.Contains(ua.system, "Android"):
extra = "Mobile Safari/537.36"
}
return fmt.Sprintf("%s (%s) %s (%s) %s %s", ua.version, ua.system, ua.platformInfo, ua.platformDetails, ua.extensionsinfo, extra)
}
// Normalize
func Normalize(val string) float64 {
nString := []rune{}
for _, v := range val {
if !unicode.IsNumber(v) && v != '.' {
continue
}
nString = append(nString, v)
}
if len(nString) > 0 {
cv, err := strconv.ParseFloat(string(nString), 64)
if err != nil {
fmt.Printf("%s", err)
}
return cv
}
return 0
}
// CreateBrowser
func CreateBrowser(log *slog.Logger) (chrome *playwright.Browser, firefox *playwright.Browser, webkit *playwright.Browser) {
pw, err := playwright.Run(&playwright.RunOptions{
Verbose: true,
})
if err != nil {
log.Error("running pw, could not start", "error", err)
os.Exit(1)
}
ff, err := pw.Firefox.Launch()
if err != nil {
log.Error("could not start browser", "error", err)
os.Exit(1)
}
cm, err := pw.Firefox.Launch()
if err != nil {
log.Error("could not start browser", "error", err)
os.Exit(1)
}
sf, err := pw.WebKit.Launch()
if err != nil {
log.Error("could not start browser", "error", err)
os.Exit(1)
}
return &cm, &ff, &sf
}
// ExecTask
func ExecTask(
ctx context.Context,
dbi *db.DB,
browser []*playwright.Browser,
log *slog.Logger,
errCounter map[string]int,
parserName string,
parserExecution func(context.Context, *db.DB, *playwright.Browser, *slog.Logger) error) (err error) {
err = parserExecution(ctx, dbi, browser[0], log)
if err != nil {
errCounter[parserName]++
log.Error(err.Error(), "parser", parserName)
// todo want a retry with different browser firefox
err = parserExecution(ctx, dbi, browser[1], log)
if err != nil {
errCounter[parserName]++
}
}
log.Info("executed", "parser", parserName, "errors", errCounter[parserName])
return err
}
// RemoveAccent
// helps normalize names in db
// https://stackoverflow.com/questions/24588295/go-removing-accents-from-strings
func RemoveAccent(str string) string {
if str == "" {
return ""
}
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
s, _, _ := transform.String(t, str)
return s
}

40
helpers/helpers_test.go Normal file
View File

@ -0,0 +1,40 @@
package helpers
import "testing"
func TestNormalize(t *testing.T) {
type scenario struct {
name string
value string
result float64
err error
}
cases := []scenario{
{
name: "sucess/parse/dolar",
value: "$58.40",
result: 58.40,
err: nil,
},
{
name: "sucess/parse/dolar",
value: "dollar $58.40",
result: 58.40,
err: nil,
},
{
name: "sucess/parse/dolar",
value: "$",
result: 0,
err: nil,
},
}
for idx, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
r := Normalize(tt.value)
if r != tt.result {
t.Errorf("case #%d - wanted: %f - got: %f", idx, tt.result, r)
}
})
}
}

135
inf/general.go Normal file
View File

@ -0,0 +1,135 @@
package inf
import (
"context"
"fmt"
"log/slog"
"os"
"strings"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("GENERAL")
)
// Scrape
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (instList []*db.History, err error) {
log = log.With("scrapper", "general")
tout := float64(120000)
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
entriesLocator := page.Locator("table#Dolar > tbody > tr")
entriesLocator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: &tout,
State: playwright.WaitForSelectorStateVisible,
})
entries, err := entriesLocator.All()
if err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
scotia := false // in this page there are 2 scotia one the change online the other is tha bank
instList = []*db.History{}
for _, entry := range entries {
inst := &db.History{
Parser: "inf",
}
title, _ := entry.Locator("span.nombre").TextContent()
if strings.ToLower(title) == "scotiabank" && !scotia {
title = "scotiabank cambio online"
scotia = true
}
name := ""
if title != "" {
name = helpers.RemoveAccent(strings.ToLower(title))
} else {
continue
}
inst.Name = name
compraLocator, ventaLocator := entry.Locator("td:nth-child(2)"), entry.Locator("td:nth-child(3)")
compra := getValue(compraLocator)
venta := getValue(ventaLocator)
inst.Compra = helpers.Normalize(compra)
inst.Venta = helpers.Normalize(venta)
inst.Parsed = time.Now().UTC()
// if one of the inst has 0 on the sell/buy dont process it
if inst.Compra == 0 || inst.Venta == 0 {
log.Warn("skipping", "nombre", inst.Name, "compra", inst.Compra, "venta", inst.Venta)
continue
}
instList = append(instList, inst)
}
return instList, nil
}
func getValue(place playwright.Locator) string {
text, _ := place.AllInnerTexts()
value := ""
if len(text) <= 0 {
return ""
}
nextList := strings.Split(text[0], " ")
if len(nextList) > 0 {
value = strings.Replace(nextList[0], "=", "", 1)
}
return value
}
// ExecParser
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) error {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
instList, err := Scrape(ctx, page, log)
if err != nil {
return err
}
for _, inst := range instList {
log.Info("processing", "name", inst.Name)
err = db.Inspect(*inst)
if err != nil {
log.Error(fmt.Sprintf("inspecting %s", inst.Name), "error", err)
}
}
return err
}

1
inf/inf_test.go Normal file
View File

@ -0,0 +1 @@
package inf

View File

@ -0,0 +1,104 @@
package crawler
import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("APA")
)
type Apap struct {
}
func NewApap() ports.APIPorts {
return &Apap{}
}
func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
tout := 120000.00
log = log.With("scrapper", "apap")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
button := page.Locator("#exchangesRates")
button.WaitFor()
button.Click()
compraLocator := page.Locator("#currency-buy-USD")
ventaLocator := page.Locator("#currency-sell-USD")
compraSTR, err := compraLocator.TextContent()
if err != nil {
log.Error("could not get compra str", "err", err)
return nil, err
}
ventaSTR, err := ventaLocator.TextContent()
if err != nil {
log.Error("could not get venta string", "err", err)
return nil, err
}
inst := &domain.History{
Name: "asociacion popular de ahorros y prestamos",
Parser: "apap",
Parsed: time.Now().Unix(),
}
inst.Venta = helpers.Normalize(ventaSTR)
inst.Compra = helpers.Normalize(compraSTR)
if inst.Compra == 0 || inst.Venta == 0 {
return nil, fmt.Errorf("apa: institution not parsed: %v", inst)
}
log.Info("parsed", "value", inst)
return []*domain.History{inst}, nil
}
func (a Apap) ExecParser(
ctx context.Context,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
_, err = a.Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
return err
}

View File

@ -0,0 +1,18 @@
package crawler
import (
"fmt"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
)
func Selector(who string) (ports.APIPorts, error) {
var parser ports.APIPorts
switch who {
case "apap":
parser = NewApap()
default:
return nil, fmt.Errorf("not recognize who: " + who)
}
return parser, nil
}

View File

@ -0,0 +1 @@
maximo@debian-pc.9800:1713363571

View File

@ -0,0 +1,26 @@
package dolar
import (
"context"
"github.com/maximotejeda/msvc-proto/golang/dolar"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
type Adapter struct {
dolar dolar.DollarClient
}
func NewAdapter(dolarServiceURL string) (*Adapter, error) {
var opts []grpc.DialOption
opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials()))
conn, err := grpc.Dial(dolarServiceURL, opts...)
if err != nil {
return nil, err
}
defer conn.Close()
client := dolar.NewDollarClient(conn)
return &Adapter{dolar: client}, nil
}

View File

@ -0,0 +1 @@
maximo@debian-pc.9800:1713363571

View File

@ -0,0 +1,26 @@
package api
import (
"log/slog"
"github.com/maximotejeda/us_dop_scrapper/config"
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
)
type Application struct {
log *slog.Logger
api ports.APIPorts
}
func NewApplication() *Application {
log := slog.Default()
log = log.With("application", "root")
return &Application{
log: log,
}
}
func (a Application) Run() {
who := config.GetWho()
}

View File

@ -0,0 +1 @@
maximo@debian-pc.9800:1713363571

View File

@ -0,0 +1,10 @@
package domain
type History struct {
ID int64 `json:""`
Name string `json:""`
Compra float64 `json:""`
Venta float64 `json:""`
Parser string `json:""`
Parsed int64 `json:""`
}

14
internal/ports/api.go Normal file
View File

@ -0,0 +1,14 @@
package ports
import (
"context"
"log/slog"
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
"github.com/playwright-community/playwright-go"
)
type APIPorts interface {
Scrape(context.Context, playwright.Page, *slog.Logger) ([]*domain.History, error)
ExecParser(context.Context, *playwright.Browser, *slog.Logger) error
}

7
internal/ports/dolar.go Normal file
View File

@ -0,0 +1,7 @@
package ports
import "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
type DollarPort interface {
NewHistory(*domain.History) error
}

294
k8s/cronjobs.yml Normal file
View File

@ -0,0 +1,294 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: inf-cronjob
spec:
schedule: "*/20 8-19 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-inf
image: localhost:32000/crawler:latest
env:
- name: GENERAL
value: https://www.infodolar.com.do/
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: bcd-cronjob
spec:
schedule: "3,33 8-22 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-bcd
image: localhost:32000/crawler:latest
env:
- name: BCD
value: https://www.bancentral.gov.do/SectorExterno/HistoricoTasas
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: bcd
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: bpd-cronjob
spec:
schedule: "5,35 8-22 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-bpd
image: localhost:32000/crawler:latest
env:
- name: BPD
value: https://popularenlinea.com/empresarial/Paginas/Home.aspx
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: bpd
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: bhd-cronjob
spec:
schedule: "8,38 8-22 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-bhd
image: localhost:32000/crawler:latest
env:
- name: BHD
value: https://bhd.com.do/calculators?calculator=DIVISAS
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: bhd
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: brd-cronjob
spec:
schedule: "22,52 8-22 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-brd
image: localhost:32000/crawler:latest
env:
- name: BDR
value: https://www.banreservas.com/calculadoras
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: brd
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: apap-cronjob
spec:
schedule: "25,55 8-22 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-apap
image: localhost:32000/crawler:latest
env:
- name: APA
value: https://apap.com.do/
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: apa
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: bnc-cronjob
spec:
schedule: "11,41 8-22 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-bnc
image: localhost:32000/crawler:latest
env:
- name: BNC
value: https://www.banesco.com.do/
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: bnc
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: vimenca-cronjob
spec:
schedule: "14,44 8-22 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-vimenca
image: localhost:32000/crawler:latest
env:
- name: VIMENCA
value: https://www.bancovimenca.com/
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: vimenca
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: scotia-cronjob
spec:
schedule: "17,47 8-22 * * 1-6"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
spec:
containers:
- name: crawler-scotia
image: localhost:32000/crawler:latest
env:
- name: SCOTIA
value: https://do.scotiabank.com/banca-personal/tarifas/tasas-de-cambio.html
- name: DBURI
value: dolardb/crawler.db
- name: NATSURI
value: "nats://nats-svc:4222"
- name: WHO
value: scotia
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
restartPolicy: OnFailure

View File

@ -0,0 +1,56 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: bank-crawler
labels:
app: bank-crawler
spec:
replicas: 1
selector:
matchLabels:
app: bank-crawler
template:
metadata:
labels:
app: bank-crawler
name: bank-crawler
spec:
containers:
- name: bank-crawler
image: localhost:32000/crawler:latest
env:
- name: GENERAL
value: $GENERAL
- name: BCD
value: $BCD
- name: BPD
value: $BPD
- name: BHD
value: $BHD
- name: BDR
value: $BDR
- name: APA
value: $APA
- name: DBURI
value: $DBURI
- name: NATSURI
value: "nats://nats-svc:4222"
volumeMounts:
- name: database
mountPath: /app/dolardb
volumes:
- name: database
persistentVolumeClaim:
claimName: bank-crawler-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: bank-crawler-pvc
spec:
storageClassName: nfs-csi
accessModes: [ReadWriteMany]
resources:
requests:
storage: 500Mi

10
k8s/pvc.yaml Normal file
View File

@ -0,0 +1,10 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: bank-crawler-pvc
spec:
storageClassName: nfs-csi
accessModes: [ReadWriteMany]
resources:
requests:
storage: 500Mi

13
models/models.go Normal file
View File

@ -0,0 +1,13 @@
package models
import (
"time"
)
type Institucion struct {
Name string `json:"name"`
Compra float64 `json:"compra"`
Venta float64 `json:"venta"`
Parser string `json:"parser"`
Parsed time.Time `json:"parsed"`
}

17
pub/pub.go Normal file
View File

@ -0,0 +1,17 @@
package pub
import (
"os"
"github.com/nats-io/nats.go"
)
func Publisher() (publisher func(string, []byte) error, closer func()) {
URI := os.Getenv("NATSURI")
if URI == "" {
panic("empty connection stream")
}
nc, _ := nats.Connect(URI)
return nc.Publish, nc.Close
}

135
scotia/scotia.go Normal file
View File

@ -0,0 +1,135 @@
package scotia
import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("SCOTIA")
)
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*db.History, err error) {
tout := 120000.00
log = log.With("scrapper", "scotia")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
currencyTable := page.Locator(".bns--table")
currencyTable.WaitFor()
firstRow := page.Locator(".bns--table > tbody:nth-child(1) > tr:nth-child(2)")
secondRow := page.Locator(".bns--table > tbody:nth-child(1) > tr:nth-child(3)")
// the same institution has 2 prices for dollar sell
// first row is onsite selling dollars
// second row is online selling dollars
buyOnsite := firstRow.Locator("td:nth-child(3)")
sellOnsite := firstRow.Locator("td:nth-child(4)")
// the first row has 4 elements
// but the second row only has 3
buyOnline := secondRow.Locator("td:nth-child(2)")
sellOnline := secondRow.Locator("td:nth-child(3)")
compraOnsiteSTR, err := buyOnsite.InnerText()
if err != nil {
log.Error("could not get compra str", "err", err)
return nil, err
}
ventaOnsiteSTR, err := sellOnsite.InnerText()
if err != nil {
log.Error("could not get venta string", "err", err)
return nil, err
}
instOnsite := &db.History{
Name: "scotiabank",
Parser: "scotia",
Parsed: time.Now().UTC(),
}
instOnsite.Venta = helpers.Normalize(ventaOnsiteSTR)
instOnsite.Compra = helpers.Normalize(compraOnsiteSTR)
if instOnsite.Compra == 0 || instOnsite.Venta == 0 {
return nil, fmt.Errorf("scotia: institution not parsed: %v", instOnsite)
}
compraOnlineSTR, err := buyOnline.InnerText()
if err != nil {
log.Error("could not get compra onlie str", "err", err)
return nil, err
}
ventaOnlineSTR, err := sellOnline.InnerText()
if err != nil {
log.Error("could not get venta online string", "err", err)
return nil, err
}
instOnline := &db.History{
Name: "scotiabank cambio online",
Parser: "scotia",
Parsed: time.Now().UTC(),
}
instOnline.Venta = helpers.Normalize(ventaOnlineSTR)
instOnline.Compra = helpers.Normalize(compraOnlineSTR)
if instOnline.Compra == 0 || instOnline.Venta == 0 {
return nil, fmt.Errorf("scotia: institution not parsed: %v", instOnline)
}
insts = append(insts, instOnline, instOnsite)
return insts, nil
}
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
insts, err := Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
for _, inst := range insts {
err = db.Inspect(*inst)
}
return err
}

97
vimenca/vimenca.go Normal file
View File

@ -0,0 +1,97 @@
package vimenca
import (
"context"
"log/slog"
"os"
"time"
"github.com/maximotejeda/us_dop_db/db"
"github.com/maximotejeda/us_dop_scrapper/helpers"
"github.com/playwright-community/playwright-go"
)
var (
uri = os.Getenv("VIMENCA")
)
func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) {
tout := 120000.00
log = log.With("scrapper", "vimenca")
if _, err := page.Goto(uri, playwright.PageGotoOptions{
Timeout: &tout,
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
log.Error("could not get info", "error", err)
return nil, err
}
currencyTable := page.Locator(".bns--table")
currencyTable.WaitFor()
infoContainer := page.Locator(".layout-uikit > div:nth-child(1)")
buyInfo := infoContainer.Locator(".purchaseValue")
sellInfo := infoContainer.Locator(".saleValue")
compraSTR, err := buyInfo.InnerText()
if err != nil {
log.Error("could not get compra str", "err", err)
return nil, err
}
ventaSTR, err := sellInfo.InnerText()
if err != nil {
log.Error("could not get venta string", "err", err)
return nil, err
}
inst = &db.History{
Name: "banco vimenca",
Parser: "vimenca",
Parsed: time.Now().UTC(),
}
inst.Venta = helpers.Normalize(ventaSTR)
inst.Compra = helpers.Normalize(compraSTR)
log.Info("institution", "value", inst)
return inst, nil
}
func ExecParser(
ctx context.Context,
db *db.DB,
browser *playwright.Browser,
log *slog.Logger) (err error) {
t := true
ua := helpers.NewMobileUA()
b := *browser
page, err := b.NewPage(playwright.BrowserNewPageOptions{
UserAgent: &ua,
// IsMobile: &t,
HasTouch: &t,
Viewport: &playwright.Size{
Width: 412,
Height: 915,
},
Screen: &playwright.Size{
Width: 412,
Height: 915,
},
})
if err != nil {
log.Error("creating page", "error", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
defer page.Close()
defer cancel()
inst, err := Scrape(ctx, page, log)
// here we execute db operations
if err != nil {
return err
}
err = db.Inspect(*inst)
return err
}

51
wait/wait.go Normal file
View File

@ -0,0 +1,51 @@
package wait
import (
"fmt"
"time"
)
// from monday to friday from 12:00 utc to 22:00 utc
// saturday from 12:00 utc to 18:00 utc
// sunday wont work
func WaitAmount(actualTime time.Time) (infoDuration time.Duration, longDuration time.Duration) {
var nextDayTime time.Time
// day of the week start on sunday=0
dayNumber := actualTime.UTC().Weekday()
year, month, day, hour := actualTime.UTC().Year(), actualTime.UTC().Month(), actualTime.UTC().Day(), actualTime.UTC().Hour()
nextDayTemplate := fmt.Sprintf("%d-%02d-%02d 12:05:00", year, month, day)
parsedDate, err := time.Parse(time.DateTime, nextDayTemplate)
if err != nil {
panic(err)
}
// in case of waiting to next day or weekend
switch dayNumber {
case 0:
nextDayTime = parsedDate.Add(24 * time.Hour)
return time.Until(nextDayTime), time.Until(nextDayTime)
case 6:
if hour > 16 {
nextDayTime = parsedDate.Add(48 * time.Hour)
return time.Until(nextDayTime), time.Until(nextDayTime)
} else {
info := time.Until(actualTime.UTC().Add(26 * time.Minute))
long := time.Until(actualTime.UTC().Add(59 * time.Minute))
return info, long
}
default:
nextDayTime = parsedDate.Add(24 * time.Hour)
switch {
case hour >= 22:
// next day wait
return time.Until(nextDayTime), time.Until(nextDayTime)
case hour < 12:
return time.Until(parsedDate), time.Until(parsedDate)
default:
info := time.Until(actualTime.UTC().Add(26 * time.Minute))
long := time.Until(actualTime.UTC().Add(59 * time.Minute))
return info, long
}
}
}

40
wait/wait_test.go Normal file
View File

@ -0,0 +1,40 @@
package wait
import (
"testing"
"time"
)
func TestWaitAmount(t *testing.T) {
type cases struct {
name string
initDate time.Time
infoAmount float64
longAmount float64
}
scenario := []cases{
{
name: "success/3h",
initDate: time.Now().Add(-1 * time.Hour),
infoAmount: 24,
longAmount: 24,
},
{
name: "success/now",
initDate: time.Now(),
infoAmount: 24,
longAmount: 24,
},
}
for _, tt := range scenario {
t.Run(tt.name, func(t *testing.T) {
info, long := WaitAmount(tt.initDate)
if info.Hours() == 0 {
t.Errorf("wanted: %f got: %f, time: %v", tt.infoAmount, info.Hours(), tt.initDate.UTC())
}
if long.Hours() == 0 {
t.Errorf("wanted: %f got: %f, time: %v", tt.longAmount, long.Hours(), tt.initDate.UTC())
}
})
}
}