diff --git a/.gitignore b/.gitignore index 4c49bd7..5344210 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .env +k8s/deployment.yml diff --git a/Dockerfile b/Dockerfile index 7bc745d..9112bf7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,10 @@ -FROM golang:1.22rc2 as builder - +FROM golang:latest +ARG BINAME=crawler-linux-arm64-0.0.0_1 +RUN go run github.com/playwright-community/playwright-go/cmd/playwright@v0.4001.0 install --with-deps +#RUN go run github.com/playwright-community/playwright-go/cmd/playwright@v0.4001.0 install --with-deps +#RUN playwright install --with-deps +COPY ./bin/${BINAME} /usr/bin/crawler +RUN mkdir /app WORKDIR /app -COPY . /app - -RUN go mod download && go mod tidy - -RUN go build -o bin/crawler ./cmd/crawler/main.go - -FROM golang:latest - -RUN go install github.com/playwright-community/playwright-go/cmd/playwright@latest -RUN playwright install --with-deps -COPY --from=builder /app/bin/crawler /usr/bin/crawler - -CMD ["crawler"] +ENTRYPOINT crawler diff --git a/Dockerfile.old b/Dockerfile.old new file mode 100644 index 0000000..76d0a7d --- /dev/null +++ b/Dockerfile.old @@ -0,0 +1,15 @@ +FROM golang:latest as builder +WORKDIR /crawler +COPY . ./ +RUN go mod download && go mod tidy +RUN go build -o ./bin/crawler ./cmd/crawler + +FROM golang:latest +#RUN go install github.com/playwright-community/playwright-go@latest +RUN go run github.com/playwright-community/playwright-go/cmd/playwright@v0.4001.0 install --with-deps +#RUN playwright install --with-deps +COPY --from=builder /crawler/bin/crawler /usr/bin/crawler +RUN mkdir /app +WORKDIR /app + +ENTRYPOINT crawler diff --git a/Makefile b/Makefile index f95aa3b..6e89537 100644 --- a/Makefile +++ b/Makefile @@ -9,13 +9,24 @@ ARRCHS="arm 386" DEBUG=1 SERVICE=crawler VERSION=0.0.0_1 +BINAME=$(SERVICE)-$(OS)-$(ARCH)-$(VERSION) +BINAMEARM=$(SERVICE)-$(OS)-arm64-$(VERSION) # can be docker or podman or whatever -CONTAINERS=podman +CONTAINERS=docker COMPOSE=$(CONTAINERS)-compose +# Configure local registry +REGADDR=192.168.0.151:32000 +K8SRSNAME=$(shell kubectl get rs --no-headers -o custom-columns=":metadata.name" | grep bank) .phony: all clean build test clean-image build-image build-image-debug run-image run-image-debug run-local -build-image: clean - @$(CONTAINERS) compose -f ./docker-compose.yaml build + +build-image: build +# here we made the images and push to registry with buildx + @$(CONTAINERS) buildx build --build-arg="BINAME=${BINAMEARM}" --platform linux/arm64 --push -t $(REGADDR)/crawler:latest . + +# Here we upload it to local +build-test-image: + @$(CONTAINERS) buildx build --platform linux/arm64 --push -t $(REGADDR)/crawler:latest -f Dockerfile.old . run-image: build-image @$(CONTAINERS) compose -f docker-compose.yaml up @@ -27,13 +38,24 @@ run-image-debug: build-image-debug @$(CONTAINERS) compose -f docker-compose-debug.yaml up run-local:clean build - @bin/$(SERVICE)-$(OS)-$(ARCH)-$(VERSION) -build: - @go build -o ./bin/$(SERVICE)-$(OS)-$(ARCH)-$(VERSION) ./cmd/crawler/ + @bin/$(BINAME) + +build: clean + #@mkdir dolardb + @env GOOS=$(OS) GOARCH=$(arch) go build -o ./bin/$(BINAME) ./cmd/crawler/. + @env GOOS=$(OS) GOARCH=arm64 go build -o ./bin/$(BINAMEARM) ./cmd/crawler/. + +create-descriptors: + @envsubst < k8s/deployment.yml.template > k8s/deployment.yml + +deploy: build-image create-descriptors + @kubectl apply -f k8s/pvc.yaml + @kubectl apply -f k8s/cronjobs.yml + test: @go -count=1 test ./... clean: - @rm -rf ./bin + @rm -rf ./bin clean-image: @$(CONTAINERS) system prune -f diff --git a/apa/apa.go b/apa/apa.go new file mode 100644 index 0000000..5d81687 --- /dev/null +++ b/apa/apa.go @@ -0,0 +1,97 @@ +package apa + +import ( + "context" + "fmt" + "log/slog" + "os" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("APA") +) + +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) { + tout := 120000.00 + log = log.With("scrapper", "apap") + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + button := page.Locator("#exchangesRates") + button.WaitFor() + button.Click() + + compraLocator := page.Locator("#currency-buy-USD") + ventaLocator := page.Locator("#currency-sell-USD") + + compraSTR, err := compraLocator.TextContent() + if err != nil { + log.Error("could not get compra str", "err", err) + return nil, err + } + ventaSTR, err := ventaLocator.TextContent() + if err != nil { + log.Error("could not get venta string", "err", err) + return nil, err + } + inst = &db.History{ + Name: "asociacion popular de ahorros y prestamos", + Parser: "apap", + Parsed: time.Now().UTC(), + } + + inst.Venta = helpers.Normalize(ventaSTR) + inst.Compra = helpers.Normalize(compraSTR) + + if inst.Compra == 0 || inst.Venta == 0 { + return nil, fmt.Errorf("apa: institution not parsed: %v", inst) + } + log.Info("parsed", "value", inst) + return inst, nil +} + +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + inst, err := Scrape(ctx, page, log) + // here we execute db operations + if err != nil { + return err + } + err = db.Inspect(*inst) + return err +} diff --git a/bcd/bcd.go b/bcd/bcd.go new file mode 100644 index 0000000..903c457 --- /dev/null +++ b/bcd/bcd.go @@ -0,0 +1,104 @@ +package bcd + +import ( + "context" + "fmt" + "log/slog" + "os" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/maximotejeda/us_dop_scrapper/models" + "github.com/playwright-community/playwright-go" +) + +type bcd struct { + models.Institucion +} + +var ( + uri = os.Getenv("BCD") +) + +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) { + log = log.With("scrapper", "bcd") + tout := 90000.00 + if _, err = page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + compraLocator := page.Locator("span#actualPurchaseValue") + + compraLocator.WaitFor(playwright.LocatorWaitForOptions{ + Timeout: &tout, + State: playwright.WaitForSelectorStateVisible, + }) + + ventaLocator := page.Locator("span#actualSellingValue") + + compra, err := compraLocator.TextContent() + if err != nil { + log.Error("locating compra", "err", err) + return nil, err + } + venta, err := ventaLocator.TextContent() + if err != nil { + log.Error("locating venta", "err", err) + return nil, err + } + + inst = &db.History{ + Parser: "bcd", + Name: "banco central dominicano", + Parsed: time.Now().UTC(), + } + + inst.Compra = helpers.Normalize(compra) + inst.Venta = helpers.Normalize(venta) + + if inst.Compra == 0 || inst.Venta == 0 { + return nil, fmt.Errorf("bcd: institution not parsed compra or venta cant be 0") + } + return inst, nil +} + +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + inst, err := Scrape(ctx, page, log) + if err != nil { + return err + } + err = db.Inspect(*inst) + + return err +} diff --git a/bdr/bdr.go b/bdr/bdr.go new file mode 100644 index 0000000..afdc21a --- /dev/null +++ b/bdr/bdr.go @@ -0,0 +1,104 @@ +package bdr + +import ( + "context" + "fmt" + "log/slog" + "os" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("BDR") +) + +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) { + tout := 120000.00 + log = log.With("scrapper", "bdr") + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + + err = page.WaitForLoadState() + if err != nil { + log.Error("waiting for page state", "err", err) + return nil, err + } + + page.Locator("section#divisas").WaitFor() + compraLocator := page.Locator("span#compraUS") + compraLocator.WaitFor() + ventaLocator := page.Locator("span#ventaUS") + compraSTR, err := compraLocator.TextContent() + if err != nil { + log.Error("parsing compra", "err", err) + return nil, err + } + ventaSTR, err := ventaLocator.TextContent() + if err != nil { + log.Error("parsing compra", "err", err) + return nil, err + } + inst = &db.History{ + Name: "banreservas", + Parser: "brd", + Parsed: time.Now().UTC(), + } + + compra := helpers.Normalize(compraSTR) + venta := helpers.Normalize(ventaSTR) + + inst.Compra = compra + inst.Venta = venta + + if inst.Compra == 0 || inst.Venta == 0 { + return nil, fmt.Errorf("brd: institution not parsed") + } + + return inst, nil +} + +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + inst, err := Scrape(ctx, page, log) + if err != nil { + return err + } + err = db.Inspect(*inst) + + return err +} diff --git a/bhd/bhd.go b/bhd/bhd.go new file mode 100644 index 0000000..be7ae35 --- /dev/null +++ b/bhd/bhd.go @@ -0,0 +1,141 @@ +package bhd + +import ( + "context" + "fmt" + "log/slog" + "os" + "strings" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("BHD") +) + +// Scrape +// needs a mobile User Agent +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) { + tout := 120000.00 + log = log.With("scrapper", "bhd") + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + + err = page.WaitForLoadState() + if err != nil { + log.Error("waiting for page state", "err", err) + return nil, err + } + + err = page.Locator("html body").WaitFor() + if err != nil { + log.Error("waiting for locating body", "err", err) + return nil, err + } + + fieldGroupLocator := page.Locator("div.field_group") + fieldGroupLocator.WaitFor() + fieldGroup, err := fieldGroupLocator.All() + //log.Info("divs", "number", len(fieldGroup)) + if err != nil { + log.Error("locating field group", "err", err) + return nil, err + } + + inst = &db.History{ + Name: "banco hipotecario dominicano", + Parser: "bhd", + Parsed: time.Now().UTC(), + } + + for _, it := range fieldGroup { + n, _ := it.AllTextContents() + if n[0] == "CompramosUS$RD$" || n[0] == "VendemosUS$RD$" { + // we work from here in the second div as first is amount of dollars + div := it.Locator("div") + /*input := div.Locator("div > div input") + val, _ := input.InputValue() + if val == "1"{ + input := div.Locator("div > div input") + }*/ + places, _ := div.Locator("div").All() + + for _, x := range places { + //txt, _ := x.TextContent() + input := x.Locator("div input") + if n, _ := input.Count(); n <= 0 { + //log.Info("no input") + continue + } + val, _ := input.InputValue() + if val == "" || val == "1" { + continue + } + place := strings.ReplaceAll(n[0], "US$RD$", "") + place = strings.ToLower(place) + price := helpers.Normalize(val) + if err != nil { + log.Error("parsing value", "where", place, "err", err) + return nil, err + } + switch place { + case "compramos": + inst.Compra = price + case "vendemos": + inst.Venta = price + } + } + + } + } + if inst.Compra == 0 || inst.Venta == 0 { + return nil, fmt.Errorf("bhd: institution not parsed: %v", inst) + } + //log.Info(fmt.Sprintf("%v", inst)) + return inst, nil +} + +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + inst, err := Scrape(ctx, page, log) + if err != nil { + return err + } + err = db.Inspect(*inst) + return err +} diff --git a/bin/crawler-linux-amd64-0.0.0_1 b/bin/crawler-linux-amd64-0.0.0_1 new file mode 100755 index 0000000..d8844db Binary files /dev/null and b/bin/crawler-linux-amd64-0.0.0_1 differ diff --git a/bin/crawler-linux-arm64-0.0.0_1 b/bin/crawler-linux-arm64-0.0.0_1 new file mode 100755 index 0000000..e472c1d Binary files /dev/null and b/bin/crawler-linux-arm64-0.0.0_1 differ diff --git a/bnc/bnc.go b/bnc/bnc.go new file mode 100644 index 0000000..d9735cb --- /dev/null +++ b/bnc/bnc.go @@ -0,0 +1,97 @@ +package bnc + +import ( + "context" + "fmt" + "log/slog" + "os" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("BNC") +) + +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) { + tout := 120000.00 + log = log.With("scrapper", "bnc") + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + + currencyDiv := page.Locator(".calculator__content") + currencyDiv.WaitFor() + + buyInput := page.Locator("div.calculator__buy-input:nth-child(2) > input:nth-child(2)") + + sellInput := page.Locator("div.calculator__sell-input:nth-child(2) > input:nth-child(2)") + + compraSTR, err := buyInput.InputValue() + if err != nil { + log.Error("could not get compra str", "err", err) + return nil, err + } + ventaSTR, err := sellInput.InputValue() + if err != nil { + log.Error("could not get venta string", "err", err) + return nil, err + } + inst = &db.History{ + Name: "banesco", + Parser: "bnc", + Parsed: time.Now().UTC(), + } + + inst.Venta = helpers.Normalize(ventaSTR) + inst.Compra = helpers.Normalize(compraSTR) + + if inst.Compra == 0 || inst.Venta == 0 { + return nil, fmt.Errorf("bnc: institution not parsed: %v", inst) + } + return inst, nil +} + +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + inst, err := Scrape(ctx, page, log) + // here we execute db operations + if err != nil { + return err + } + err = db.Inspect(*inst) + return err +} diff --git a/bpd/bpd.go b/bpd/bpd.go new file mode 100644 index 0000000..bf68654 --- /dev/null +++ b/bpd/bpd.go @@ -0,0 +1,120 @@ +package bpd + +import ( + "context" + "fmt" + "log/slog" + "os" + "strconv" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("BPD") +) + +// Scrape +// needs a mobile User Agent +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) { + tout := 120000.00 + //start := time.Now() + log = log.With("scrapper", "bpd") + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + //log.Info("Page loaded", "time", time.Since(start).Seconds()) + // menu := page.Locator(".footer_est_menu_bpd > li:nth-child(3)") + container := page.Locator("div.tasa.tasa_dolar") + container.WaitFor() + compraInput := page.Locator("input#compra_peso_dolar") + compraInput.WaitFor(playwright.LocatorWaitForOptions{ + Timeout: &tout, + State: playwright.WaitForSelectorStateVisible, + }) + + ventaInput := page.Locator("input#venta_peso_dolar") + ventaInput.WaitFor() + compraSTR, err := compraInput.InputValue() + if err != nil { + log.Error("compra value", "err", err) + return nil, err + } + // log.Info("Compra get value", "time", time.Since(start).Seconds()) + ventaSTR, err := ventaInput.InputValue() + if err != nil { + log.Error("compra value", "err", err) + return nil, err + } + inst = &db.History{ + Name: "banco popular", + Parser: "bpd", + Parsed: time.Now().UTC(), + } + compra, err := strconv.ParseFloat(compraSTR, 64) + if err != nil { + log.Error("parsing value", "where", "compra", "err", err) + return nil, err + } + venta, err := strconv.ParseFloat(ventaSTR, 64) + if err != nil { + log.Error("parsing value", "where", "venta", "err", err) + return nil, err + } + inst.Compra = compra + inst.Venta = venta + + if inst.Compra == 0 || inst.Venta == 0 { + return nil, fmt.Errorf("bpd: institution not parsed") + } + return inst, nil +} + +func HoverTasas(page playwright.Page) { + tasasMenu := page.Locator(".footer_est_menu_bpd > li:nth-child(3)") + tasasMenu.Hover() +} + +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + inst, err := Scrape(ctx, page, log) + if err != nil { + return err + } + err = db.Inspect(*inst) + return err +} diff --git a/cmd/crawler/main.go b/cmd/crawler/main.go index f8639e1..8e83f35 100644 --- a/cmd/crawler/main.go +++ b/cmd/crawler/main.go @@ -1,7 +1,79 @@ package main -import "fmt" +import ( + "context" + "log/slog" + "os" + "os/signal" + "syscall" + "time" -func main(){ - fmt.Println("working imageb") + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/apa" + "github.com/maximotejeda/us_dop_scrapper/bcd" + "github.com/maximotejeda/us_dop_scrapper/bdr" + "github.com/maximotejeda/us_dop_scrapper/bhd" + "github.com/maximotejeda/us_dop_scrapper/bnc" + "github.com/maximotejeda/us_dop_scrapper/bpd" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/maximotejeda/us_dop_scrapper/inf" + "github.com/maximotejeda/us_dop_scrapper/scotia" + "github.com/maximotejeda/us_dop_scrapper/vimenca" + "github.com/playwright-community/playwright-go" +) + +func main() { + var err error + dbRoute := os.Getenv("DBURI") + sig := make(chan os.Signal, 1) + signal.Notify(sig, os.Interrupt, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) + log := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{})) + db := db.Dial(dbRoute, log) + // create a chrome and feed parsers + db.CreateTables() + chrome, firefox, webkit := helpers.CreateBrowser(log) + browserList := []*playwright.Browser{chrome, firefox, webkit} + longTick := time.NewTicker(time.Minute * 2) + defer longTick.Stop() + infoTick := time.NewTicker(time.Minute * 1) + defer infoTick.Stop() + ctx, cancel := context.WithCancel(context.Background()) + errN := map[string]int{ + "bcd": 0, + "bpd": 0, + "apap": 0, + "inf": 0, + } + + defer cancel() + who := os.Getenv("WHO") + + switch who { + case "bcd": + err = helpers.ExecTask(ctx, db, browserList, log, errN, "bcd", bcd.ExecParser) + case "bpd": + err = helpers.ExecTask(ctx, db, browserList, log, errN, "bpd", bpd.ExecParser) + case "apa": + err = helpers.ExecTask(ctx, db, browserList, log, errN, "apa", apa.ExecParser) + case "brd": + err = helpers.ExecTask(ctx, db, browserList, log, errN, "brd", bdr.ExecParser) + case "bhd": + err = helpers.ExecTask(ctx, db, browserList, log, errN, "bhd", bhd.ExecParser) + case "bnc": + err = helpers.ExecTask(ctx, db, browserList, log, errN, "bnc", bnc.ExecParser) + case "scotia": + err = helpers.ExecTask(ctx, db, browserList, log, errN, "scotia", scotia.ExecParser) + case "vimenca": + err = helpers.ExecTask(ctx, db, browserList, log, errN, "vimenca", vimenca.ExecParser) + default: + err = helpers.ExecTask(ctx, db, browserList, log, errN, "inf", inf.ExecParser) + } + + if err != nil { + log.Info("task executed with errors", "name", who, "error", err) + os.Exit(1) + return + } + log.Info("SUCCESS - task executed", "name", who) + os.Exit(0) } diff --git a/config/config.go b/config/config.go new file mode 100644 index 0000000..d429248 --- /dev/null +++ b/config/config.go @@ -0,0 +1,14 @@ +package config + +import "os" + +func GetWho() string { + return getEnvValue("WHO") +} + +func getEnvValue(key string) string { + if os.Getenv(key) == "" { + panic("key not found " + key) + } + return os.Getenv(key) +} diff --git a/db-old/db.go b/db-old/db.go new file mode 100644 index 0000000..4e1e32c --- /dev/null +++ b/db-old/db.go @@ -0,0 +1,423 @@ +package db + +import ( + "database/sql" + _ "embed" + "encoding/json" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/maximotejeda/us_dop_scrapper/models" + "github.com/maximotejeda/us_dop_scrapper/pub" + _ "modernc.org/sqlite" +) + +//go:embed schema.sql +var schema string + +type DB struct { + *sql.DB + log *slog.Logger +} + +type change struct { + Before models.Institucion `json:"before"` + After models.Institucion `json:"after"` +} + +type Message struct { + Message string `json:"message"` + Data change `json:"data"` + Error error `json:"error"` +} + +type Institution struct { + ID int + Name string + ShortName string + Created time.Time +} + +// Dial +func Dial(path string, log *slog.Logger) *DB { + db, err := sql.Open("sqlite", path) + if err != nil { + fmt.Printf("opening database: %s", err.Error()) + panic("opening database") + } + if err := db.Ping(); err != nil { + fmt.Printf("pinging database: %s", err.Error()) + panic("pinging database") + } + return &DB{db, log} +} + +// Schema +func (db *DB) CreateTables() { + _, err := db.Exec(schema) + if err != nil { + panic(err) + } +} + +// Inspect +// Handle behavior of the changes +// Will report errors to a nats consumer +func (db *DB) Inspect(enter models.Institucion) error { + if db == nil { + return fmt.Errorf("nil or empty database") + } + pub, close := pub.Publisher() + defer close() + msg := Message{} + // Get last row added + + inst, err := db.GetLatest(enter.Parser, enter.Name) + // if no rows are found because of first enter a name - parser ? + if errors.Is(sql.ErrNoRows, err) { + db.log.Info("adding new item to table: ", "parse", enter.Parser, "name", enter.Name) + msg.Message = "add new institution" + msg.Data.After = enter + + data, err := json.Marshal(msg) + if err != nil { + db.log.Error("marshaling struct", "error", err) + } + + id, err := db.ADDInstitution(enter.Name) + if err != nil { + return err + } + defer pub("dolar-crawler", data) + return db.AddNew(enter, id) + } + + // check prices compra venta + if inst == nil { + db.log.Error("row is nil", "name", enter.Name, "parser", enter.Parser) + return fmt.Errorf("row is nil, not entering row") + } + if enter.Compra == inst.Compra && enter.Venta == inst.Venta { + return nil + } else { + // if one of them changes create a new row + db.log.Info("change registered, adding item", "parse", enter.Parser, "name", enter.Name, "compra enter", enter.Compra, "compra db", inst.Compra, "venta enter", enter.Venta, "venta db", inst.Venta) + + msg.Message = "change registered" + msg.Data.After = enter + msg.Data.Before = *inst + + data, err := json.Marshal(msg) + if err != nil { + db.log.Error("marshaling struct", "error", err) + } + ins, err := db.GETInstitution(enter.Name) + if err != nil { + return err + } + + defer pub("dolar-crawler", data) + return db.AddNew(enter, int64(ins.ID)) + } +} + +// GetLatest +// returns the latest row in a specific parser and name +// we are using DateTime in DB and date.Datetime in go +func (db *DB) GetLatest(parser string, name string) (inst *models.Institucion, err error) { + var parsed string + inst = &models.Institucion{} + stmtt, err := db.Prepare("SELECT i.name, d.parser, d.compra, d.venta, d.parsed FROM dolars AS d JOIN institutions as i ON d.name_id = i.id WHERE d.parser = ? AND i.name = ? ORDER BY d.parsed DESC LIMIT 1;") + if err != nil { + db.log.Error("preparing stmtt", "error", err.Error()) + return nil, err + } + defer stmtt.Close() + + if err := stmtt.QueryRow(parser, name).Scan(&inst.Name, &inst.Parser, &inst.Compra, &inst.Venta, &parsed); err != nil { + db.log.Error("getting latest", "error", err.Error(), "parser", parser, "name", name) + return nil, err + } + + inst.Parsed, err = time.Parse(time.DateTime, parsed) + if err != nil { + //db.log.Error("parsed", "error", err.Error()) + return nil, err + } + return inst, nil +} + +// AddNew +// Add a new row in the dolar table +// Will send to nats changes on prices +func (db *DB) AddNew(row models.Institucion, id int64) error { + stmt, err := db.Prepare("INSERT INTO dolars (name_id, compra, venta, parser, parsed) VALUES(?,?,?,?,?);") + if err != nil { + return err + } + defer stmt.Close() + parsed := row.Parsed.Format(time.DateTime) + _, err = stmt.Exec(&id, &row.Compra, &row.Venta, &row.Parser, &parsed) + if err != nil { + return err + } + return nil +} + +func (db *DB) ADDInstitution(name string) (id int64, err error) { + stmt, err := db.Prepare("INSERT INTO institutions (name, short_name, created) VALUES(?,?,?);") + if err != nil { + return 0, err + } + defer stmt.Close() + parsed := time.Now().Format(time.DateTime) + short := shortner(name) + res, err := stmt.Exec(&name, short, &parsed) + if err != nil { + return 0, err + } + id, err = res.LastInsertId() + if err != nil { + return 0, err + } + return id, nil + +} +func (db *DB) GETInstitution(name string) (inst *Institution, err error) { + institution := Institution{} + stmtt, err := db.Prepare("SELECT id, name, short_name FROM institutions WHERE name = ?") + if err != nil { + db.log.Error("preparing stmt", "error", err.Error()) + return nil, err + } + defer stmtt.Close() + if err := stmtt.QueryRow(name).Scan(&institution.ID, &institution.Name, &institution.ShortName); err != nil { + db.log.Error("getting institution", "error", err.Error(), "short name", institution.ShortName, "name", name) + return nil, err + } + return inst, err +} + +func (db *DB) GetAll() ([]string, error) { + stmt, err := db.Prepare("SELECT i.name FROM institutions AS i;") + if err != nil { + db.log.Error("[db-GetAll]", "error", err) + return nil, err + } + rows, err := stmt.Query() + if err != nil { + db.log.Error("[db-GetAll-stmt]", "error", err) + return nil, err + } + defer rows.Close() + insts := []string{} + for rows.Next() { + inst := "" + + if err = rows.Scan(&inst); err != nil { + return nil, err + } + if inst == "" { + continue + } + insts = append(insts, inst) + } + if err := rows.Err(); err != nil { + return insts, err + } + return insts, nil + +} +func (db *DB) GetBancos() ([]string, error) { + stmt, err := db.Prepare("SELECT i.name FROM institutions AS i WHERE i.name LIKE '%ban%' OR i.name LIKE '%scoti%'") + if err != nil { + db.log.Error("[inst-GetAll]", "error", err) + return nil, err + } + rows, err := stmt.Query() + if err != nil { + db.log.Error("[inst-GetAll-stmt]", "error", err) + return nil, err + } + defer rows.Close() + insts := []string{} + for rows.Next() { + inst := "" + + if err = rows.Scan(&inst); err != nil { + return nil, err + } + if inst == "" { + continue + } + insts = append(insts, inst) + } + if err := rows.Err(); err != nil { + return insts, err + } + return insts, nil + +} +func (db *DB) GetCajas() ([]string, error) { + stmt, err := db.Prepare("SELECT i.name FROM institutions AS i WHERE i.name LIKE '%asociacion%'") + if err != nil { + db.log.Error("[inst-GetAll]", "error", err) + return nil, err + } + rows, err := stmt.Query() + if err != nil { + db.log.Error("[inst-GetAll-stmt]", "error", err) + return nil, err + } + defer rows.Close() + insts := []string{} + for rows.Next() { + inst := "" + + if err = rows.Scan(&inst); err != nil { + return nil, err + } + if inst == "" { + continue + } + insts = append(insts, inst) + } + if err := rows.Err(); err != nil { + return insts, err + } + return insts, nil + +} + +func (db *DB) GetAgentes() ([]string, error) { + stmt, err := db.Prepare("SELECT i.name FROM institutions AS i WHERE i.name NOT LIKE '%ban%' AND i.name NOT LIKE '%scoti%' AND i.name NOT LIKE '%asociacion%'") + if err != nil { + db.log.Error("[inst-GetAll]", "error", err) + return nil, err + } + rows, err := stmt.Query() + if err != nil { + db.log.Error("[inst-GetAll-stmt]", "error", err) + return nil, err + } + defer rows.Close() + insts := []string{} + for rows.Next() { + inst := "" + + if err = rows.Scan(&inst); err != nil { + return nil, err + } + if inst == "" { + continue + } + insts = append(insts, inst) + } + if err := rows.Err(); err != nil { + return insts, err + } + return insts, nil + +} + +func (db *DB) GetLastPrice(name string) (inst *models.Institucion, err error) { + var parsed string + inst = &models.Institucion{} + stmt, err := db.Prepare("SELECT i.name, d.parser, d.compra, d.venta, d.parsed FROM dolars AS d JOIN institutions as i ON d.name_id = i.id WHERE name = ? ORDER BY parsed DESC LIMIT 1;") + if err != nil { + db.log.Error("preparing", "error", err.Error()) + return nil, err + } + defer stmt.Close() + + if err := stmt.QueryRow(name).Scan(&inst.Name, &inst.Parser, &inst.Compra, &inst.Venta, &parsed); err != nil { + db.log.Error("getting last price", "error", err.Error(), "name", name) + return nil, err + } + + inst.Parsed, err = time.Parse(time.DateTime, parsed) + if err != nil { + //db.log.Error("parsed", "error", err.Error()) + return nil, err + } + return inst, nil + +} +func (db *DB) GetChangeSince(name string, duration time.Duration) (insts []*models.Institucion, err error) { + date := time.Now().Add(-duration).Format(time.DateTime) + stmt, err := db.Prepare("SELECT i.name, d.parser, d.compra, d.venta, d.parsed FROM dolars AS d JOIN institutions as i ON d.name_id = i.id WHERE name = ? AND parsed > ? ORDER BY parsed DESC;") + if err != nil { + db.log.Error("[GetChangeSince] preparing", "error", err.Error()) + return nil, err + } + defer stmt.Close() + rows, err := stmt.Query(name, date) + if err != nil { + db.log.Error("[GetChangeSince] preparing", "error", err.Error()) + return nil, err + } + defer rows.Close() + for rows.Next() { + inst := models.Institucion{} + parsed := "" + if err := rows.Scan(&inst.Name, &inst.Parser, &inst.Compra, &inst.Venta, &parsed); err != nil { + db.log.Error("[GetChangeSince] scanning", "error", err) + return nil, err + } + inst.Parsed, err = time.Parse(time.DateTime, parsed) + if err != nil { + //db.log.Error("parsed", "error", err.Error()) + continue + } + insts = append(insts, &inst) + } + return insts, nil +} + +func shortner(name string) string { + if name == "" { + return "" + } + switch strings.ToLower(name) { + case "banco popular": + return "bpd" + case "banreservas": + return "brd" + case "banco central dominicano": + return "bcd" + case "banco hipotecario dominicano": + return "bhd" + case "asociacion popular de ahorros y prestamos": + return "apap" + case "asociacion cibao de ahorros y prestamos": + return "acap" + case "asociacion la nacional de ahorros y prestamos": + return "alnap" + case "asociacion peravia de ahorros y prestamos": + return "apeap" + case "banco santa cruz": + return "bsc" + case "imbert y balbuena": + return "imb" + case "banco activo dominicana": + return "bacd" + case "scotiabank cambio online": + return "scline" + case "banco lopez de haro": + return "blh" + } + nameList := strings.Split(name, " ") + switch len(nameList) { + case 1: + return nameList[0] + case 2: + return string(nameList[0][0]) + nameList[1][0:2] + case 3: + return string(nameList[0][0] + nameList[1][0] + nameList[2][0]) + default: + return "n/a" + } +} diff --git a/db-old/schema.sql b/db-old/schema.sql new file mode 100644 index 0000000..f2fba03 --- /dev/null +++ b/db-old/schema.sql @@ -0,0 +1,18 @@ +PRAGMA foreign_keys = ON; + +CREATE TABLE IF NOT EXISTS 'dolars' ( + id INTEGER PRIMARY KEY, + name_id INTEGER NOT NULL, + compra REAL NOT NULL, + venta REAL NOT NULL, + parser TEXT NOT NULL, + parsed TEXT NOT NULL, + FOREIGN KEY(name_id) REFERENCES institutions(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS 'institutions' ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + short_name TEXT NOT NULL, + created TEXT NOT NULL +); diff --git a/docker-compose.yaml b/docker-compose.yaml index e92a34e..1a46bab 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,9 +2,13 @@ version: '3.8' services: crawler: - container-name: creawler build: dockerfile: Dockerfile env_file: - - .env + - .env + develop: + watch: + - action: rebuild + path: ./ + diff --git a/dolardb/crawler.db b/dolardb/crawler.db new file mode 100644 index 0000000..94822c0 Binary files /dev/null and b/dolardb/crawler.db differ diff --git a/go.mod b/go.mod index fa2ac7a..a8750ca 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,39 @@ module github.com/maximotejeda/us_dop_scrapper -go 1.22 +go 1.22.0 -require github.com/playwright-community/playwright-go v0.4001.0 +require ( + github.com/nats-io/nats.go v1.33.1 + github.com/playwright-community/playwright-go v0.4001.0 + golang.org/x/text v0.14.0 + modernc.org/sqlite v1.29.1 +) require ( github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/go-jose/go-jose/v3 v3.0.1 // indirect github.com/go-stack/stack v1.8.1 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/klauspost/compress v1.17.2 // indirect + github.com/mattn/go-isatty v0.0.16 // indirect + github.com/maximotejeda/msvc-proto/golang/dolar v0.0.0-3 // indirect + github.com/nats-io/nkeys v0.4.7 // indirect + github.com/nats-io/nuid v1.0.1 // indirect + github.com/ncruces/go-strftime v0.1.9 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect go.uber.org/multierr v1.11.0 // indirect + golang.org/x/crypto v0.19.0 // indirect + golang.org/x/net v0.21.0 // indirect + golang.org/x/sys v0.17.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect + google.golang.org/grpc v1.63.2 // indirect + google.golang.org/protobuf v1.33.0 // indirect + modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect + modernc.org/libc v1.41.0 // indirect + modernc.org/mathutil v1.6.0 // indirect + modernc.org/memory v1.7.2 // indirect + modernc.org/strutil v1.2.0 // indirect + modernc.org/token v1.1.0 // indirect ) diff --git a/go.sum b/go.sum index 85ba0f6..d4ba868 100644 --- a/go.sum +++ b/go.sum @@ -3,17 +3,44 @@ github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/go-jose/go-jose/v3 v3.0.1 h1:pWmKFVtt+Jl0vBZTIpz/eAKwsm6LkIxDVVbFHKkchhA= github.com/go-jose/go-jose/v3 v3.0.1/go.mod h1:RNkWWRld676jZEYoV3+XK8L2ZnNSvIsxFMht0mSX+u8= github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw= github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= +github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= +github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y= +github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +github.com/maximotejeda/msvc-proto/golang/dolar v0.0.0-3 h1:J2qbFWgrnQvcVeHGoqBz5YtfLZW2uG18xYRk09BXIeI= +github.com/maximotejeda/msvc-proto/golang/dolar v0.0.0-3/go.mod h1:bAs0mlC1Vyn/BkHONL2Ik8ox9px9s9bhbJWgUQFMMWo= github.com/mitchellh/go-ps v1.0.0 h1:i6ampVEEF4wQFF+bkYfwYgY+F/uYJDktmvLPf7qIgjc= github.com/mitchellh/go-ps v1.0.0/go.mod h1:J4lOc8z8yJs6vUwklHw2XEIiT4z4C40KtWVN3nvg8Pg= +github.com/nats-io/nats.go v1.33.1 h1:8TxLZZ/seeEfR97qV0/Bl939tpDnt2Z2fK3HkPypj70= +github.com/nats-io/nats.go v1.33.1/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8= +github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI= +github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc= +github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= +github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= +github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/playwright-community/playwright-go v0.4001.0 h1:2cBiTIjCvFu7zUrZ48C0YC2DIp90Tbudueq4brUGjHM= github.com/playwright-community/playwright-go v0.4001.0/go.mod h1:quEkYFrvvpQyGSxBjnYbGS52vrUDB2uaY1cOzkkSHCc= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= @@ -22,12 +49,48 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190911031432-227b76d455e7/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= +golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= +golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= +golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= +google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= +google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI= +modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4= +modernc.org/libc v1.41.0 h1:g9YAc6BkKlgORsUWj+JwqoB1wU3o4DE3bM3yvA3k+Gk= +modernc.org/libc v1.41.0/go.mod h1:w0eszPsiXoOnoMJgrXjglgLuDy/bt5RR4y3QzUUeodY= +modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= +modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo= +modernc.org/memory v1.7.2 h1:Klh90S215mmH8c9gO98QxQFsY+W451E8AnzjoE2ee1E= +modernc.org/memory v1.7.2/go.mod h1:NO4NVCQy0N7ln+T9ngWqOQfi7ley4vpwvARR+Hjw95E= +modernc.org/sqlite v1.29.1 h1:19GY2qvWB4VPw0HppFlZCPAbmxFU41r+qjKZQdQ1ryA= +modernc.org/sqlite v1.29.1/go.mod h1:hG41jCYxOAOoO6BRK66AdRlmOcDzXf7qnwlwjUIOqa0= +modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= +modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/helpers/.#helpers.go b/helpers/.#helpers.go new file mode 120000 index 0000000..feda370 --- /dev/null +++ b/helpers/.#helpers.go @@ -0,0 +1 @@ +maximo@debian-pc.9800:1713363571 \ No newline at end of file diff --git a/helpers/helpers.go b/helpers/helpers.go new file mode 100644 index 0000000..4e15c21 --- /dev/null +++ b/helpers/helpers.go @@ -0,0 +1,198 @@ +package helpers + +import ( + "context" + "fmt" + "log/slog" + "math/rand" + "os" + "strconv" + "strings" + + "unicode" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/playwright-community/playwright-go" + "golang.org/x/text/runes" + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" +) + +type UserAgent struct { + version string + system string + platformInfo string + platformDetails string + extensionsinfo string +} + +var ( + systems = []string{ + "Macintosh; Intel Mac OS X 10_15_7", + "Windows NT 10.0; Win64; x64", + "Windows NT 6.1; Win64; x64; rv:109.0", + "X11; Linux x86_64", + "X11; CrOS x86_64 14541.0.0", + "Linux; Android 10; K", + "iPhone; CPU iPhone OS 17_1_2 like Mac OS X", + "iPhone; CPU iPhone OS 14_6 like Mac OS X", + "Linux; Android 9; JAT-L41", + "Linux; Android 11; SAMSUNG SM-G973U", + "iPad; CPU OS 14_7_1 like Mac OS X", + "Linux; U; en-us; KFAPWI Build/JDQ39", + } + platformInfo = []string{ + "AppleWebKit/605.1.15", + "AppleWebKit/537.36", + "Gecko/20100101", + } + platformDetails = []string{ + "KHTML, like Gecko", + } + extensionInfo = []string{ + "Firefox", + "Chrome/87.0.42", + "Safari/604.1", + "Safari/537.36", + "Version/14.1.2", + } +) + +// NewUA +func NewUA() string { + ua := UserAgent{} + ua.version = "Mozilla/5.0" + + ua.system = systems[rand.Intn(len(systems))] + ua.platformInfo = platformInfo[rand.Intn(len(platformInfo))] + ua.platformDetails = platformDetails[0] + ua.extensionsinfo = extensionInfo[rand.Intn(len(extensionInfo))] + extra := "" + if ua.extensionsinfo == "Chrome" { + extra = "Safari/537.3" + } + version := rand.Intn(20) + 100 + + return fmt.Sprintf("%s (%s) %s (%s) %s/%d %s", ua.version, ua.system, ua.platformInfo, ua.platformDetails, ua.extensionsinfo, version, extra) +} + +// NewMobileUA +// returns an Ua string from distinct mobiles +func NewMobileUA() string { + ua := UserAgent{} + ua.version = "Mozilla/5.0" + + s := systems[5:] + ua.system = s[rand.Intn(len(s))] + ua.platformInfo = platformInfo[rand.Intn(len(platformInfo))] + ua.platformDetails = platformDetails[0] + ua.extensionsinfo = extensionInfo[rand.Intn(len(extensionInfo))] + extra := "mobile Safari/537.3" + switch { + case strings.Contains(ua.system, "iPhone"), strings.Contains(ua.system, "iPad"): + extra = "Mobile/15E148 Safari/604.1" + ua.extensionsinfo = "Version/14.1.2" + case strings.Contains(ua.system, "Android"): + ie := extensionInfo[0:2] + selected := ie[rand.Intn(len(ie))] + if strings.Contains(selected, "Firefox") { + extra = "Firefox/114.0" + ua.extensionsinfo = "Gecko/114.0" + } else { + ua.extensionsinfo = "Chrome/114.0.0.0" + extra = "Mobile Safari/537.36" + } + + case strings.Contains(ua.system, "Android"): + extra = "Mobile Safari/537.36" + + } + + return fmt.Sprintf("%s (%s) %s (%s) %s %s", ua.version, ua.system, ua.platformInfo, ua.platformDetails, ua.extensionsinfo, extra) +} + +// Normalize +func Normalize(val string) float64 { + nString := []rune{} + for _, v := range val { + if !unicode.IsNumber(v) && v != '.' { + continue + } + nString = append(nString, v) + } + if len(nString) > 0 { + cv, err := strconv.ParseFloat(string(nString), 64) + if err != nil { + fmt.Printf("%s", err) + } + return cv + } + return 0 +} + +// CreateBrowser +func CreateBrowser(log *slog.Logger) (chrome *playwright.Browser, firefox *playwright.Browser, webkit *playwright.Browser) { + pw, err := playwright.Run(&playwright.RunOptions{ + Verbose: true, + }) + + if err != nil { + log.Error("running pw, could not start", "error", err) + os.Exit(1) + } + + ff, err := pw.Firefox.Launch() + if err != nil { + log.Error("could not start browser", "error", err) + os.Exit(1) + } + cm, err := pw.Firefox.Launch() + if err != nil { + log.Error("could not start browser", "error", err) + os.Exit(1) + } + sf, err := pw.WebKit.Launch() + if err != nil { + log.Error("could not start browser", "error", err) + os.Exit(1) + } + return &cm, &ff, &sf +} + +// ExecTask +func ExecTask( + ctx context.Context, + + dbi *db.DB, + browser []*playwright.Browser, + log *slog.Logger, + errCounter map[string]int, + parserName string, + parserExecution func(context.Context, *db.DB, *playwright.Browser, *slog.Logger) error) (err error) { + err = parserExecution(ctx, dbi, browser[0], log) + if err != nil { + errCounter[parserName]++ + log.Error(err.Error(), "parser", parserName) + // todo want a retry with different browser firefox + err = parserExecution(ctx, dbi, browser[1], log) + + if err != nil { + errCounter[parserName]++ + } + } + log.Info("executed", "parser", parserName, "errors", errCounter[parserName]) + return err +} + +// RemoveAccent +// helps normalize names in db +// https://stackoverflow.com/questions/24588295/go-removing-accents-from-strings +func RemoveAccent(str string) string { + if str == "" { + return "" + } + t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) + s, _, _ := transform.String(t, str) + + return s +} diff --git a/helpers/helpers_test.go b/helpers/helpers_test.go new file mode 100644 index 0000000..20d2c9e --- /dev/null +++ b/helpers/helpers_test.go @@ -0,0 +1,40 @@ +package helpers + +import "testing" + +func TestNormalize(t *testing.T) { + type scenario struct { + name string + value string + result float64 + err error + } + cases := []scenario{ + { + name: "sucess/parse/dolar", + value: "$58.40", + result: 58.40, + err: nil, + }, + { + name: "sucess/parse/dolar", + value: "dollar $58.40", + result: 58.40, + err: nil, + }, + { + name: "sucess/parse/dolar", + value: "$", + result: 0, + err: nil, + }, + } + for idx, tt := range cases { + t.Run(tt.name, func(t *testing.T) { + r := Normalize(tt.value) + if r != tt.result { + t.Errorf("case #%d - wanted: %f - got: %f", idx, tt.result, r) + } + }) + } +} diff --git a/inf/general.go b/inf/general.go new file mode 100644 index 0000000..3fea88f --- /dev/null +++ b/inf/general.go @@ -0,0 +1,135 @@ +package inf + +import ( + "context" + "fmt" + "log/slog" + "os" + "strings" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("GENERAL") +) + +// Scrape +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (instList []*db.History, err error) { + log = log.With("scrapper", "general") + tout := float64(120000) + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + + entriesLocator := page.Locator("table#Dolar > tbody > tr") + entriesLocator.WaitFor(playwright.LocatorWaitForOptions{ + Timeout: &tout, + State: playwright.WaitForSelectorStateVisible, + }) + + entries, err := entriesLocator.All() + + if err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + scotia := false // in this page there are 2 scotia one the change online the other is tha bank + instList = []*db.History{} + for _, entry := range entries { + inst := &db.History{ + Parser: "inf", + } + title, _ := entry.Locator("span.nombre").TextContent() + if strings.ToLower(title) == "scotiabank" && !scotia { + title = "scotiabank cambio online" + scotia = true + } + name := "" + if title != "" { + name = helpers.RemoveAccent(strings.ToLower(title)) + } else { + continue + } + inst.Name = name + + compraLocator, ventaLocator := entry.Locator("td:nth-child(2)"), entry.Locator("td:nth-child(3)") + compra := getValue(compraLocator) + venta := getValue(ventaLocator) + + inst.Compra = helpers.Normalize(compra) + inst.Venta = helpers.Normalize(venta) + + inst.Parsed = time.Now().UTC() + // if one of the inst has 0 on the sell/buy dont process it + if inst.Compra == 0 || inst.Venta == 0 { + log.Warn("skipping", "nombre", inst.Name, "compra", inst.Compra, "venta", inst.Venta) + continue + } + instList = append(instList, inst) + } + + return instList, nil +} +func getValue(place playwright.Locator) string { + text, _ := place.AllInnerTexts() + value := "" + if len(text) <= 0 { + return "" + } + nextList := strings.Split(text[0], " ") + if len(nextList) > 0 { + value = strings.Replace(nextList[0], "=", "", 1) + } + return value +} + +// ExecParser +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) error { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + instList, err := Scrape(ctx, page, log) + if err != nil { + return err + } + for _, inst := range instList { + log.Info("processing", "name", inst.Name) + err = db.Inspect(*inst) + if err != nil { + log.Error(fmt.Sprintf("inspecting %s", inst.Name), "error", err) + } + } + return err +} diff --git a/inf/inf_test.go b/inf/inf_test.go new file mode 100644 index 0000000..8e3d714 --- /dev/null +++ b/inf/inf_test.go @@ -0,0 +1 @@ +package inf diff --git a/internal/adapters/crawler/apap.go b/internal/adapters/crawler/apap.go new file mode 100644 index 0000000..d2f2307 --- /dev/null +++ b/internal/adapters/crawler/apap.go @@ -0,0 +1,104 @@ +package crawler + +import ( + "context" + "fmt" + "log/slog" + "os" + "time" + + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" + "github.com/maximotejeda/us_dop_scrapper/internal/ports" + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("APA") +) + +type Apap struct { +} + +func NewApap() ports.APIPorts { + return &Apap{} +} + +func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) { + tout := 120000.00 + log = log.With("scrapper", "apap") + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + button := page.Locator("#exchangesRates") + button.WaitFor() + button.Click() + + compraLocator := page.Locator("#currency-buy-USD") + ventaLocator := page.Locator("#currency-sell-USD") + + compraSTR, err := compraLocator.TextContent() + if err != nil { + log.Error("could not get compra str", "err", err) + return nil, err + } + ventaSTR, err := ventaLocator.TextContent() + if err != nil { + log.Error("could not get venta string", "err", err) + return nil, err + } + inst := &domain.History{ + Name: "asociacion popular de ahorros y prestamos", + Parser: "apap", + Parsed: time.Now().Unix(), + } + + inst.Venta = helpers.Normalize(ventaSTR) + inst.Compra = helpers.Normalize(compraSTR) + + if inst.Compra == 0 || inst.Venta == 0 { + return nil, fmt.Errorf("apa: institution not parsed: %v", inst) + } + log.Info("parsed", "value", inst) + return []*domain.History{inst}, nil +} + +func (a Apap) ExecParser( + ctx context.Context, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + _, err = a.Scrape(ctx, page, log) + // here we execute db operations + if err != nil { + return err + } + return err +} diff --git a/internal/adapters/crawler/crawler.go b/internal/adapters/crawler/crawler.go new file mode 100644 index 0000000..06030e8 --- /dev/null +++ b/internal/adapters/crawler/crawler.go @@ -0,0 +1,18 @@ +package crawler + +import ( + "fmt" + + "github.com/maximotejeda/us_dop_scrapper/internal/ports" +) + +func Selector(who string) (ports.APIPorts, error) { + var parser ports.APIPorts + switch who { + case "apap": + parser = NewApap() + default: + return nil, fmt.Errorf("not recognize who: " + who) + } + return parser, nil +} diff --git a/internal/adapters/dolar/.#dolar.go b/internal/adapters/dolar/.#dolar.go new file mode 120000 index 0000000..feda370 --- /dev/null +++ b/internal/adapters/dolar/.#dolar.go @@ -0,0 +1 @@ +maximo@debian-pc.9800:1713363571 \ No newline at end of file diff --git a/internal/adapters/dolar/dolar.go b/internal/adapters/dolar/dolar.go new file mode 100644 index 0000000..13db334 --- /dev/null +++ b/internal/adapters/dolar/dolar.go @@ -0,0 +1,26 @@ +package dolar + +import ( + "context" + + "github.com/maximotejeda/msvc-proto/golang/dolar" + "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +type Adapter struct { + dolar dolar.DollarClient +} + +func NewAdapter(dolarServiceURL string) (*Adapter, error) { + var opts []grpc.DialOption + opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) + conn, err := grpc.Dial(dolarServiceURL, opts...) + if err != nil { + return nil, err + } + defer conn.Close() + client := dolar.NewDollarClient(conn) + return &Adapter{dolar: client}, nil +} diff --git a/internal/application/core/api/.#api.go b/internal/application/core/api/.#api.go new file mode 120000 index 0000000..feda370 --- /dev/null +++ b/internal/application/core/api/.#api.go @@ -0,0 +1 @@ +maximo@debian-pc.9800:1713363571 \ No newline at end of file diff --git a/internal/application/core/api/api.go b/internal/application/core/api/api.go new file mode 100644 index 0000000..0c6af10 --- /dev/null +++ b/internal/application/core/api/api.go @@ -0,0 +1,26 @@ +package api + +import ( + "log/slog" + + "github.com/maximotejeda/us_dop_scrapper/config" + "github.com/maximotejeda/us_dop_scrapper/internal/ports" +) + +type Application struct { + log *slog.Logger + api ports.APIPorts +} + +func NewApplication() *Application { + log := slog.Default() + log = log.With("application", "root") + return &Application{ + log: log, + } +} + +func (a Application) Run() { + who := config.GetWho() + +} diff --git a/internal/application/core/domain/.#domain.go b/internal/application/core/domain/.#domain.go new file mode 120000 index 0000000..feda370 --- /dev/null +++ b/internal/application/core/domain/.#domain.go @@ -0,0 +1 @@ +maximo@debian-pc.9800:1713363571 \ No newline at end of file diff --git a/internal/application/core/domain/domain.go b/internal/application/core/domain/domain.go new file mode 100644 index 0000000..a34dc46 --- /dev/null +++ b/internal/application/core/domain/domain.go @@ -0,0 +1,10 @@ +package domain + +type History struct { + ID int64 `json:""` + Name string `json:""` + Compra float64 `json:""` + Venta float64 `json:""` + Parser string `json:""` + Parsed int64 `json:""` +} diff --git a/internal/ports/api.go b/internal/ports/api.go new file mode 100644 index 0000000..d2eed0a --- /dev/null +++ b/internal/ports/api.go @@ -0,0 +1,14 @@ +package ports + +import ( + "context" + "log/slog" + + "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" + "github.com/playwright-community/playwright-go" +) + +type APIPorts interface { + Scrape(context.Context, playwright.Page, *slog.Logger) ([]*domain.History, error) + ExecParser(context.Context, *playwright.Browser, *slog.Logger) error +} diff --git a/internal/ports/dolar.go b/internal/ports/dolar.go new file mode 100644 index 0000000..5afa696 --- /dev/null +++ b/internal/ports/dolar.go @@ -0,0 +1,7 @@ +package ports + +import "github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain" + +type DollarPort interface { + NewHistory(*domain.History) error +} diff --git a/k8s/cronjobs.yml b/k8s/cronjobs.yml new file mode 100644 index 0000000..f3c581f --- /dev/null +++ b/k8s/cronjobs.yml @@ -0,0 +1,294 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: inf-cronjob +spec: + schedule: "*/20 8-19 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-inf + image: localhost:32000/crawler:latest + env: + - name: GENERAL + value: https://www.infodolar.com.do/ + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: bcd-cronjob +spec: + schedule: "3,33 8-22 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-bcd + image: localhost:32000/crawler:latest + env: + - name: BCD + value: https://www.bancentral.gov.do/SectorExterno/HistoricoTasas + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + - name: WHO + value: bcd + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: bpd-cronjob +spec: + schedule: "5,35 8-22 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-bpd + image: localhost:32000/crawler:latest + env: + - name: BPD + value: https://popularenlinea.com/empresarial/Paginas/Home.aspx + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + - name: WHO + value: bpd + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: bhd-cronjob +spec: + schedule: "8,38 8-22 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-bhd + image: localhost:32000/crawler:latest + env: + - name: BHD + value: https://bhd.com.do/calculators?calculator=DIVISAS + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + - name: WHO + value: bhd + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: brd-cronjob +spec: + schedule: "22,52 8-22 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-brd + image: localhost:32000/crawler:latest + env: + - name: BDR + value: https://www.banreservas.com/calculadoras + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + - name: WHO + value: brd + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: apap-cronjob +spec: + schedule: "25,55 8-22 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-apap + image: localhost:32000/crawler:latest + env: + - name: APA + value: https://apap.com.do/ + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + - name: WHO + value: apa + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: bnc-cronjob +spec: + schedule: "11,41 8-22 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-bnc + image: localhost:32000/crawler:latest + env: + - name: BNC + value: https://www.banesco.com.do/ + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + - name: WHO + value: bnc + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vimenca-cronjob +spec: + schedule: "14,44 8-22 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-vimenca + image: localhost:32000/crawler:latest + env: + - name: VIMENCA + value: https://www.bancovimenca.com/ + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + - name: WHO + value: vimenca + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: scotia-cronjob +spec: + schedule: "17,47 8-22 * * 1-6" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + containers: + - name: crawler-scotia + image: localhost:32000/crawler:latest + env: + - name: SCOTIA + value: https://do.scotiabank.com/banca-personal/tarifas/tasas-de-cambio.html + - name: DBURI + value: dolardb/crawler.db + - name: NATSURI + value: "nats://nats-svc:4222" + - name: WHO + value: scotia + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + restartPolicy: OnFailure diff --git a/k8s/deployment.yml.template b/k8s/deployment.yml.template new file mode 100644 index 0000000..25d252e --- /dev/null +++ b/k8s/deployment.yml.template @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: bank-crawler + labels: + app: bank-crawler +spec: + replicas: 1 + selector: + matchLabels: + app: bank-crawler + template: + metadata: + labels: + app: bank-crawler + name: bank-crawler + spec: + containers: + - name: bank-crawler + image: localhost:32000/crawler:latest + env: + - name: GENERAL + value: $GENERAL + - name: BCD + value: $BCD + - name: BPD + value: $BPD + - name: BHD + value: $BHD + - name: BDR + value: $BDR + - name: APA + value: $APA + - name: DBURI + value: $DBURI + - name: NATSURI + value: "nats://nats-svc:4222" + volumeMounts: + - name: database + mountPath: /app/dolardb + volumes: + - name: database + persistentVolumeClaim: + claimName: bank-crawler-pvc + +--- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: bank-crawler-pvc + spec: + storageClassName: nfs-csi + accessModes: [ReadWriteMany] + resources: + requests: + storage: 500Mi diff --git a/k8s/pvc.yaml b/k8s/pvc.yaml new file mode 100644 index 0000000..67406af --- /dev/null +++ b/k8s/pvc.yaml @@ -0,0 +1,10 @@ + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: bank-crawler-pvc + spec: + storageClassName: nfs-csi + accessModes: [ReadWriteMany] + resources: + requests: + storage: 500Mi diff --git a/models/models.go b/models/models.go new file mode 100644 index 0000000..f6008d1 --- /dev/null +++ b/models/models.go @@ -0,0 +1,13 @@ +package models + +import ( + "time" +) + +type Institucion struct { + Name string `json:"name"` + Compra float64 `json:"compra"` + Venta float64 `json:"venta"` + Parser string `json:"parser"` + Parsed time.Time `json:"parsed"` +} diff --git a/pub/pub.go b/pub/pub.go new file mode 100644 index 0000000..36d0ef7 --- /dev/null +++ b/pub/pub.go @@ -0,0 +1,17 @@ +package pub + +import ( + "os" + + "github.com/nats-io/nats.go" +) + +func Publisher() (publisher func(string, []byte) error, closer func()) { + URI := os.Getenv("NATSURI") + if URI == "" { + panic("empty connection stream") + } + nc, _ := nats.Connect(URI) + + return nc.Publish, nc.Close +} diff --git a/scotia/scotia.go b/scotia/scotia.go new file mode 100644 index 0000000..70be4ee --- /dev/null +++ b/scotia/scotia.go @@ -0,0 +1,135 @@ +package scotia + +import ( + "context" + "fmt" + "log/slog" + "os" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("SCOTIA") +) + +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*db.History, err error) { + tout := 120000.00 + log = log.With("scrapper", "scotia") + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + + currencyTable := page.Locator(".bns--table") + currencyTable.WaitFor() + + firstRow := page.Locator(".bns--table > tbody:nth-child(1) > tr:nth-child(2)") + secondRow := page.Locator(".bns--table > tbody:nth-child(1) > tr:nth-child(3)") + + // the same institution has 2 prices for dollar sell + // first row is onsite selling dollars + // second row is online selling dollars + buyOnsite := firstRow.Locator("td:nth-child(3)") + sellOnsite := firstRow.Locator("td:nth-child(4)") + + // the first row has 4 elements + // but the second row only has 3 + buyOnline := secondRow.Locator("td:nth-child(2)") + sellOnline := secondRow.Locator("td:nth-child(3)") + + compraOnsiteSTR, err := buyOnsite.InnerText() + if err != nil { + log.Error("could not get compra str", "err", err) + return nil, err + } + + ventaOnsiteSTR, err := sellOnsite.InnerText() + if err != nil { + log.Error("could not get venta string", "err", err) + return nil, err + } + instOnsite := &db.History{ + Name: "scotiabank", + Parser: "scotia", + Parsed: time.Now().UTC(), + } + + instOnsite.Venta = helpers.Normalize(ventaOnsiteSTR) + instOnsite.Compra = helpers.Normalize(compraOnsiteSTR) + if instOnsite.Compra == 0 || instOnsite.Venta == 0 { + return nil, fmt.Errorf("scotia: institution not parsed: %v", instOnsite) + } + + compraOnlineSTR, err := buyOnline.InnerText() + if err != nil { + log.Error("could not get compra onlie str", "err", err) + return nil, err + } + + ventaOnlineSTR, err := sellOnline.InnerText() + if err != nil { + log.Error("could not get venta online string", "err", err) + return nil, err + } + + instOnline := &db.History{ + Name: "scotiabank cambio online", + Parser: "scotia", + Parsed: time.Now().UTC(), + } + instOnline.Venta = helpers.Normalize(ventaOnlineSTR) + instOnline.Compra = helpers.Normalize(compraOnlineSTR) + + if instOnline.Compra == 0 || instOnline.Venta == 0 { + return nil, fmt.Errorf("scotia: institution not parsed: %v", instOnline) + } + + insts = append(insts, instOnline, instOnsite) + return insts, nil +} + +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + insts, err := Scrape(ctx, page, log) + // here we execute db operations + if err != nil { + return err + } + for _, inst := range insts { + err = db.Inspect(*inst) + } + return err +} diff --git a/vimenca/vimenca.go b/vimenca/vimenca.go new file mode 100644 index 0000000..4fc7f3e --- /dev/null +++ b/vimenca/vimenca.go @@ -0,0 +1,97 @@ +package vimenca + +import ( + "context" + "log/slog" + "os" + "time" + + "github.com/maximotejeda/us_dop_db/db" + "github.com/maximotejeda/us_dop_scrapper/helpers" + "github.com/playwright-community/playwright-go" +) + +var ( + uri = os.Getenv("VIMENCA") +) + +func Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (inst *db.History, err error) { + tout := 120000.00 + log = log.With("scrapper", "vimenca") + if _, err := page.Goto(uri, playwright.PageGotoOptions{ + Timeout: &tout, + WaitUntil: playwright.WaitUntilStateLoad, + }); err != nil { + log.Error("could not get info", "error", err) + return nil, err + } + + currencyTable := page.Locator(".bns--table") + currencyTable.WaitFor() + + infoContainer := page.Locator(".layout-uikit > div:nth-child(1)") + + buyInfo := infoContainer.Locator(".purchaseValue") + sellInfo := infoContainer.Locator(".saleValue") + + compraSTR, err := buyInfo.InnerText() + if err != nil { + log.Error("could not get compra str", "err", err) + return nil, err + } + + ventaSTR, err := sellInfo.InnerText() + if err != nil { + log.Error("could not get venta string", "err", err) + return nil, err + } + inst = &db.History{ + Name: "banco vimenca", + Parser: "vimenca", + Parsed: time.Now().UTC(), + } + + inst.Venta = helpers.Normalize(ventaSTR) + inst.Compra = helpers.Normalize(compraSTR) + log.Info("institution", "value", inst) + return inst, nil +} + +func ExecParser( + ctx context.Context, + db *db.DB, + browser *playwright.Browser, + log *slog.Logger) (err error) { + t := true + ua := helpers.NewMobileUA() + b := *browser + page, err := b.NewPage(playwright.BrowserNewPageOptions{ + UserAgent: &ua, + // IsMobile: &t, + HasTouch: &t, + Viewport: &playwright.Size{ + Width: 412, + Height: 915, + }, + Screen: &playwright.Size{ + Width: 412, + Height: 915, + }, + }) + if err != nil { + log.Error("creating page", "error", err) + os.Exit(1) + } + ctx, cancel := context.WithTimeout(ctx, 6*time.Minute) + defer page.Close() + defer cancel() + inst, err := Scrape(ctx, page, log) + // here we execute db operations + if err != nil { + return err + } + + err = db.Inspect(*inst) + + return err +} diff --git a/wait/wait.go b/wait/wait.go new file mode 100644 index 0000000..8b52cec --- /dev/null +++ b/wait/wait.go @@ -0,0 +1,51 @@ +package wait + +import ( + "fmt" + "time" +) + +// from monday to friday from 12:00 utc to 22:00 utc +// saturday from 12:00 utc to 18:00 utc +// sunday wont work +func WaitAmount(actualTime time.Time) (infoDuration time.Duration, longDuration time.Duration) { + var nextDayTime time.Time + + // day of the week start on sunday=0 + dayNumber := actualTime.UTC().Weekday() + year, month, day, hour := actualTime.UTC().Year(), actualTime.UTC().Month(), actualTime.UTC().Day(), actualTime.UTC().Hour() + nextDayTemplate := fmt.Sprintf("%d-%02d-%02d 12:05:00", year, month, day) + parsedDate, err := time.Parse(time.DateTime, nextDayTemplate) + if err != nil { + panic(err) + } + // in case of waiting to next day or weekend + switch dayNumber { + case 0: + nextDayTime = parsedDate.Add(24 * time.Hour) + return time.Until(nextDayTime), time.Until(nextDayTime) + case 6: + if hour > 16 { + nextDayTime = parsedDate.Add(48 * time.Hour) + return time.Until(nextDayTime), time.Until(nextDayTime) + } else { + info := time.Until(actualTime.UTC().Add(26 * time.Minute)) + long := time.Until(actualTime.UTC().Add(59 * time.Minute)) + return info, long + + } + default: + nextDayTime = parsedDate.Add(24 * time.Hour) + switch { + case hour >= 22: + // next day wait + return time.Until(nextDayTime), time.Until(nextDayTime) + case hour < 12: + return time.Until(parsedDate), time.Until(parsedDate) + default: + info := time.Until(actualTime.UTC().Add(26 * time.Minute)) + long := time.Until(actualTime.UTC().Add(59 * time.Minute)) + return info, long + } + } +} diff --git a/wait/wait_test.go b/wait/wait_test.go new file mode 100644 index 0000000..a9b6619 --- /dev/null +++ b/wait/wait_test.go @@ -0,0 +1,40 @@ +package wait + +import ( + "testing" + "time" +) + +func TestWaitAmount(t *testing.T) { + type cases struct { + name string + initDate time.Time + infoAmount float64 + longAmount float64 + } + scenario := []cases{ + { + name: "success/3h", + initDate: time.Now().Add(-1 * time.Hour), + infoAmount: 24, + longAmount: 24, + }, + { + name: "success/now", + initDate: time.Now(), + infoAmount: 24, + longAmount: 24, + }, + } + for _, tt := range scenario { + t.Run(tt.name, func(t *testing.T) { + info, long := WaitAmount(tt.initDate) + if info.Hours() == 0 { + t.Errorf("wanted: %f got: %f, time: %v", tt.infoAmount, info.Hours(), tt.initDate.UTC()) + } + if long.Hours() == 0 { + t.Errorf("wanted: %f got: %f, time: %v", tt.longAmount, long.Hours(), tt.initDate.UTC()) + } + }) + } +}