assume new paqradigm
This commit is contained in:
parent
151c8f7a7b
commit
b5a3123506
2
Makefile
2
Makefile
@ -49,7 +49,7 @@ create-descriptors:
|
|||||||
@envsubst < k8s/deployment.yml.template > k8s/deployment.yml
|
@envsubst < k8s/deployment.yml.template > k8s/deployment.yml
|
||||||
|
|
||||||
deploy: build-image create-descriptors
|
deploy: build-image create-descriptors
|
||||||
@kubectl apply -f k8s/pvc.yaml
|
#@kubectl apply -f k8s/pvc.yaml
|
||||||
@kubectl apply -f k8s/cronjobs.yml
|
@kubectl apply -f k8s/cronjobs.yml
|
||||||
|
|
||||||
test:
|
test:
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@ -32,6 +32,6 @@ func main() {
|
|||||||
log.Error("selecting crawler adapter", "error", err)
|
log.Error("selecting crawler adapter", "error", err)
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
app := api.NewApplication(crawler)
|
app := api.NewApplication(crawler, dol)
|
||||||
app.Run()
|
app.Run()
|
||||||
}
|
}
|
||||||
|
|||||||
@ -10,6 +10,43 @@ func GetDollarServiceURL() string {
|
|||||||
return getEnvValue("DOLLAR_SERVICE_URL")
|
return getEnvValue("DOLLAR_SERVICE_URL")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetAPAPURL() string {
|
||||||
|
return getEnvValue("APA")
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBCDURL() string {
|
||||||
|
return getEnvValue("BCD")
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBDRURL() string {
|
||||||
|
return getEnvValue("BDR")
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBHDURL() string {
|
||||||
|
return getEnvValue("BHD")
|
||||||
|
}
|
||||||
|
func GetBNCURL() string {
|
||||||
|
return getEnvValue("BNC")
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBPDURL() string {
|
||||||
|
return getEnvValue("BPD")
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetINFURL() string {
|
||||||
|
return getEnvValue("GENERAL")
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetSCTAURL
|
||||||
|
// Scotia bank URL
|
||||||
|
func GetSCTAURL() string {
|
||||||
|
return getEnvValue("SCOTIA")
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetVMCURL() string {
|
||||||
|
return getEnvValue("VIMENCA")
|
||||||
|
}
|
||||||
|
|
||||||
func getEnvValue(key string) string {
|
func getEnvValue(key string) string {
|
||||||
if os.Getenv(key) == "" {
|
if os.Getenv(key) == "" {
|
||||||
panic("key not found " + key)
|
panic("key not found " + key)
|
||||||
|
|||||||
@ -1,9 +1,9 @@
|
|||||||
package helpers
|
package helpers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"math"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -11,7 +11,7 @@ import (
|
|||||||
|
|
||||||
"unicode"
|
"unicode"
|
||||||
|
|
||||||
"github.com/maximotejeda/us_dop_db/db"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
"golang.org/x/text/runes"
|
"golang.org/x/text/runes"
|
||||||
"golang.org/x/text/transform"
|
"golang.org/x/text/transform"
|
||||||
@ -125,62 +125,95 @@ func Normalize(val string) float64 {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("%s", err)
|
fmt.Printf("%s", err)
|
||||||
}
|
}
|
||||||
return cv
|
cvt := math.Round(cv*10000) / 10000
|
||||||
|
return cvt
|
||||||
}
|
}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateBrowser
|
// CreateBrowser
|
||||||
func CreateBrowser(log *slog.Logger) (chrome *playwright.Browser, firefox *playwright.Browser, webkit *playwright.Browser) {
|
func CreateBrowser(log *slog.Logger) (chrome *playwright.BrowserContext, firefox *playwright.BrowserContext, webkit *playwright.BrowserContext) {
|
||||||
pw, err := playwright.Run(&playwright.RunOptions{
|
pw, err := playwright.Run(&playwright.RunOptions{
|
||||||
Verbose: true,
|
Verbose: true,
|
||||||
})
|
})
|
||||||
|
ua := helpers.NewMobileUA()
|
||||||
|
|
||||||
|
headless := true
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("running pw, could not start", "error", err)
|
log.Error("running pw, could not start", "error", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
ff, err := pw.Firefox.Launch()
|
ff, err := pw.Firefox.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("could not start browser", "error", err)
|
log.Error("could not start browser firefox", "error", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
cm, err := pw.Firefox.Launch()
|
ffc, err := ff.NewContext(playwright.BrowserNewContextOptions{
|
||||||
|
IgnoreHttpsErrors: &headless,
|
||||||
|
UserAgent: &ua,
|
||||||
|
HasTouch: &headless,
|
||||||
|
Viewport: &playwright.Size{
|
||||||
|
Width: 412,
|
||||||
|
Height: 915,
|
||||||
|
},
|
||||||
|
Screen: &playwright.Size{
|
||||||
|
Width: 412,
|
||||||
|
Height: 915,
|
||||||
|
},
|
||||||
|
IsMobile: &headless,
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("could not start browser", "error", err)
|
log.Error("could not start browser firefox context", "error", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
sf, err := pw.WebKit.Launch()
|
cm, err := pw.Firefox.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("could not start browser", "error", err)
|
log.Error("could not start browser chrome", "error", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
return &cm, &ff, &sf
|
cmc, err := cm.NewContext(playwright.BrowserNewContextOptions{
|
||||||
}
|
IgnoreHttpsErrors: &headless,
|
||||||
|
UserAgent: &ua,
|
||||||
// ExecTask
|
HasTouch: &headless,
|
||||||
func ExecTask(
|
Viewport: &playwright.Size{
|
||||||
ctx context.Context,
|
Width: 412,
|
||||||
dbi *db.DB,
|
Height: 915,
|
||||||
browser []*playwright.Browser,
|
},
|
||||||
log *slog.Logger,
|
Screen: &playwright.Size{
|
||||||
errCounter map[string]int,
|
Width: 412,
|
||||||
parserName string,
|
Height: 915,
|
||||||
parserExecution func(context.Context, *db.DB, *playwright.Browser, *slog.Logger) error) (err error) {
|
},
|
||||||
err = parserExecution(ctx, dbi, browser[0], log)
|
IsMobile: &headless,
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errCounter[parserName]++
|
log.Error("could not start browser chorme context", "error", err)
|
||||||
log.Error(err.Error(), "parser", parserName)
|
os.Exit(1)
|
||||||
// todo want a retry with different browser firefox
|
|
||||||
err = parserExecution(ctx, dbi, browser[1], log)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
errCounter[parserName]++
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
log.Info("executed", "parser", parserName, "errors", errCounter[parserName])
|
sf, err := pw.WebKit.Launch(playwright.BrowserTypeLaunchOptions{Headless: &headless})
|
||||||
return err
|
if err != nil {
|
||||||
|
log.Error("could not start browser safari", "error", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
sfc, err := sf.NewContext(playwright.BrowserNewContextOptions{
|
||||||
|
IgnoreHttpsErrors: &headless,
|
||||||
|
UserAgent: &ua,
|
||||||
|
HasTouch: &headless,
|
||||||
|
Viewport: &playwright.Size{
|
||||||
|
Width: 412,
|
||||||
|
Height: 915,
|
||||||
|
},
|
||||||
|
Screen: &playwright.Size{
|
||||||
|
Width: 412,
|
||||||
|
Height: 915,
|
||||||
|
},
|
||||||
|
IsMobile: &headless,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Error("could not start browser safari context", "error", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
return &cmc, &ffc, &sfc
|
||||||
}
|
}
|
||||||
|
|
||||||
// RemoveAccent
|
// RemoveAccent
|
||||||
|
|||||||
@ -4,28 +4,25 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Apap struct {
|
type Apap struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewApap(client ports.DollarPort) ports.APIPorts {
|
func NewApap() ports.APIPorts {
|
||||||
return &Apap{client: client}
|
return &Apap{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
||||||
tout := 120000.00
|
tout := 120000.00
|
||||||
uri := os.Getenv("APA")
|
|
||||||
log = log.With("scrapper", "apap")
|
log = log.With("scrapper", "apap")
|
||||||
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
if _, err := page.Goto(config.GetAPAPURL(), playwright.PageGotoOptions{
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -64,43 +61,3 @@ func (a Apap) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
|
|||||||
log.Info("parsed", "value", inst)
|
log.Info("parsed", "value", inst)
|
||||||
return []*domain.History{inst}, nil
|
return []*domain.History{inst}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a Apap) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) (err error) {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
histList, err := a.Scrape(ctx, page, log)
|
|
||||||
// here we execute db operations
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = a.client.NewHistory(histList[0])
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -4,30 +4,25 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type bcd struct {
|
type bcd struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewBCD(client ports.DollarPort) ports.APIPorts {
|
func NewBCD() ports.APIPorts {
|
||||||
return &bcd{
|
return &bcd{}
|
||||||
client: client,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b bcd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
func (b bcd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
||||||
log = log.With("scrapper", "bcd")
|
log = log.With("scrapper", "bcd")
|
||||||
tout := 90000.00
|
tout := 90000.00
|
||||||
uri := os.Getenv("BCD")
|
if _, err = page.Goto(config.GetBCDURL(), playwright.PageGotoOptions{
|
||||||
if _, err = page.Goto(uri, playwright.PageGotoOptions{
|
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -68,43 +63,3 @@ func (b bcd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger)
|
|||||||
}
|
}
|
||||||
return []*domain.History{inst}, nil
|
return []*domain.History{inst}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bc bcd) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) (err error) {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
inst, err := bc.Scrape(ctx, page, log)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = bc.client.NewHistory(inst[0])
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -4,29 +4,24 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type bdr struct {
|
type bdr struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewBDR(client ports.DollarPort) ports.APIPorts {
|
func NewBDR() ports.APIPorts {
|
||||||
return &bdr{
|
return &bdr{}
|
||||||
client: client,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
func (bd bdr) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
func (bd bdr) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
||||||
tout := 120000.00
|
tout := 120000.00
|
||||||
log = log.With("scrapper", "bdr")
|
log = log.With("scrapper", "bdr")
|
||||||
uri := os.Getenv("BDR")
|
if _, err := page.Goto(config.GetBDRURL(), playwright.PageGotoOptions{
|
||||||
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -72,39 +67,3 @@ func (bd bdr) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
|
|||||||
insts = append(insts, inst)
|
insts = append(insts, inst)
|
||||||
return insts, nil
|
return insts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bd bdr) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) (err error) {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
insts, err := bd.Scrape(ctx, page, log)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = bd.client.NewHistory(insts[0])
|
|
||||||
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -4,33 +4,28 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type bhd struct {
|
type bhd struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewBHD(client ports.DollarPort) ports.APIPorts {
|
func NewBHD() ports.APIPorts {
|
||||||
return &bhd{
|
return &bhd{}
|
||||||
client: client,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scrape
|
// Scrape
|
||||||
// needs a mobile User Agent
|
// needs a mobile User Agent
|
||||||
func (bh bhd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
func (bh bhd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
||||||
tout := 120000.00
|
tout := 120000.00
|
||||||
uri := os.Getenv("BHD")
|
|
||||||
log = log.With("scrapper", "bhd")
|
log = log.With("scrapper", "bhd")
|
||||||
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
if _, err := page.Goto(config.GetBHDURL(), playwright.PageGotoOptions{
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -111,38 +106,3 @@ func (bh bhd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
|
|||||||
//log.Info(fmt.Sprintf("%v", inst))
|
//log.Info(fmt.Sprintf("%v", inst))
|
||||||
return []*domain.History{inst}, nil
|
return []*domain.History{inst}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bh bhd) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) (err error) {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
inst, err := bh.Scrape(ctx, page, log)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
bh.client.NewHistory(inst[0])
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -4,30 +4,25 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type bnc struct {
|
type bnc struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewBNC(client ports.DollarPort) ports.APIPorts {
|
func NewBNC() ports.APIPorts {
|
||||||
return &bnc{
|
return &bnc{}
|
||||||
client: client,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bn bnc) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
func (bn bnc) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
||||||
tout := 120000.00
|
tout := 120000.00
|
||||||
uri := os.Getenv("BNC")
|
|
||||||
log = log.With("scrapper", "bnc")
|
log = log.With("scrapper", "bnc")
|
||||||
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
if _, err := page.Goto(config.GetBNCURL(), playwright.PageGotoOptions{
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -66,39 +61,3 @@ func (bn bnc) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
|
|||||||
}
|
}
|
||||||
return []*domain.History{inst}, nil
|
return []*domain.History{inst}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bn bnc) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) (err error) {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
inst, err := bn.Scrape(ctx, page, log)
|
|
||||||
// here we execute db operations
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
bn.client.NewHistory(inst[0])
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -4,35 +4,28 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
|
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type bpd struct {
|
type bpd struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewBPD(client ports.DollarPort) ports.APIPorts {
|
func NewBPD() ports.APIPorts {
|
||||||
return &bpd{
|
return &bpd{}
|
||||||
client: client,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scrape
|
// Scrape
|
||||||
// needs a mobile User Agent
|
// needs a mobile User Agent
|
||||||
func (bp bpd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
func (bp bpd) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
||||||
tout := 120000.00
|
tout := 120000.00
|
||||||
|
|
||||||
uri := os.Getenv("BPD")
|
|
||||||
log = log.With("scrapper", "bpd")
|
log = log.With("scrapper", "bpd")
|
||||||
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
if _, err := page.Goto(config.GetBPDURL(), playwright.PageGotoOptions{
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -90,38 +83,3 @@ func HoverTasas(page playwright.Page) {
|
|||||||
tasasMenu := page.Locator(".footer_est_menu_bpd > li:nth-child(3)")
|
tasasMenu := page.Locator(".footer_est_menu_bpd > li:nth-child(3)")
|
||||||
tasasMenu.Hover()
|
tasasMenu.Hover()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bp bpd) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) (err error) {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
inst, err := bp.Scrape(ctx, page, log)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
bp.client.NewHistory(inst[0])
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -10,23 +10,23 @@ func Selector(who string, client ports.DollarPort) (ports.APIPorts, error) {
|
|||||||
var parser ports.APIPorts
|
var parser ports.APIPorts
|
||||||
switch who {
|
switch who {
|
||||||
case "apap":
|
case "apap":
|
||||||
parser = NewApap(client)
|
parser = NewApap()
|
||||||
case "bcd":
|
case "bcd":
|
||||||
parser = NewBCD(client)
|
parser = NewBCD()
|
||||||
case "bdr":
|
case "brd":
|
||||||
parser = NewBDR(client)
|
parser = NewBDR()
|
||||||
case "bhd":
|
case "bhd":
|
||||||
parser = NewBHD(client)
|
parser = NewBHD()
|
||||||
case "bnc":
|
case "bnc":
|
||||||
parser = NewBNC(client)
|
parser = NewBNC()
|
||||||
case "bpd":
|
case "bpd":
|
||||||
parser = NewBPD(client)
|
parser = NewBPD()
|
||||||
case "inf":
|
case "inf":
|
||||||
parser = NewINF(client)
|
parser = NewINF()
|
||||||
case "scotia":
|
case "scotia":
|
||||||
parser = NewScotia(client)
|
parser = NewScotia()
|
||||||
case "vimenca":
|
case "vimenca":
|
||||||
parser = NewVimenca(client)
|
parser = NewVimenca()
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("not recognize who: " + who)
|
return nil, fmt.Errorf("not recognize who: " + who)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,34 +2,28 @@ package crawler
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type inf struct {
|
type inf struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewINF(client ports.DollarPort) ports.APIPorts {
|
func NewINF() ports.APIPorts {
|
||||||
return &inf{
|
return &inf{}
|
||||||
client: client,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scrape
|
// Scrape
|
||||||
func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (instList []*domain.History, err error) {
|
func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (instList []*domain.History, err error) {
|
||||||
uri := os.Getenv("GENERAL")
|
|
||||||
log = log.With("scrapper", "general")
|
log = log.With("scrapper", "general")
|
||||||
tout := float64(120000)
|
tout := float64(120000)
|
||||||
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
if _, err := page.Goto(config.GetINFURL(), playwright.PageGotoOptions{
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -49,17 +43,13 @@ func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
|
|||||||
log.Error("could not get info", "error", err)
|
log.Error("could not get info", "error", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
scotia := false // in this page there are 2 scotia one the change online the other is tha bank
|
|
||||||
instList = []*domain.History{}
|
instList = []*domain.History{}
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
inst := &domain.History{
|
inst := &domain.History{
|
||||||
Parser: "inf",
|
Parser: "inf",
|
||||||
}
|
}
|
||||||
title, _ := entry.Locator("span.nombre").TextContent()
|
title, _ := entry.Locator("span.nombre").TextContent()
|
||||||
if strings.ToLower(title) == "scotiabank" && !scotia {
|
|
||||||
title = "scotiabank cambio online"
|
|
||||||
scotia = true
|
|
||||||
}
|
|
||||||
name := ""
|
name := ""
|
||||||
if title != "" {
|
if title != "" {
|
||||||
name = helpers.RemoveAccent(strings.ToLower(title))
|
name = helpers.RemoveAccent(strings.ToLower(title))
|
||||||
@ -81,6 +71,10 @@ func (in inf) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger
|
|||||||
log.Warn("skipping", "nombre", inst.Name, "compra", inst.Compra, "venta", inst.Venta)
|
log.Warn("skipping", "nombre", inst.Name, "compra", inst.Compra, "venta", inst.Venta)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
switch {
|
||||||
|
case strings.Contains(inst.Name, "banreservas"), strings.Contains(inst.Name,"banco popular"), strings.Contains(inst.Name,"scotia"), strings.Contains(inst.Name,"hipotecario"), strings.Contains(inst.Name,"asociacion popular"), strings.Contains(inst.Name,"vimenca"):
|
||||||
|
continue
|
||||||
|
}
|
||||||
instList = append(instList, inst)
|
instList = append(instList, inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,45 +92,3 @@ func getValue(place playwright.Locator) string {
|
|||||||
}
|
}
|
||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExecParser
|
|
||||||
func (in inf) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) error {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
instList, err := in.Scrape(ctx, page, log)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, inst := range instList {
|
|
||||||
log.Info("processing", "name", inst.Name)
|
|
||||||
err = in.client.NewHistory(inst)
|
|
||||||
if err != nil {
|
|
||||||
log.Error(fmt.Sprintf("inspecting %s", inst.Name), "error", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -4,30 +4,26 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type scotia struct {
|
type scotia struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewScotia(client ports.DollarPort) ports.APIPorts {
|
func NewScotia() ports.APIPorts {
|
||||||
return &scotia{
|
return &scotia{}
|
||||||
client: client,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sct scotia) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
func (sct scotia) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
||||||
tout := 120000.00
|
tout := 120000.00
|
||||||
uri := os.Getenv("SCOTIA")
|
|
||||||
log = log.With("scrapper", "scotia")
|
log = log.With("scrapper", "scotia")
|
||||||
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
if _, err := page.Goto(config.GetSCTAURL(), playwright.PageGotoOptions{
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -102,41 +98,3 @@ func (sct scotia) Scrape(ctx context.Context, page playwright.Page, log *slog.Lo
|
|||||||
insts = append(insts, instOnline, instOnsite)
|
insts = append(insts, instOnline, instOnsite)
|
||||||
return insts, nil
|
return insts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sct scotia) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) (err error) {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
insts, err := sct.Scrape(ctx, page, log)
|
|
||||||
// here we execute db operations
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, inst := range insts {
|
|
||||||
sct.client.NewHistory(inst)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -3,30 +3,25 @@ package crawler
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/maximotejeda/us_dop_scrapper/config"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
"github.com/maximotejeda/us_dop_scrapper/internal/application/core/domain"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type vimenca struct {
|
type vimenca struct{}
|
||||||
client ports.DollarPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewVimenca(client ports.DollarPort) ports.APIPorts {
|
func NewVimenca() ports.APIPorts {
|
||||||
return &vimenca{
|
return &vimenca{}
|
||||||
client: client,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v vimenca) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
func (v vimenca) Scrape(ctx context.Context, page playwright.Page, log *slog.Logger) (insts []*domain.History, err error) {
|
||||||
uri := os.Getenv("VIMENCA")
|
|
||||||
tout := 120000.00
|
tout := 120000.00
|
||||||
log = log.With("scrapper", "vimenca")
|
log = log.With("scrapper", "vimenca")
|
||||||
if _, err := page.Goto(uri, playwright.PageGotoOptions{
|
if _, err := page.Goto(config.GetVMCURL(), playwright.PageGotoOptions{
|
||||||
Timeout: &tout,
|
Timeout: &tout,
|
||||||
WaitUntil: playwright.WaitUntilStateLoad,
|
WaitUntil: playwright.WaitUntilStateLoad,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -64,43 +59,3 @@ func (v vimenca) Scrape(ctx context.Context, page playwright.Page, log *slog.Log
|
|||||||
log.Info("institution", "value", inst)
|
log.Info("institution", "value", inst)
|
||||||
return []*domain.History{inst}, nil
|
return []*domain.History{inst}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v vimenca) ExecParser(
|
|
||||||
ctx context.Context,
|
|
||||||
browser *playwright.Browser,
|
|
||||||
log *slog.Logger) (err error) {
|
|
||||||
t := true
|
|
||||||
ua := helpers.NewMobileUA()
|
|
||||||
b := *browser
|
|
||||||
page, err := b.NewPage(playwright.BrowserNewPageOptions{
|
|
||||||
UserAgent: &ua,
|
|
||||||
// IsMobile: &t,
|
|
||||||
HasTouch: &t,
|
|
||||||
Viewport: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
Screen: &playwright.Size{
|
|
||||||
Width: 412,
|
|
||||||
Height: 915,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("creating page", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
|
||||||
defer page.Close()
|
|
||||||
defer cancel()
|
|
||||||
inst, err := v.Scrape(ctx, page, log)
|
|
||||||
// here we execute db operations
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = v.client.NewHistory(inst[0])
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|||||||
@ -2,38 +2,74 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
"github.com/maximotejeda/us_dop_scrapper/helpers"
|
||||||
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
"github.com/maximotejeda/us_dop_scrapper/internal/ports"
|
||||||
"log/slog"
|
"github.com/playwright-community/playwright-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Application struct {
|
type Application struct {
|
||||||
log *slog.Logger
|
log *slog.Logger
|
||||||
api ports.APIPorts
|
api ports.APIPorts
|
||||||
|
client ports.DollarPort
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewApplication(crawler ports.APIPorts) *Application {
|
func NewApplication(crawler ports.APIPorts, client ports.DollarPort) *Application {
|
||||||
log := slog.Default()
|
log := slog.Default()
|
||||||
log = log.With("application", "root")
|
log = log.With("application", "root")
|
||||||
return &Application{
|
return &Application{
|
||||||
log: log,
|
log: log,
|
||||||
api: crawler,
|
api: crawler,
|
||||||
|
client: client,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a Application) Run() {
|
func (a Application) Run() {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
ch, ff, wk := helpers.CreateBrowser(a.log)
|
ch, ff, wk := helpers.CreateBrowser(a.log)
|
||||||
err := a.api.ExecParser(ctx, ch, a.log)
|
err := a.ExecParser(ctx, ch, a.log)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.log.Info("failed on frist browser", "browser", "chrome", "error", err)
|
a.log.Info("failed on frist browser", "browser", "chrome", "error", err)
|
||||||
err := a.api.ExecParser(ctx, ff, a.log)
|
err := a.ExecParser(ctx, ff, a.log)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.log.Error("failed on second browser", "browser", "firefox", "error", err)
|
a.log.Error("failed on second browser", "browser", "firefox", "error", err)
|
||||||
err := a.api.ExecParser(ctx, wk, a.log)
|
err := a.ExecParser(ctx, wk, a.log)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.log.Error("tried all browsers error", "brwser", "webkit", "error", err)
|
a.log.Error("tried all browsers error", "brwser", "webkit", "error", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a Application) ExecParser(
|
||||||
|
ctx context.Context,
|
||||||
|
browser *playwright.BrowserContext,
|
||||||
|
log *slog.Logger) (err error) {
|
||||||
|
b := *browser
|
||||||
|
page, err := b.NewPage()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("creating page", "error", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, 6*time.Minute)
|
||||||
|
defer page.Close()
|
||||||
|
defer cancel()
|
||||||
|
histList, err := a.api.Scrape(ctx, page, log)
|
||||||
|
// here we execute db operations
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, hist := range histList {
|
||||||
|
err = a.client.NewHistory(hist)
|
||||||
|
if err != nil {
|
||||||
|
a.log.Error("creating new hist", "history", hist, "error", err)
|
||||||
|
}
|
||||||
|
a.log.Info("parsed Success", "parser", hist.Parser, "item", hist)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|||||||
@ -10,5 +10,4 @@ import (
|
|||||||
|
|
||||||
type APIPorts interface {
|
type APIPorts interface {
|
||||||
Scrape(context.Context, playwright.Page, *slog.Logger) ([]*domain.History, error)
|
Scrape(context.Context, playwright.Page, *slog.Logger) ([]*domain.History, error)
|
||||||
ExecParser(context.Context, *playwright.Browser, *slog.Logger) error
|
|
||||||
}
|
}
|
||||||
|
|||||||
133
k8s/cronjobs.yml
133
k8s/cronjobs.yml
@ -10,24 +10,19 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-inf
|
- name: crawler-inf
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: GENERAL
|
- name: WHO
|
||||||
value: https://www.infodolar.com.do/
|
value: inf
|
||||||
- name: DBURI
|
- name: DOLLAR_SERVICE_URL
|
||||||
value: dolardb/crawler.db
|
value: "dolar-grpc-svc:80"
|
||||||
- name: NATSURI
|
- name: GENERAL
|
||||||
value: "nats://nats-svc:4222"
|
value: https://www.infodolar.com.do/
|
||||||
volumeMounts:
|
- name: NATSURI
|
||||||
- name: database
|
value: "nats://nats-svc:4222"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
---
|
---
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
@ -41,26 +36,19 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-bcd
|
- name: crawler-bcd
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: BCD
|
- name: BCD
|
||||||
value: https://www.bancentral.gov.do/SectorExterno/HistoricoTasas
|
value: https://www.bancentral.gov.do/SectorExterno/HistoricoTasas
|
||||||
- name: DBURI
|
|
||||||
value: dolardb/crawler.db
|
|
||||||
- name: NATSURI
|
- name: NATSURI
|
||||||
value: "nats://nats-svc:4222"
|
value: "nats://nats-svc:4222"
|
||||||
- name: WHO
|
- name: WHO
|
||||||
value: bcd
|
value: bcd
|
||||||
volumeMounts:
|
- name: DOLLAR_SERVICE_URL
|
||||||
- name: database
|
value: "dolar-grpc-svc:80"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
---
|
---
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
@ -74,26 +62,19 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-bpd
|
- name: crawler-bpd
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: BPD
|
- name: BPD
|
||||||
value: https://popularenlinea.com/empresarial/Paginas/Home.aspx
|
value: https://popularenlinea.com/empresarial/Paginas/Home.aspx
|
||||||
- name: DBURI
|
|
||||||
value: dolardb/crawler.db
|
|
||||||
- name: NATSURI
|
- name: NATSURI
|
||||||
value: "nats://nats-svc:4222"
|
value: "nats://nats-svc:4222"
|
||||||
- name: WHO
|
- name: WHO
|
||||||
value: bpd
|
value: bpd
|
||||||
volumeMounts:
|
- name: DOLLAR_SERVICE_URL
|
||||||
- name: database
|
value: "dolar-grpc-svc:80"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
---
|
---
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
@ -107,26 +88,19 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-bhd
|
- name: crawler-bhd
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: BHD
|
- name: BHD
|
||||||
value: https://bhd.com.do/calculators?calculator=DIVISAS
|
value: https://bhd.com.do/calculators?calculator=DIVISAS
|
||||||
- name: DBURI
|
|
||||||
value: dolardb/crawler.db
|
|
||||||
- name: NATSURI
|
- name: NATSURI
|
||||||
value: "nats://nats-svc:4222"
|
value: "nats://nats-svc:4222"
|
||||||
- name: WHO
|
- name: WHO
|
||||||
value: bhd
|
value: bhd
|
||||||
volumeMounts:
|
- name: DOLLAR_SERVICE_URL
|
||||||
- name: database
|
value: "dolar-grpc-svc:80"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
---
|
---
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
@ -140,26 +114,20 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-brd
|
- name: crawler-brd
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: BDR
|
- name: BDR
|
||||||
value: https://www.banreservas.com/calculadoras
|
value: https://www.banreservas.com/calculadoras
|
||||||
- name: DBURI
|
|
||||||
value: dolardb/crawler.db
|
|
||||||
- name: NATSURI
|
- name: NATSURI
|
||||||
value: "nats://nats-svc:4222"
|
value: "nats://nats-svc:4222"
|
||||||
- name: WHO
|
- name: WHO
|
||||||
value: brd
|
value: brd
|
||||||
volumeMounts:
|
- name: DOLLAR_SERVICE_URL
|
||||||
- name: database
|
value: "dolar-grpc-svc:80"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
---
|
---
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
@ -173,26 +141,20 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-apap
|
- name: crawler-apap
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: APA
|
- name: APA
|
||||||
value: https://apap.com.do/
|
value: https://apap.com.do/
|
||||||
- name: DBURI
|
|
||||||
value: dolardb/crawler.db
|
|
||||||
- name: NATSURI
|
- name: NATSURI
|
||||||
value: "nats://nats-svc:4222"
|
value: "nats://nats-svc:4222"
|
||||||
- name: WHO
|
- name: WHO
|
||||||
value: apa
|
value: apap
|
||||||
volumeMounts:
|
- name: DOLLAR_SERVICE_URL
|
||||||
- name: database
|
value: "dolar-grpc-svc:80"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
---
|
---
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
@ -206,26 +168,20 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-bnc
|
- name: crawler-bnc
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: BNC
|
- name: BNC
|
||||||
value: https://www.banesco.com.do/
|
value: https://www.banesco.com.do/
|
||||||
- name: DBURI
|
|
||||||
value: dolardb/crawler.db
|
|
||||||
- name: NATSURI
|
- name: NATSURI
|
||||||
value: "nats://nats-svc:4222"
|
value: "nats://nats-svc:4222"
|
||||||
- name: WHO
|
- name: WHO
|
||||||
value: bnc
|
value: bnc
|
||||||
volumeMounts:
|
- name: DOLLAR_SERVICE_URL
|
||||||
- name: database
|
value: "dolar-grpc-svc:80"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
---
|
---
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
@ -239,26 +195,20 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-vimenca
|
- name: crawler-vimenca
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: VIMENCA
|
- name: VIMENCA
|
||||||
value: https://www.bancovimenca.com/
|
value: https://www.bancovimenca.com/
|
||||||
- name: DBURI
|
|
||||||
value: dolardb/crawler.db
|
|
||||||
- name: NATSURI
|
- name: NATSURI
|
||||||
value: "nats://nats-svc:4222"
|
value: "nats://nats-svc:4222"
|
||||||
- name: WHO
|
- name: WHO
|
||||||
value: vimenca
|
value: vimenca
|
||||||
volumeMounts:
|
- name: DOLLAR_SERVICE_URL
|
||||||
- name: database
|
value: "dolar-grpc-svc:80"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
---
|
---
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
@ -272,23 +222,16 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: crawler-scotia
|
- name: crawler-scotia
|
||||||
image: localhost:32000/crawler:latest
|
image: localhost:32000/crawler:latest
|
||||||
env:
|
env:
|
||||||
- name: SCOTIA
|
- name: SCOTIA
|
||||||
value: https://do.scotiabank.com/banca-personal/tarifas/tasas-de-cambio.html
|
value: https://do.scotiabank.com/banca-personal/tarifas/tasas-de-cambio.html
|
||||||
- name: DBURI
|
|
||||||
value: dolardb/crawler.db
|
|
||||||
- name: NATSURI
|
- name: NATSURI
|
||||||
value: "nats://nats-svc:4222"
|
value: "nats://nats-svc:4222"
|
||||||
- name: WHO
|
- name: WHO
|
||||||
value: scotia
|
value: scotia
|
||||||
volumeMounts:
|
- name: DOLLAR_SERVICE_URL
|
||||||
- name: database
|
value: "dolar-grpc-svc:80"
|
||||||
mountPath: /app/dolardb
|
|
||||||
volumes:
|
|
||||||
- name: database
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: bank-crawler-pvc
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user