Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
588 lines
13 KiB
Go
588 lines
13 KiB
Go
package browser
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
type PlaywrightBrowser struct {
|
|
cmd *exec.Cmd
|
|
serverURL string
|
|
client *http.Client
|
|
sessions map[string]*BrowserSession
|
|
mu sync.RWMutex
|
|
config Config
|
|
}
|
|
|
|
type Config struct {
|
|
PlaywrightServerURL string
|
|
DefaultTimeout time.Duration
|
|
Headless bool
|
|
UserAgent string
|
|
ProxyURL string
|
|
ScreenshotsDir string
|
|
RecordingsDir string
|
|
}
|
|
|
|
type BrowserSession struct {
|
|
ID string
|
|
ContextID string
|
|
PageID string
|
|
CreatedAt time.Time
|
|
LastAction time.Time
|
|
Screenshots []string
|
|
Recordings []string
|
|
Closed bool
|
|
}
|
|
|
|
type ActionRequest struct {
|
|
SessionID string `json:"sessionId"`
|
|
Action string `json:"action"`
|
|
Params map[string]interface{} `json:"params"`
|
|
}
|
|
|
|
type ActionResponse struct {
|
|
Success bool `json:"success"`
|
|
Data interface{} `json:"data,omitempty"`
|
|
Screenshot string `json:"screenshot,omitempty"`
|
|
Error string `json:"error,omitempty"`
|
|
PageTitle string `json:"pageTitle,omitempty"`
|
|
PageURL string `json:"pageUrl,omitempty"`
|
|
}
|
|
|
|
func NewPlaywrightBrowser(cfg Config) *PlaywrightBrowser {
|
|
if cfg.DefaultTimeout == 0 {
|
|
cfg.DefaultTimeout = 30 * time.Second
|
|
}
|
|
if cfg.PlaywrightServerURL == "" {
|
|
cfg.PlaywrightServerURL = "http://localhost:3050"
|
|
}
|
|
if cfg.ScreenshotsDir == "" {
|
|
cfg.ScreenshotsDir = "/tmp/gooseek-screenshots"
|
|
}
|
|
if cfg.RecordingsDir == "" {
|
|
cfg.RecordingsDir = "/tmp/gooseek-recordings"
|
|
}
|
|
|
|
os.MkdirAll(cfg.ScreenshotsDir, 0755)
|
|
os.MkdirAll(cfg.RecordingsDir, 0755)
|
|
|
|
return &PlaywrightBrowser{
|
|
serverURL: cfg.PlaywrightServerURL,
|
|
client: &http.Client{
|
|
Timeout: cfg.DefaultTimeout,
|
|
},
|
|
sessions: make(map[string]*BrowserSession),
|
|
config: cfg,
|
|
}
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) NewSession(ctx context.Context, opts SessionOptions) (*BrowserSession, error) {
|
|
sessionID := uuid.New().String()
|
|
|
|
params := map[string]interface{}{
|
|
"headless": b.config.Headless,
|
|
"sessionId": sessionID,
|
|
}
|
|
|
|
if opts.Viewport != nil {
|
|
params["viewport"] = opts.Viewport
|
|
}
|
|
if opts.UserAgent != "" {
|
|
params["userAgent"] = opts.UserAgent
|
|
} else if b.config.UserAgent != "" {
|
|
params["userAgent"] = b.config.UserAgent
|
|
}
|
|
if opts.ProxyURL != "" {
|
|
params["proxy"] = opts.ProxyURL
|
|
} else if b.config.ProxyURL != "" {
|
|
params["proxy"] = b.config.ProxyURL
|
|
}
|
|
if opts.RecordVideo {
|
|
params["recordVideo"] = map[string]interface{}{
|
|
"dir": b.config.RecordingsDir,
|
|
}
|
|
}
|
|
|
|
resp, err := b.sendCommand(ctx, "browser.newContext", params)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create browser context: %w", err)
|
|
}
|
|
|
|
contextID, _ := resp["contextId"].(string)
|
|
pageID, _ := resp["pageId"].(string)
|
|
|
|
session := &BrowserSession{
|
|
ID: sessionID,
|
|
ContextID: contextID,
|
|
PageID: pageID,
|
|
CreatedAt: time.Now(),
|
|
LastAction: time.Now(),
|
|
}
|
|
|
|
b.mu.Lock()
|
|
b.sessions[sessionID] = session
|
|
b.mu.Unlock()
|
|
|
|
return session, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) CloseSession(ctx context.Context, sessionID string) error {
|
|
b.mu.Lock()
|
|
session, ok := b.sessions[sessionID]
|
|
if !ok {
|
|
b.mu.Unlock()
|
|
return errors.New("session not found")
|
|
}
|
|
session.Closed = true
|
|
delete(b.sessions, sessionID)
|
|
b.mu.Unlock()
|
|
|
|
_, err := b.sendCommand(ctx, "browser.closeContext", map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
})
|
|
return err
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) Navigate(ctx context.Context, sessionID, url string, opts NavigateOptions) (*ActionResponse, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"url": url,
|
|
}
|
|
if opts.Timeout > 0 {
|
|
params["timeout"] = opts.Timeout
|
|
}
|
|
if opts.WaitUntil != "" {
|
|
params["waitUntil"] = opts.WaitUntil
|
|
}
|
|
|
|
resp, err := b.sendCommand(ctx, "page.goto", params)
|
|
if err != nil {
|
|
return &ActionResponse{Success: false, Error: err.Error()}, err
|
|
}
|
|
|
|
result := &ActionResponse{
|
|
Success: true,
|
|
PageURL: getString(resp, "url"),
|
|
PageTitle: getString(resp, "title"),
|
|
}
|
|
|
|
if opts.Screenshot {
|
|
screenshot, _ := b.Screenshot(ctx, sessionID, ScreenshotOptions{FullPage: false})
|
|
if screenshot != nil {
|
|
result.Screenshot = screenshot.Data
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) Click(ctx context.Context, sessionID, selector string, opts ClickOptions) (*ActionResponse, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"selector": selector,
|
|
}
|
|
if opts.Button != "" {
|
|
params["button"] = opts.Button
|
|
}
|
|
if opts.ClickCount > 0 {
|
|
params["clickCount"] = opts.ClickCount
|
|
}
|
|
if opts.Timeout > 0 {
|
|
params["timeout"] = opts.Timeout
|
|
}
|
|
if opts.Force {
|
|
params["force"] = true
|
|
}
|
|
|
|
_, err := b.sendCommand(ctx, "page.click", params)
|
|
if err != nil {
|
|
return &ActionResponse{Success: false, Error: err.Error()}, err
|
|
}
|
|
|
|
result := &ActionResponse{Success: true}
|
|
|
|
if opts.WaitAfter > 0 {
|
|
time.Sleep(time.Duration(opts.WaitAfter) * time.Millisecond)
|
|
}
|
|
|
|
if opts.Screenshot {
|
|
screenshot, _ := b.Screenshot(ctx, sessionID, ScreenshotOptions{FullPage: false})
|
|
if screenshot != nil {
|
|
result.Screenshot = screenshot.Data
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) Type(ctx context.Context, sessionID, selector, text string, opts TypeOptions) (*ActionResponse, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"selector": selector,
|
|
"text": text,
|
|
}
|
|
if opts.Delay > 0 {
|
|
params["delay"] = opts.Delay
|
|
}
|
|
if opts.Timeout > 0 {
|
|
params["timeout"] = opts.Timeout
|
|
}
|
|
if opts.Clear {
|
|
b.sendCommand(ctx, "page.fill", map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"selector": selector,
|
|
"value": "",
|
|
})
|
|
}
|
|
|
|
_, err := b.sendCommand(ctx, "page.type", params)
|
|
if err != nil {
|
|
return &ActionResponse{Success: false, Error: err.Error()}, err
|
|
}
|
|
|
|
return &ActionResponse{Success: true}, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) Fill(ctx context.Context, sessionID, selector, value string) (*ActionResponse, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"selector": selector,
|
|
"value": value,
|
|
}
|
|
|
|
_, err := b.sendCommand(ctx, "page.fill", params)
|
|
if err != nil {
|
|
return &ActionResponse{Success: false, Error: err.Error()}, err
|
|
}
|
|
|
|
return &ActionResponse{Success: true}, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) Screenshot(ctx context.Context, sessionID string, opts ScreenshotOptions) (*ScreenshotResult, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"fullPage": opts.FullPage,
|
|
}
|
|
if opts.Selector != "" {
|
|
params["selector"] = opts.Selector
|
|
}
|
|
if opts.Quality > 0 {
|
|
params["quality"] = opts.Quality
|
|
}
|
|
params["type"] = "png"
|
|
if opts.Format != "" {
|
|
params["type"] = opts.Format
|
|
}
|
|
|
|
resp, err := b.sendCommand(ctx, "page.screenshot", params)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
data, _ := resp["data"].(string)
|
|
|
|
filename := fmt.Sprintf("%s/%s-%d.png", b.config.ScreenshotsDir, sessionID, time.Now().UnixNano())
|
|
if decoded, err := base64.StdEncoding.DecodeString(data); err == nil {
|
|
os.WriteFile(filename, decoded, 0644)
|
|
}
|
|
|
|
b.mu.Lock()
|
|
if session, ok := b.sessions[sessionID]; ok {
|
|
session.Screenshots = append(session.Screenshots, filename)
|
|
}
|
|
b.mu.Unlock()
|
|
|
|
return &ScreenshotResult{
|
|
Data: data,
|
|
Path: filename,
|
|
MimeType: "image/png",
|
|
}, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) ExtractText(ctx context.Context, sessionID, selector string) (string, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"selector": selector,
|
|
}
|
|
|
|
resp, err := b.sendCommand(ctx, "page.textContent", params)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return getString(resp, "text"), nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) ExtractHTML(ctx context.Context, sessionID, selector string) (string, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"selector": selector,
|
|
}
|
|
|
|
resp, err := b.sendCommand(ctx, "page.innerHTML", params)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return getString(resp, "html"), nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) WaitForSelector(ctx context.Context, sessionID, selector string, opts WaitOptions) error {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"selector": selector,
|
|
}
|
|
if opts.Timeout > 0 {
|
|
params["timeout"] = opts.Timeout
|
|
}
|
|
if opts.State != "" {
|
|
params["state"] = opts.State
|
|
}
|
|
|
|
_, err := b.sendCommand(ctx, "page.waitForSelector", params)
|
|
return err
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) WaitForNavigation(ctx context.Context, sessionID string, opts WaitOptions) error {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
}
|
|
if opts.Timeout > 0 {
|
|
params["timeout"] = opts.Timeout
|
|
}
|
|
if opts.WaitUntil != "" {
|
|
params["waitUntil"] = opts.WaitUntil
|
|
}
|
|
|
|
_, err := b.sendCommand(ctx, "page.waitForNavigation", params)
|
|
return err
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) Scroll(ctx context.Context, sessionID string, opts ScrollOptions) (*ActionResponse, error) {
|
|
script := fmt.Sprintf("window.scrollBy(%d, %d)", opts.X, opts.Y)
|
|
if opts.Selector != "" {
|
|
script = fmt.Sprintf(`document.querySelector('%s').scrollBy(%d, %d)`, opts.Selector, opts.X, opts.Y)
|
|
}
|
|
if opts.ToBottom {
|
|
script = "window.scrollTo(0, document.body.scrollHeight)"
|
|
}
|
|
if opts.ToTop {
|
|
script = "window.scrollTo(0, 0)"
|
|
}
|
|
|
|
_, err := b.Evaluate(ctx, sessionID, script)
|
|
if err != nil {
|
|
return &ActionResponse{Success: false, Error: err.Error()}, err
|
|
}
|
|
|
|
if opts.WaitAfter > 0 {
|
|
time.Sleep(time.Duration(opts.WaitAfter) * time.Millisecond)
|
|
}
|
|
|
|
return &ActionResponse{Success: true}, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) Evaluate(ctx context.Context, sessionID, script string) (interface{}, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"expression": script,
|
|
}
|
|
|
|
resp, err := b.sendCommand(ctx, "page.evaluate", params)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return resp["result"], nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) Select(ctx context.Context, sessionID, selector string, values []string) (*ActionResponse, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
"selector": selector,
|
|
"values": values,
|
|
}
|
|
|
|
_, err := b.sendCommand(ctx, "page.selectOption", params)
|
|
if err != nil {
|
|
return &ActionResponse{Success: false, Error: err.Error()}, err
|
|
}
|
|
|
|
return &ActionResponse{Success: true}, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) GetPageInfo(ctx context.Context, sessionID string) (*PageInfo, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
}
|
|
|
|
resp, err := b.sendCommand(ctx, "page.info", params)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &PageInfo{
|
|
URL: getString(resp, "url"),
|
|
Title: getString(resp, "title"),
|
|
Content: getString(resp, "content"),
|
|
}, nil
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) PDF(ctx context.Context, sessionID string, opts PDFOptions) ([]byte, error) {
|
|
params := map[string]interface{}{
|
|
"sessionId": sessionID,
|
|
}
|
|
if opts.Format != "" {
|
|
params["format"] = opts.Format
|
|
}
|
|
if opts.Landscape {
|
|
params["landscape"] = true
|
|
}
|
|
if opts.PrintBackground {
|
|
params["printBackground"] = true
|
|
}
|
|
|
|
resp, err := b.sendCommand(ctx, "page.pdf", params)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
data, _ := resp["data"].(string)
|
|
return base64.StdEncoding.DecodeString(data)
|
|
}
|
|
|
|
func (b *PlaywrightBrowser) sendCommand(ctx context.Context, method string, params map[string]interface{}) (map[string]interface{}, error) {
|
|
body := map[string]interface{}{
|
|
"method": method,
|
|
"params": params,
|
|
}
|
|
|
|
jsonBody, err := json.Marshal(body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", b.serverURL+"/api/browser", strings.NewReader(string(jsonBody)))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := b.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var result map[string]interface{}
|
|
if err := json.Unmarshal(respBody, &result); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if errMsg, ok := result["error"].(string); ok && errMsg != "" {
|
|
return result, errors.New(errMsg)
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func getString(m map[string]interface{}, key string) string {
|
|
if v, ok := m[key].(string); ok {
|
|
return v
|
|
}
|
|
return ""
|
|
}
|
|
|
|
type SessionOptions struct {
|
|
Headless bool
|
|
Viewport *Viewport
|
|
UserAgent string
|
|
ProxyURL string
|
|
RecordVideo bool
|
|
BlockAds bool
|
|
}
|
|
|
|
type Viewport struct {
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
}
|
|
|
|
type NavigateOptions struct {
|
|
Timeout int
|
|
WaitUntil string
|
|
Screenshot bool
|
|
}
|
|
|
|
type ClickOptions struct {
|
|
Button string
|
|
ClickCount int
|
|
Timeout int
|
|
Force bool
|
|
WaitAfter int
|
|
Screenshot bool
|
|
}
|
|
|
|
type TypeOptions struct {
|
|
Delay int
|
|
Timeout int
|
|
Clear bool
|
|
}
|
|
|
|
type ScreenshotOptions struct {
|
|
FullPage bool
|
|
Selector string
|
|
Format string
|
|
Quality int
|
|
}
|
|
|
|
type ScreenshotResult struct {
|
|
Data string
|
|
Path string
|
|
MimeType string
|
|
}
|
|
|
|
type WaitOptions struct {
|
|
Timeout int
|
|
State string
|
|
WaitUntil string
|
|
}
|
|
|
|
type ScrollOptions struct {
|
|
X int
|
|
Y int
|
|
Selector string
|
|
ToBottom bool
|
|
ToTop bool
|
|
WaitAfter int
|
|
}
|
|
|
|
type PageInfo struct {
|
|
URL string
|
|
Title string
|
|
Content string
|
|
}
|
|
|
|
type PDFOptions struct {
|
|
Format string
|
|
Landscape bool
|
|
PrintBackground bool
|
|
}
|