feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
This commit is contained in:
587
backend/internal/computer/browser/browser.go
Normal file
587
backend/internal/computer/browser/browser.go
Normal file
@@ -0,0 +1,587 @@
|
||||
package browser
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type PlaywrightBrowser struct {
|
||||
cmd *exec.Cmd
|
||||
serverURL string
|
||||
client *http.Client
|
||||
sessions map[string]*BrowserSession
|
||||
mu sync.RWMutex
|
||||
config Config
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
PlaywrightServerURL string
|
||||
DefaultTimeout time.Duration
|
||||
Headless bool
|
||||
UserAgent string
|
||||
ProxyURL string
|
||||
ScreenshotsDir string
|
||||
RecordingsDir string
|
||||
}
|
||||
|
||||
type BrowserSession struct {
|
||||
ID string
|
||||
ContextID string
|
||||
PageID string
|
||||
CreatedAt time.Time
|
||||
LastAction time.Time
|
||||
Screenshots []string
|
||||
Recordings []string
|
||||
Closed bool
|
||||
}
|
||||
|
||||
type ActionRequest struct {
|
||||
SessionID string `json:"sessionId"`
|
||||
Action string `json:"action"`
|
||||
Params map[string]interface{} `json:"params"`
|
||||
}
|
||||
|
||||
type ActionResponse struct {
|
||||
Success bool `json:"success"`
|
||||
Data interface{} `json:"data,omitempty"`
|
||||
Screenshot string `json:"screenshot,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
PageTitle string `json:"pageTitle,omitempty"`
|
||||
PageURL string `json:"pageUrl,omitempty"`
|
||||
}
|
||||
|
||||
func NewPlaywrightBrowser(cfg Config) *PlaywrightBrowser {
|
||||
if cfg.DefaultTimeout == 0 {
|
||||
cfg.DefaultTimeout = 30 * time.Second
|
||||
}
|
||||
if cfg.PlaywrightServerURL == "" {
|
||||
cfg.PlaywrightServerURL = "http://localhost:3050"
|
||||
}
|
||||
if cfg.ScreenshotsDir == "" {
|
||||
cfg.ScreenshotsDir = "/tmp/gooseek-screenshots"
|
||||
}
|
||||
if cfg.RecordingsDir == "" {
|
||||
cfg.RecordingsDir = "/tmp/gooseek-recordings"
|
||||
}
|
||||
|
||||
os.MkdirAll(cfg.ScreenshotsDir, 0755)
|
||||
os.MkdirAll(cfg.RecordingsDir, 0755)
|
||||
|
||||
return &PlaywrightBrowser{
|
||||
serverURL: cfg.PlaywrightServerURL,
|
||||
client: &http.Client{
|
||||
Timeout: cfg.DefaultTimeout,
|
||||
},
|
||||
sessions: make(map[string]*BrowserSession),
|
||||
config: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) NewSession(ctx context.Context, opts SessionOptions) (*BrowserSession, error) {
|
||||
sessionID := uuid.New().String()
|
||||
|
||||
params := map[string]interface{}{
|
||||
"headless": b.config.Headless,
|
||||
"sessionId": sessionID,
|
||||
}
|
||||
|
||||
if opts.Viewport != nil {
|
||||
params["viewport"] = opts.Viewport
|
||||
}
|
||||
if opts.UserAgent != "" {
|
||||
params["userAgent"] = opts.UserAgent
|
||||
} else if b.config.UserAgent != "" {
|
||||
params["userAgent"] = b.config.UserAgent
|
||||
}
|
||||
if opts.ProxyURL != "" {
|
||||
params["proxy"] = opts.ProxyURL
|
||||
} else if b.config.ProxyURL != "" {
|
||||
params["proxy"] = b.config.ProxyURL
|
||||
}
|
||||
if opts.RecordVideo {
|
||||
params["recordVideo"] = map[string]interface{}{
|
||||
"dir": b.config.RecordingsDir,
|
||||
}
|
||||
}
|
||||
|
||||
resp, err := b.sendCommand(ctx, "browser.newContext", params)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create browser context: %w", err)
|
||||
}
|
||||
|
||||
contextID, _ := resp["contextId"].(string)
|
||||
pageID, _ := resp["pageId"].(string)
|
||||
|
||||
session := &BrowserSession{
|
||||
ID: sessionID,
|
||||
ContextID: contextID,
|
||||
PageID: pageID,
|
||||
CreatedAt: time.Now(),
|
||||
LastAction: time.Now(),
|
||||
}
|
||||
|
||||
b.mu.Lock()
|
||||
b.sessions[sessionID] = session
|
||||
b.mu.Unlock()
|
||||
|
||||
return session, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) CloseSession(ctx context.Context, sessionID string) error {
|
||||
b.mu.Lock()
|
||||
session, ok := b.sessions[sessionID]
|
||||
if !ok {
|
||||
b.mu.Unlock()
|
||||
return errors.New("session not found")
|
||||
}
|
||||
session.Closed = true
|
||||
delete(b.sessions, sessionID)
|
||||
b.mu.Unlock()
|
||||
|
||||
_, err := b.sendCommand(ctx, "browser.closeContext", map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) Navigate(ctx context.Context, sessionID, url string, opts NavigateOptions) (*ActionResponse, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"url": url,
|
||||
}
|
||||
if opts.Timeout > 0 {
|
||||
params["timeout"] = opts.Timeout
|
||||
}
|
||||
if opts.WaitUntil != "" {
|
||||
params["waitUntil"] = opts.WaitUntil
|
||||
}
|
||||
|
||||
resp, err := b.sendCommand(ctx, "page.goto", params)
|
||||
if err != nil {
|
||||
return &ActionResponse{Success: false, Error: err.Error()}, err
|
||||
}
|
||||
|
||||
result := &ActionResponse{
|
||||
Success: true,
|
||||
PageURL: getString(resp, "url"),
|
||||
PageTitle: getString(resp, "title"),
|
||||
}
|
||||
|
||||
if opts.Screenshot {
|
||||
screenshot, _ := b.Screenshot(ctx, sessionID, ScreenshotOptions{FullPage: false})
|
||||
if screenshot != nil {
|
||||
result.Screenshot = screenshot.Data
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) Click(ctx context.Context, sessionID, selector string, opts ClickOptions) (*ActionResponse, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"selector": selector,
|
||||
}
|
||||
if opts.Button != "" {
|
||||
params["button"] = opts.Button
|
||||
}
|
||||
if opts.ClickCount > 0 {
|
||||
params["clickCount"] = opts.ClickCount
|
||||
}
|
||||
if opts.Timeout > 0 {
|
||||
params["timeout"] = opts.Timeout
|
||||
}
|
||||
if opts.Force {
|
||||
params["force"] = true
|
||||
}
|
||||
|
||||
_, err := b.sendCommand(ctx, "page.click", params)
|
||||
if err != nil {
|
||||
return &ActionResponse{Success: false, Error: err.Error()}, err
|
||||
}
|
||||
|
||||
result := &ActionResponse{Success: true}
|
||||
|
||||
if opts.WaitAfter > 0 {
|
||||
time.Sleep(time.Duration(opts.WaitAfter) * time.Millisecond)
|
||||
}
|
||||
|
||||
if opts.Screenshot {
|
||||
screenshot, _ := b.Screenshot(ctx, sessionID, ScreenshotOptions{FullPage: false})
|
||||
if screenshot != nil {
|
||||
result.Screenshot = screenshot.Data
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) Type(ctx context.Context, sessionID, selector, text string, opts TypeOptions) (*ActionResponse, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"selector": selector,
|
||||
"text": text,
|
||||
}
|
||||
if opts.Delay > 0 {
|
||||
params["delay"] = opts.Delay
|
||||
}
|
||||
if opts.Timeout > 0 {
|
||||
params["timeout"] = opts.Timeout
|
||||
}
|
||||
if opts.Clear {
|
||||
b.sendCommand(ctx, "page.fill", map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"selector": selector,
|
||||
"value": "",
|
||||
})
|
||||
}
|
||||
|
||||
_, err := b.sendCommand(ctx, "page.type", params)
|
||||
if err != nil {
|
||||
return &ActionResponse{Success: false, Error: err.Error()}, err
|
||||
}
|
||||
|
||||
return &ActionResponse{Success: true}, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) Fill(ctx context.Context, sessionID, selector, value string) (*ActionResponse, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"selector": selector,
|
||||
"value": value,
|
||||
}
|
||||
|
||||
_, err := b.sendCommand(ctx, "page.fill", params)
|
||||
if err != nil {
|
||||
return &ActionResponse{Success: false, Error: err.Error()}, err
|
||||
}
|
||||
|
||||
return &ActionResponse{Success: true}, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) Screenshot(ctx context.Context, sessionID string, opts ScreenshotOptions) (*ScreenshotResult, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"fullPage": opts.FullPage,
|
||||
}
|
||||
if opts.Selector != "" {
|
||||
params["selector"] = opts.Selector
|
||||
}
|
||||
if opts.Quality > 0 {
|
||||
params["quality"] = opts.Quality
|
||||
}
|
||||
params["type"] = "png"
|
||||
if opts.Format != "" {
|
||||
params["type"] = opts.Format
|
||||
}
|
||||
|
||||
resp, err := b.sendCommand(ctx, "page.screenshot", params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, _ := resp["data"].(string)
|
||||
|
||||
filename := fmt.Sprintf("%s/%s-%d.png", b.config.ScreenshotsDir, sessionID, time.Now().UnixNano())
|
||||
if decoded, err := base64.StdEncoding.DecodeString(data); err == nil {
|
||||
os.WriteFile(filename, decoded, 0644)
|
||||
}
|
||||
|
||||
b.mu.Lock()
|
||||
if session, ok := b.sessions[sessionID]; ok {
|
||||
session.Screenshots = append(session.Screenshots, filename)
|
||||
}
|
||||
b.mu.Unlock()
|
||||
|
||||
return &ScreenshotResult{
|
||||
Data: data,
|
||||
Path: filename,
|
||||
MimeType: "image/png",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) ExtractText(ctx context.Context, sessionID, selector string) (string, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"selector": selector,
|
||||
}
|
||||
|
||||
resp, err := b.sendCommand(ctx, "page.textContent", params)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getString(resp, "text"), nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) ExtractHTML(ctx context.Context, sessionID, selector string) (string, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"selector": selector,
|
||||
}
|
||||
|
||||
resp, err := b.sendCommand(ctx, "page.innerHTML", params)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getString(resp, "html"), nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) WaitForSelector(ctx context.Context, sessionID, selector string, opts WaitOptions) error {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"selector": selector,
|
||||
}
|
||||
if opts.Timeout > 0 {
|
||||
params["timeout"] = opts.Timeout
|
||||
}
|
||||
if opts.State != "" {
|
||||
params["state"] = opts.State
|
||||
}
|
||||
|
||||
_, err := b.sendCommand(ctx, "page.waitForSelector", params)
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) WaitForNavigation(ctx context.Context, sessionID string, opts WaitOptions) error {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
}
|
||||
if opts.Timeout > 0 {
|
||||
params["timeout"] = opts.Timeout
|
||||
}
|
||||
if opts.WaitUntil != "" {
|
||||
params["waitUntil"] = opts.WaitUntil
|
||||
}
|
||||
|
||||
_, err := b.sendCommand(ctx, "page.waitForNavigation", params)
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) Scroll(ctx context.Context, sessionID string, opts ScrollOptions) (*ActionResponse, error) {
|
||||
script := fmt.Sprintf("window.scrollBy(%d, %d)", opts.X, opts.Y)
|
||||
if opts.Selector != "" {
|
||||
script = fmt.Sprintf(`document.querySelector('%s').scrollBy(%d, %d)`, opts.Selector, opts.X, opts.Y)
|
||||
}
|
||||
if opts.ToBottom {
|
||||
script = "window.scrollTo(0, document.body.scrollHeight)"
|
||||
}
|
||||
if opts.ToTop {
|
||||
script = "window.scrollTo(0, 0)"
|
||||
}
|
||||
|
||||
_, err := b.Evaluate(ctx, sessionID, script)
|
||||
if err != nil {
|
||||
return &ActionResponse{Success: false, Error: err.Error()}, err
|
||||
}
|
||||
|
||||
if opts.WaitAfter > 0 {
|
||||
time.Sleep(time.Duration(opts.WaitAfter) * time.Millisecond)
|
||||
}
|
||||
|
||||
return &ActionResponse{Success: true}, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) Evaluate(ctx context.Context, sessionID, script string) (interface{}, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"expression": script,
|
||||
}
|
||||
|
||||
resp, err := b.sendCommand(ctx, "page.evaluate", params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resp["result"], nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) Select(ctx context.Context, sessionID, selector string, values []string) (*ActionResponse, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
"selector": selector,
|
||||
"values": values,
|
||||
}
|
||||
|
||||
_, err := b.sendCommand(ctx, "page.selectOption", params)
|
||||
if err != nil {
|
||||
return &ActionResponse{Success: false, Error: err.Error()}, err
|
||||
}
|
||||
|
||||
return &ActionResponse{Success: true}, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) GetPageInfo(ctx context.Context, sessionID string) (*PageInfo, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
}
|
||||
|
||||
resp, err := b.sendCommand(ctx, "page.info", params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &PageInfo{
|
||||
URL: getString(resp, "url"),
|
||||
Title: getString(resp, "title"),
|
||||
Content: getString(resp, "content"),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) PDF(ctx context.Context, sessionID string, opts PDFOptions) ([]byte, error) {
|
||||
params := map[string]interface{}{
|
||||
"sessionId": sessionID,
|
||||
}
|
||||
if opts.Format != "" {
|
||||
params["format"] = opts.Format
|
||||
}
|
||||
if opts.Landscape {
|
||||
params["landscape"] = true
|
||||
}
|
||||
if opts.PrintBackground {
|
||||
params["printBackground"] = true
|
||||
}
|
||||
|
||||
resp, err := b.sendCommand(ctx, "page.pdf", params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, _ := resp["data"].(string)
|
||||
return base64.StdEncoding.DecodeString(data)
|
||||
}
|
||||
|
||||
func (b *PlaywrightBrowser) sendCommand(ctx context.Context, method string, params map[string]interface{}) (map[string]interface{}, error) {
|
||||
body := map[string]interface{}{
|
||||
"method": method,
|
||||
"params": params,
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", b.serverURL+"/api/browser", strings.NewReader(string(jsonBody)))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := b.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var result map[string]interface{}
|
||||
if err := json.Unmarshal(respBody, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if errMsg, ok := result["error"].(string); ok && errMsg != "" {
|
||||
return result, errors.New(errMsg)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func getString(m map[string]interface{}, key string) string {
|
||||
if v, ok := m[key].(string); ok {
|
||||
return v
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type SessionOptions struct {
|
||||
Headless bool
|
||||
Viewport *Viewport
|
||||
UserAgent string
|
||||
ProxyURL string
|
||||
RecordVideo bool
|
||||
BlockAds bool
|
||||
}
|
||||
|
||||
type Viewport struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
}
|
||||
|
||||
type NavigateOptions struct {
|
||||
Timeout int
|
||||
WaitUntil string
|
||||
Screenshot bool
|
||||
}
|
||||
|
||||
type ClickOptions struct {
|
||||
Button string
|
||||
ClickCount int
|
||||
Timeout int
|
||||
Force bool
|
||||
WaitAfter int
|
||||
Screenshot bool
|
||||
}
|
||||
|
||||
type TypeOptions struct {
|
||||
Delay int
|
||||
Timeout int
|
||||
Clear bool
|
||||
}
|
||||
|
||||
type ScreenshotOptions struct {
|
||||
FullPage bool
|
||||
Selector string
|
||||
Format string
|
||||
Quality int
|
||||
}
|
||||
|
||||
type ScreenshotResult struct {
|
||||
Data string
|
||||
Path string
|
||||
MimeType string
|
||||
}
|
||||
|
||||
type WaitOptions struct {
|
||||
Timeout int
|
||||
State string
|
||||
WaitUntil string
|
||||
}
|
||||
|
||||
type ScrollOptions struct {
|
||||
X int
|
||||
Y int
|
||||
Selector string
|
||||
ToBottom bool
|
||||
ToTop bool
|
||||
WaitAfter int
|
||||
}
|
||||
|
||||
type PageInfo struct {
|
||||
URL string
|
||||
Title string
|
||||
Content string
|
||||
}
|
||||
|
||||
type PDFOptions struct {
|
||||
Format string
|
||||
Landscape bool
|
||||
PrintBackground bool
|
||||
}
|
||||
Reference in New Issue
Block a user