feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
This commit is contained in:
home
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions

View File

@@ -0,0 +1,233 @@
package agent
import (
"context"
"encoding/json"
"regexp"
"strings"
"github.com/gooseek/backend/internal/llm"
"github.com/gooseek/backend/internal/prompts"
)
type ClassificationResult struct {
StandaloneFollowUp string `json:"standaloneFollowUp"`
SkipSearch bool `json:"skipSearch"`
Topics []string `json:"topics,omitempty"`
QueryType string `json:"queryType,omitempty"`
Engines []string `json:"engines,omitempty"`
}
func classify(ctx context.Context, client llm.Client, query string, history []llm.Message, locale, detectedLang string) (*ClassificationResult, error) {
prompt := prompts.GetClassifierPrompt(locale, detectedLang)
historyStr := formatHistory(history)
userContent := "<conversation>\n" + historyStr + "\nUser: " + query + "\n</conversation>"
messages := []llm.Message{
{Role: llm.RoleSystem, Content: prompt},
{Role: llm.RoleUser, Content: userContent},
}
response, err := client.GenerateText(ctx, llm.StreamRequest{
Messages: messages,
Options: llm.StreamOptions{MaxTokens: 1024},
})
if err != nil {
return nil, err
}
jsonMatch := regexp.MustCompile(`\{[\s\S]*\}`).FindString(response)
if jsonMatch == "" {
return &ClassificationResult{
StandaloneFollowUp: query,
SkipSearch: false,
}, nil
}
var result ClassificationResult
if err := json.Unmarshal([]byte(jsonMatch), &result); err != nil {
return &ClassificationResult{
StandaloneFollowUp: query,
SkipSearch: false,
}, nil
}
if result.StandaloneFollowUp == "" {
result.StandaloneFollowUp = query
}
return &result, nil
}
func fastClassify(query string, history []llm.Message) *ClassificationResult {
queryLower := strings.ToLower(query)
skipPatterns := []string{
"привет", "как дела", "спасибо", "пока",
"hello", "hi", "thanks", "bye",
"объясни", "расскажи подробнее", "что ты имеешь",
}
skipSearch := false
for _, p := range skipPatterns {
if strings.Contains(queryLower, p) && len(query) < 50 {
skipSearch = true
break
}
}
standalone := query
if len(history) > 0 {
pronouns := []string{
"это", "этот", "эта", "эти",
"он", "она", "оно", "они",
"it", "this", "that", "they", "them",
}
hasPronouns := false
for _, p := range pronouns {
if strings.Contains(queryLower, p+" ") || strings.HasPrefix(queryLower, p+" ") {
hasPronouns = true
break
}
}
if hasPronouns && len(history) >= 2 {
lastAssistant := ""
for i := len(history) - 1; i >= 0; i-- {
if history[i].Role == llm.RoleAssistant {
lastAssistant = history[i].Content
break
}
}
if lastAssistant != "" {
topics := extractTopics(lastAssistant)
if len(topics) > 0 {
standalone = query + " (контекст: " + strings.Join(topics, ", ") + ")"
}
}
}
}
engines := detectEngines(queryLower)
return &ClassificationResult{
StandaloneFollowUp: standalone,
SkipSearch: skipSearch,
Engines: engines,
}
}
func generateSearchQueries(query string) []string {
queries := []string{query}
if len(query) > 100 {
words := strings.Fields(query)
if len(words) > 5 {
queries = append(queries, strings.Join(words[:5], " "))
}
}
keywordPatterns := []string{
"как", "что такое", "где", "когда", "почему", "кто",
"how", "what is", "where", "when", "why", "who",
}
for _, p := range keywordPatterns {
if strings.HasPrefix(strings.ToLower(query), p) {
withoutPrefix := strings.TrimPrefix(strings.ToLower(query), p)
withoutPrefix = strings.TrimSpace(withoutPrefix)
if len(withoutPrefix) > 10 {
queries = append(queries, withoutPrefix)
}
break
}
}
if len(queries) > 3 {
queries = queries[:3]
}
return queries
}
func detectEngines(query string) []string {
engines := []string{"google", "duckduckgo"}
if strings.Contains(query, "новости") || strings.Contains(query, "news") {
engines = append(engines, "google_news")
}
if strings.Contains(query, "видео") || strings.Contains(query, "video") {
engines = append(engines, "youtube")
}
if strings.Contains(query, "товар") || strings.Contains(query, "купить") ||
strings.Contains(query, "цена") || strings.Contains(query, "price") {
engines = append(engines, "google_shopping")
}
return engines
}
func extractTopics(text string) []string {
words := strings.Fields(text)
if len(words) > 50 {
words = words[:50]
}
topics := make([]string, 0)
for _, w := range words {
if len(w) > 5 && len(w) < 20 {
r := []rune(w)
if len(r) > 0 && ((r[0] >= 'A' && r[0] <= 'Z') || (r[0] >= 'А' && r[0] <= 'Я')) {
topics = append(topics, w)
if len(topics) >= 3 {
break
}
}
}
}
return topics
}
func formatHistory(messages []llm.Message) string {
var sb strings.Builder
for _, m := range messages {
role := "User"
if m.Role == llm.RoleAssistant {
role = "Assistant"
}
sb.WriteString(role)
sb.WriteString(": ")
content := m.Content
if len(content) > 500 {
content = content[:500] + "..."
}
sb.WriteString(content)
sb.WriteString("\n")
}
return sb.String()
}
func detectLanguage(text string) string {
cyrillicCount := 0
latinCount := 0
for _, r := range text {
if r >= 'а' && r <= 'я' || r >= 'А' && r <= 'Я' {
cyrillicCount++
} else if r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z' {
latinCount++
}
}
if cyrillicCount > latinCount {
return "ru"
}
return "en"
}

View File

@@ -0,0 +1,543 @@
package agent
import (
"context"
"fmt"
"strings"
"sync"
"time"
"github.com/gooseek/backend/internal/llm"
"github.com/gooseek/backend/internal/search"
"github.com/gooseek/backend/internal/session"
"github.com/gooseek/backend/internal/types"
"github.com/google/uuid"
"golang.org/x/sync/errgroup"
)
type DeepResearchConfig struct {
LLM llm.Client
SearchClient *search.SearXNGClient
FocusMode FocusMode
Locale string
MaxSearchQueries int
MaxSources int
MaxIterations int
Timeout time.Duration
}
type DeepResearchResult struct {
FinalReport string
Sources []types.Chunk
SubQueries []SubQuery
Insights []string
FollowUpQueries []string
TotalSearches int
TotalSources int
Duration time.Duration
}
type SubQuery struct {
Query string
Purpose string
Status string
Results []types.Chunk
Insights []string
}
type DeepResearcher struct {
cfg DeepResearchConfig
sess *session.Session
mu sync.Mutex
allSources []types.Chunk
seenURLs map[string]bool
subQueries []SubQuery
insights []string
searchCount int
startTime time.Time
}
func NewDeepResearcher(cfg DeepResearchConfig, sess *session.Session) *DeepResearcher {
if cfg.MaxSearchQueries == 0 {
cfg.MaxSearchQueries = 30
}
if cfg.MaxSources == 0 {
cfg.MaxSources = 100
}
if cfg.MaxIterations == 0 {
cfg.MaxIterations = 5
}
if cfg.Timeout == 0 {
cfg.Timeout = 5 * time.Minute
}
return &DeepResearcher{
cfg: cfg,
sess: sess,
seenURLs: make(map[string]bool),
allSources: make([]types.Chunk, 0),
subQueries: make([]SubQuery, 0),
insights: make([]string, 0),
startTime: time.Now(),
}
}
func (dr *DeepResearcher) Research(ctx context.Context, query string) (*DeepResearchResult, error) {
ctx, cancel := context.WithTimeout(ctx, dr.cfg.Timeout)
defer cancel()
researchBlockID := uuid.New().String()
dr.sess.EmitBlock(&types.Block{
ID: researchBlockID,
Type: types.BlockTypeResearch,
Data: types.ResearchData{
SubSteps: []types.ResearchSubStep{},
},
})
subQueries, err := dr.planResearch(ctx, query)
if err != nil {
return nil, fmt.Errorf("planning failed: %w", err)
}
dr.updateResearchStatus(researchBlockID, "researching", fmt.Sprintf("Executing %d sub-queries", len(subQueries)))
for i := 0; i < dr.cfg.MaxIterations && dr.searchCount < dr.cfg.MaxSearchQueries; i++ {
if err := dr.executeIteration(ctx, i, researchBlockID); err != nil {
if ctx.Err() != nil {
break
}
}
if dr.hasEnoughData() {
break
}
newQueries, err := dr.generateFollowUpQueries(ctx, query)
if err != nil || len(newQueries) == 0 {
break
}
for _, q := range newQueries {
dr.mu.Lock()
dr.subQueries = append(dr.subQueries, SubQuery{
Query: q.Query,
Purpose: q.Purpose,
Status: "pending",
})
dr.mu.Unlock()
}
}
dr.updateResearchStatus(researchBlockID, "synthesizing", "Analyzing findings")
insights, err := dr.synthesizeInsights(ctx, query)
if err != nil {
insights = dr.insights
}
dr.updateResearchStatus(researchBlockID, "writing", "Generating report")
report, err := dr.generateFinalReport(ctx, query, insights)
if err != nil {
return nil, fmt.Errorf("report generation failed: %w", err)
}
followUp, _ := dr.generateFollowUpSuggestions(ctx, query, report)
dr.updateResearchStatus(researchBlockID, "complete", "Research complete")
return &DeepResearchResult{
FinalReport: report,
Sources: dr.allSources,
SubQueries: dr.subQueries,
Insights: insights,
FollowUpQueries: followUp,
TotalSearches: dr.searchCount,
TotalSources: len(dr.allSources),
Duration: time.Since(dr.startTime),
}, nil
}
func (dr *DeepResearcher) planResearch(ctx context.Context, query string) ([]SubQuery, error) {
prompt := fmt.Sprintf(`Analyze this research query and break it into 3-5 sub-queries for comprehensive research.
Query: %s
For each sub-query, specify:
1. The search query (optimized for search engines)
2. The purpose (what aspect it addresses)
Respond in this exact format:
QUERY: [search query]
PURPOSE: [what this addresses]
QUERY: [search query]
PURPOSE: [what this addresses]
...
Be specific and actionable. Focus on different aspects: definitions, current state, history, expert opinions, data/statistics, controversies, future trends.`, query)
result, err := dr.cfg.LLM.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return dr.generateDefaultSubQueries(query), nil
}
subQueries := dr.parseSubQueries(result)
if len(subQueries) == 0 {
subQueries = dr.generateDefaultSubQueries(query)
}
dr.mu.Lock()
dr.subQueries = subQueries
dr.mu.Unlock()
return subQueries, nil
}
func (dr *DeepResearcher) parseSubQueries(text string) []SubQuery {
var queries []SubQuery
lines := strings.Split(text, "\n")
var currentQuery, currentPurpose string
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "QUERY:") {
if currentQuery != "" && currentPurpose != "" {
queries = append(queries, SubQuery{
Query: currentQuery,
Purpose: currentPurpose,
Status: "pending",
})
}
currentQuery = strings.TrimSpace(strings.TrimPrefix(line, "QUERY:"))
currentPurpose = ""
} else if strings.HasPrefix(line, "PURPOSE:") {
currentPurpose = strings.TrimSpace(strings.TrimPrefix(line, "PURPOSE:"))
}
}
if currentQuery != "" && currentPurpose != "" {
queries = append(queries, SubQuery{
Query: currentQuery,
Purpose: currentPurpose,
Status: "pending",
})
}
return queries
}
func (dr *DeepResearcher) generateDefaultSubQueries(query string) []SubQuery {
return []SubQuery{
{Query: query, Purpose: "Main query", Status: "pending"},
{Query: query + " definition explained", Purpose: "Definitions and basics", Status: "pending"},
{Query: query + " latest news 2026", Purpose: "Current developments", Status: "pending"},
{Query: query + " expert analysis", Purpose: "Expert opinions", Status: "pending"},
{Query: query + " statistics data research", Purpose: "Data and evidence", Status: "pending"},
}
}
func (dr *DeepResearcher) executeIteration(ctx context.Context, iteration int, blockID string) error {
dr.mu.Lock()
pendingQueries := make([]int, 0)
for i, sq := range dr.subQueries {
if sq.Status == "pending" {
pendingQueries = append(pendingQueries, i)
}
}
dr.mu.Unlock()
if len(pendingQueries) == 0 {
return nil
}
batchSize := 3
if len(pendingQueries) < batchSize {
batchSize = len(pendingQueries)
}
g, gctx := errgroup.WithContext(ctx)
g.SetLimit(batchSize)
for _, idx := range pendingQueries[:batchSize] {
idx := idx
g.Go(func() error {
return dr.executeSubQuery(gctx, idx, blockID)
})
}
return g.Wait()
}
func (dr *DeepResearcher) executeSubQuery(ctx context.Context, idx int, blockID string) error {
dr.mu.Lock()
if idx >= len(dr.subQueries) {
dr.mu.Unlock()
return nil
}
sq := &dr.subQueries[idx]
sq.Status = "searching"
query := sq.Query
dr.searchCount++
dr.mu.Unlock()
dr.updateResearchStatus(blockID, "researching", fmt.Sprintf("Searching: %s", truncate(query, 50)))
enhancedQuery := EnhanceQueryForFocusMode(query, dr.cfg.FocusMode)
results, err := dr.cfg.SearchClient.Search(ctx, enhancedQuery, &search.SearchOptions{
Engines: dr.cfg.FocusMode.GetSearchEngines(),
Categories: FocusModeConfigs[dr.cfg.FocusMode].Categories,
PageNo: 1,
})
if err != nil {
dr.mu.Lock()
sq.Status = "failed"
dr.mu.Unlock()
return err
}
chunks := make([]types.Chunk, 0)
for _, r := range results.Results {
dr.mu.Lock()
if dr.seenURLs[r.URL] {
dr.mu.Unlock()
continue
}
dr.seenURLs[r.URL] = true
dr.mu.Unlock()
chunk := r.ToChunk()
chunks = append(chunks, chunk)
if len(chunks) >= 10 {
break
}
}
dr.mu.Lock()
sq.Results = chunks
sq.Status = "complete"
dr.allSources = append(dr.allSources, chunks...)
dr.mu.Unlock()
return nil
}
func (dr *DeepResearcher) generateFollowUpQueries(ctx context.Context, originalQuery string) ([]SubQuery, error) {
if dr.searchCount >= dr.cfg.MaxSearchQueries-5 {
return nil, nil
}
var sourceSummary strings.Builder
dr.mu.Lock()
for i, s := range dr.allSources {
if i >= 20 {
break
}
sourceSummary.WriteString(fmt.Sprintf("- %s: %s\n", s.Metadata["title"], truncate(s.Content, 100)))
}
dr.mu.Unlock()
prompt := fmt.Sprintf(`Based on the original query and sources found so far, suggest 2-3 follow-up queries to deepen the research.
Original query: %s
Sources found so far:
%s
What aspects are missing? What would provide more comprehensive coverage?
Respond with queries in format:
QUERY: [query]
PURPOSE: [what gap it fills]`, originalQuery, sourceSummary.String())
result, err := dr.cfg.LLM.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return nil, err
}
return dr.parseSubQueries(result), nil
}
func (dr *DeepResearcher) synthesizeInsights(ctx context.Context, query string) ([]string, error) {
var sourcesText strings.Builder
dr.mu.Lock()
for i, s := range dr.allSources {
if i >= 30 {
break
}
sourcesText.WriteString(fmt.Sprintf("[%d] %s\n%s\n\n", i+1, s.Metadata["title"], truncate(s.Content, 300)))
}
dr.mu.Unlock()
prompt := fmt.Sprintf(`Analyze these sources and extract 5-7 key insights for the query: %s
Sources:
%s
Provide insights as bullet points, each starting with a key finding.
Focus on: main conclusions, patterns, contradictions, expert consensus, data points.`, query, sourcesText.String())
result, err := dr.cfg.LLM.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return nil, err
}
insights := make([]string, 0)
for _, line := range strings.Split(result, "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "-") || strings.HasPrefix(line, "•") || strings.HasPrefix(line, "*") {
insights = append(insights, strings.TrimPrefix(strings.TrimPrefix(strings.TrimPrefix(line, "-"), "•"), "*"))
}
}
dr.mu.Lock()
dr.insights = insights
dr.mu.Unlock()
return insights, nil
}
func (dr *DeepResearcher) generateFinalReport(ctx context.Context, query string, insights []string) (string, error) {
var sourcesText strings.Builder
dr.mu.Lock()
sources := dr.allSources
dr.mu.Unlock()
for i, s := range sources {
if i >= 50 {
break
}
sourcesText.WriteString(fmt.Sprintf("[%d] %s (%s)\n%s\n\n", i+1, s.Metadata["title"], s.Metadata["url"], truncate(s.Content, 400)))
}
insightsText := strings.Join(insights, "\n- ")
focusCfg := FocusModeConfigs[dr.cfg.FocusMode]
locale := dr.cfg.Locale
if locale == "" {
locale = "en"
}
langInstruction := ""
if locale == "ru" {
langInstruction = "Write the report in Russian."
}
prompt := fmt.Sprintf(`%s
Write a comprehensive research report answering: %s
Key insights discovered:
- %s
Sources (cite using [1], [2], etc.):
%s
Structure your report with:
1. Executive Summary (2-3 sentences)
2. Key Findings (organized by theme)
3. Analysis and Discussion
4. Conclusions
%s
Use citations [1], [2], etc. throughout.
Be thorough but concise. Focus on actionable information.`, focusCfg.SystemPrompt, query, insightsText, sourcesText.String(), langInstruction)
stream, err := dr.cfg.LLM.StreamText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return "", err
}
var report strings.Builder
textBlockID := uuid.New().String()
dr.sess.EmitBlock(&types.Block{
ID: textBlockID,
Type: types.BlockTypeText,
Data: "",
})
for chunk := range stream {
report.WriteString(chunk.ContentChunk)
dr.sess.EmitTextChunk(textBlockID, chunk.ContentChunk)
}
return report.String(), nil
}
func (dr *DeepResearcher) generateFollowUpSuggestions(ctx context.Context, query, report string) ([]string, error) {
prompt := fmt.Sprintf(`Based on this research query and report, suggest 3-4 follow-up questions the user might want to explore:
Query: %s
Report summary: %s
Provide follow-up questions that:
1. Go deeper into specific aspects
2. Explore related topics
3. Address practical applications
4. Consider alternative perspectives
Format as simple questions, one per line.`, query, truncate(report, 1000))
result, err := dr.cfg.LLM.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return nil, err
}
suggestions := make([]string, 0)
for _, line := range strings.Split(result, "\n") {
line = strings.TrimSpace(line)
if line != "" && (strings.Contains(line, "?") || len(line) > 20) {
line = strings.TrimPrefix(line, "- ")
line = strings.TrimPrefix(line, "• ")
line = strings.TrimLeft(line, "0123456789. ")
if line != "" {
suggestions = append(suggestions, line)
}
}
}
if len(suggestions) > 4 {
suggestions = suggestions[:4]
}
return suggestions, nil
}
func (dr *DeepResearcher) updateResearchStatus(blockID, status, message string) {
dr.sess.UpdateBlock(blockID, []session.Patch{
{Op: "replace", Path: "/data/status", Value: status},
{Op: "replace", Path: "/data/message", Value: message},
})
}
func (dr *DeepResearcher) hasEnoughData() bool {
dr.mu.Lock()
defer dr.mu.Unlock()
return len(dr.allSources) >= dr.cfg.MaxSources
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
func RunDeepResearch(ctx context.Context, sess *session.Session, query string, cfg DeepResearchConfig) (*DeepResearchResult, error) {
researcher := NewDeepResearcher(cfg, sess)
return researcher.Research(ctx, query)
}

View File

@@ -0,0 +1,293 @@
package agent
import (
"strings"
)
type FocusMode string
const (
FocusModeAll FocusMode = "all"
FocusModeAcademic FocusMode = "academic"
FocusModeWriting FocusMode = "writing"
FocusModeYouTube FocusMode = "youtube"
FocusModeReddit FocusMode = "reddit"
FocusModeCode FocusMode = "code"
FocusModeNews FocusMode = "news"
FocusModeImages FocusMode = "images"
FocusModeMath FocusMode = "math"
FocusModeFinance FocusMode = "finance"
)
type FocusModeConfig struct {
Mode FocusMode
Engines []string
Categories []string
SystemPrompt string
SearchQueryPrefix string
MaxSources int
RequiresCitation bool
AllowScraping bool
}
var FocusModeConfigs = map[FocusMode]FocusModeConfig{
FocusModeAll: {
Mode: FocusModeAll,
Engines: []string{"google", "bing", "duckduckgo"},
Categories: []string{"general"},
MaxSources: 15,
RequiresCitation: true,
AllowScraping: true,
SystemPrompt: `You are a helpful AI assistant that provides comprehensive answers based on web search results.
Always cite your sources using [1], [2], etc. format.
Provide balanced, accurate information from multiple perspectives.`,
},
FocusModeAcademic: {
Mode: FocusModeAcademic,
Engines: []string{"google scholar", "arxiv", "pubmed", "semantic scholar"},
Categories: []string{"science"},
SearchQueryPrefix: "research paper",
MaxSources: 20,
RequiresCitation: true,
AllowScraping: true,
SystemPrompt: `You are an academic research assistant specializing in scholarly sources.
Focus on peer-reviewed papers, academic journals, and reputable research institutions.
Always cite sources in academic format with [1], [2], etc.
Distinguish between primary research, meta-analyses, and review articles.
Mention publication dates, authors, and journals when available.
Be precise about confidence levels and note when findings are preliminary or contested.`,
},
FocusModeWriting: {
Mode: FocusModeWriting,
Engines: []string{"google"},
Categories: []string{"general"},
MaxSources: 5,
RequiresCitation: false,
AllowScraping: false,
SystemPrompt: `You are a creative writing assistant.
Help with drafting, editing, and improving written content.
Provide suggestions for style, tone, structure, and clarity.
Offer multiple variations when appropriate.
Focus on the user's voice and intent rather than web search results.`,
},
FocusModeYouTube: {
Mode: FocusModeYouTube,
Engines: []string{"youtube"},
Categories: []string{"videos"},
SearchQueryPrefix: "site:youtube.com",
MaxSources: 10,
RequiresCitation: true,
AllowScraping: false,
SystemPrompt: `You are a video content assistant focused on YouTube.
Summarize video content, recommend relevant videos, and help find tutorials.
Mention video titles, channels, and approximate timestamps when relevant.
Note view counts and upload dates to indicate video popularity and relevance.`,
},
FocusModeReddit: {
Mode: FocusModeReddit,
Engines: []string{"reddit"},
Categories: []string{"social media"},
SearchQueryPrefix: "site:reddit.com",
MaxSources: 15,
RequiresCitation: true,
AllowScraping: true,
SystemPrompt: `You are an assistant that specializes in Reddit discussions and community knowledge.
Focus on highly upvoted comments and posts from relevant subreddits.
Note the subreddit source, upvote counts, and community consensus.
Distinguish between personal opinions, experiences, and factual claims.
Be aware of potential biases in specific communities.`,
},
FocusModeCode: {
Mode: FocusModeCode,
Engines: []string{"google", "github", "stackoverflow"},
Categories: []string{"it"},
SearchQueryPrefix: "",
MaxSources: 10,
RequiresCitation: true,
AllowScraping: true,
SystemPrompt: `You are a programming assistant focused on code, documentation, and technical solutions.
Provide working code examples with explanations.
Reference official documentation, Stack Overflow answers, and GitHub repositories.
Mention library versions and compatibility considerations.
Follow best practices and coding standards for the relevant language/framework.
Include error handling and edge cases in code examples.`,
},
FocusModeNews: {
Mode: FocusModeNews,
Engines: []string{"google news", "bing news"},
Categories: []string{"news"},
MaxSources: 12,
RequiresCitation: true,
AllowScraping: true,
SystemPrompt: `You are a news assistant that provides current events information.
Focus on recent, verified news from reputable sources.
Distinguish between breaking news, analysis, and opinion pieces.
Note publication dates and source credibility.
Present multiple perspectives on controversial topics.`,
},
FocusModeImages: {
Mode: FocusModeImages,
Engines: []string{"google images", "bing images"},
Categories: []string{"images"},
MaxSources: 20,
RequiresCitation: true,
AllowScraping: false,
SystemPrompt: `You are an image search assistant.
Help find relevant images, describe image sources, and provide context.
Note image sources, licenses, and quality when relevant.`,
},
FocusModeMath: {
Mode: FocusModeMath,
Engines: []string{"wolfram alpha", "google"},
Categories: []string{"science"},
MaxSources: 5,
RequiresCitation: true,
AllowScraping: false,
SystemPrompt: `You are a mathematical problem-solving assistant.
Provide step-by-step solutions with clear explanations.
Use proper mathematical notation and formatting.
Show your work and explain the reasoning behind each step.
Mention relevant theorems, formulas, and mathematical concepts.
Verify your calculations and provide alternative solution methods when applicable.`,
},
FocusModeFinance: {
Mode: FocusModeFinance,
Engines: []string{"google", "google finance", "yahoo finance"},
Categories: []string{"news"},
SearchQueryPrefix: "stock market finance",
MaxSources: 10,
RequiresCitation: true,
AllowScraping: true,
SystemPrompt: `You are a financial information assistant.
Provide accurate financial data, market analysis, and investment information.
Note that you cannot provide personalized financial advice.
Cite data sources and note when data may be delayed or historical.
Include relevant disclaimers about investment risks.
Reference SEC filings, analyst reports, and official company statements.`,
},
}
func GetFocusModeConfig(mode string) FocusModeConfig {
fm := FocusMode(strings.ToLower(mode))
if cfg, ok := FocusModeConfigs[fm]; ok {
return cfg
}
return FocusModeConfigs[FocusModeAll]
}
func DetectFocusMode(query string) FocusMode {
queryLower := strings.ToLower(query)
academicKeywords := []string{
"research", "paper", "study", "journal", "scientific", "academic",
"peer-reviewed", "citation", "исследование", "научн", "статья",
"публикация", "диссертация",
}
for _, kw := range academicKeywords {
if strings.Contains(queryLower, kw) {
return FocusModeAcademic
}
}
codeKeywords := []string{
"code", "programming", "function", "error", "bug", "api",
"library", "framework", "syntax", "compile", "debug",
"код", "программ", "функция", "ошибка", "библиотека",
"golang", "python", "javascript", "typescript", "react", "vue",
"docker", "kubernetes", "sql", "database", "git",
}
for _, kw := range codeKeywords {
if strings.Contains(queryLower, kw) {
return FocusModeCode
}
}
if strings.Contains(queryLower, "youtube") ||
strings.Contains(queryLower, "video tutorial") ||
strings.Contains(queryLower, "видео") {
return FocusModeYouTube
}
if strings.Contains(queryLower, "reddit") ||
strings.Contains(queryLower, "subreddit") ||
strings.Contains(queryLower, "/r/") {
return FocusModeReddit
}
mathKeywords := []string{
"calculate", "solve", "equation", "integral", "derivative",
"formula", "theorem", "proof", "вычисл", "решить", "уравнение",
"интеграл", "производная", "формула", "теорема",
}
for _, kw := range mathKeywords {
if strings.Contains(queryLower, kw) {
return FocusModeMath
}
}
financeKeywords := []string{
"stock", "market", "invest", "price", "trading", "finance",
"акци", "рынок", "инвест", "биржа", "котировк", "финанс",
"etf", "dividend", "portfolio",
}
for _, kw := range financeKeywords {
if strings.Contains(queryLower, kw) {
return FocusModeFinance
}
}
newsKeywords := []string{
"news", "today", "latest", "breaking", "current events",
"новост", "сегодня", "последн", "актуальн",
}
for _, kw := range newsKeywords {
if strings.Contains(queryLower, kw) {
return FocusModeNews
}
}
return FocusModeAll
}
func (f FocusMode) GetSearchEngines() []string {
if cfg, ok := FocusModeConfigs[f]; ok {
return cfg.Engines
}
return FocusModeConfigs[FocusModeAll].Engines
}
func (f FocusMode) GetSystemPrompt() string {
if cfg, ok := FocusModeConfigs[f]; ok {
return cfg.SystemPrompt
}
return FocusModeConfigs[FocusModeAll].SystemPrompt
}
func (f FocusMode) GetMaxSources() int {
if cfg, ok := FocusModeConfigs[f]; ok {
return cfg.MaxSources
}
return 15
}
func (f FocusMode) RequiresCitation() bool {
if cfg, ok := FocusModeConfigs[f]; ok {
return cfg.RequiresCitation
}
return true
}
func (f FocusMode) AllowsScraping() bool {
if cfg, ok := FocusModeConfigs[f]; ok {
return cfg.AllowScraping
}
return true
}
func EnhanceQueryForFocusMode(query string, mode FocusMode) string {
cfg := FocusModeConfigs[mode]
if cfg.SearchQueryPrefix != "" {
return cfg.SearchQueryPrefix + " " + query
}
return query
}

View File

@@ -0,0 +1,950 @@
package agent
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"github.com/gooseek/backend/internal/llm"
"github.com/gooseek/backend/internal/prompts"
"github.com/gooseek/backend/internal/search"
"github.com/gooseek/backend/internal/session"
"github.com/gooseek/backend/internal/types"
"github.com/google/uuid"
"golang.org/x/sync/errgroup"
)
type Mode string
const (
ModeSpeed Mode = "speed"
ModeBalanced Mode = "balanced"
ModeQuality Mode = "quality"
)
type OrchestratorConfig struct {
LLM llm.Client
SearchClient *search.SearXNGClient
Mode Mode
FocusMode FocusMode
Sources []string
FileIDs []string
FileContext string
CollectionID string
CollectionContext string
SystemInstructions string
Locale string
MemoryContext string
UserMemory string
AnswerMode string
ResponsePrefs *ResponsePrefs
LearningMode bool
EnableDeepResearch bool
EnableClarifying bool
DiscoverSvcURL string
Crawl4AIURL string
CollectionSvcURL string
FileSvcURL string
}
type DigestResponse struct {
SummaryRu string `json:"summaryRu"`
Citations []DigestCitation `json:"citations"`
FollowUp []string `json:"followUp"`
SourcesCount int `json:"sourcesCount"`
ClusterTitle string `json:"clusterTitle"`
}
type DigestCitation struct {
Index int `json:"index"`
URL string `json:"url"`
Title string `json:"title"`
Domain string `json:"domain"`
}
type PreScrapedArticle struct {
Title string
Content string
URL string
}
type ResponsePrefs struct {
Format string `json:"format,omitempty"`
Length string `json:"length,omitempty"`
Tone string `json:"tone,omitempty"`
}
type OrchestratorInput struct {
ChatHistory []llm.Message
FollowUp string
Config OrchestratorConfig
}
func RunOrchestrator(ctx context.Context, sess *session.Session, input OrchestratorInput) error {
detectedLang := detectLanguage(input.FollowUp)
isArticleSummary := strings.HasPrefix(strings.TrimSpace(input.FollowUp), "Summary: ")
if input.Config.FocusMode == "" {
input.Config.FocusMode = DetectFocusMode(input.FollowUp)
}
if input.Config.EnableDeepResearch && input.Config.Mode == ModeQuality {
return runDeepResearchMode(ctx, sess, input, detectedLang)
}
if input.Config.Mode == ModeSpeed && !isArticleSummary {
return runSpeedMode(ctx, sess, input, detectedLang)
}
return runFullMode(ctx, sess, input, detectedLang, isArticleSummary)
}
func runDeepResearchMode(ctx context.Context, sess *session.Session, input OrchestratorInput, lang string) error {
sess.EmitBlock(types.NewResearchBlock(uuid.New().String()))
researcher := NewDeepResearcher(DeepResearchConfig{
LLM: input.Config.LLM,
SearchClient: input.Config.SearchClient,
FocusMode: input.Config.FocusMode,
Locale: input.Config.Locale,
MaxSearchQueries: 30,
MaxSources: 100,
MaxIterations: 5,
Timeout: 5 * time.Minute,
}, sess)
result, err := researcher.Research(ctx, input.FollowUp)
if err != nil {
sess.EmitError(err)
return err
}
sess.EmitBlock(types.NewSourceBlock(uuid.New().String(), result.Sources))
if len(result.FollowUpQueries) > 0 {
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "related_questions", map[string]interface{}{
"questions": result.FollowUpQueries,
}))
}
sess.EmitResearchComplete()
sess.EmitEnd()
return nil
}
func generateClarifyingQuestions(ctx context.Context, llmClient llm.Client, query string) ([]string, error) {
prompt := fmt.Sprintf(`Analyze this query and determine if clarifying questions would help provide a better answer.
Query: %s
If the query is:
- Clear and specific → respond with "CLEAR"
- Ambiguous or could benefit from clarification → provide 2-3 short clarifying questions
Format:
CLEAR
or
QUESTION: [question 1]
QUESTION: [question 2]
QUESTION: [question 3]`, query)
result, err := llmClient.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return nil, err
}
if strings.Contains(strings.ToUpper(result), "CLEAR") {
return nil, nil
}
var questions []string
for _, line := range strings.Split(result, "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "QUESTION:") {
q := strings.TrimSpace(strings.TrimPrefix(line, "QUESTION:"))
if q != "" {
questions = append(questions, q)
}
}
}
return questions, nil
}
func generateRelatedQuestions(ctx context.Context, llmClient llm.Client, query, answer string, locale string) []string {
langInstruction := ""
if locale == "ru" {
langInstruction = "Generate questions in Russian."
}
prompt := fmt.Sprintf(`Based on this query and answer, generate 3-4 related follow-up questions the user might want to explore.
Query: %s
Answer summary: %s
%s
Format: One question per line, no numbering or bullets.`, query, truncateForPrompt(answer, 500), langInstruction)
result, err := llmClient.GenerateText(ctx, llm.StreamRequest{
Messages: []llm.Message{{Role: "user", Content: prompt}},
})
if err != nil {
return nil
}
var questions []string
for _, line := range strings.Split(result, "\n") {
line = strings.TrimSpace(line)
if line != "" && len(line) > 10 && strings.Contains(line, "?") {
line = strings.TrimLeft(line, "0123456789.-•* ")
if line != "" {
questions = append(questions, line)
}
}
}
if len(questions) > 4 {
questions = questions[:4]
}
return questions
}
func truncateForPrompt(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
func buildEnhancedContext(input OrchestratorInput) string {
var ctx strings.Builder
if input.Config.UserMemory != "" {
ctx.WriteString("## User Preferences\n")
ctx.WriteString(input.Config.UserMemory)
ctx.WriteString("\n\n")
}
if input.Config.CollectionContext != "" {
ctx.WriteString("## Collection Context\n")
ctx.WriteString(input.Config.CollectionContext)
ctx.WriteString("\n\n")
}
if input.Config.FileContext != "" {
ctx.WriteString("## Uploaded Files Content\n")
ctx.WriteString(input.Config.FileContext)
ctx.WriteString("\n\n")
}
if input.Config.MemoryContext != "" {
ctx.WriteString("## Previous Context\n")
ctx.WriteString(input.Config.MemoryContext)
ctx.WriteString("\n\n")
}
return ctx.String()
}
func fetchPreGeneratedDigest(ctx context.Context, discoverURL, articleURL string) (*DigestResponse, error) {
if discoverURL == "" {
return nil, nil
}
reqURL := fmt.Sprintf("%s/api/v1/discover/digest?url=%s",
strings.TrimSuffix(discoverURL, "/"),
url.QueryEscape(articleURL))
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
if err != nil {
return nil, err
}
client := &http.Client{Timeout: 3 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, nil
}
var digest DigestResponse
if err := json.NewDecoder(resp.Body).Decode(&digest); err != nil {
return nil, err
}
if digest.SummaryRu != "" && len(digest.Citations) > 0 {
return &digest, nil
}
return nil, nil
}
func preScrapeArticleURL(ctx context.Context, crawl4aiURL, articleURL string) (*PreScrapedArticle, error) {
if crawl4aiURL != "" {
article, err := scrapeWithCrawl4AI(ctx, crawl4aiURL, articleURL)
if err == nil && article != nil {
return article, nil
}
}
return scrapeDirectly(ctx, articleURL)
}
func scrapeWithCrawl4AI(ctx context.Context, crawl4aiURL, articleURL string) (*PreScrapedArticle, error) {
reqBody := fmt.Sprintf(`{
"urls": ["%s"],
"crawler_config": {
"type": "CrawlerRunConfig",
"params": {
"cache_mode": "default",
"page_timeout": 20000
}
}
}`, articleURL)
req, err := http.NewRequestWithContext(ctx, "POST", crawl4aiURL+"/crawl", strings.NewReader(reqBody))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{Timeout: 25 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Crawl4AI returned status %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
markdown := extractMarkdownFromCrawl4AI(string(body))
title := extractTitleFromCrawl4AI(string(body))
if len(markdown) > 100 {
content := markdown
if len(content) > 15000 {
content = content[:15000]
}
return &PreScrapedArticle{
Title: title,
Content: content,
URL: articleURL,
}, nil
}
return nil, fmt.Errorf("insufficient content from Crawl4AI")
}
func scrapeDirectly(ctx context.Context, articleURL string) (*PreScrapedArticle, error) {
req, err := http.NewRequestWithContext(ctx, "GET", articleURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "GooSeek-Agent/1.0")
req.Header.Set("Accept", "text/html")
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
html := string(body)
title := extractHTMLTitle(html)
content := extractTextContent(html)
if len(content) < 100 {
return nil, fmt.Errorf("insufficient content")
}
if len(content) > 15000 {
content = content[:15000]
}
return &PreScrapedArticle{
Title: title,
Content: content,
URL: articleURL,
}, nil
}
var (
titleRegex = regexp.MustCompile(`<title[^>]*>([^<]+)</title>`)
scriptRegex = regexp.MustCompile(`(?s)<script[^>]*>.*?</script>`)
styleRegex = regexp.MustCompile(`(?s)<style[^>]*>.*?</style>`)
tagRegex = regexp.MustCompile(`<[^>]+>`)
spaceRegex = regexp.MustCompile(`\s+`)
)
func extractHTMLTitle(html string) string {
matches := titleRegex.FindStringSubmatch(html)
if len(matches) > 1 {
return strings.TrimSpace(matches[1])
}
return ""
}
func extractTextContent(html string) string {
bodyStart := strings.Index(strings.ToLower(html), "<body")
bodyEnd := strings.Index(strings.ToLower(html), "</body>")
if bodyStart != -1 && bodyEnd != -1 && bodyEnd > bodyStart {
html = html[bodyStart:bodyEnd]
}
html = scriptRegex.ReplaceAllString(html, "")
html = styleRegex.ReplaceAllString(html, "")
html = tagRegex.ReplaceAllString(html, " ")
html = spaceRegex.ReplaceAllString(html, " ")
return strings.TrimSpace(html)
}
func extractMarkdownFromCrawl4AI(response string) string {
if idx := strings.Index(response, `"raw_markdown"`); idx != -1 {
start := idx + len(`"raw_markdown"`)
if colonIdx := strings.Index(response[start:], ":"); colonIdx != -1 {
start += colonIdx + 1
for start < len(response) && (response[start] == ' ' || response[start] == '"') {
start++
}
end := strings.Index(response[start:], `"`)
if end > 0 {
return response[start : start+end]
}
}
}
return ""
}
func extractTitleFromCrawl4AI(response string) string {
if idx := strings.Index(response, `"title"`); idx != -1 {
start := idx + len(`"title"`)
if colonIdx := strings.Index(response[start:], ":"); colonIdx != -1 {
start += colonIdx + 1
for start < len(response) && (response[start] == ' ' || response[start] == '"') {
start++
}
end := strings.Index(response[start:], `"`)
if end > 0 {
return response[start : start+end]
}
}
}
return ""
}
func runSpeedMode(ctx context.Context, sess *session.Session, input OrchestratorInput, detectedLang string) error {
classification := fastClassify(input.FollowUp, input.ChatHistory)
searchQuery := classification.StandaloneFollowUp
if searchQuery == "" {
searchQuery = input.FollowUp
}
queries := generateSearchQueries(searchQuery)
researchBlockID := uuid.New().String()
sess.EmitBlock(types.NewResearchBlock(researchBlockID))
var searchResults []types.Chunk
var mediaResult *search.MediaSearchResult
g, gctx := errgroup.WithContext(ctx)
g.Go(func() error {
results, err := parallelSearch(gctx, input.Config.SearchClient, queries)
if err != nil {
return nil
}
searchResults = results
return nil
})
g.Go(func() error {
result, err := input.Config.SearchClient.SearchMedia(gctx, searchQuery, &search.MediaSearchOptions{
MaxImages: 6,
MaxVideos: 4,
})
if err != nil {
return nil
}
mediaResult = result
return nil
})
_ = g.Wait()
if len(searchResults) > 0 {
sess.EmitBlock(types.NewSourceBlock(uuid.New().String(), searchResults))
}
if mediaResult != nil {
if len(mediaResult.Images) > 0 {
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "image_gallery", map[string]interface{}{
"images": mediaResult.Images,
"layout": "carousel",
}))
}
if len(mediaResult.Videos) > 0 {
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "videos", map[string]interface{}{
"items": mediaResult.Videos,
"title": "",
}))
}
}
sess.EmitResearchComplete()
queryComplexity := search.EstimateQueryComplexity(searchQuery)
adaptiveTopK := search.ComputeAdaptiveTopK(len(searchResults), queryComplexity, "speed")
rankedResults := search.RerankBM25(searchResults, searchQuery, adaptiveTopK)
finalContext := buildContext(rankedResults, 15, 250)
writerPrompt := prompts.GetWriterPrompt(prompts.WriterConfig{
Context: finalContext,
SystemInstructions: input.Config.SystemInstructions,
Mode: string(input.Config.Mode),
Locale: input.Config.Locale,
MemoryContext: input.Config.MemoryContext,
AnswerMode: input.Config.AnswerMode,
DetectedLanguage: detectedLang,
IsArticleSummary: false,
})
messages := []llm.Message{
{Role: llm.RoleSystem, Content: writerPrompt},
}
messages = append(messages, input.ChatHistory...)
messages = append(messages, llm.Message{Role: llm.RoleUser, Content: input.FollowUp})
return streamResponse(ctx, sess, input.Config.LLM, messages, 2048, input.FollowUp, input.Config.Locale)
}
func runFullMode(ctx context.Context, sess *session.Session, input OrchestratorInput, detectedLang string, isArticleSummary bool) error {
if input.Config.EnableClarifying && !isArticleSummary && input.Config.Mode == ModeQuality {
clarifying, err := generateClarifyingQuestions(ctx, input.Config.LLM, input.FollowUp)
if err == nil && len(clarifying) > 0 {
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "clarifying", map[string]interface{}{
"questions": clarifying,
"query": input.FollowUp,
}))
return nil
}
}
enhancedContext := buildEnhancedContext(input)
if enhancedContext != "" {
input.Config.MemoryContext = enhancedContext + input.Config.MemoryContext
}
var preScrapedArticle *PreScrapedArticle
var articleURL string
if isArticleSummary {
articleURL = strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(input.FollowUp), "Summary: "))
digestCtx, digestCancel := context.WithTimeout(ctx, 3*time.Second)
scrapeCtx, scrapeCancel := context.WithTimeout(ctx, 25*time.Second)
digestCh := make(chan *DigestResponse, 1)
scrapeCh := make(chan *PreScrapedArticle, 1)
go func() {
defer digestCancel()
digest, _ := fetchPreGeneratedDigest(digestCtx, input.Config.DiscoverSvcURL, articleURL)
digestCh <- digest
}()
go func() {
defer scrapeCancel()
article, _ := preScrapeArticleURL(scrapeCtx, input.Config.Crawl4AIURL, articleURL)
scrapeCh <- article
}()
digest := <-digestCh
preScrapedArticle = <-scrapeCh
if digest != nil {
chunks := make([]types.Chunk, len(digest.Citations))
for i, c := range digest.Citations {
chunks[i] = types.Chunk{
Content: c.Title,
Metadata: map[string]string{
"url": c.URL,
"title": c.Title,
"domain": c.Domain,
},
}
}
sess.EmitBlock(types.NewSourceBlock(uuid.New().String(), chunks))
sess.EmitResearchComplete()
summaryText := digest.SummaryRu
if len(digest.FollowUp) > 0 {
summaryText += "\n\n---\n"
for _, q := range digest.FollowUp {
summaryText += "> " + q + "\n"
}
}
sess.EmitBlock(types.NewTextBlock(uuid.New().String(), summaryText))
sess.EmitEnd()
return nil
}
}
classification, err := classify(ctx, input.Config.LLM, input.FollowUp, input.ChatHistory, input.Config.Locale, detectedLang)
if err != nil {
classification = &ClassificationResult{
StandaloneFollowUp: input.FollowUp,
SkipSearch: false,
}
}
if isArticleSummary && classification.SkipSearch {
classification.SkipSearch = false
}
g, gctx := errgroup.WithContext(ctx)
var searchResults []types.Chunk
var mediaResult *search.MediaSearchResult
mediaQuery := classification.StandaloneFollowUp
if mediaQuery == "" {
mediaQuery = input.FollowUp
}
effectiveFollowUp := input.FollowUp
if isArticleSummary && preScrapedArticle != nil && preScrapedArticle.Title != "" {
effectiveFollowUp = fmt.Sprintf("Summary: %s\nArticle title: %s", preScrapedArticle.URL, preScrapedArticle.Title)
if classification.StandaloneFollowUp != "" {
classification.StandaloneFollowUp = preScrapedArticle.Title + " " + classification.StandaloneFollowUp
} else {
classification.StandaloneFollowUp = preScrapedArticle.Title
}
}
if !classification.SkipSearch {
g.Go(func() error {
results, err := research(gctx, sess, input.Config.LLM, input.Config.SearchClient, ResearchInput{
ChatHistory: input.ChatHistory,
FollowUp: effectiveFollowUp,
Classification: classification,
Mode: input.Config.Mode,
Sources: input.Config.Sources,
Locale: input.Config.Locale,
DetectedLang: detectedLang,
IsArticleSummary: isArticleSummary,
})
if err != nil {
return nil
}
searchResults = results
return nil
})
}
if !isArticleSummary {
g.Go(func() error {
result, err := input.Config.SearchClient.SearchMedia(gctx, mediaQuery, &search.MediaSearchOptions{
MaxImages: 8,
MaxVideos: 6,
})
if err != nil {
return nil
}
mediaResult = result
return nil
})
}
_ = g.Wait()
if isArticleSummary && preScrapedArticle != nil {
alreadyHasURL := false
for _, r := range searchResults {
if strings.Contains(r.Metadata["url"], preScrapedArticle.URL) {
alreadyHasURL = true
break
}
}
if !alreadyHasURL {
prependChunk := types.Chunk{
Content: preScrapedArticle.Content,
Metadata: map[string]string{
"url": preScrapedArticle.URL,
"title": preScrapedArticle.Title,
},
}
searchResults = append([]types.Chunk{prependChunk}, searchResults...)
}
}
if len(searchResults) > 0 {
sess.EmitBlock(types.NewSourceBlock(uuid.New().String(), searchResults))
}
if mediaResult != nil {
if len(mediaResult.Images) > 0 {
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "image_gallery", map[string]interface{}{
"images": mediaResult.Images,
"layout": "carousel",
}))
}
if len(mediaResult.Videos) > 0 {
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "videos", map[string]interface{}{
"items": mediaResult.Videos,
"title": "",
}))
}
}
sess.EmitResearchComplete()
maxResults := 25
maxContent := 320
if isArticleSummary {
maxResults = 30
maxContent = 2000
}
rankedResults := rankByRelevance(searchResults, input.FollowUp)
if len(rankedResults) > maxResults {
rankedResults = rankedResults[:maxResults]
}
finalContext := buildContext(rankedResults, maxResults, maxContent)
writerPrompt := prompts.GetWriterPrompt(prompts.WriterConfig{
Context: finalContext,
SystemInstructions: input.Config.SystemInstructions,
Mode: string(input.Config.Mode),
Locale: input.Config.Locale,
MemoryContext: input.Config.MemoryContext,
AnswerMode: input.Config.AnswerMode,
DetectedLanguage: detectedLang,
IsArticleSummary: isArticleSummary,
})
messages := []llm.Message{
{Role: llm.RoleSystem, Content: writerPrompt},
}
messages = append(messages, input.ChatHistory...)
messages = append(messages, llm.Message{Role: llm.RoleUser, Content: input.FollowUp})
maxTokens := 4096
return streamResponse(ctx, sess, input.Config.LLM, messages, maxTokens, input.FollowUp, input.Config.Locale)
}
func streamResponse(ctx context.Context, sess *session.Session, client llm.Client, messages []llm.Message, maxTokens int, query string, locale string) error {
stream, err := client.StreamText(ctx, llm.StreamRequest{
Messages: messages,
Options: llm.StreamOptions{MaxTokens: maxTokens},
})
if err != nil {
return err
}
var responseBlockID string
var accumulatedText string
for chunk := range stream {
if chunk.ContentChunk == "" && responseBlockID == "" {
continue
}
if responseBlockID == "" {
responseBlockID = uuid.New().String()
accumulatedText = chunk.ContentChunk
sess.EmitBlock(types.NewTextBlock(responseBlockID, accumulatedText))
} else if chunk.ContentChunk != "" {
accumulatedText += chunk.ContentChunk
sess.EmitTextChunk(responseBlockID, chunk.ContentChunk)
}
}
if responseBlockID != "" {
sess.UpdateBlock(responseBlockID, []session.Patch{
{Op: "replace", Path: "/data", Value: accumulatedText},
})
}
go func() {
relatedCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
related := generateRelatedQuestions(relatedCtx, client, query, accumulatedText, locale)
if len(related) > 0 {
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "related_questions", map[string]interface{}{
"questions": related,
}))
}
}()
sess.EmitEnd()
return nil
}
func parallelSearch(ctx context.Context, client *search.SearXNGClient, queries []string) ([]types.Chunk, error) {
results := make([]types.Chunk, 0)
seen := make(map[string]bool)
g, gctx := errgroup.WithContext(ctx)
resultsCh := make(chan []types.SearchResult, len(queries))
for _, q := range queries {
query := q
g.Go(func() error {
resp, err := client.Search(gctx, query, &search.SearchOptions{
Categories: []string{"general", "news"},
PageNo: 1,
})
if err != nil {
resultsCh <- nil
return nil
}
resultsCh <- resp.Results
return nil
})
}
go func() {
g.Wait()
close(resultsCh)
}()
for batch := range resultsCh {
for _, r := range batch {
if r.URL != "" && !seen[r.URL] {
seen[r.URL] = true
results = append(results, r.ToChunk())
}
}
}
return results, nil
}
func buildContext(chunks []types.Chunk, maxResults, maxContentLen int) string {
if len(chunks) > maxResults {
chunks = chunks[:maxResults]
}
var sb strings.Builder
sb.WriteString("<search_results note=\"These are the search results and assistant can cite these\">\n")
for i, chunk := range chunks {
content := chunk.Content
if len(content) > maxContentLen {
content = content[:maxContentLen] + "…"
}
title := chunk.Metadata["title"]
sb.WriteString("<result index=")
sb.WriteString(strings.ReplaceAll(title, "\"", "'"))
sb.WriteString("\" index=\"")
sb.WriteString(string(rune('0' + i + 1)))
sb.WriteString("\">")
sb.WriteString(content)
sb.WriteString("</result>\n")
}
sb.WriteString("</search_results>")
return sb.String()
}
func rankByRelevance(chunks []types.Chunk, query string) []types.Chunk {
if len(chunks) == 0 {
return chunks
}
terms := extractQueryTerms(query)
if len(terms) == 0 {
return chunks
}
type scored struct {
chunk types.Chunk
score int
}
scored_chunks := make([]scored, len(chunks))
for i, chunk := range chunks {
score := 0
content := strings.ToLower(chunk.Content)
title := strings.ToLower(chunk.Metadata["title"])
for term := range terms {
if strings.Contains(title, term) {
score += 3
}
if strings.Contains(content, term) {
score += 1
}
}
scored_chunks[i] = scored{chunk: chunk, score: score}
}
for i := 0; i < len(scored_chunks)-1; i++ {
for j := i + 1; j < len(scored_chunks); j++ {
if scored_chunks[j].score > scored_chunks[i].score {
scored_chunks[i], scored_chunks[j] = scored_chunks[j], scored_chunks[i]
}
}
}
result := make([]types.Chunk, len(scored_chunks))
for i, s := range scored_chunks {
result[i] = s.chunk
}
return result
}
func extractQueryTerms(query string) map[string]bool {
query = strings.ToLower(query)
query = strings.TrimPrefix(query, "summary: ")
words := strings.Fields(query)
terms := make(map[string]bool)
for _, w := range words {
if len(w) >= 2 && !strings.HasPrefix(w, "http") {
terms[w] = true
}
}
return terms
}

View File

@@ -0,0 +1,128 @@
package agent
import (
"context"
"github.com/gooseek/backend/internal/llm"
"github.com/gooseek/backend/internal/search"
"github.com/gooseek/backend/internal/session"
"github.com/gooseek/backend/internal/types"
"github.com/google/uuid"
)
type ResearchInput struct {
ChatHistory []llm.Message
FollowUp string
Classification *ClassificationResult
Mode Mode
Sources []string
Locale string
DetectedLang string
IsArticleSummary bool
}
func research(
ctx context.Context,
sess *session.Session,
llmClient llm.Client,
searchClient *search.SearXNGClient,
input ResearchInput,
) ([]types.Chunk, error) {
maxIterations := 1
switch input.Mode {
case ModeBalanced:
maxIterations = 3
case ModeQuality:
maxIterations = 10
}
researchBlockID := uuid.New().String()
sess.EmitBlock(types.NewResearchBlock(researchBlockID))
allResults := make([]types.Chunk, 0)
seenURLs := make(map[string]bool)
searchQuery := input.Classification.StandaloneFollowUp
if searchQuery == "" {
searchQuery = input.FollowUp
}
for i := 0; i < maxIterations; i++ {
queries := generateSearchQueries(searchQuery)
sess.UpdateBlock(researchBlockID, []session.Patch{
{
Op: "replace",
Path: "/data/subSteps",
Value: []types.ResearchSubStep{
{
ID: uuid.New().String(),
Type: "searching",
Searching: queries,
},
},
},
})
for _, q := range queries {
resp, err := searchClient.Search(ctx, q, &search.SearchOptions{
Categories: categoriesToSearch(input.Sources),
PageNo: 1,
})
if err != nil {
continue
}
for _, r := range resp.Results {
if r.URL != "" && !seenURLs[r.URL] {
seenURLs[r.URL] = true
allResults = append(allResults, r.ToChunk())
}
}
}
if input.Mode == ModeSpeed {
break
}
if len(allResults) >= 20 && input.Mode == ModeBalanced {
break
}
if len(allResults) >= 50 {
break
}
}
return allResults, nil
}
func categoriesToSearch(sources []string) []string {
if len(sources) == 0 {
return []string{"general", "news"}
}
categories := make([]string, 0)
for _, s := range sources {
switch s {
case "web":
categories = append(categories, "general")
case "discussions":
categories = append(categories, "social media")
case "academic":
categories = append(categories, "science")
case "news":
categories = append(categories, "news")
case "images":
categories = append(categories, "images")
case "videos":
categories = append(categories, "videos")
}
}
if len(categories) == 0 {
return []string{"general"}
}
return categories
}