feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
This commit is contained in:
233
backend/internal/agent/classifier.go
Normal file
233
backend/internal/agent/classifier.go
Normal file
@@ -0,0 +1,233 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/gooseek/backend/internal/llm"
|
||||
"github.com/gooseek/backend/internal/prompts"
|
||||
)
|
||||
|
||||
type ClassificationResult struct {
|
||||
StandaloneFollowUp string `json:"standaloneFollowUp"`
|
||||
SkipSearch bool `json:"skipSearch"`
|
||||
Topics []string `json:"topics,omitempty"`
|
||||
QueryType string `json:"queryType,omitempty"`
|
||||
Engines []string `json:"engines,omitempty"`
|
||||
}
|
||||
|
||||
func classify(ctx context.Context, client llm.Client, query string, history []llm.Message, locale, detectedLang string) (*ClassificationResult, error) {
|
||||
prompt := prompts.GetClassifierPrompt(locale, detectedLang)
|
||||
|
||||
historyStr := formatHistory(history)
|
||||
userContent := "<conversation>\n" + historyStr + "\nUser: " + query + "\n</conversation>"
|
||||
|
||||
messages := []llm.Message{
|
||||
{Role: llm.RoleSystem, Content: prompt},
|
||||
{Role: llm.RoleUser, Content: userContent},
|
||||
}
|
||||
|
||||
response, err := client.GenerateText(ctx, llm.StreamRequest{
|
||||
Messages: messages,
|
||||
Options: llm.StreamOptions{MaxTokens: 1024},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jsonMatch := regexp.MustCompile(`\{[\s\S]*\}`).FindString(response)
|
||||
if jsonMatch == "" {
|
||||
return &ClassificationResult{
|
||||
StandaloneFollowUp: query,
|
||||
SkipSearch: false,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var result ClassificationResult
|
||||
if err := json.Unmarshal([]byte(jsonMatch), &result); err != nil {
|
||||
return &ClassificationResult{
|
||||
StandaloneFollowUp: query,
|
||||
SkipSearch: false,
|
||||
}, nil
|
||||
}
|
||||
|
||||
if result.StandaloneFollowUp == "" {
|
||||
result.StandaloneFollowUp = query
|
||||
}
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
func fastClassify(query string, history []llm.Message) *ClassificationResult {
|
||||
queryLower := strings.ToLower(query)
|
||||
|
||||
skipPatterns := []string{
|
||||
"привет", "как дела", "спасибо", "пока",
|
||||
"hello", "hi", "thanks", "bye",
|
||||
"объясни", "расскажи подробнее", "что ты имеешь",
|
||||
}
|
||||
|
||||
skipSearch := false
|
||||
for _, p := range skipPatterns {
|
||||
if strings.Contains(queryLower, p) && len(query) < 50 {
|
||||
skipSearch = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
standalone := query
|
||||
|
||||
if len(history) > 0 {
|
||||
pronouns := []string{
|
||||
"это", "этот", "эта", "эти",
|
||||
"он", "она", "оно", "они",
|
||||
"it", "this", "that", "they", "them",
|
||||
}
|
||||
|
||||
hasPronouns := false
|
||||
for _, p := range pronouns {
|
||||
if strings.Contains(queryLower, p+" ") || strings.HasPrefix(queryLower, p+" ") {
|
||||
hasPronouns = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if hasPronouns && len(history) >= 2 {
|
||||
lastAssistant := ""
|
||||
for i := len(history) - 1; i >= 0; i-- {
|
||||
if history[i].Role == llm.RoleAssistant {
|
||||
lastAssistant = history[i].Content
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if lastAssistant != "" {
|
||||
topics := extractTopics(lastAssistant)
|
||||
if len(topics) > 0 {
|
||||
standalone = query + " (контекст: " + strings.Join(topics, ", ") + ")"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
engines := detectEngines(queryLower)
|
||||
|
||||
return &ClassificationResult{
|
||||
StandaloneFollowUp: standalone,
|
||||
SkipSearch: skipSearch,
|
||||
Engines: engines,
|
||||
}
|
||||
}
|
||||
|
||||
func generateSearchQueries(query string) []string {
|
||||
queries := []string{query}
|
||||
|
||||
if len(query) > 100 {
|
||||
words := strings.Fields(query)
|
||||
if len(words) > 5 {
|
||||
queries = append(queries, strings.Join(words[:5], " "))
|
||||
}
|
||||
}
|
||||
|
||||
keywordPatterns := []string{
|
||||
"как", "что такое", "где", "когда", "почему", "кто",
|
||||
"how", "what is", "where", "when", "why", "who",
|
||||
}
|
||||
|
||||
for _, p := range keywordPatterns {
|
||||
if strings.HasPrefix(strings.ToLower(query), p) {
|
||||
withoutPrefix := strings.TrimPrefix(strings.ToLower(query), p)
|
||||
withoutPrefix = strings.TrimSpace(withoutPrefix)
|
||||
if len(withoutPrefix) > 10 {
|
||||
queries = append(queries, withoutPrefix)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(queries) > 3 {
|
||||
queries = queries[:3]
|
||||
}
|
||||
|
||||
return queries
|
||||
}
|
||||
|
||||
func detectEngines(query string) []string {
|
||||
engines := []string{"google", "duckduckgo"}
|
||||
|
||||
if strings.Contains(query, "новости") || strings.Contains(query, "news") {
|
||||
engines = append(engines, "google_news")
|
||||
}
|
||||
|
||||
if strings.Contains(query, "видео") || strings.Contains(query, "video") {
|
||||
engines = append(engines, "youtube")
|
||||
}
|
||||
|
||||
if strings.Contains(query, "товар") || strings.Contains(query, "купить") ||
|
||||
strings.Contains(query, "цена") || strings.Contains(query, "price") {
|
||||
engines = append(engines, "google_shopping")
|
||||
}
|
||||
|
||||
return engines
|
||||
}
|
||||
|
||||
func extractTopics(text string) []string {
|
||||
words := strings.Fields(text)
|
||||
if len(words) > 50 {
|
||||
words = words[:50]
|
||||
}
|
||||
|
||||
topics := make([]string, 0)
|
||||
for _, w := range words {
|
||||
if len(w) > 5 && len(w) < 20 {
|
||||
r := []rune(w)
|
||||
if len(r) > 0 && ((r[0] >= 'A' && r[0] <= 'Z') || (r[0] >= 'А' && r[0] <= 'Я')) {
|
||||
topics = append(topics, w)
|
||||
if len(topics) >= 3 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return topics
|
||||
}
|
||||
|
||||
func formatHistory(messages []llm.Message) string {
|
||||
var sb strings.Builder
|
||||
for _, m := range messages {
|
||||
role := "User"
|
||||
if m.Role == llm.RoleAssistant {
|
||||
role = "Assistant"
|
||||
}
|
||||
sb.WriteString(role)
|
||||
sb.WriteString(": ")
|
||||
content := m.Content
|
||||
if len(content) > 500 {
|
||||
content = content[:500] + "..."
|
||||
}
|
||||
sb.WriteString(content)
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func detectLanguage(text string) string {
|
||||
cyrillicCount := 0
|
||||
latinCount := 0
|
||||
|
||||
for _, r := range text {
|
||||
if r >= 'а' && r <= 'я' || r >= 'А' && r <= 'Я' {
|
||||
cyrillicCount++
|
||||
} else if r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z' {
|
||||
latinCount++
|
||||
}
|
||||
}
|
||||
|
||||
if cyrillicCount > latinCount {
|
||||
return "ru"
|
||||
}
|
||||
return "en"
|
||||
}
|
||||
543
backend/internal/agent/deep_research.go
Normal file
543
backend/internal/agent/deep_research.go
Normal file
@@ -0,0 +1,543 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gooseek/backend/internal/llm"
|
||||
"github.com/gooseek/backend/internal/search"
|
||||
"github.com/gooseek/backend/internal/session"
|
||||
"github.com/gooseek/backend/internal/types"
|
||||
"github.com/google/uuid"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
type DeepResearchConfig struct {
|
||||
LLM llm.Client
|
||||
SearchClient *search.SearXNGClient
|
||||
FocusMode FocusMode
|
||||
Locale string
|
||||
MaxSearchQueries int
|
||||
MaxSources int
|
||||
MaxIterations int
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
type DeepResearchResult struct {
|
||||
FinalReport string
|
||||
Sources []types.Chunk
|
||||
SubQueries []SubQuery
|
||||
Insights []string
|
||||
FollowUpQueries []string
|
||||
TotalSearches int
|
||||
TotalSources int
|
||||
Duration time.Duration
|
||||
}
|
||||
|
||||
type SubQuery struct {
|
||||
Query string
|
||||
Purpose string
|
||||
Status string
|
||||
Results []types.Chunk
|
||||
Insights []string
|
||||
}
|
||||
|
||||
type DeepResearcher struct {
|
||||
cfg DeepResearchConfig
|
||||
sess *session.Session
|
||||
mu sync.Mutex
|
||||
allSources []types.Chunk
|
||||
seenURLs map[string]bool
|
||||
subQueries []SubQuery
|
||||
insights []string
|
||||
searchCount int
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
func NewDeepResearcher(cfg DeepResearchConfig, sess *session.Session) *DeepResearcher {
|
||||
if cfg.MaxSearchQueries == 0 {
|
||||
cfg.MaxSearchQueries = 30
|
||||
}
|
||||
if cfg.MaxSources == 0 {
|
||||
cfg.MaxSources = 100
|
||||
}
|
||||
if cfg.MaxIterations == 0 {
|
||||
cfg.MaxIterations = 5
|
||||
}
|
||||
if cfg.Timeout == 0 {
|
||||
cfg.Timeout = 5 * time.Minute
|
||||
}
|
||||
|
||||
return &DeepResearcher{
|
||||
cfg: cfg,
|
||||
sess: sess,
|
||||
seenURLs: make(map[string]bool),
|
||||
allSources: make([]types.Chunk, 0),
|
||||
subQueries: make([]SubQuery, 0),
|
||||
insights: make([]string, 0),
|
||||
startTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) Research(ctx context.Context, query string) (*DeepResearchResult, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, dr.cfg.Timeout)
|
||||
defer cancel()
|
||||
|
||||
researchBlockID := uuid.New().String()
|
||||
dr.sess.EmitBlock(&types.Block{
|
||||
ID: researchBlockID,
|
||||
Type: types.BlockTypeResearch,
|
||||
Data: types.ResearchData{
|
||||
SubSteps: []types.ResearchSubStep{},
|
||||
},
|
||||
})
|
||||
|
||||
subQueries, err := dr.planResearch(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("planning failed: %w", err)
|
||||
}
|
||||
|
||||
dr.updateResearchStatus(researchBlockID, "researching", fmt.Sprintf("Executing %d sub-queries", len(subQueries)))
|
||||
|
||||
for i := 0; i < dr.cfg.MaxIterations && dr.searchCount < dr.cfg.MaxSearchQueries; i++ {
|
||||
if err := dr.executeIteration(ctx, i, researchBlockID); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if dr.hasEnoughData() {
|
||||
break
|
||||
}
|
||||
|
||||
newQueries, err := dr.generateFollowUpQueries(ctx, query)
|
||||
if err != nil || len(newQueries) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
for _, q := range newQueries {
|
||||
dr.mu.Lock()
|
||||
dr.subQueries = append(dr.subQueries, SubQuery{
|
||||
Query: q.Query,
|
||||
Purpose: q.Purpose,
|
||||
Status: "pending",
|
||||
})
|
||||
dr.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
dr.updateResearchStatus(researchBlockID, "synthesizing", "Analyzing findings")
|
||||
|
||||
insights, err := dr.synthesizeInsights(ctx, query)
|
||||
if err != nil {
|
||||
insights = dr.insights
|
||||
}
|
||||
|
||||
dr.updateResearchStatus(researchBlockID, "writing", "Generating report")
|
||||
|
||||
report, err := dr.generateFinalReport(ctx, query, insights)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("report generation failed: %w", err)
|
||||
}
|
||||
|
||||
followUp, _ := dr.generateFollowUpSuggestions(ctx, query, report)
|
||||
|
||||
dr.updateResearchStatus(researchBlockID, "complete", "Research complete")
|
||||
|
||||
return &DeepResearchResult{
|
||||
FinalReport: report,
|
||||
Sources: dr.allSources,
|
||||
SubQueries: dr.subQueries,
|
||||
Insights: insights,
|
||||
FollowUpQueries: followUp,
|
||||
TotalSearches: dr.searchCount,
|
||||
TotalSources: len(dr.allSources),
|
||||
Duration: time.Since(dr.startTime),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) planResearch(ctx context.Context, query string) ([]SubQuery, error) {
|
||||
prompt := fmt.Sprintf(`Analyze this research query and break it into 3-5 sub-queries for comprehensive research.
|
||||
|
||||
Query: %s
|
||||
|
||||
For each sub-query, specify:
|
||||
1. The search query (optimized for search engines)
|
||||
2. The purpose (what aspect it addresses)
|
||||
|
||||
Respond in this exact format:
|
||||
QUERY: [search query]
|
||||
PURPOSE: [what this addresses]
|
||||
|
||||
QUERY: [search query]
|
||||
PURPOSE: [what this addresses]
|
||||
|
||||
...
|
||||
|
||||
Be specific and actionable. Focus on different aspects: definitions, current state, history, expert opinions, data/statistics, controversies, future trends.`, query)
|
||||
|
||||
result, err := dr.cfg.LLM.GenerateText(ctx, llm.StreamRequest{
|
||||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||||
})
|
||||
if err != nil {
|
||||
return dr.generateDefaultSubQueries(query), nil
|
||||
}
|
||||
|
||||
subQueries := dr.parseSubQueries(result)
|
||||
if len(subQueries) == 0 {
|
||||
subQueries = dr.generateDefaultSubQueries(query)
|
||||
}
|
||||
|
||||
dr.mu.Lock()
|
||||
dr.subQueries = subQueries
|
||||
dr.mu.Unlock()
|
||||
|
||||
return subQueries, nil
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) parseSubQueries(text string) []SubQuery {
|
||||
var queries []SubQuery
|
||||
lines := strings.Split(text, "\n")
|
||||
|
||||
var currentQuery, currentPurpose string
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "QUERY:") {
|
||||
if currentQuery != "" && currentPurpose != "" {
|
||||
queries = append(queries, SubQuery{
|
||||
Query: currentQuery,
|
||||
Purpose: currentPurpose,
|
||||
Status: "pending",
|
||||
})
|
||||
}
|
||||
currentQuery = strings.TrimSpace(strings.TrimPrefix(line, "QUERY:"))
|
||||
currentPurpose = ""
|
||||
} else if strings.HasPrefix(line, "PURPOSE:") {
|
||||
currentPurpose = strings.TrimSpace(strings.TrimPrefix(line, "PURPOSE:"))
|
||||
}
|
||||
}
|
||||
|
||||
if currentQuery != "" && currentPurpose != "" {
|
||||
queries = append(queries, SubQuery{
|
||||
Query: currentQuery,
|
||||
Purpose: currentPurpose,
|
||||
Status: "pending",
|
||||
})
|
||||
}
|
||||
|
||||
return queries
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) generateDefaultSubQueries(query string) []SubQuery {
|
||||
return []SubQuery{
|
||||
{Query: query, Purpose: "Main query", Status: "pending"},
|
||||
{Query: query + " definition explained", Purpose: "Definitions and basics", Status: "pending"},
|
||||
{Query: query + " latest news 2026", Purpose: "Current developments", Status: "pending"},
|
||||
{Query: query + " expert analysis", Purpose: "Expert opinions", Status: "pending"},
|
||||
{Query: query + " statistics data research", Purpose: "Data and evidence", Status: "pending"},
|
||||
}
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) executeIteration(ctx context.Context, iteration int, blockID string) error {
|
||||
dr.mu.Lock()
|
||||
pendingQueries := make([]int, 0)
|
||||
for i, sq := range dr.subQueries {
|
||||
if sq.Status == "pending" {
|
||||
pendingQueries = append(pendingQueries, i)
|
||||
}
|
||||
}
|
||||
dr.mu.Unlock()
|
||||
|
||||
if len(pendingQueries) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
batchSize := 3
|
||||
if len(pendingQueries) < batchSize {
|
||||
batchSize = len(pendingQueries)
|
||||
}
|
||||
|
||||
g, gctx := errgroup.WithContext(ctx)
|
||||
g.SetLimit(batchSize)
|
||||
|
||||
for _, idx := range pendingQueries[:batchSize] {
|
||||
idx := idx
|
||||
g.Go(func() error {
|
||||
return dr.executeSubQuery(gctx, idx, blockID)
|
||||
})
|
||||
}
|
||||
|
||||
return g.Wait()
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) executeSubQuery(ctx context.Context, idx int, blockID string) error {
|
||||
dr.mu.Lock()
|
||||
if idx >= len(dr.subQueries) {
|
||||
dr.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
sq := &dr.subQueries[idx]
|
||||
sq.Status = "searching"
|
||||
query := sq.Query
|
||||
dr.searchCount++
|
||||
dr.mu.Unlock()
|
||||
|
||||
dr.updateResearchStatus(blockID, "researching", fmt.Sprintf("Searching: %s", truncate(query, 50)))
|
||||
|
||||
enhancedQuery := EnhanceQueryForFocusMode(query, dr.cfg.FocusMode)
|
||||
|
||||
results, err := dr.cfg.SearchClient.Search(ctx, enhancedQuery, &search.SearchOptions{
|
||||
Engines: dr.cfg.FocusMode.GetSearchEngines(),
|
||||
Categories: FocusModeConfigs[dr.cfg.FocusMode].Categories,
|
||||
PageNo: 1,
|
||||
})
|
||||
if err != nil {
|
||||
dr.mu.Lock()
|
||||
sq.Status = "failed"
|
||||
dr.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
chunks := make([]types.Chunk, 0)
|
||||
for _, r := range results.Results {
|
||||
dr.mu.Lock()
|
||||
if dr.seenURLs[r.URL] {
|
||||
dr.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
dr.seenURLs[r.URL] = true
|
||||
dr.mu.Unlock()
|
||||
|
||||
chunk := r.ToChunk()
|
||||
chunks = append(chunks, chunk)
|
||||
|
||||
if len(chunks) >= 10 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
dr.mu.Lock()
|
||||
sq.Results = chunks
|
||||
sq.Status = "complete"
|
||||
dr.allSources = append(dr.allSources, chunks...)
|
||||
dr.mu.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) generateFollowUpQueries(ctx context.Context, originalQuery string) ([]SubQuery, error) {
|
||||
if dr.searchCount >= dr.cfg.MaxSearchQueries-5 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var sourceSummary strings.Builder
|
||||
dr.mu.Lock()
|
||||
for i, s := range dr.allSources {
|
||||
if i >= 20 {
|
||||
break
|
||||
}
|
||||
sourceSummary.WriteString(fmt.Sprintf("- %s: %s\n", s.Metadata["title"], truncate(s.Content, 100)))
|
||||
}
|
||||
dr.mu.Unlock()
|
||||
|
||||
prompt := fmt.Sprintf(`Based on the original query and sources found so far, suggest 2-3 follow-up queries to deepen the research.
|
||||
|
||||
Original query: %s
|
||||
|
||||
Sources found so far:
|
||||
%s
|
||||
|
||||
What aspects are missing? What would provide more comprehensive coverage?
|
||||
Respond with queries in format:
|
||||
QUERY: [query]
|
||||
PURPOSE: [what gap it fills]`, originalQuery, sourceSummary.String())
|
||||
|
||||
result, err := dr.cfg.LLM.GenerateText(ctx, llm.StreamRequest{
|
||||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return dr.parseSubQueries(result), nil
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) synthesizeInsights(ctx context.Context, query string) ([]string, error) {
|
||||
var sourcesText strings.Builder
|
||||
dr.mu.Lock()
|
||||
for i, s := range dr.allSources {
|
||||
if i >= 30 {
|
||||
break
|
||||
}
|
||||
sourcesText.WriteString(fmt.Sprintf("[%d] %s\n%s\n\n", i+1, s.Metadata["title"], truncate(s.Content, 300)))
|
||||
}
|
||||
dr.mu.Unlock()
|
||||
|
||||
prompt := fmt.Sprintf(`Analyze these sources and extract 5-7 key insights for the query: %s
|
||||
|
||||
Sources:
|
||||
%s
|
||||
|
||||
Provide insights as bullet points, each starting with a key finding.
|
||||
Focus on: main conclusions, patterns, contradictions, expert consensus, data points.`, query, sourcesText.String())
|
||||
|
||||
result, err := dr.cfg.LLM.GenerateText(ctx, llm.StreamRequest{
|
||||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
insights := make([]string, 0)
|
||||
for _, line := range strings.Split(result, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "-") || strings.HasPrefix(line, "•") || strings.HasPrefix(line, "*") {
|
||||
insights = append(insights, strings.TrimPrefix(strings.TrimPrefix(strings.TrimPrefix(line, "-"), "•"), "*"))
|
||||
}
|
||||
}
|
||||
|
||||
dr.mu.Lock()
|
||||
dr.insights = insights
|
||||
dr.mu.Unlock()
|
||||
|
||||
return insights, nil
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) generateFinalReport(ctx context.Context, query string, insights []string) (string, error) {
|
||||
var sourcesText strings.Builder
|
||||
dr.mu.Lock()
|
||||
sources := dr.allSources
|
||||
dr.mu.Unlock()
|
||||
|
||||
for i, s := range sources {
|
||||
if i >= 50 {
|
||||
break
|
||||
}
|
||||
sourcesText.WriteString(fmt.Sprintf("[%d] %s (%s)\n%s\n\n", i+1, s.Metadata["title"], s.Metadata["url"], truncate(s.Content, 400)))
|
||||
}
|
||||
|
||||
insightsText := strings.Join(insights, "\n- ")
|
||||
|
||||
focusCfg := FocusModeConfigs[dr.cfg.FocusMode]
|
||||
locale := dr.cfg.Locale
|
||||
if locale == "" {
|
||||
locale = "en"
|
||||
}
|
||||
|
||||
langInstruction := ""
|
||||
if locale == "ru" {
|
||||
langInstruction = "Write the report in Russian."
|
||||
}
|
||||
|
||||
prompt := fmt.Sprintf(`%s
|
||||
|
||||
Write a comprehensive research report answering: %s
|
||||
|
||||
Key insights discovered:
|
||||
- %s
|
||||
|
||||
Sources (cite using [1], [2], etc.):
|
||||
%s
|
||||
|
||||
Structure your report with:
|
||||
1. Executive Summary (2-3 sentences)
|
||||
2. Key Findings (organized by theme)
|
||||
3. Analysis and Discussion
|
||||
4. Conclusions
|
||||
|
||||
%s
|
||||
Use citations [1], [2], etc. throughout.
|
||||
Be thorough but concise. Focus on actionable information.`, focusCfg.SystemPrompt, query, insightsText, sourcesText.String(), langInstruction)
|
||||
|
||||
stream, err := dr.cfg.LLM.StreamText(ctx, llm.StreamRequest{
|
||||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var report strings.Builder
|
||||
textBlockID := uuid.New().String()
|
||||
dr.sess.EmitBlock(&types.Block{
|
||||
ID: textBlockID,
|
||||
Type: types.BlockTypeText,
|
||||
Data: "",
|
||||
})
|
||||
|
||||
for chunk := range stream {
|
||||
report.WriteString(chunk.ContentChunk)
|
||||
dr.sess.EmitTextChunk(textBlockID, chunk.ContentChunk)
|
||||
}
|
||||
|
||||
return report.String(), nil
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) generateFollowUpSuggestions(ctx context.Context, query, report string) ([]string, error) {
|
||||
prompt := fmt.Sprintf(`Based on this research query and report, suggest 3-4 follow-up questions the user might want to explore:
|
||||
|
||||
Query: %s
|
||||
|
||||
Report summary: %s
|
||||
|
||||
Provide follow-up questions that:
|
||||
1. Go deeper into specific aspects
|
||||
2. Explore related topics
|
||||
3. Address practical applications
|
||||
4. Consider alternative perspectives
|
||||
|
||||
Format as simple questions, one per line.`, query, truncate(report, 1000))
|
||||
|
||||
result, err := dr.cfg.LLM.GenerateText(ctx, llm.StreamRequest{
|
||||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
suggestions := make([]string, 0)
|
||||
for _, line := range strings.Split(result, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line != "" && (strings.Contains(line, "?") || len(line) > 20) {
|
||||
line = strings.TrimPrefix(line, "- ")
|
||||
line = strings.TrimPrefix(line, "• ")
|
||||
line = strings.TrimLeft(line, "0123456789. ")
|
||||
if line != "" {
|
||||
suggestions = append(suggestions, line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(suggestions) > 4 {
|
||||
suggestions = suggestions[:4]
|
||||
}
|
||||
|
||||
return suggestions, nil
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) updateResearchStatus(blockID, status, message string) {
|
||||
dr.sess.UpdateBlock(blockID, []session.Patch{
|
||||
{Op: "replace", Path: "/data/status", Value: status},
|
||||
{Op: "replace", Path: "/data/message", Value: message},
|
||||
})
|
||||
}
|
||||
|
||||
func (dr *DeepResearcher) hasEnoughData() bool {
|
||||
dr.mu.Lock()
|
||||
defer dr.mu.Unlock()
|
||||
return len(dr.allSources) >= dr.cfg.MaxSources
|
||||
}
|
||||
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
|
||||
func RunDeepResearch(ctx context.Context, sess *session.Session, query string, cfg DeepResearchConfig) (*DeepResearchResult, error) {
|
||||
researcher := NewDeepResearcher(cfg, sess)
|
||||
return researcher.Research(ctx, query)
|
||||
}
|
||||
293
backend/internal/agent/focus_modes.go
Normal file
293
backend/internal/agent/focus_modes.go
Normal file
@@ -0,0 +1,293 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
type FocusMode string
|
||||
|
||||
const (
|
||||
FocusModeAll FocusMode = "all"
|
||||
FocusModeAcademic FocusMode = "academic"
|
||||
FocusModeWriting FocusMode = "writing"
|
||||
FocusModeYouTube FocusMode = "youtube"
|
||||
FocusModeReddit FocusMode = "reddit"
|
||||
FocusModeCode FocusMode = "code"
|
||||
FocusModeNews FocusMode = "news"
|
||||
FocusModeImages FocusMode = "images"
|
||||
FocusModeMath FocusMode = "math"
|
||||
FocusModeFinance FocusMode = "finance"
|
||||
)
|
||||
|
||||
type FocusModeConfig struct {
|
||||
Mode FocusMode
|
||||
Engines []string
|
||||
Categories []string
|
||||
SystemPrompt string
|
||||
SearchQueryPrefix string
|
||||
MaxSources int
|
||||
RequiresCitation bool
|
||||
AllowScraping bool
|
||||
}
|
||||
|
||||
var FocusModeConfigs = map[FocusMode]FocusModeConfig{
|
||||
FocusModeAll: {
|
||||
Mode: FocusModeAll,
|
||||
Engines: []string{"google", "bing", "duckduckgo"},
|
||||
Categories: []string{"general"},
|
||||
MaxSources: 15,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: true,
|
||||
SystemPrompt: `You are a helpful AI assistant that provides comprehensive answers based on web search results.
|
||||
Always cite your sources using [1], [2], etc. format.
|
||||
Provide balanced, accurate information from multiple perspectives.`,
|
||||
},
|
||||
FocusModeAcademic: {
|
||||
Mode: FocusModeAcademic,
|
||||
Engines: []string{"google scholar", "arxiv", "pubmed", "semantic scholar"},
|
||||
Categories: []string{"science"},
|
||||
SearchQueryPrefix: "research paper",
|
||||
MaxSources: 20,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: true,
|
||||
SystemPrompt: `You are an academic research assistant specializing in scholarly sources.
|
||||
Focus on peer-reviewed papers, academic journals, and reputable research institutions.
|
||||
Always cite sources in academic format with [1], [2], etc.
|
||||
Distinguish between primary research, meta-analyses, and review articles.
|
||||
Mention publication dates, authors, and journals when available.
|
||||
Be precise about confidence levels and note when findings are preliminary or contested.`,
|
||||
},
|
||||
FocusModeWriting: {
|
||||
Mode: FocusModeWriting,
|
||||
Engines: []string{"google"},
|
||||
Categories: []string{"general"},
|
||||
MaxSources: 5,
|
||||
RequiresCitation: false,
|
||||
AllowScraping: false,
|
||||
SystemPrompt: `You are a creative writing assistant.
|
||||
Help with drafting, editing, and improving written content.
|
||||
Provide suggestions for style, tone, structure, and clarity.
|
||||
Offer multiple variations when appropriate.
|
||||
Focus on the user's voice and intent rather than web search results.`,
|
||||
},
|
||||
FocusModeYouTube: {
|
||||
Mode: FocusModeYouTube,
|
||||
Engines: []string{"youtube"},
|
||||
Categories: []string{"videos"},
|
||||
SearchQueryPrefix: "site:youtube.com",
|
||||
MaxSources: 10,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: false,
|
||||
SystemPrompt: `You are a video content assistant focused on YouTube.
|
||||
Summarize video content, recommend relevant videos, and help find tutorials.
|
||||
Mention video titles, channels, and approximate timestamps when relevant.
|
||||
Note view counts and upload dates to indicate video popularity and relevance.`,
|
||||
},
|
||||
FocusModeReddit: {
|
||||
Mode: FocusModeReddit,
|
||||
Engines: []string{"reddit"},
|
||||
Categories: []string{"social media"},
|
||||
SearchQueryPrefix: "site:reddit.com",
|
||||
MaxSources: 15,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: true,
|
||||
SystemPrompt: `You are an assistant that specializes in Reddit discussions and community knowledge.
|
||||
Focus on highly upvoted comments and posts from relevant subreddits.
|
||||
Note the subreddit source, upvote counts, and community consensus.
|
||||
Distinguish between personal opinions, experiences, and factual claims.
|
||||
Be aware of potential biases in specific communities.`,
|
||||
},
|
||||
FocusModeCode: {
|
||||
Mode: FocusModeCode,
|
||||
Engines: []string{"google", "github", "stackoverflow"},
|
||||
Categories: []string{"it"},
|
||||
SearchQueryPrefix: "",
|
||||
MaxSources: 10,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: true,
|
||||
SystemPrompt: `You are a programming assistant focused on code, documentation, and technical solutions.
|
||||
Provide working code examples with explanations.
|
||||
Reference official documentation, Stack Overflow answers, and GitHub repositories.
|
||||
Mention library versions and compatibility considerations.
|
||||
Follow best practices and coding standards for the relevant language/framework.
|
||||
Include error handling and edge cases in code examples.`,
|
||||
},
|
||||
FocusModeNews: {
|
||||
Mode: FocusModeNews,
|
||||
Engines: []string{"google news", "bing news"},
|
||||
Categories: []string{"news"},
|
||||
MaxSources: 12,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: true,
|
||||
SystemPrompt: `You are a news assistant that provides current events information.
|
||||
Focus on recent, verified news from reputable sources.
|
||||
Distinguish between breaking news, analysis, and opinion pieces.
|
||||
Note publication dates and source credibility.
|
||||
Present multiple perspectives on controversial topics.`,
|
||||
},
|
||||
FocusModeImages: {
|
||||
Mode: FocusModeImages,
|
||||
Engines: []string{"google images", "bing images"},
|
||||
Categories: []string{"images"},
|
||||
MaxSources: 20,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: false,
|
||||
SystemPrompt: `You are an image search assistant.
|
||||
Help find relevant images, describe image sources, and provide context.
|
||||
Note image sources, licenses, and quality when relevant.`,
|
||||
},
|
||||
FocusModeMath: {
|
||||
Mode: FocusModeMath,
|
||||
Engines: []string{"wolfram alpha", "google"},
|
||||
Categories: []string{"science"},
|
||||
MaxSources: 5,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: false,
|
||||
SystemPrompt: `You are a mathematical problem-solving assistant.
|
||||
Provide step-by-step solutions with clear explanations.
|
||||
Use proper mathematical notation and formatting.
|
||||
Show your work and explain the reasoning behind each step.
|
||||
Mention relevant theorems, formulas, and mathematical concepts.
|
||||
Verify your calculations and provide alternative solution methods when applicable.`,
|
||||
},
|
||||
FocusModeFinance: {
|
||||
Mode: FocusModeFinance,
|
||||
Engines: []string{"google", "google finance", "yahoo finance"},
|
||||
Categories: []string{"news"},
|
||||
SearchQueryPrefix: "stock market finance",
|
||||
MaxSources: 10,
|
||||
RequiresCitation: true,
|
||||
AllowScraping: true,
|
||||
SystemPrompt: `You are a financial information assistant.
|
||||
Provide accurate financial data, market analysis, and investment information.
|
||||
Note that you cannot provide personalized financial advice.
|
||||
Cite data sources and note when data may be delayed or historical.
|
||||
Include relevant disclaimers about investment risks.
|
||||
Reference SEC filings, analyst reports, and official company statements.`,
|
||||
},
|
||||
}
|
||||
|
||||
func GetFocusModeConfig(mode string) FocusModeConfig {
|
||||
fm := FocusMode(strings.ToLower(mode))
|
||||
if cfg, ok := FocusModeConfigs[fm]; ok {
|
||||
return cfg
|
||||
}
|
||||
return FocusModeConfigs[FocusModeAll]
|
||||
}
|
||||
|
||||
func DetectFocusMode(query string) FocusMode {
|
||||
queryLower := strings.ToLower(query)
|
||||
|
||||
academicKeywords := []string{
|
||||
"research", "paper", "study", "journal", "scientific", "academic",
|
||||
"peer-reviewed", "citation", "исследование", "научн", "статья",
|
||||
"публикация", "диссертация",
|
||||
}
|
||||
for _, kw := range academicKeywords {
|
||||
if strings.Contains(queryLower, kw) {
|
||||
return FocusModeAcademic
|
||||
}
|
||||
}
|
||||
|
||||
codeKeywords := []string{
|
||||
"code", "programming", "function", "error", "bug", "api",
|
||||
"library", "framework", "syntax", "compile", "debug",
|
||||
"код", "программ", "функция", "ошибка", "библиотека",
|
||||
"golang", "python", "javascript", "typescript", "react", "vue",
|
||||
"docker", "kubernetes", "sql", "database", "git",
|
||||
}
|
||||
for _, kw := range codeKeywords {
|
||||
if strings.Contains(queryLower, kw) {
|
||||
return FocusModeCode
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(queryLower, "youtube") ||
|
||||
strings.Contains(queryLower, "video tutorial") ||
|
||||
strings.Contains(queryLower, "видео") {
|
||||
return FocusModeYouTube
|
||||
}
|
||||
|
||||
if strings.Contains(queryLower, "reddit") ||
|
||||
strings.Contains(queryLower, "subreddit") ||
|
||||
strings.Contains(queryLower, "/r/") {
|
||||
return FocusModeReddit
|
||||
}
|
||||
|
||||
mathKeywords := []string{
|
||||
"calculate", "solve", "equation", "integral", "derivative",
|
||||
"formula", "theorem", "proof", "вычисл", "решить", "уравнение",
|
||||
"интеграл", "производная", "формула", "теорема",
|
||||
}
|
||||
for _, kw := range mathKeywords {
|
||||
if strings.Contains(queryLower, kw) {
|
||||
return FocusModeMath
|
||||
}
|
||||
}
|
||||
|
||||
financeKeywords := []string{
|
||||
"stock", "market", "invest", "price", "trading", "finance",
|
||||
"акци", "рынок", "инвест", "биржа", "котировк", "финанс",
|
||||
"etf", "dividend", "portfolio",
|
||||
}
|
||||
for _, kw := range financeKeywords {
|
||||
if strings.Contains(queryLower, kw) {
|
||||
return FocusModeFinance
|
||||
}
|
||||
}
|
||||
|
||||
newsKeywords := []string{
|
||||
"news", "today", "latest", "breaking", "current events",
|
||||
"новост", "сегодня", "последн", "актуальн",
|
||||
}
|
||||
for _, kw := range newsKeywords {
|
||||
if strings.Contains(queryLower, kw) {
|
||||
return FocusModeNews
|
||||
}
|
||||
}
|
||||
|
||||
return FocusModeAll
|
||||
}
|
||||
|
||||
func (f FocusMode) GetSearchEngines() []string {
|
||||
if cfg, ok := FocusModeConfigs[f]; ok {
|
||||
return cfg.Engines
|
||||
}
|
||||
return FocusModeConfigs[FocusModeAll].Engines
|
||||
}
|
||||
|
||||
func (f FocusMode) GetSystemPrompt() string {
|
||||
if cfg, ok := FocusModeConfigs[f]; ok {
|
||||
return cfg.SystemPrompt
|
||||
}
|
||||
return FocusModeConfigs[FocusModeAll].SystemPrompt
|
||||
}
|
||||
|
||||
func (f FocusMode) GetMaxSources() int {
|
||||
if cfg, ok := FocusModeConfigs[f]; ok {
|
||||
return cfg.MaxSources
|
||||
}
|
||||
return 15
|
||||
}
|
||||
|
||||
func (f FocusMode) RequiresCitation() bool {
|
||||
if cfg, ok := FocusModeConfigs[f]; ok {
|
||||
return cfg.RequiresCitation
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (f FocusMode) AllowsScraping() bool {
|
||||
if cfg, ok := FocusModeConfigs[f]; ok {
|
||||
return cfg.AllowScraping
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func EnhanceQueryForFocusMode(query string, mode FocusMode) string {
|
||||
cfg := FocusModeConfigs[mode]
|
||||
if cfg.SearchQueryPrefix != "" {
|
||||
return cfg.SearchQueryPrefix + " " + query
|
||||
}
|
||||
return query
|
||||
}
|
||||
950
backend/internal/agent/orchestrator.go
Normal file
950
backend/internal/agent/orchestrator.go
Normal file
@@ -0,0 +1,950 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gooseek/backend/internal/llm"
|
||||
"github.com/gooseek/backend/internal/prompts"
|
||||
"github.com/gooseek/backend/internal/search"
|
||||
"github.com/gooseek/backend/internal/session"
|
||||
"github.com/gooseek/backend/internal/types"
|
||||
"github.com/google/uuid"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
type Mode string
|
||||
|
||||
const (
|
||||
ModeSpeed Mode = "speed"
|
||||
ModeBalanced Mode = "balanced"
|
||||
ModeQuality Mode = "quality"
|
||||
)
|
||||
|
||||
type OrchestratorConfig struct {
|
||||
LLM llm.Client
|
||||
SearchClient *search.SearXNGClient
|
||||
Mode Mode
|
||||
FocusMode FocusMode
|
||||
Sources []string
|
||||
FileIDs []string
|
||||
FileContext string
|
||||
CollectionID string
|
||||
CollectionContext string
|
||||
SystemInstructions string
|
||||
Locale string
|
||||
MemoryContext string
|
||||
UserMemory string
|
||||
AnswerMode string
|
||||
ResponsePrefs *ResponsePrefs
|
||||
LearningMode bool
|
||||
EnableDeepResearch bool
|
||||
EnableClarifying bool
|
||||
DiscoverSvcURL string
|
||||
Crawl4AIURL string
|
||||
CollectionSvcURL string
|
||||
FileSvcURL string
|
||||
}
|
||||
|
||||
type DigestResponse struct {
|
||||
SummaryRu string `json:"summaryRu"`
|
||||
Citations []DigestCitation `json:"citations"`
|
||||
FollowUp []string `json:"followUp"`
|
||||
SourcesCount int `json:"sourcesCount"`
|
||||
ClusterTitle string `json:"clusterTitle"`
|
||||
}
|
||||
|
||||
type DigestCitation struct {
|
||||
Index int `json:"index"`
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Domain string `json:"domain"`
|
||||
}
|
||||
|
||||
type PreScrapedArticle struct {
|
||||
Title string
|
||||
Content string
|
||||
URL string
|
||||
}
|
||||
|
||||
type ResponsePrefs struct {
|
||||
Format string `json:"format,omitempty"`
|
||||
Length string `json:"length,omitempty"`
|
||||
Tone string `json:"tone,omitempty"`
|
||||
}
|
||||
|
||||
type OrchestratorInput struct {
|
||||
ChatHistory []llm.Message
|
||||
FollowUp string
|
||||
Config OrchestratorConfig
|
||||
}
|
||||
|
||||
func RunOrchestrator(ctx context.Context, sess *session.Session, input OrchestratorInput) error {
|
||||
detectedLang := detectLanguage(input.FollowUp)
|
||||
isArticleSummary := strings.HasPrefix(strings.TrimSpace(input.FollowUp), "Summary: ")
|
||||
|
||||
if input.Config.FocusMode == "" {
|
||||
input.Config.FocusMode = DetectFocusMode(input.FollowUp)
|
||||
}
|
||||
|
||||
if input.Config.EnableDeepResearch && input.Config.Mode == ModeQuality {
|
||||
return runDeepResearchMode(ctx, sess, input, detectedLang)
|
||||
}
|
||||
|
||||
if input.Config.Mode == ModeSpeed && !isArticleSummary {
|
||||
return runSpeedMode(ctx, sess, input, detectedLang)
|
||||
}
|
||||
|
||||
return runFullMode(ctx, sess, input, detectedLang, isArticleSummary)
|
||||
}
|
||||
|
||||
func runDeepResearchMode(ctx context.Context, sess *session.Session, input OrchestratorInput, lang string) error {
|
||||
sess.EmitBlock(types.NewResearchBlock(uuid.New().String()))
|
||||
|
||||
researcher := NewDeepResearcher(DeepResearchConfig{
|
||||
LLM: input.Config.LLM,
|
||||
SearchClient: input.Config.SearchClient,
|
||||
FocusMode: input.Config.FocusMode,
|
||||
Locale: input.Config.Locale,
|
||||
MaxSearchQueries: 30,
|
||||
MaxSources: 100,
|
||||
MaxIterations: 5,
|
||||
Timeout: 5 * time.Minute,
|
||||
}, sess)
|
||||
|
||||
result, err := researcher.Research(ctx, input.FollowUp)
|
||||
if err != nil {
|
||||
sess.EmitError(err)
|
||||
return err
|
||||
}
|
||||
|
||||
sess.EmitBlock(types.NewSourceBlock(uuid.New().String(), result.Sources))
|
||||
|
||||
if len(result.FollowUpQueries) > 0 {
|
||||
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "related_questions", map[string]interface{}{
|
||||
"questions": result.FollowUpQueries,
|
||||
}))
|
||||
}
|
||||
|
||||
sess.EmitResearchComplete()
|
||||
sess.EmitEnd()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateClarifyingQuestions(ctx context.Context, llmClient llm.Client, query string) ([]string, error) {
|
||||
prompt := fmt.Sprintf(`Analyze this query and determine if clarifying questions would help provide a better answer.
|
||||
|
||||
Query: %s
|
||||
|
||||
If the query is:
|
||||
- Clear and specific → respond with "CLEAR"
|
||||
- Ambiguous or could benefit from clarification → provide 2-3 short clarifying questions
|
||||
|
||||
Format:
|
||||
CLEAR
|
||||
or
|
||||
QUESTION: [question 1]
|
||||
QUESTION: [question 2]
|
||||
QUESTION: [question 3]`, query)
|
||||
|
||||
result, err := llmClient.GenerateText(ctx, llm.StreamRequest{
|
||||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if strings.Contains(strings.ToUpper(result), "CLEAR") {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var questions []string
|
||||
for _, line := range strings.Split(result, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "QUESTION:") {
|
||||
q := strings.TrimSpace(strings.TrimPrefix(line, "QUESTION:"))
|
||||
if q != "" {
|
||||
questions = append(questions, q)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return questions, nil
|
||||
}
|
||||
|
||||
func generateRelatedQuestions(ctx context.Context, llmClient llm.Client, query, answer string, locale string) []string {
|
||||
langInstruction := ""
|
||||
if locale == "ru" {
|
||||
langInstruction = "Generate questions in Russian."
|
||||
}
|
||||
|
||||
prompt := fmt.Sprintf(`Based on this query and answer, generate 3-4 related follow-up questions the user might want to explore.
|
||||
|
||||
Query: %s
|
||||
|
||||
Answer summary: %s
|
||||
|
||||
%s
|
||||
Format: One question per line, no numbering or bullets.`, query, truncateForPrompt(answer, 500), langInstruction)
|
||||
|
||||
result, err := llmClient.GenerateText(ctx, llm.StreamRequest{
|
||||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||||
})
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var questions []string
|
||||
for _, line := range strings.Split(result, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line != "" && len(line) > 10 && strings.Contains(line, "?") {
|
||||
line = strings.TrimLeft(line, "0123456789.-•* ")
|
||||
if line != "" {
|
||||
questions = append(questions, line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(questions) > 4 {
|
||||
questions = questions[:4]
|
||||
}
|
||||
|
||||
return questions
|
||||
}
|
||||
|
||||
func truncateForPrompt(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
|
||||
func buildEnhancedContext(input OrchestratorInput) string {
|
||||
var ctx strings.Builder
|
||||
|
||||
if input.Config.UserMemory != "" {
|
||||
ctx.WriteString("## User Preferences\n")
|
||||
ctx.WriteString(input.Config.UserMemory)
|
||||
ctx.WriteString("\n\n")
|
||||
}
|
||||
|
||||
if input.Config.CollectionContext != "" {
|
||||
ctx.WriteString("## Collection Context\n")
|
||||
ctx.WriteString(input.Config.CollectionContext)
|
||||
ctx.WriteString("\n\n")
|
||||
}
|
||||
|
||||
if input.Config.FileContext != "" {
|
||||
ctx.WriteString("## Uploaded Files Content\n")
|
||||
ctx.WriteString(input.Config.FileContext)
|
||||
ctx.WriteString("\n\n")
|
||||
}
|
||||
|
||||
if input.Config.MemoryContext != "" {
|
||||
ctx.WriteString("## Previous Context\n")
|
||||
ctx.WriteString(input.Config.MemoryContext)
|
||||
ctx.WriteString("\n\n")
|
||||
}
|
||||
|
||||
return ctx.String()
|
||||
}
|
||||
|
||||
func fetchPreGeneratedDigest(ctx context.Context, discoverURL, articleURL string) (*DigestResponse, error) {
|
||||
if discoverURL == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
reqURL := fmt.Sprintf("%s/api/v1/discover/digest?url=%s",
|
||||
strings.TrimSuffix(discoverURL, "/"),
|
||||
url.QueryEscape(articleURL))
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 3 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var digest DigestResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&digest); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if digest.SummaryRu != "" && len(digest.Citations) > 0 {
|
||||
return &digest, nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func preScrapeArticleURL(ctx context.Context, crawl4aiURL, articleURL string) (*PreScrapedArticle, error) {
|
||||
if crawl4aiURL != "" {
|
||||
article, err := scrapeWithCrawl4AI(ctx, crawl4aiURL, articleURL)
|
||||
if err == nil && article != nil {
|
||||
return article, nil
|
||||
}
|
||||
}
|
||||
|
||||
return scrapeDirectly(ctx, articleURL)
|
||||
}
|
||||
|
||||
func scrapeWithCrawl4AI(ctx context.Context, crawl4aiURL, articleURL string) (*PreScrapedArticle, error) {
|
||||
reqBody := fmt.Sprintf(`{
|
||||
"urls": ["%s"],
|
||||
"crawler_config": {
|
||||
"type": "CrawlerRunConfig",
|
||||
"params": {
|
||||
"cache_mode": "default",
|
||||
"page_timeout": 20000
|
||||
}
|
||||
}
|
||||
}`, articleURL)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", crawl4aiURL+"/crawl", strings.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
client := &http.Client{Timeout: 25 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("Crawl4AI returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
markdown := extractMarkdownFromCrawl4AI(string(body))
|
||||
title := extractTitleFromCrawl4AI(string(body))
|
||||
|
||||
if len(markdown) > 100 {
|
||||
content := markdown
|
||||
if len(content) > 15000 {
|
||||
content = content[:15000]
|
||||
}
|
||||
return &PreScrapedArticle{
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: articleURL,
|
||||
}, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("insufficient content from Crawl4AI")
|
||||
}
|
||||
|
||||
func scrapeDirectly(ctx context.Context, articleURL string) (*PreScrapedArticle, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", articleURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", "GooSeek-Agent/1.0")
|
||||
req.Header.Set("Accept", "text/html")
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
html := string(body)
|
||||
title := extractHTMLTitle(html)
|
||||
content := extractTextContent(html)
|
||||
|
||||
if len(content) < 100 {
|
||||
return nil, fmt.Errorf("insufficient content")
|
||||
}
|
||||
|
||||
if len(content) > 15000 {
|
||||
content = content[:15000]
|
||||
}
|
||||
|
||||
return &PreScrapedArticle{
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: articleURL,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var (
|
||||
titleRegex = regexp.MustCompile(`<title[^>]*>([^<]+)</title>`)
|
||||
scriptRegex = regexp.MustCompile(`(?s)<script[^>]*>.*?</script>`)
|
||||
styleRegex = regexp.MustCompile(`(?s)<style[^>]*>.*?</style>`)
|
||||
tagRegex = regexp.MustCompile(`<[^>]+>`)
|
||||
spaceRegex = regexp.MustCompile(`\s+`)
|
||||
)
|
||||
|
||||
func extractHTMLTitle(html string) string {
|
||||
matches := titleRegex.FindStringSubmatch(html)
|
||||
if len(matches) > 1 {
|
||||
return strings.TrimSpace(matches[1])
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractTextContent(html string) string {
|
||||
bodyStart := strings.Index(strings.ToLower(html), "<body")
|
||||
bodyEnd := strings.Index(strings.ToLower(html), "</body>")
|
||||
|
||||
if bodyStart != -1 && bodyEnd != -1 && bodyEnd > bodyStart {
|
||||
html = html[bodyStart:bodyEnd]
|
||||
}
|
||||
|
||||
html = scriptRegex.ReplaceAllString(html, "")
|
||||
html = styleRegex.ReplaceAllString(html, "")
|
||||
html = tagRegex.ReplaceAllString(html, " ")
|
||||
html = spaceRegex.ReplaceAllString(html, " ")
|
||||
|
||||
return strings.TrimSpace(html)
|
||||
}
|
||||
|
||||
func extractMarkdownFromCrawl4AI(response string) string {
|
||||
if idx := strings.Index(response, `"raw_markdown"`); idx != -1 {
|
||||
start := idx + len(`"raw_markdown"`)
|
||||
if colonIdx := strings.Index(response[start:], ":"); colonIdx != -1 {
|
||||
start += colonIdx + 1
|
||||
for start < len(response) && (response[start] == ' ' || response[start] == '"') {
|
||||
start++
|
||||
}
|
||||
end := strings.Index(response[start:], `"`)
|
||||
if end > 0 {
|
||||
return response[start : start+end]
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractTitleFromCrawl4AI(response string) string {
|
||||
if idx := strings.Index(response, `"title"`); idx != -1 {
|
||||
start := idx + len(`"title"`)
|
||||
if colonIdx := strings.Index(response[start:], ":"); colonIdx != -1 {
|
||||
start += colonIdx + 1
|
||||
for start < len(response) && (response[start] == ' ' || response[start] == '"') {
|
||||
start++
|
||||
}
|
||||
end := strings.Index(response[start:], `"`)
|
||||
if end > 0 {
|
||||
return response[start : start+end]
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func runSpeedMode(ctx context.Context, sess *session.Session, input OrchestratorInput, detectedLang string) error {
|
||||
classification := fastClassify(input.FollowUp, input.ChatHistory)
|
||||
searchQuery := classification.StandaloneFollowUp
|
||||
if searchQuery == "" {
|
||||
searchQuery = input.FollowUp
|
||||
}
|
||||
queries := generateSearchQueries(searchQuery)
|
||||
|
||||
researchBlockID := uuid.New().String()
|
||||
sess.EmitBlock(types.NewResearchBlock(researchBlockID))
|
||||
|
||||
var searchResults []types.Chunk
|
||||
var mediaResult *search.MediaSearchResult
|
||||
|
||||
g, gctx := errgroup.WithContext(ctx)
|
||||
|
||||
g.Go(func() error {
|
||||
results, err := parallelSearch(gctx, input.Config.SearchClient, queries)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
searchResults = results
|
||||
return nil
|
||||
})
|
||||
|
||||
g.Go(func() error {
|
||||
result, err := input.Config.SearchClient.SearchMedia(gctx, searchQuery, &search.MediaSearchOptions{
|
||||
MaxImages: 6,
|
||||
MaxVideos: 4,
|
||||
})
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
mediaResult = result
|
||||
return nil
|
||||
})
|
||||
|
||||
_ = g.Wait()
|
||||
|
||||
if len(searchResults) > 0 {
|
||||
sess.EmitBlock(types.NewSourceBlock(uuid.New().String(), searchResults))
|
||||
}
|
||||
|
||||
if mediaResult != nil {
|
||||
if len(mediaResult.Images) > 0 {
|
||||
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "image_gallery", map[string]interface{}{
|
||||
"images": mediaResult.Images,
|
||||
"layout": "carousel",
|
||||
}))
|
||||
}
|
||||
if len(mediaResult.Videos) > 0 {
|
||||
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "videos", map[string]interface{}{
|
||||
"items": mediaResult.Videos,
|
||||
"title": "",
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
sess.EmitResearchComplete()
|
||||
|
||||
queryComplexity := search.EstimateQueryComplexity(searchQuery)
|
||||
adaptiveTopK := search.ComputeAdaptiveTopK(len(searchResults), queryComplexity, "speed")
|
||||
rankedResults := search.RerankBM25(searchResults, searchQuery, adaptiveTopK)
|
||||
|
||||
finalContext := buildContext(rankedResults, 15, 250)
|
||||
|
||||
writerPrompt := prompts.GetWriterPrompt(prompts.WriterConfig{
|
||||
Context: finalContext,
|
||||
SystemInstructions: input.Config.SystemInstructions,
|
||||
Mode: string(input.Config.Mode),
|
||||
Locale: input.Config.Locale,
|
||||
MemoryContext: input.Config.MemoryContext,
|
||||
AnswerMode: input.Config.AnswerMode,
|
||||
DetectedLanguage: detectedLang,
|
||||
IsArticleSummary: false,
|
||||
})
|
||||
|
||||
messages := []llm.Message{
|
||||
{Role: llm.RoleSystem, Content: writerPrompt},
|
||||
}
|
||||
messages = append(messages, input.ChatHistory...)
|
||||
messages = append(messages, llm.Message{Role: llm.RoleUser, Content: input.FollowUp})
|
||||
|
||||
return streamResponse(ctx, sess, input.Config.LLM, messages, 2048, input.FollowUp, input.Config.Locale)
|
||||
}
|
||||
|
||||
func runFullMode(ctx context.Context, sess *session.Session, input OrchestratorInput, detectedLang string, isArticleSummary bool) error {
|
||||
if input.Config.EnableClarifying && !isArticleSummary && input.Config.Mode == ModeQuality {
|
||||
clarifying, err := generateClarifyingQuestions(ctx, input.Config.LLM, input.FollowUp)
|
||||
if err == nil && len(clarifying) > 0 {
|
||||
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "clarifying", map[string]interface{}{
|
||||
"questions": clarifying,
|
||||
"query": input.FollowUp,
|
||||
}))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
enhancedContext := buildEnhancedContext(input)
|
||||
if enhancedContext != "" {
|
||||
input.Config.MemoryContext = enhancedContext + input.Config.MemoryContext
|
||||
}
|
||||
|
||||
var preScrapedArticle *PreScrapedArticle
|
||||
var articleURL string
|
||||
|
||||
if isArticleSummary {
|
||||
articleURL = strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(input.FollowUp), "Summary: "))
|
||||
|
||||
digestCtx, digestCancel := context.WithTimeout(ctx, 3*time.Second)
|
||||
scrapeCtx, scrapeCancel := context.WithTimeout(ctx, 25*time.Second)
|
||||
|
||||
digestCh := make(chan *DigestResponse, 1)
|
||||
scrapeCh := make(chan *PreScrapedArticle, 1)
|
||||
|
||||
go func() {
|
||||
defer digestCancel()
|
||||
digest, _ := fetchPreGeneratedDigest(digestCtx, input.Config.DiscoverSvcURL, articleURL)
|
||||
digestCh <- digest
|
||||
}()
|
||||
|
||||
go func() {
|
||||
defer scrapeCancel()
|
||||
article, _ := preScrapeArticleURL(scrapeCtx, input.Config.Crawl4AIURL, articleURL)
|
||||
scrapeCh <- article
|
||||
}()
|
||||
|
||||
digest := <-digestCh
|
||||
preScrapedArticle = <-scrapeCh
|
||||
|
||||
if digest != nil {
|
||||
chunks := make([]types.Chunk, len(digest.Citations))
|
||||
for i, c := range digest.Citations {
|
||||
chunks[i] = types.Chunk{
|
||||
Content: c.Title,
|
||||
Metadata: map[string]string{
|
||||
"url": c.URL,
|
||||
"title": c.Title,
|
||||
"domain": c.Domain,
|
||||
},
|
||||
}
|
||||
}
|
||||
sess.EmitBlock(types.NewSourceBlock(uuid.New().String(), chunks))
|
||||
sess.EmitResearchComplete()
|
||||
|
||||
summaryText := digest.SummaryRu
|
||||
if len(digest.FollowUp) > 0 {
|
||||
summaryText += "\n\n---\n"
|
||||
for _, q := range digest.FollowUp {
|
||||
summaryText += "> " + q + "\n"
|
||||
}
|
||||
}
|
||||
sess.EmitBlock(types.NewTextBlock(uuid.New().String(), summaryText))
|
||||
sess.EmitEnd()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
classification, err := classify(ctx, input.Config.LLM, input.FollowUp, input.ChatHistory, input.Config.Locale, detectedLang)
|
||||
if err != nil {
|
||||
classification = &ClassificationResult{
|
||||
StandaloneFollowUp: input.FollowUp,
|
||||
SkipSearch: false,
|
||||
}
|
||||
}
|
||||
|
||||
if isArticleSummary && classification.SkipSearch {
|
||||
classification.SkipSearch = false
|
||||
}
|
||||
|
||||
g, gctx := errgroup.WithContext(ctx)
|
||||
|
||||
var searchResults []types.Chunk
|
||||
var mediaResult *search.MediaSearchResult
|
||||
|
||||
mediaQuery := classification.StandaloneFollowUp
|
||||
if mediaQuery == "" {
|
||||
mediaQuery = input.FollowUp
|
||||
}
|
||||
|
||||
effectiveFollowUp := input.FollowUp
|
||||
if isArticleSummary && preScrapedArticle != nil && preScrapedArticle.Title != "" {
|
||||
effectiveFollowUp = fmt.Sprintf("Summary: %s\nArticle title: %s", preScrapedArticle.URL, preScrapedArticle.Title)
|
||||
if classification.StandaloneFollowUp != "" {
|
||||
classification.StandaloneFollowUp = preScrapedArticle.Title + " " + classification.StandaloneFollowUp
|
||||
} else {
|
||||
classification.StandaloneFollowUp = preScrapedArticle.Title
|
||||
}
|
||||
}
|
||||
|
||||
if !classification.SkipSearch {
|
||||
g.Go(func() error {
|
||||
results, err := research(gctx, sess, input.Config.LLM, input.Config.SearchClient, ResearchInput{
|
||||
ChatHistory: input.ChatHistory,
|
||||
FollowUp: effectiveFollowUp,
|
||||
Classification: classification,
|
||||
Mode: input.Config.Mode,
|
||||
Sources: input.Config.Sources,
|
||||
Locale: input.Config.Locale,
|
||||
DetectedLang: detectedLang,
|
||||
IsArticleSummary: isArticleSummary,
|
||||
})
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
searchResults = results
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
if !isArticleSummary {
|
||||
g.Go(func() error {
|
||||
result, err := input.Config.SearchClient.SearchMedia(gctx, mediaQuery, &search.MediaSearchOptions{
|
||||
MaxImages: 8,
|
||||
MaxVideos: 6,
|
||||
})
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
mediaResult = result
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
_ = g.Wait()
|
||||
|
||||
if isArticleSummary && preScrapedArticle != nil {
|
||||
alreadyHasURL := false
|
||||
for _, r := range searchResults {
|
||||
if strings.Contains(r.Metadata["url"], preScrapedArticle.URL) {
|
||||
alreadyHasURL = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !alreadyHasURL {
|
||||
prependChunk := types.Chunk{
|
||||
Content: preScrapedArticle.Content,
|
||||
Metadata: map[string]string{
|
||||
"url": preScrapedArticle.URL,
|
||||
"title": preScrapedArticle.Title,
|
||||
},
|
||||
}
|
||||
searchResults = append([]types.Chunk{prependChunk}, searchResults...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(searchResults) > 0 {
|
||||
sess.EmitBlock(types.NewSourceBlock(uuid.New().String(), searchResults))
|
||||
}
|
||||
|
||||
if mediaResult != nil {
|
||||
if len(mediaResult.Images) > 0 {
|
||||
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "image_gallery", map[string]interface{}{
|
||||
"images": mediaResult.Images,
|
||||
"layout": "carousel",
|
||||
}))
|
||||
}
|
||||
if len(mediaResult.Videos) > 0 {
|
||||
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "videos", map[string]interface{}{
|
||||
"items": mediaResult.Videos,
|
||||
"title": "",
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
sess.EmitResearchComplete()
|
||||
|
||||
maxResults := 25
|
||||
maxContent := 320
|
||||
if isArticleSummary {
|
||||
maxResults = 30
|
||||
maxContent = 2000
|
||||
}
|
||||
|
||||
rankedResults := rankByRelevance(searchResults, input.FollowUp)
|
||||
if len(rankedResults) > maxResults {
|
||||
rankedResults = rankedResults[:maxResults]
|
||||
}
|
||||
|
||||
finalContext := buildContext(rankedResults, maxResults, maxContent)
|
||||
|
||||
writerPrompt := prompts.GetWriterPrompt(prompts.WriterConfig{
|
||||
Context: finalContext,
|
||||
SystemInstructions: input.Config.SystemInstructions,
|
||||
Mode: string(input.Config.Mode),
|
||||
Locale: input.Config.Locale,
|
||||
MemoryContext: input.Config.MemoryContext,
|
||||
AnswerMode: input.Config.AnswerMode,
|
||||
DetectedLanguage: detectedLang,
|
||||
IsArticleSummary: isArticleSummary,
|
||||
})
|
||||
|
||||
messages := []llm.Message{
|
||||
{Role: llm.RoleSystem, Content: writerPrompt},
|
||||
}
|
||||
messages = append(messages, input.ChatHistory...)
|
||||
messages = append(messages, llm.Message{Role: llm.RoleUser, Content: input.FollowUp})
|
||||
|
||||
maxTokens := 4096
|
||||
return streamResponse(ctx, sess, input.Config.LLM, messages, maxTokens, input.FollowUp, input.Config.Locale)
|
||||
}
|
||||
|
||||
func streamResponse(ctx context.Context, sess *session.Session, client llm.Client, messages []llm.Message, maxTokens int, query string, locale string) error {
|
||||
stream, err := client.StreamText(ctx, llm.StreamRequest{
|
||||
Messages: messages,
|
||||
Options: llm.StreamOptions{MaxTokens: maxTokens},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var responseBlockID string
|
||||
var accumulatedText string
|
||||
|
||||
for chunk := range stream {
|
||||
if chunk.ContentChunk == "" && responseBlockID == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if responseBlockID == "" {
|
||||
responseBlockID = uuid.New().String()
|
||||
accumulatedText = chunk.ContentChunk
|
||||
sess.EmitBlock(types.NewTextBlock(responseBlockID, accumulatedText))
|
||||
} else if chunk.ContentChunk != "" {
|
||||
accumulatedText += chunk.ContentChunk
|
||||
sess.EmitTextChunk(responseBlockID, chunk.ContentChunk)
|
||||
}
|
||||
}
|
||||
|
||||
if responseBlockID != "" {
|
||||
sess.UpdateBlock(responseBlockID, []session.Patch{
|
||||
{Op: "replace", Path: "/data", Value: accumulatedText},
|
||||
})
|
||||
}
|
||||
|
||||
go func() {
|
||||
relatedCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
related := generateRelatedQuestions(relatedCtx, client, query, accumulatedText, locale)
|
||||
if len(related) > 0 {
|
||||
sess.EmitBlock(types.NewWidgetBlock(uuid.New().String(), "related_questions", map[string]interface{}{
|
||||
"questions": related,
|
||||
}))
|
||||
}
|
||||
}()
|
||||
|
||||
sess.EmitEnd()
|
||||
return nil
|
||||
}
|
||||
|
||||
func parallelSearch(ctx context.Context, client *search.SearXNGClient, queries []string) ([]types.Chunk, error) {
|
||||
results := make([]types.Chunk, 0)
|
||||
seen := make(map[string]bool)
|
||||
|
||||
g, gctx := errgroup.WithContext(ctx)
|
||||
resultsCh := make(chan []types.SearchResult, len(queries))
|
||||
|
||||
for _, q := range queries {
|
||||
query := q
|
||||
g.Go(func() error {
|
||||
resp, err := client.Search(gctx, query, &search.SearchOptions{
|
||||
Categories: []string{"general", "news"},
|
||||
PageNo: 1,
|
||||
})
|
||||
if err != nil {
|
||||
resultsCh <- nil
|
||||
return nil
|
||||
}
|
||||
resultsCh <- resp.Results
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
go func() {
|
||||
g.Wait()
|
||||
close(resultsCh)
|
||||
}()
|
||||
|
||||
for batch := range resultsCh {
|
||||
for _, r := range batch {
|
||||
if r.URL != "" && !seen[r.URL] {
|
||||
seen[r.URL] = true
|
||||
results = append(results, r.ToChunk())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func buildContext(chunks []types.Chunk, maxResults, maxContentLen int) string {
|
||||
if len(chunks) > maxResults {
|
||||
chunks = chunks[:maxResults]
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString("<search_results note=\"These are the search results and assistant can cite these\">\n")
|
||||
|
||||
for i, chunk := range chunks {
|
||||
content := chunk.Content
|
||||
if len(content) > maxContentLen {
|
||||
content = content[:maxContentLen] + "…"
|
||||
}
|
||||
title := chunk.Metadata["title"]
|
||||
sb.WriteString("<result index=")
|
||||
sb.WriteString(strings.ReplaceAll(title, "\"", "'"))
|
||||
sb.WriteString("\" index=\"")
|
||||
sb.WriteString(string(rune('0' + i + 1)))
|
||||
sb.WriteString("\">")
|
||||
sb.WriteString(content)
|
||||
sb.WriteString("</result>\n")
|
||||
}
|
||||
|
||||
sb.WriteString("</search_results>")
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func rankByRelevance(chunks []types.Chunk, query string) []types.Chunk {
|
||||
if len(chunks) == 0 {
|
||||
return chunks
|
||||
}
|
||||
|
||||
terms := extractQueryTerms(query)
|
||||
if len(terms) == 0 {
|
||||
return chunks
|
||||
}
|
||||
|
||||
type scored struct {
|
||||
chunk types.Chunk
|
||||
score int
|
||||
}
|
||||
|
||||
scored_chunks := make([]scored, len(chunks))
|
||||
for i, chunk := range chunks {
|
||||
score := 0
|
||||
content := strings.ToLower(chunk.Content)
|
||||
title := strings.ToLower(chunk.Metadata["title"])
|
||||
|
||||
for term := range terms {
|
||||
if strings.Contains(title, term) {
|
||||
score += 3
|
||||
}
|
||||
if strings.Contains(content, term) {
|
||||
score += 1
|
||||
}
|
||||
}
|
||||
|
||||
scored_chunks[i] = scored{chunk: chunk, score: score}
|
||||
}
|
||||
|
||||
for i := 0; i < len(scored_chunks)-1; i++ {
|
||||
for j := i + 1; j < len(scored_chunks); j++ {
|
||||
if scored_chunks[j].score > scored_chunks[i].score {
|
||||
scored_chunks[i], scored_chunks[j] = scored_chunks[j], scored_chunks[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result := make([]types.Chunk, len(scored_chunks))
|
||||
for i, s := range scored_chunks {
|
||||
result[i] = s.chunk
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func extractQueryTerms(query string) map[string]bool {
|
||||
query = strings.ToLower(query)
|
||||
query = strings.TrimPrefix(query, "summary: ")
|
||||
|
||||
words := strings.Fields(query)
|
||||
terms := make(map[string]bool)
|
||||
|
||||
for _, w := range words {
|
||||
if len(w) >= 2 && !strings.HasPrefix(w, "http") {
|
||||
terms[w] = true
|
||||
}
|
||||
}
|
||||
|
||||
return terms
|
||||
}
|
||||
128
backend/internal/agent/researcher.go
Normal file
128
backend/internal/agent/researcher.go
Normal file
@@ -0,0 +1,128 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/gooseek/backend/internal/llm"
|
||||
"github.com/gooseek/backend/internal/search"
|
||||
"github.com/gooseek/backend/internal/session"
|
||||
"github.com/gooseek/backend/internal/types"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type ResearchInput struct {
|
||||
ChatHistory []llm.Message
|
||||
FollowUp string
|
||||
Classification *ClassificationResult
|
||||
Mode Mode
|
||||
Sources []string
|
||||
Locale string
|
||||
DetectedLang string
|
||||
IsArticleSummary bool
|
||||
}
|
||||
|
||||
func research(
|
||||
ctx context.Context,
|
||||
sess *session.Session,
|
||||
llmClient llm.Client,
|
||||
searchClient *search.SearXNGClient,
|
||||
input ResearchInput,
|
||||
) ([]types.Chunk, error) {
|
||||
maxIterations := 1
|
||||
switch input.Mode {
|
||||
case ModeBalanced:
|
||||
maxIterations = 3
|
||||
case ModeQuality:
|
||||
maxIterations = 10
|
||||
}
|
||||
|
||||
researchBlockID := uuid.New().String()
|
||||
sess.EmitBlock(types.NewResearchBlock(researchBlockID))
|
||||
|
||||
allResults := make([]types.Chunk, 0)
|
||||
seenURLs := make(map[string]bool)
|
||||
|
||||
searchQuery := input.Classification.StandaloneFollowUp
|
||||
if searchQuery == "" {
|
||||
searchQuery = input.FollowUp
|
||||
}
|
||||
|
||||
for i := 0; i < maxIterations; i++ {
|
||||
queries := generateSearchQueries(searchQuery)
|
||||
|
||||
sess.UpdateBlock(researchBlockID, []session.Patch{
|
||||
{
|
||||
Op: "replace",
|
||||
Path: "/data/subSteps",
|
||||
Value: []types.ResearchSubStep{
|
||||
{
|
||||
ID: uuid.New().String(),
|
||||
Type: "searching",
|
||||
Searching: queries,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
for _, q := range queries {
|
||||
resp, err := searchClient.Search(ctx, q, &search.SearchOptions{
|
||||
Categories: categoriesToSearch(input.Sources),
|
||||
PageNo: 1,
|
||||
})
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, r := range resp.Results {
|
||||
if r.URL != "" && !seenURLs[r.URL] {
|
||||
seenURLs[r.URL] = true
|
||||
allResults = append(allResults, r.ToChunk())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if input.Mode == ModeSpeed {
|
||||
break
|
||||
}
|
||||
|
||||
if len(allResults) >= 20 && input.Mode == ModeBalanced {
|
||||
break
|
||||
}
|
||||
|
||||
if len(allResults) >= 50 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return allResults, nil
|
||||
}
|
||||
|
||||
func categoriesToSearch(sources []string) []string {
|
||||
if len(sources) == 0 {
|
||||
return []string{"general", "news"}
|
||||
}
|
||||
|
||||
categories := make([]string, 0)
|
||||
for _, s := range sources {
|
||||
switch s {
|
||||
case "web":
|
||||
categories = append(categories, "general")
|
||||
case "discussions":
|
||||
categories = append(categories, "social media")
|
||||
case "academic":
|
||||
categories = append(categories, "science")
|
||||
case "news":
|
||||
categories = append(categories, "news")
|
||||
case "images":
|
||||
categories = append(categories, "images")
|
||||
case "videos":
|
||||
categories = append(categories, "videos")
|
||||
}
|
||||
}
|
||||
|
||||
if len(categories) == 0 {
|
||||
return []string{"general"}
|
||||
}
|
||||
|
||||
return categories
|
||||
}
|
||||
Reference in New Issue
Block a user