feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
This commit is contained in:
@@ -2,22 +2,75 @@ import z from 'zod';
|
||||
import type { ResearchAction } from './types.js';
|
||||
import type { Chunk, SearchResultsResearchBlock } from '../types.js';
|
||||
import { searchSearxng } from '../searxng.js';
|
||||
import { rerankBM25, computeAdaptiveTopK, estimateQueryComplexity } from '../reranker.js';
|
||||
|
||||
const MAX_CONCURRENT_SEARCHES = 5;
|
||||
const SEARCH_DELAY_MS = 50;
|
||||
|
||||
class SearchSemaphore {
|
||||
private permits: number;
|
||||
private queue: (() => void)[] = [];
|
||||
|
||||
constructor(permits: number) {
|
||||
this.permits = permits;
|
||||
}
|
||||
|
||||
async acquire(): Promise<void> {
|
||||
if (this.permits > 0) {
|
||||
this.permits--;
|
||||
return;
|
||||
}
|
||||
return new Promise((resolve) => {
|
||||
this.queue.push(resolve);
|
||||
});
|
||||
}
|
||||
|
||||
release(): void {
|
||||
const next = this.queue.shift();
|
||||
if (next) {
|
||||
next();
|
||||
} else {
|
||||
this.permits++;
|
||||
}
|
||||
}
|
||||
|
||||
async runExclusive<T>(fn: () => Promise<T>): Promise<T> {
|
||||
await this.acquire();
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
this.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const searchSemaphore = new SearchSemaphore(MAX_CONCURRENT_SEARCHES);
|
||||
|
||||
async function rateLimitedSearch(
|
||||
query: string,
|
||||
opts: { categories?: string[]; pageno?: number },
|
||||
): Promise<{ results: { content?: string; title: string; url: string }[] }> {
|
||||
return searchSemaphore.runExclusive(async () => {
|
||||
await new Promise((r) => setTimeout(r, SEARCH_DELAY_MS));
|
||||
return searchSearxng(query, opts);
|
||||
});
|
||||
}
|
||||
|
||||
const schema = z.object({
|
||||
queries: z.array(z.string()).describe('An array of search queries to perform web searches for.'),
|
||||
queries: z.array(z.string()).max(6).describe('An array of search queries to perform web searches for.'),
|
||||
});
|
||||
|
||||
const webSearchAction: ResearchAction<typeof schema> = {
|
||||
name: 'web_search',
|
||||
schema,
|
||||
getToolDescription: () =>
|
||||
'Use this tool to perform web searches based on the provided queries. You can provide up to 3 queries at a time.',
|
||||
'Use this tool to perform web searches. Provide up to 6 queries in the user\'s language.',
|
||||
getDescription: () =>
|
||||
'Use this tool to perform web searches. Your queries should be targeted and specific, SEO-friendly keywords. You can search for 3 queries in one go.',
|
||||
'Use this tool to perform web searches. Queries should be targeted, SEO-friendly. Up to 6 queries in the user\'s language.',
|
||||
enabled: (config) =>
|
||||
config.sources.includes('web') && config.classification.classification.skipSearch === false,
|
||||
execute: async (input, additionalConfig) => {
|
||||
input.queries = input.queries.slice(0, 3);
|
||||
input.queries = input.queries.slice(0, 6);
|
||||
|
||||
const researchBlock = additionalConfig.session.getBlock(additionalConfig.researchBlockId);
|
||||
|
||||
@@ -35,15 +88,45 @@ const webSearchAction: ResearchAction<typeof schema> = {
|
||||
const searchResultsBlockId = crypto.randomUUID();
|
||||
let searchResultsEmitted = false;
|
||||
const results: Chunk[] = [];
|
||||
// Саммари по ссылке: только новости + наука. Обычный чат: текстовые категории (без images/videos/files/music/map/social_media).
|
||||
const isArticleSummary = !!additionalConfig.isArticleSummary;
|
||||
const TEXT_CATEGORIES = ['general', 'science', 'it', 'news'] as const;
|
||||
const opts = isArticleSummary
|
||||
? { categories: ['news', 'science'] as string[], pageno: 1 as number }
|
||||
: { categories: [...TEXT_CATEGORIES] };
|
||||
|
||||
const search = async (q: string) => {
|
||||
let res: { results: { content?: string; title: string; url: string }[] };
|
||||
let allRawResults: { content?: string; title: string; url: string }[] = [];
|
||||
try {
|
||||
res = await searchSearxng(q);
|
||||
if (isArticleSummary) {
|
||||
const pages = await Promise.allSettled([
|
||||
rateLimitedSearch(q, { ...opts, pageno: 1 }),
|
||||
rateLimitedSearch(q, { ...opts, pageno: 2 }),
|
||||
]);
|
||||
const seenUrls = new Set<string>();
|
||||
for (const p of pages) {
|
||||
if (p.status !== 'fulfilled') continue;
|
||||
for (const r of p.value.results ?? []) {
|
||||
if (r.url && !seenUrls.has(r.url)) {
|
||||
seenUrls.add(r.url);
|
||||
allRawResults.push(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const res = await rateLimitedSearch(q, { ...opts, pageno: 1 });
|
||||
const seenUrls = new Set<string>();
|
||||
for (const r of res.results ?? []) {
|
||||
if (r.url && !seenUrls.has(r.url)) {
|
||||
seenUrls.add(r.url);
|
||||
allRawResults.push(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
const resultChunks: Chunk[] = res.results.map((r) => ({
|
||||
const resultChunks: Chunk[] = allRawResults.map((r) => ({
|
||||
content: r.content || r.title,
|
||||
metadata: { title: r.title, url: r.url },
|
||||
}));
|
||||
@@ -73,7 +156,30 @@ const webSearchAction: ResearchAction<typeof schema> = {
|
||||
|
||||
await Promise.all(input.queries.map(search));
|
||||
|
||||
return { type: 'search_results', results };
|
||||
if (results.length === 0) {
|
||||
return { type: 'search_results', results };
|
||||
}
|
||||
|
||||
const originalQuery = additionalConfig.originalQuery ?? input.queries.join(' ');
|
||||
const mode = additionalConfig.mode ?? 'balanced';
|
||||
const queryComplexity = estimateQueryComplexity(originalQuery);
|
||||
const adaptiveTopK = computeAdaptiveTopK(results.length, queryComplexity, mode);
|
||||
|
||||
const rerankableItems = results.map((r) => ({
|
||||
content: r.content,
|
||||
title: (r.metadata?.title as string) ?? '',
|
||||
url: (r.metadata?.url as string) ?? '',
|
||||
metadata: r.metadata,
|
||||
}));
|
||||
|
||||
const rankedItems = rerankBM25(rerankableItems, originalQuery, adaptiveTopK);
|
||||
|
||||
const rankedResults: Chunk[] = rankedItems.map((item) => ({
|
||||
content: item.content,
|
||||
metadata: item.metadata ?? { title: item.title, url: item.url },
|
||||
}));
|
||||
|
||||
return { type: 'search_results', results: rankedResults };
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user