Major changes: - Add Go backend (backend/) with microservices architecture - Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler, proxy-manager, media-search, fastClassifier, language detection - New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard, UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions - Improved discover-svc with discover-db integration - Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md) - Library-svc: project_id schema migration - Remove deprecated finance-svc and travel-svc - Localization improvements across services Made-with: Cursor
187 lines
6.4 KiB
TypeScript
187 lines
6.4 KiB
TypeScript
import z from 'zod';
|
|
import type { ResearchAction } from './types.js';
|
|
import type { Chunk, SearchResultsResearchBlock } from '../types.js';
|
|
import { searchSearxng } from '../searxng.js';
|
|
import { rerankBM25, computeAdaptiveTopK, estimateQueryComplexity } from '../reranker.js';
|
|
|
|
const MAX_CONCURRENT_SEARCHES = 5;
|
|
const SEARCH_DELAY_MS = 50;
|
|
|
|
class SearchSemaphore {
|
|
private permits: number;
|
|
private queue: (() => void)[] = [];
|
|
|
|
constructor(permits: number) {
|
|
this.permits = permits;
|
|
}
|
|
|
|
async acquire(): Promise<void> {
|
|
if (this.permits > 0) {
|
|
this.permits--;
|
|
return;
|
|
}
|
|
return new Promise((resolve) => {
|
|
this.queue.push(resolve);
|
|
});
|
|
}
|
|
|
|
release(): void {
|
|
const next = this.queue.shift();
|
|
if (next) {
|
|
next();
|
|
} else {
|
|
this.permits++;
|
|
}
|
|
}
|
|
|
|
async runExclusive<T>(fn: () => Promise<T>): Promise<T> {
|
|
await this.acquire();
|
|
try {
|
|
return await fn();
|
|
} finally {
|
|
this.release();
|
|
}
|
|
}
|
|
}
|
|
|
|
const searchSemaphore = new SearchSemaphore(MAX_CONCURRENT_SEARCHES);
|
|
|
|
async function rateLimitedSearch(
|
|
query: string,
|
|
opts: { categories?: string[]; pageno?: number },
|
|
): Promise<{ results: { content?: string; title: string; url: string }[] }> {
|
|
return searchSemaphore.runExclusive(async () => {
|
|
await new Promise((r) => setTimeout(r, SEARCH_DELAY_MS));
|
|
return searchSearxng(query, opts);
|
|
});
|
|
}
|
|
|
|
const schema = z.object({
|
|
queries: z.array(z.string()).max(6).describe('An array of search queries to perform web searches for.'),
|
|
});
|
|
|
|
const webSearchAction: ResearchAction<typeof schema> = {
|
|
name: 'web_search',
|
|
schema,
|
|
getToolDescription: () =>
|
|
'Use this tool to perform web searches. Provide up to 6 queries in the user\'s language.',
|
|
getDescription: () =>
|
|
'Use this tool to perform web searches. Queries should be targeted, SEO-friendly. Up to 6 queries in the user\'s language.',
|
|
enabled: (config) =>
|
|
config.sources.includes('web') && config.classification.classification.skipSearch === false,
|
|
execute: async (input, additionalConfig) => {
|
|
input.queries = input.queries.slice(0, 6);
|
|
|
|
const researchBlock = additionalConfig.session.getBlock(additionalConfig.researchBlockId);
|
|
|
|
if (researchBlock && researchBlock.type === 'research') {
|
|
researchBlock.data.subSteps.push({
|
|
id: crypto.randomUUID(),
|
|
type: 'searching',
|
|
searching: input.queries,
|
|
});
|
|
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
{ op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps },
|
|
]);
|
|
}
|
|
|
|
const searchResultsBlockId = crypto.randomUUID();
|
|
let searchResultsEmitted = false;
|
|
const results: Chunk[] = [];
|
|
// Саммари по ссылке: только новости + наука. Обычный чат: текстовые категории (без images/videos/files/music/map/social_media).
|
|
const isArticleSummary = !!additionalConfig.isArticleSummary;
|
|
const TEXT_CATEGORIES = ['general', 'science', 'it', 'news'] as const;
|
|
const opts = isArticleSummary
|
|
? { categories: ['news', 'science'] as string[], pageno: 1 as number }
|
|
: { categories: [...TEXT_CATEGORIES] };
|
|
|
|
const search = async (q: string) => {
|
|
let allRawResults: { content?: string; title: string; url: string }[] = [];
|
|
try {
|
|
if (isArticleSummary) {
|
|
const pages = await Promise.allSettled([
|
|
rateLimitedSearch(q, { ...opts, pageno: 1 }),
|
|
rateLimitedSearch(q, { ...opts, pageno: 2 }),
|
|
]);
|
|
const seenUrls = new Set<string>();
|
|
for (const p of pages) {
|
|
if (p.status !== 'fulfilled') continue;
|
|
for (const r of p.value.results ?? []) {
|
|
if (r.url && !seenUrls.has(r.url)) {
|
|
seenUrls.add(r.url);
|
|
allRawResults.push(r);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
const res = await rateLimitedSearch(q, { ...opts, pageno: 1 });
|
|
const seenUrls = new Set<string>();
|
|
for (const r of res.results ?? []) {
|
|
if (r.url && !seenUrls.has(r.url)) {
|
|
seenUrls.add(r.url);
|
|
allRawResults.push(r);
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
return;
|
|
}
|
|
const resultChunks: Chunk[] = allRawResults.map((r) => ({
|
|
content: r.content || r.title,
|
|
metadata: { title: r.title, url: r.url },
|
|
}));
|
|
results.push(...resultChunks);
|
|
|
|
if (!searchResultsEmitted && researchBlock && researchBlock.type === 'research') {
|
|
searchResultsEmitted = true;
|
|
researchBlock.data.subSteps.push({
|
|
id: searchResultsBlockId,
|
|
type: 'search_results',
|
|
reading: resultChunks,
|
|
});
|
|
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
{ op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps },
|
|
]);
|
|
} else if (searchResultsEmitted && researchBlock && researchBlock.type === 'research') {
|
|
const subStepIndex = researchBlock.data.subSteps.findIndex((s) => s.id === searchResultsBlockId);
|
|
const subStep = researchBlock.data.subSteps[subStepIndex] as SearchResultsResearchBlock | undefined;
|
|
if (subStep) {
|
|
subStep.reading.push(...resultChunks);
|
|
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
|
|
{ op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps },
|
|
]);
|
|
}
|
|
}
|
|
};
|
|
|
|
await Promise.all(input.queries.map(search));
|
|
|
|
if (results.length === 0) {
|
|
return { type: 'search_results', results };
|
|
}
|
|
|
|
const originalQuery = additionalConfig.originalQuery ?? input.queries.join(' ');
|
|
const mode = additionalConfig.mode ?? 'balanced';
|
|
const queryComplexity = estimateQueryComplexity(originalQuery);
|
|
const adaptiveTopK = computeAdaptiveTopK(results.length, queryComplexity, mode);
|
|
|
|
const rerankableItems = results.map((r) => ({
|
|
content: r.content,
|
|
title: (r.metadata?.title as string) ?? '',
|
|
url: (r.metadata?.url as string) ?? '',
|
|
metadata: r.metadata,
|
|
}));
|
|
|
|
const rankedItems = rerankBM25(rerankableItems, originalQuery, adaptiveTopK);
|
|
|
|
const rankedResults: Chunk[] = rankedItems.map((item) => ({
|
|
content: item.content,
|
|
metadata: item.metadata ?? { title: item.title, url: item.url },
|
|
}));
|
|
|
|
return { type: 'search_results', results: rankedResults };
|
|
},
|
|
};
|
|
|
|
export default webSearchAction;
|