import z from 'zod'; import type { ResearchAction } from './types.js'; import type { Chunk, SearchResultsResearchBlock } from '../types.js'; import { searchSearxng } from '../searxng.js'; import { rerankBM25, computeAdaptiveTopK, estimateQueryComplexity } from '../reranker.js'; const MAX_CONCURRENT_SEARCHES = 5; const SEARCH_DELAY_MS = 50; class SearchSemaphore { private permits: number; private queue: (() => void)[] = []; constructor(permits: number) { this.permits = permits; } async acquire(): Promise { if (this.permits > 0) { this.permits--; return; } return new Promise((resolve) => { this.queue.push(resolve); }); } release(): void { const next = this.queue.shift(); if (next) { next(); } else { this.permits++; } } async runExclusive(fn: () => Promise): Promise { await this.acquire(); try { return await fn(); } finally { this.release(); } } } const searchSemaphore = new SearchSemaphore(MAX_CONCURRENT_SEARCHES); async function rateLimitedSearch( query: string, opts: { categories?: string[]; pageno?: number }, ): Promise<{ results: { content?: string; title: string; url: string }[] }> { return searchSemaphore.runExclusive(async () => { await new Promise((r) => setTimeout(r, SEARCH_DELAY_MS)); return searchSearxng(query, opts); }); } const schema = z.object({ queries: z.array(z.string()).max(6).describe('An array of search queries to perform web searches for.'), }); const webSearchAction: ResearchAction = { name: 'web_search', schema, getToolDescription: () => 'Use this tool to perform web searches. Provide up to 6 queries in the user\'s language.', getDescription: () => 'Use this tool to perform web searches. Queries should be targeted, SEO-friendly. Up to 6 queries in the user\'s language.', enabled: (config) => config.sources.includes('web') && config.classification.classification.skipSearch === false, execute: async (input, additionalConfig) => { input.queries = input.queries.slice(0, 6); const researchBlock = additionalConfig.session.getBlock(additionalConfig.researchBlockId); if (researchBlock && researchBlock.type === 'research') { researchBlock.data.subSteps.push({ id: crypto.randomUUID(), type: 'searching', searching: input.queries, }); additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [ { op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps }, ]); } const searchResultsBlockId = crypto.randomUUID(); let searchResultsEmitted = false; const results: Chunk[] = []; // Саммари по ссылке: только новости + наука. Обычный чат: текстовые категории (без images/videos/files/music/map/social_media). const isArticleSummary = !!additionalConfig.isArticleSummary; const TEXT_CATEGORIES = ['general', 'science', 'it', 'news'] as const; const opts = isArticleSummary ? { categories: ['news', 'science'] as string[], pageno: 1 as number } : { categories: [...TEXT_CATEGORIES] }; const search = async (q: string) => { let allRawResults: { content?: string; title: string; url: string }[] = []; try { if (isArticleSummary) { const pages = await Promise.allSettled([ rateLimitedSearch(q, { ...opts, pageno: 1 }), rateLimitedSearch(q, { ...opts, pageno: 2 }), ]); const seenUrls = new Set(); for (const p of pages) { if (p.status !== 'fulfilled') continue; for (const r of p.value.results ?? []) { if (r.url && !seenUrls.has(r.url)) { seenUrls.add(r.url); allRawResults.push(r); } } } } else { const res = await rateLimitedSearch(q, { ...opts, pageno: 1 }); const seenUrls = new Set(); for (const r of res.results ?? []) { if (r.url && !seenUrls.has(r.url)) { seenUrls.add(r.url); allRawResults.push(r); } } } } catch { return; } const resultChunks: Chunk[] = allRawResults.map((r) => ({ content: r.content || r.title, metadata: { title: r.title, url: r.url }, })); results.push(...resultChunks); if (!searchResultsEmitted && researchBlock && researchBlock.type === 'research') { searchResultsEmitted = true; researchBlock.data.subSteps.push({ id: searchResultsBlockId, type: 'search_results', reading: resultChunks, }); additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [ { op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps }, ]); } else if (searchResultsEmitted && researchBlock && researchBlock.type === 'research') { const subStepIndex = researchBlock.data.subSteps.findIndex((s) => s.id === searchResultsBlockId); const subStep = researchBlock.data.subSteps[subStepIndex] as SearchResultsResearchBlock | undefined; if (subStep) { subStep.reading.push(...resultChunks); additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [ { op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps }, ]); } } }; await Promise.all(input.queries.map(search)); if (results.length === 0) { return { type: 'search_results', results }; } const originalQuery = additionalConfig.originalQuery ?? input.queries.join(' '); const mode = additionalConfig.mode ?? 'balanced'; const queryComplexity = estimateQueryComplexity(originalQuery); const adaptiveTopK = computeAdaptiveTopK(results.length, queryComplexity, mode); const rerankableItems = results.map((r) => ({ content: r.content, title: (r.metadata?.title as string) ?? '', url: (r.metadata?.url as string) ?? '', metadata: r.metadata, })); const rankedItems = rerankBM25(rerankableItems, originalQuery, adaptiveTopK); const rankedResults: Chunk[] = rankedItems.map((item) => ({ content: item.content, metadata: item.metadata ?? { title: item.title, url: item.url }, })); return { type: 'search_results', results: rankedResults }; }, }; export default webSearchAction;