Files
gooseek/services/master-agents-svc/src/lib/actions/web_search.ts
home 06fe57c765 feat: Go backend, enhanced search, new widgets, Docker deploy
Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
2026-02-27 04:15:32 +03:00

187 lines
6.4 KiB
TypeScript

import z from 'zod';
import type { ResearchAction } from './types.js';
import type { Chunk, SearchResultsResearchBlock } from '../types.js';
import { searchSearxng } from '../searxng.js';
import { rerankBM25, computeAdaptiveTopK, estimateQueryComplexity } from '../reranker.js';
const MAX_CONCURRENT_SEARCHES = 5;
const SEARCH_DELAY_MS = 50;
class SearchSemaphore {
private permits: number;
private queue: (() => void)[] = [];
constructor(permits: number) {
this.permits = permits;
}
async acquire(): Promise<void> {
if (this.permits > 0) {
this.permits--;
return;
}
return new Promise((resolve) => {
this.queue.push(resolve);
});
}
release(): void {
const next = this.queue.shift();
if (next) {
next();
} else {
this.permits++;
}
}
async runExclusive<T>(fn: () => Promise<T>): Promise<T> {
await this.acquire();
try {
return await fn();
} finally {
this.release();
}
}
}
const searchSemaphore = new SearchSemaphore(MAX_CONCURRENT_SEARCHES);
async function rateLimitedSearch(
query: string,
opts: { categories?: string[]; pageno?: number },
): Promise<{ results: { content?: string; title: string; url: string }[] }> {
return searchSemaphore.runExclusive(async () => {
await new Promise((r) => setTimeout(r, SEARCH_DELAY_MS));
return searchSearxng(query, opts);
});
}
const schema = z.object({
queries: z.array(z.string()).max(6).describe('An array of search queries to perform web searches for.'),
});
const webSearchAction: ResearchAction<typeof schema> = {
name: 'web_search',
schema,
getToolDescription: () =>
'Use this tool to perform web searches. Provide up to 6 queries in the user\'s language.',
getDescription: () =>
'Use this tool to perform web searches. Queries should be targeted, SEO-friendly. Up to 6 queries in the user\'s language.',
enabled: (config) =>
config.sources.includes('web') && config.classification.classification.skipSearch === false,
execute: async (input, additionalConfig) => {
input.queries = input.queries.slice(0, 6);
const researchBlock = additionalConfig.session.getBlock(additionalConfig.researchBlockId);
if (researchBlock && researchBlock.type === 'research') {
researchBlock.data.subSteps.push({
id: crypto.randomUUID(),
type: 'searching',
searching: input.queries,
});
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
{ op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps },
]);
}
const searchResultsBlockId = crypto.randomUUID();
let searchResultsEmitted = false;
const results: Chunk[] = [];
// Саммари по ссылке: только новости + наука. Обычный чат: текстовые категории (без images/videos/files/music/map/social_media).
const isArticleSummary = !!additionalConfig.isArticleSummary;
const TEXT_CATEGORIES = ['general', 'science', 'it', 'news'] as const;
const opts = isArticleSummary
? { categories: ['news', 'science'] as string[], pageno: 1 as number }
: { categories: [...TEXT_CATEGORIES] };
const search = async (q: string) => {
let allRawResults: { content?: string; title: string; url: string }[] = [];
try {
if (isArticleSummary) {
const pages = await Promise.allSettled([
rateLimitedSearch(q, { ...opts, pageno: 1 }),
rateLimitedSearch(q, { ...opts, pageno: 2 }),
]);
const seenUrls = new Set<string>();
for (const p of pages) {
if (p.status !== 'fulfilled') continue;
for (const r of p.value.results ?? []) {
if (r.url && !seenUrls.has(r.url)) {
seenUrls.add(r.url);
allRawResults.push(r);
}
}
}
} else {
const res = await rateLimitedSearch(q, { ...opts, pageno: 1 });
const seenUrls = new Set<string>();
for (const r of res.results ?? []) {
if (r.url && !seenUrls.has(r.url)) {
seenUrls.add(r.url);
allRawResults.push(r);
}
}
}
} catch {
return;
}
const resultChunks: Chunk[] = allRawResults.map((r) => ({
content: r.content || r.title,
metadata: { title: r.title, url: r.url },
}));
results.push(...resultChunks);
if (!searchResultsEmitted && researchBlock && researchBlock.type === 'research') {
searchResultsEmitted = true;
researchBlock.data.subSteps.push({
id: searchResultsBlockId,
type: 'search_results',
reading: resultChunks,
});
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
{ op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps },
]);
} else if (searchResultsEmitted && researchBlock && researchBlock.type === 'research') {
const subStepIndex = researchBlock.data.subSteps.findIndex((s) => s.id === searchResultsBlockId);
const subStep = researchBlock.data.subSteps[subStepIndex] as SearchResultsResearchBlock | undefined;
if (subStep) {
subStep.reading.push(...resultChunks);
additionalConfig.session.updateBlock(additionalConfig.researchBlockId, [
{ op: 'replace', path: '/data/subSteps', value: researchBlock.data.subSteps },
]);
}
}
};
await Promise.all(input.queries.map(search));
if (results.length === 0) {
return { type: 'search_results', results };
}
const originalQuery = additionalConfig.originalQuery ?? input.queries.join(' ');
const mode = additionalConfig.mode ?? 'balanced';
const queryComplexity = estimateQueryComplexity(originalQuery);
const adaptiveTopK = computeAdaptiveTopK(results.length, queryComplexity, mode);
const rerankableItems = results.map((r) => ({
content: r.content,
title: (r.metadata?.title as string) ?? '',
url: (r.metadata?.url as string) ?? '',
metadata: r.metadata,
}));
const rankedItems = rerankBM25(rerankableItems, originalQuery, adaptiveTopK);
const rankedResults: Chunk[] = rankedItems.map((item) => ({
content: item.content,
metadata: item.metadata ?? { title: item.title, url: item.url },
}));
return { type: 'search_results', results: rankedResults };
},
};
export default webSearchAction;