feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
This commit is contained in:
home
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions

View File

@@ -2,22 +2,75 @@ import z from 'zod';
import type { ResearchAction } from './types.js';
import type { Chunk, SearchResultsResearchBlock } from '../types.js';
import { searchSearxng } from '../searxng.js';
import { rerankBM25, computeAdaptiveTopK, estimateQueryComplexity } from '../reranker.js';
const MAX_CONCURRENT_SEARCHES = 5;
const SEARCH_DELAY_MS = 50;
class SearchSemaphore {
private permits: number;
private queue: (() => void)[] = [];
constructor(permits: number) {
this.permits = permits;
}
async acquire(): Promise<void> {
if (this.permits > 0) {
this.permits--;
return;
}
return new Promise((resolve) => {
this.queue.push(resolve);
});
}
release(): void {
const next = this.queue.shift();
if (next) {
next();
} else {
this.permits++;
}
}
async runExclusive<T>(fn: () => Promise<T>): Promise<T> {
await this.acquire();
try {
return await fn();
} finally {
this.release();
}
}
}
const searchSemaphore = new SearchSemaphore(MAX_CONCURRENT_SEARCHES);
async function rateLimitedSearch(
query: string,
opts: { categories?: string[]; pageno?: number },
): Promise<{ results: { content?: string; title: string; url: string }[] }> {
return searchSemaphore.runExclusive(async () => {
await new Promise((r) => setTimeout(r, SEARCH_DELAY_MS));
return searchSearxng(query, opts);
});
}
const schema = z.object({
queries: z.array(z.string()).describe('An array of search queries to perform web searches for.'),
queries: z.array(z.string()).max(6).describe('An array of search queries to perform web searches for.'),
});
const webSearchAction: ResearchAction<typeof schema> = {
name: 'web_search',
schema,
getToolDescription: () =>
'Use this tool to perform web searches based on the provided queries. You can provide up to 3 queries at a time.',
'Use this tool to perform web searches. Provide up to 6 queries in the user\'s language.',
getDescription: () =>
'Use this tool to perform web searches. Your queries should be targeted and specific, SEO-friendly keywords. You can search for 3 queries in one go.',
'Use this tool to perform web searches. Queries should be targeted, SEO-friendly. Up to 6 queries in the user\'s language.',
enabled: (config) =>
config.sources.includes('web') && config.classification.classification.skipSearch === false,
execute: async (input, additionalConfig) => {
input.queries = input.queries.slice(0, 3);
input.queries = input.queries.slice(0, 6);
const researchBlock = additionalConfig.session.getBlock(additionalConfig.researchBlockId);
@@ -35,15 +88,45 @@ const webSearchAction: ResearchAction<typeof schema> = {
const searchResultsBlockId = crypto.randomUUID();
let searchResultsEmitted = false;
const results: Chunk[] = [];
// Саммари по ссылке: только новости + наука. Обычный чат: текстовые категории (без images/videos/files/music/map/social_media).
const isArticleSummary = !!additionalConfig.isArticleSummary;
const TEXT_CATEGORIES = ['general', 'science', 'it', 'news'] as const;
const opts = isArticleSummary
? { categories: ['news', 'science'] as string[], pageno: 1 as number }
: { categories: [...TEXT_CATEGORIES] };
const search = async (q: string) => {
let res: { results: { content?: string; title: string; url: string }[] };
let allRawResults: { content?: string; title: string; url: string }[] = [];
try {
res = await searchSearxng(q);
if (isArticleSummary) {
const pages = await Promise.allSettled([
rateLimitedSearch(q, { ...opts, pageno: 1 }),
rateLimitedSearch(q, { ...opts, pageno: 2 }),
]);
const seenUrls = new Set<string>();
for (const p of pages) {
if (p.status !== 'fulfilled') continue;
for (const r of p.value.results ?? []) {
if (r.url && !seenUrls.has(r.url)) {
seenUrls.add(r.url);
allRawResults.push(r);
}
}
}
} else {
const res = await rateLimitedSearch(q, { ...opts, pageno: 1 });
const seenUrls = new Set<string>();
for (const r of res.results ?? []) {
if (r.url && !seenUrls.has(r.url)) {
seenUrls.add(r.url);
allRawResults.push(r);
}
}
}
} catch {
return;
}
const resultChunks: Chunk[] = res.results.map((r) => ({
const resultChunks: Chunk[] = allRawResults.map((r) => ({
content: r.content || r.title,
metadata: { title: r.title, url: r.url },
}));
@@ -73,7 +156,30 @@ const webSearchAction: ResearchAction<typeof schema> = {
await Promise.all(input.queries.map(search));
return { type: 'search_results', results };
if (results.length === 0) {
return { type: 'search_results', results };
}
const originalQuery = additionalConfig.originalQuery ?? input.queries.join(' ');
const mode = additionalConfig.mode ?? 'balanced';
const queryComplexity = estimateQueryComplexity(originalQuery);
const adaptiveTopK = computeAdaptiveTopK(results.length, queryComplexity, mode);
const rerankableItems = results.map((r) => ({
content: r.content,
title: (r.metadata?.title as string) ?? '',
url: (r.metadata?.url as string) ?? '',
metadata: r.metadata,
}));
const rankedItems = rerankBM25(rerankableItems, originalQuery, adaptiveTopK);
const rankedResults: Chunk[] = rankedItems.map((item) => ({
content: item.content,
metadata: item.metadata ?? { title: item.title, url: item.url },
}));
return { type: 'search_results', results: rankedResults };
},
};