feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
This commit is contained in:
home
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions

View File

@@ -1,15 +1,20 @@
# syntax=docker/dockerfile:1
FROM node:22-alpine AS builder
RUN apk add --no-cache python3 make g++
WORKDIR /app
COPY package*.json ./
RUN npm install
COPY --from=npm-cache / /tmp/npm-cache
RUN npm install --cache /tmp/npm-cache --prefer-offline --no-audit
COPY tsconfig.json ./
COPY src ./src
RUN npm run build
FROM node:22-alpine
RUN apk add --no-cache python3 make g++
WORKDIR /app
COPY package*.json ./
RUN npm install --omit=dev
COPY --from=npm-cache / /tmp/npm-cache
RUN npm install --omit=dev --cache /tmp/npm-cache --prefer-offline --no-audit
COPY --from=builder /app/dist ./dist
EXPOSE 3002
CMD ["node", "dist/index.js"]

View File

@@ -13,9 +13,11 @@
"fastify": "^4.28.1",
"@fastify/cors": "^9.0.1",
"ioredis": "^5.4.1",
"zod": "^3.23.8"
"zod": "^3.23.8",
"better-sqlite3": "^11.9.1"
},
"devDependencies": {
"@types/better-sqlite3": "^7.6.12",
"@types/node": "^22.10.0",
"tsx": "^4.19.2",
"typescript": "^5.7.2"

View File

@@ -0,0 +1,276 @@
/**
* SQLite: переводы, саммари и дайджесты статей Discover.
* Хранение 7 дней, раз в сутки удаление просроченных.
*/
import Database from 'better-sqlite3';
const DEFAULT_PATH = process.env.DISCOVER_DB_PATH ?? ':memory:';
const RETENTION_DAYS = 7;
const RETENTION_SEC = RETENTION_DAYS * 24 * 60 * 60;
export interface DiscoverArticleRow {
url: string;
title_ru: string;
summary_ru: string;
sources_json: string;
thumbnail: string;
fetched_at: number;
}
export interface DigestCitation {
index: number;
url: string;
title: string;
domain: string;
}
export interface DigestRow {
topic: string;
region: string;
cluster_title: string;
summary_ru: string;
citations_json: string;
sources_count: number;
follow_up_json: string;
thumbnail: string;
short_description: string;
main_url: string;
created_at: number;
}
let db: Database.Database | null = null;
function getDb(): Database.Database {
if (!db) {
db = new Database(DEFAULT_PATH);
db.pragma('journal_mode = WAL');
db.exec(`
CREATE TABLE IF NOT EXISTS discover_articles (
url TEXT PRIMARY KEY,
title_ru TEXT NOT NULL,
summary_ru TEXT NOT NULL,
sources_json TEXT DEFAULT '[]',
thumbnail TEXT DEFAULT '',
fetched_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_discover_fetched_at ON discover_articles(fetched_at);
CREATE TABLE IF NOT EXISTS article_summaries (
url TEXT PRIMARY KEY,
events_json TEXT NOT NULL,
created_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_article_summaries_created ON article_summaries(created_at);
CREATE TABLE IF NOT EXISTS discover_digests (
topic TEXT NOT NULL,
region TEXT NOT NULL,
cluster_title TEXT NOT NULL,
summary_ru TEXT NOT NULL,
citations_json TEXT NOT NULL DEFAULT '[]',
sources_count INTEGER DEFAULT 0,
follow_up_json TEXT DEFAULT '[]',
thumbnail TEXT DEFAULT '',
short_description TEXT DEFAULT '',
main_url TEXT DEFAULT '',
created_at INTEGER NOT NULL,
PRIMARY KEY (topic, region, cluster_title)
);
CREATE INDEX IF NOT EXISTS idx_digests_topic_region ON discover_digests(topic, region);
CREATE INDEX IF NOT EXISTS idx_digests_created ON discover_digests(created_at);
CREATE INDEX IF NOT EXISTS idx_digests_main_url ON discover_digests(main_url);
`);
}
return db;
}
export function getByUrl(url: string): DiscoverArticleRow | null {
const row = getDb().prepare('SELECT * FROM discover_articles WHERE url = ?').get(url.trim()) as DiscoverArticleRow | undefined;
return row ?? null;
}
export function upsert(row: {
url: string;
title_ru: string;
summary_ru: string;
sources?: { url: string; title: string }[];
thumbnail?: string;
}): void {
const sourcesJson = JSON.stringify(row.sources ?? []);
getDb()
.prepare(
`INSERT INTO discover_articles (url, title_ru, summary_ru, sources_json, thumbnail, fetched_at)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(url) DO UPDATE SET
title_ru = excluded.title_ru,
summary_ru = excluded.summary_ru,
sources_json = excluded.sources_json,
thumbnail = excluded.thumbnail,
fetched_at = excluded.fetched_at`
)
.run(
row.url.trim(),
row.title_ru,
row.summary_ru,
sourcesJson,
row.thumbnail ?? '',
Math.floor(Date.now() / 1000)
);
}
/** Получить все дайджесты для topic+region. */
export function getDigests(topic: string, region: string): DigestRow[] {
return getDb()
.prepare('SELECT * FROM discover_digests WHERE topic = ? AND region = ? ORDER BY created_at DESC')
.all(topic, region) as DigestRow[];
}
/** Получить один дайджест по topic+region+cluster_title. */
export function getDigest(topic: string, region: string, clusterTitle: string): DigestRow | null {
const row = getDb()
.prepare('SELECT * FROM discover_digests WHERE topic = ? AND region = ? AND cluster_title = ?')
.get(topic, region, clusterTitle) as DigestRow | undefined;
return row ?? null;
}
/** Найти дайджест по main_url (для pre-generated lookup из чата). */
export function getDigestByUrl(url: string): DigestRow | null {
const normalized = normalizeArticleUrl(url);
const row = getDb()
.prepare('SELECT * FROM discover_digests WHERE main_url = ? ORDER BY created_at DESC LIMIT 1')
.get(normalized) as DigestRow | undefined;
if (row) return row;
const byCitation = getDb()
.prepare("SELECT * FROM discover_digests WHERE citations_json LIKE ? ORDER BY created_at DESC LIMIT 1")
.get(`%${normalized}%`) as DigestRow | undefined;
return byCitation ?? null;
}
/** Upsert дайджест. */
export function upsertDigest(row: {
topic: string;
region: string;
clusterTitle: string;
summaryRu: string;
citations: DigestCitation[];
sourcesCount: number;
followUp: string[];
thumbnail: string;
shortDescription: string;
mainUrl: string;
}): void {
getDb()
.prepare(
`INSERT INTO discover_digests (topic, region, cluster_title, summary_ru, citations_json, sources_count, follow_up_json, thumbnail, short_description, main_url, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(topic, region, cluster_title) DO UPDATE SET
summary_ru = excluded.summary_ru,
citations_json = excluded.citations_json,
sources_count = excluded.sources_count,
follow_up_json = excluded.follow_up_json,
thumbnail = excluded.thumbnail,
short_description = excluded.short_description,
main_url = excluded.main_url,
created_at = excluded.created_at`
)
.run(
row.topic,
row.region,
row.clusterTitle,
row.summaryRu,
JSON.stringify(row.citations),
row.sourcesCount,
JSON.stringify(row.followUp),
row.thumbnail,
row.shortDescription,
normalizeArticleUrl(row.mainUrl),
Math.floor(Date.now() / 1000),
);
}
/** Удалить все дайджесты для topic+region (перед пересозданием). */
export function deleteDigests(topic: string, region: string): number {
const r = getDb().prepare('DELETE FROM discover_digests WHERE topic = ? AND region = ?').run(topic, region);
return r.changes;
}
/** Удалить статьи старше RETENTION_DAYS. Вызывать раз в сутки. */
export function deleteExpired(): number {
const cutoff = Math.floor(Date.now() / 1000) - RETENTION_SEC;
const r1 = getDb().prepare('DELETE FROM discover_articles WHERE fetched_at < ?').run(cutoff);
const r2 = getDb().prepare('DELETE FROM article_summaries WHERE created_at < ?').run(cutoff);
const r3 = getDb().prepare('DELETE FROM discover_digests WHERE created_at < ?').run(cutoff);
return r1.changes + r2.changes + r3.changes;
}
/** Саммари статьи из чата (Discover): получить сохранённый стрим событий. */
export function getArticleSummary(url: string): string[] | null {
const u = normalizeArticleUrl(url);
const row = getDb()
.prepare('SELECT events_json FROM article_summaries WHERE url = ?')
.get(u) as { events_json: string } | undefined;
if (!row?.events_json) return null;
try {
const arr = JSON.parse(row.events_json) as unknown;
return Array.isArray(arr) ? arr.map(String) : null;
} catch {
return null;
}
}
function normalizeArticleUrl(url: string): string {
let u = url.trim();
try {
const parsed = new URL(u);
u = parsed.origin + parsed.pathname.replace(/\/+$/, '') + (parsed.search || '');
} catch {
// leave as-is
}
return u;
}
export { normalizeArticleUrl };
/** Сохранить саммари статьи (стрим событий NDJSON). */
export function saveArticleSummary(url: string, events: string[]): void {
const u = normalizeArticleUrl(url);
const eventsJson = JSON.stringify(events);
getDb()
.prepare(
`INSERT INTO article_summaries (url, events_json, created_at)
VALUES (?, ?, ?)
ON CONFLICT(url) DO UPDATE SET events_json = excluded.events_json, created_at = excluded.created_at`
)
.run(u, eventsJson, Math.floor(Date.now() / 1000));
}
/** Удалить саммари статьи (для перегенерации). */
export function deleteArticleSummary(url: string): number {
const u = normalizeArticleUrl(url);
const r = getDb().prepare('DELETE FROM article_summaries WHERE url = ?').run(u);
return r.changes;
}
let cleanupInterval: ReturnType<typeof setInterval> | null = null;
/** Запуск ежедневной очистки (раз в 24 ч). */
export function startDailyCleanup(): void {
if (cleanupInterval) return;
deleteExpired();
cleanupInterval = setInterval(() => {
const n = deleteExpired();
if (n > 0) {
console.log(`[discover-db] deleted ${n} expired articles`);
}
}, 24 * 60 * 60 * 1000);
}
export function closeDb(): void {
if (cleanupInterval) {
clearInterval(cleanupInterval);
cleanupInterval = null;
}
if (db) {
db.close();
db = null;
}
}

View File

@@ -4,28 +4,40 @@
* Ответ: { blogs: [{ title, content, url, thumbnail }] }
*/
import crypto from 'node:crypto';
import Fastify from 'fastify';
import cors from '@fastify/cors';
import Redis from 'ioredis';
import { searchSearxng, type SearxngSearchResult } from './searxng.js';
import * as discoverDb from './discover-db.js';
const PORT = parseInt(process.env.PORT ?? '3002', 10);
const REDIS_URL = process.env.REDIS_URL ?? 'redis://localhost:6379';
/** Redis: кэш выдачи Discover на 1 ч (то, что показываем пользователям). */
const REDIS_DISCOVER_TTL_SEC = 60 * 60;
const GHOST_URL = process.env.GHOST_URL?.trim() ?? '';
const GHOST_CONTENT_API_KEY = process.env.GHOST_CONTENT_API_KEY?.trim() ?? '';
const GEO_DEVICE_SERVICE_URL = process.env.GEO_DEVICE_SERVICE_URL ?? 'http://localhost:4002';
const PLACEHOLDER_IMAGE = 'https://placehold.co/400x225/e5e7eb/6b7280?text=Post';
const PLACEHOLDER_IMAGE =
'data:image/svg+xml,' +
encodeURIComponent(
'<svg xmlns="http://www.w3.org/2000/svg" width="400" height="225" viewBox="0 0 400 225"><rect fill="%23e5e7eb" width="400" height="225"/><text x="50%" y="50%" dominant-baseline="middle" text-anchor="middle" fill="%236b7280" font-family="sans-serif" font-size="16">Post</text></svg>'
);
const NEWS_REGION = (process.env.NEWS_REGION ?? 'auto') as string;
type Region = 'america' | 'eu' | 'russia' | 'china';
type Region = 'america' | 'eu' | 'russia' | 'china' | 'world';
type Topic =
| 'bait'
| 'gooseek'
| 'tech'
| 'finance'
| 'art'
| 'sports'
| 'entertainment'
| 'gooseek';
| 'entertainment';
interface GhostTag {
slug?: string;
}
interface GhostPost {
title: string;
excerpt?: string | null;
@@ -35,12 +47,35 @@ interface GhostPost {
html?: string | null;
feature_image?: string | null;
url: string;
tags?: GhostTag[] | null;
}
// ioredis + NodeNext: default export не распознаётся как конструктор
const redis: import('ioredis') = new (Redis as any)(REDIS_URL);
// @ts-expect-error — ioredis + NodeNext ESM constructability
const redis = new Redis(REDIS_URL);
redis.on('error', () => {});
async function scanAndDeleteKeys(
pattern: string,
excludePrefix?: string,
): Promise<number> {
let deletedCount = 0;
let cursor = '0';
do {
const [nextCursor, keys] = await redis.scan(cursor, 'MATCH', pattern, 'COUNT', 100);
cursor = nextCursor;
if (keys.length > 0) {
const toDelete = excludePrefix
? keys.filter((k: string) => !k.startsWith(excludePrefix))
: keys;
if (toDelete.length > 0) {
await redis.del(...toDelete);
deletedCount += toDelete.length;
}
}
} while (cursor !== '0');
return deletedCount;
}
function stripHtml(html: string): string {
return html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
}
@@ -49,7 +84,32 @@ const SOURCES_BY_REGION: Record<
Region,
Record<Topic, { query: string[]; links: string[] }>
> = {
world: {
bait: { query: [], links: [] },
gooseek: { query: [], links: [] },
tech: {
query: ['technology news', 'AI', 'innovation', 'science'],
links: ['reuters.com', 'bbc.com', 'theguardian.com', 'apnews.com', 'techcrunch.com', 'theverge.com'],
},
finance: {
query: ['finance news', 'economy', 'stock market', 'central banks'],
links: ['reuters.com', 'bbc.com', 'bloomberg.com', 'cnbc.com', 'ft.com', 'apnews.com'],
},
art: {
query: ['art news', 'culture', 'exhibition', 'museum'],
links: ['reuters.com', 'bbc.com', 'theguardian.com', 'apnews.com', 'nytimes.com'],
},
sports: {
query: ['sports news', 'football', 'Olympics', 'premier league'],
links: ['reuters.com', 'bbc.com', 'espn.com', 'apnews.com', 'theguardian.com'],
},
entertainment: {
query: ['entertainment news', 'films', 'music', 'culture'],
links: ['reuters.com', 'bbc.com', 'theguardian.com', 'variety.com', 'apnews.com'],
},
},
america: {
bait: { query: [], links: [] },
gooseek: { query: [], links: [] },
tech: {
query: ['technology news', 'latest tech', 'AI', 'science and innovation'],
@@ -73,6 +133,7 @@ const SOURCES_BY_REGION: Record<
},
},
eu: {
bait: { query: [], links: [] },
gooseek: { query: [], links: [] },
tech: {
query: ['technology news', 'tech', 'AI', 'innovation'],
@@ -96,6 +157,7 @@ const SOURCES_BY_REGION: Record<
},
},
russia: {
bait: { query: [], links: [] },
gooseek: { query: [], links: [] },
tech: {
query: ['technology news', 'tech', 'IT', 'innovation'],
@@ -119,6 +181,7 @@ const SOURCES_BY_REGION: Record<
},
},
china: {
bait: { query: [], links: [] },
gooseek: { query: [], links: [] },
tech: {
query: ['technology news', 'tech', 'AI', 'innovation'],
@@ -153,16 +216,23 @@ const COUNTRY_TO_REGION: Record<string, Region> = {
BG: 'eu', HR: 'eu', SK: 'eu', SI: 'eu', LT: 'eu', LV: 'eu', EE: 'eu', DK: 'eu',
};
async function fetchGooseekPosts(): Promise<
{ title: string; content: string; url: string; thumbnail: string }[]
> {
async function fetchGhostPosts(
tagSlug?: string,
excludeTagSlug?: string
): Promise<{ title: string; content: string; url: string; thumbnail: string }[]> {
if (!GHOST_URL || !GHOST_CONTENT_API_KEY) {
throw new Error(
'Ghost не настроен. Укажите GHOST_URL и GHOST_CONTENT_API_KEY в .env'
);
}
const base = GHOST_URL.replace(/\/$/, '');
const apiUrl = `${base}/ghost/api/content/posts/?key=${GHOST_CONTENT_API_KEY}&limit=50&fields=title,excerpt,custom_excerpt,meta_description,html,feature_image,url&formats=html`;
let apiUrl = `${base}/ghost/api/content/posts/?key=${GHOST_CONTENT_API_KEY}&limit=50&fields=title,excerpt,custom_excerpt,meta_description,html,feature_image,url&formats=html`;
if (tagSlug) {
apiUrl += `&filter=tag:${encodeURIComponent(tagSlug)}`;
}
if (excludeTagSlug) {
apiUrl += '&include=tags';
}
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 15_000);
try {
@@ -180,7 +250,12 @@ async function fetchGooseekPosts(): Promise<
throw new Error(`Ghost API: HTTP ${res.status}`);
}
const data = await res.json();
const posts: GhostPost[] = data.posts ?? [];
let posts: GhostPost[] = data.posts ?? [];
if (excludeTagSlug) {
posts = posts.filter(
(p) => !p.tags?.some((t) => (t?.slug ?? '').toLowerCase() === excludeTagSlug.toLowerCase())
);
}
return posts.map((p) => {
const excerpt =
p.custom_excerpt?.trim() ||
@@ -238,7 +313,7 @@ async function resolveRegion(
return 'america';
}
const app = Fastify({ logger: true });
const app = Fastify({ logger: true, bodyLimit: 100 * 1024 * 1024 });
const corsOrigin = process.env.ALLOWED_ORIGINS
? process.env.ALLOWED_ORIGINS.split(',').map((s) => s.trim()).filter(Boolean)
@@ -264,15 +339,436 @@ app.get('/ready', async () => {
}
});
/** General search for cache-worker (find related sources by query) */
app.get<{ Querystring: { q?: string } }>('/api/v1/discover/search', async (req, reply) => {
const q = (req.query.q ?? '').trim();
if (!q) {
return reply.status(400).send({ message: 'Query q is required' });
}
try {
const { results } = await searchSearxng(q, { pageno: 1 });
return { results: results.slice(0, 10) };
} catch (err) {
req.log.error(err);
return reply.status(503).send({
message:
err instanceof Error && err.message.includes('not configured')
? 'SearxNG is not configured.'
: 'Search failed.',
});
}
});
/** Проверка/получение перевода по URL (для cache-worker). */
app.get<{ Querystring: { url?: string } }>('/api/v1/discover/translated', async (req, reply) => {
const url = (req.query.url ?? '').trim();
if (!url) return reply.status(400).send({ message: 'url required' });
const row = discoverDb.getByUrl(url);
if (!row) return reply.status(404).send({ message: 'not found' });
return reply.send({
url: row.url,
title_ru: row.title_ru,
summary_ru: row.summary_ru,
sources: JSON.parse(row.sources_json || '[]') as { url: string; title: string }[],
thumbnail: row.thumbnail,
fetched_at: row.fetched_at,
});
});
/** Сохранение перевода (вызывает cache-worker после перевода). */
app.post<{ Body: { url: string; title_ru: string; summary_ru: string; sources?: { url: string; title: string }[]; thumbnail?: string } }>(
'/api/v1/discover/translated',
async (req, reply) => {
const body = req.body as { url?: string; title_ru?: string; summary_ru?: string; sources?: { url: string; title: string }[]; thumbnail?: string };
const url = (body.url ?? '').trim();
const title_ru = body.title_ru ?? '';
const summary_ru = body.summary_ru ?? '';
if (!url || !title_ru || !summary_ru) {
return reply.status(400).send({ message: 'url, title_ru, summary_ru required' });
}
discoverDb.upsert({
url,
title_ru,
summary_ru,
sources: body.sources,
thumbnail: body.thumbnail,
});
try {
await scanAndDeleteKeys('discover:*', 'discover:asum:');
} catch (e) {
req.log.warn(e, 'Redis cache invalidation after translation');
}
return reply.status(204).send();
}
);
const ARTICLE_SUMMARY_REDIS_TTL = 60 * 60; // 1 ч
function articleSummaryRedisKey(url: string): string {
const hash = crypto.createHash('sha256').update(url.trim()).digest('hex').slice(0, 32);
return `discover:asum:${hash}`;
}
/** Кэш саммари статьи из чата (Discover): получить сохранённый стрим — для имитации размышления/источников. */
app.get<{ Querystring: { url?: string } }>('/api/v1/discover/article-summary', async (req, reply) => {
const url = (req.query.url ?? '').trim();
if (!url) return reply.status(400).send({ message: 'url required' });
try {
const normalizedUrl = discoverDb.normalizeArticleUrl(url);
const key = articleSummaryRedisKey(normalizedUrl);
const cached = await redis.get(key);
if (cached) {
const payload = JSON.parse(cached) as { events: string[] };
return reply.send(payload);
}
const events = discoverDb.getArticleSummary(url);
if (!events || events.length === 0) return reply.status(404).send({ message: 'not found' });
await redis.setex(key, ARTICLE_SUMMARY_REDIS_TTL, JSON.stringify({ events }));
return reply.send({ events });
} catch (e) {
req.log.error(e);
return reply.status(500).send({ message: 'Failed to get article summary' });
}
});
/** Очистить кэш и запись саммари статьи (для перегенерации в чате). */
app.delete<{ Querystring: { url?: string } }>('/api/v1/discover/article-summary', async (req, reply) => {
const url = (req.query.url ?? '').trim();
if (!url) return reply.status(400).send({ message: 'url required' });
try {
const normalizedUrl = discoverDb.normalizeArticleUrl(url);
const key = articleSummaryRedisKey(normalizedUrl);
await redis.del(key);
const deleted = discoverDb.deleteArticleSummary(url);
req.log.info({ url: url.slice(0, 80), deleted }, 'article-summary cache and DB cleared');
return reply.status(204).send();
} catch (e) {
req.log.error(e);
return reply.status(500).send({ message: 'Failed to clear article summary' });
}
});
/** Сохранить саммари статьи (после первого саммари в чате). */
app.post<{ Body: { url: string; events: string[] } }>('/api/v1/discover/article-summary', async (req, reply) => {
const body = req.body as { url?: string; events?: string[] };
const url = (body.url ?? '').trim();
const events = Array.isArray(body.events) ? body.events : [];
req.log.info({ url: url.slice(0, 80), eventsCount: events.length }, 'POST article-summary received');
if (!url || events.length === 0) {
return reply.status(400).send({ message: 'url and events[] required' });
}
try {
discoverDb.saveArticleSummary(url, events);
const normalizedUrl = discoverDb.normalizeArticleUrl(url);
const key = articleSummaryRedisKey(normalizedUrl);
await redis.setex(key, ARTICLE_SUMMARY_REDIS_TTL, JSON.stringify({ events }));
req.log.info({ url: url.slice(0, 80) }, 'article-summary saved');
return reply.status(204).send();
} catch (e) {
req.log.error(e);
return reply.status(500).send({ message: 'Failed to save article summary' });
}
});
/** GET /api/v1/discover/digest — полная сводка дайджеста с citations и follow-up. */
app.get<{ Querystring: { topic?: string; region?: string; title?: string; url?: string } }>(
'/api/v1/discover/digest',
async (req, reply) => {
const topic = (req.query.topic ?? '').trim();
const region = (req.query.region ?? '').trim();
const title = (req.query.title ?? '').trim();
const url = (req.query.url ?? '').trim();
if (url) {
const row = discoverDb.getDigestByUrl(url);
if (!row) return reply.status(404).send({ message: 'digest not found' });
return reply.send({
topic: row.topic,
region: row.region,
clusterTitle: row.cluster_title,
summaryRu: row.summary_ru,
citations: JSON.parse(row.citations_json || '[]') as discoverDb.DigestCitation[],
sourcesCount: row.sources_count,
followUp: JSON.parse(row.follow_up_json || '[]') as string[],
thumbnail: row.thumbnail,
shortDescription: row.short_description,
mainUrl: row.main_url,
});
}
if (!topic || !region || !title) {
return reply.status(400).send({ message: 'topic, region, title (or url) required' });
}
const row = discoverDb.getDigest(topic, region, title);
if (!row) return reply.status(404).send({ message: 'digest not found' });
return reply.send({
topic: row.topic,
region: row.region,
clusterTitle: row.cluster_title,
summaryRu: row.summary_ru,
citations: JSON.parse(row.citations_json || '[]') as discoverDb.DigestCitation[],
sourcesCount: row.sources_count,
followUp: JSON.parse(row.follow_up_json || '[]') as string[],
thumbnail: row.thumbnail,
shortDescription: row.short_description,
mainUrl: row.main_url,
});
},
);
/** POST /api/v1/discover/digest — сохранение дайджеста (вызывает cache-worker). */
app.post<{
Body: {
topic: string;
region: string;
clusterTitle: string;
summaryRu: string;
citations: discoverDb.DigestCitation[];
sourcesCount: number;
followUp: string[];
thumbnail: string;
shortDescription: string;
mainUrl: string;
};
}>('/api/v1/discover/digest', async (req, reply) => {
const b = req.body as Record<string, unknown>;
const topic = String(b.topic ?? '').trim();
const region = String(b.region ?? '').trim();
const clusterTitle = String(b.clusterTitle ?? '').trim();
const summaryRu = String(b.summaryRu ?? '').trim();
if (!topic || !region || !clusterTitle || !summaryRu) {
return reply.status(400).send({ message: 'topic, region, clusterTitle, summaryRu required' });
}
discoverDb.upsertDigest({
topic,
region,
clusterTitle,
summaryRu,
citations: Array.isArray(b.citations) ? b.citations as discoverDb.DigestCitation[] : [],
sourcesCount: typeof b.sourcesCount === 'number' ? b.sourcesCount : 0,
followUp: Array.isArray(b.followUp) ? b.followUp as string[] : [],
thumbnail: String(b.thumbnail ?? ''),
shortDescription: String(b.shortDescription ?? ''),
mainUrl: String(b.mainUrl ?? ''),
});
try {
await scanAndDeleteKeys(`discover:${topic}:*`);
} catch (e) {
req.log.warn(e, 'Redis cache invalidation after digest save');
}
return reply.status(204).send();
});
/** DELETE /api/v1/discover/digest — удалить все дайджесты для topic+region. */
app.delete<{ Querystring: { topic?: string; region?: string } }>(
'/api/v1/discover/digest',
async (req, reply) => {
const topic = (req.query.topic ?? '').trim();
const region = (req.query.region ?? '').trim();
if (!topic || !region) return reply.status(400).send({ message: 'topic, region required' });
const deleted = discoverDb.deleteDigests(topic, region);
return reply.send({ deleted });
},
);
const DIGEST_QUEUE_KEY = 'discover:digest:queue';
const DIGEST_PROCESSING_KEY = 'discover:digest:processing';
const QUEUE_ITEM_TTL_SEC = 3600;
/** POST /api/v1/discover/queue — добавить URL в очередь для фоновой генерации дайджеста. */
app.post<{ Body: { url: string; title?: string; priority?: number } }>(
'/api/v1/discover/queue',
async (req, reply) => {
const body = req.body as { url?: string; title?: string; priority?: number };
const url = (body.url ?? '').trim();
if (!url) return reply.status(400).send({ message: 'url required' });
const existingDigest = discoverDb.getDigestByUrl(url);
if (existingDigest) {
return reply.send({ queued: false, reason: 'digest already exists' });
}
const normalizedUrl = discoverDb.normalizeArticleUrl(url);
const existingSummary = discoverDb.getArticleSummary(url);
if (existingSummary && existingSummary.length > 0) {
return reply.send({ queued: false, reason: 'summary already exists' });
}
const priority = body.priority ?? Date.now();
const payload = JSON.stringify({ url: normalizedUrl, title: body.title ?? '', addedAt: Date.now() });
try {
await redis.zadd(DIGEST_QUEUE_KEY, priority, payload);
req.log.info({ url: normalizedUrl.slice(0, 80), priority }, 'URL queued for digest generation');
return reply.send({ queued: true, position: await redis.zrank(DIGEST_QUEUE_KEY, payload) });
} catch (e) {
req.log.error(e);
return reply.status(500).send({ message: 'Failed to queue URL' });
}
},
);
/** GET /api/v1/discover/queue — получить следующий URL для обработки (для воркера). */
app.get('/api/v1/discover/queue', async (req, reply) => {
try {
const items = await redis.zrange(DIGEST_QUEUE_KEY, 0, 0);
if (items.length === 0) {
return reply.send({ item: null, queueLength: 0 });
}
const item = items[0];
const parsed = JSON.parse(item) as { url: string; title: string; addedAt: number };
await redis.zrem(DIGEST_QUEUE_KEY, item);
await redis.setex(`${DIGEST_PROCESSING_KEY}:${parsed.url}`, QUEUE_ITEM_TTL_SEC, item);
const queueLength = await redis.zcard(DIGEST_QUEUE_KEY);
return reply.send({ item: parsed, queueLength });
} catch (e) {
req.log.error(e);
return reply.status(500).send({ message: 'Failed to get queue item' });
}
});
/** DELETE /api/v1/discover/queue — пометить URL как обработанный (или вернуть в очередь при ошибке). */
app.delete<{ Querystring: { url?: string; requeue?: string } }>(
'/api/v1/discover/queue',
async (req, reply) => {
const url = (req.query.url ?? '').trim();
const requeue = req.query.requeue === 'true';
if (!url) return reply.status(400).send({ message: 'url required' });
const normalizedUrl = discoverDb.normalizeArticleUrl(url);
const processingKey = `${DIGEST_PROCESSING_KEY}:${normalizedUrl}`;
try {
const item = await redis.get(processingKey);
await redis.del(processingKey);
if (requeue && item) {
const priority = Date.now() + 60000;
await redis.zadd(DIGEST_QUEUE_KEY, priority, item);
req.log.info({ url: normalizedUrl.slice(0, 80) }, 'URL requeued after failure');
return reply.send({ requeued: true });
}
req.log.info({ url: normalizedUrl.slice(0, 80) }, 'URL processing completed');
return reply.status(204).send();
} catch (e) {
req.log.error(e);
return reply.status(500).send({ message: 'Failed to complete queue item' });
}
},
);
/** GET /api/v1/discover/queue/stats — статистика очереди. */
app.get('/api/v1/discover/queue/stats', async (_req, reply) => {
try {
const queueLength = await redis.zcard(DIGEST_QUEUE_KEY);
let processingCount = 0;
let cursor = '0';
do {
const [nextCursor, keys] = await redis.scan(cursor, 'MATCH', `${DIGEST_PROCESSING_KEY}:*`, 'COUNT', 100);
cursor = nextCursor;
processingCount += keys.length;
} while (cursor !== '0');
return reply.send({ queueLength, processingCount });
} catch (e) {
return reply.status(500).send({ message: 'Failed to get queue stats' });
}
});
/** Сырая выдача по topic/region (SearXNG, time_range=day). */
async function fetchRawForTopicRegion(
region: Region,
topic: Topic
): Promise<SearxngSearchResult[]> {
const selectedTopic = SOURCES_BY_REGION[region][topic];
const searchLang = region === 'russia' ? 'ru' : region === 'china' ? 'zh' : 'en';
const seenUrls = new Set<string>();
const searchPromises = selectedTopic.links.flatMap((link) =>
selectedTopic.query.map((query) =>
searchSearxng(`site:${link} ${query}`, {
engines: ['bing news'],
pageno: 1,
language: searchLang,
time_range: 'day',
}).then((r) => r.results)
)
);
const settled = await Promise.allSettled(searchPromises);
const allResults = settled
.filter(
(r): r is PromiseFulfilledResult<SearxngSearchResult[]> => r.status === 'fulfilled'
)
.flatMap((r) => r.value);
return allResults
.flat()
.filter((item) => {
const u = item.url?.toLowerCase().trim();
if (!u || seenUrls.has(u)) return false;
seenUrls.add(u);
return true;
})
.sort(() => Math.random() - 0.5);
}
/** Precomputed item (Redis discover:{topic}:{region} или сборка из raw + SQLite) */
interface PrecomputedItem {
title: string;
titleRu?: string;
summary?: string;
sources?: { url: string; title: string }[];
url: string;
thumbnail: string;
fetchedAt?: number;
}
function isPrecomputedPayload(cached: unknown): cached is { items: PrecomputedItem[]; updatedAt?: number } {
return (
typeof cached === 'object' &&
cached !== null &&
Array.isArray((cached as { items?: unknown }).items) &&
(cached as { items: unknown[] }).items.length > 0 &&
typeof (cached as { items: PrecomputedItem[] }).items[0]?.url === 'string'
);
}
function precomputedToBlogs(items: PrecomputedItem[]): { title: string; content: string; url: string; thumbnail: string; sources?: { url: string; title: string }[]; summary?: string; sourcesCount?: number; digestId?: string }[] {
return items.map((it) => ({
title: it.titleRu ?? it.title,
content: it.summary ?? it.title,
url: it.url,
thumbnail: it.thumbnail || PLACEHOLDER_IMAGE,
...(it.sources?.length ? { sources: it.sources } : undefined),
...(it.summary ? { summary: it.summary } : undefined),
}));
}
function digestsToBlogs(digests: discoverDb.DigestRow[]): { title: string; content: string; url: string; thumbnail: string; sourcesCount: number; digestId: string }[] {
return digests.map((d) => ({
title: d.cluster_title,
content: d.short_description || d.summary_ru.slice(0, 200) + (d.summary_ru.length > 200 ? '…' : ''),
url: d.main_url,
thumbnail: d.thumbnail || PLACEHOLDER_IMAGE,
sourcesCount: d.sources_count,
digestId: `${d.topic}:${d.region}:${d.cluster_title}`,
}));
}
app.get<{
Querystring: { topic?: string; region?: string; mode?: string };
Querystring: { topic?: string; region?: string; mode?: string; source?: string };
}>('/api/v1/discover', async (req, reply) => {
const topic = (req.query.topic ?? 'tech') as Topic;
const mode = (req.query.mode ?? 'normal') as 'normal' | 'preview';
const mode = (req.query.mode ?? 'normal') as 'normal' | 'preview' | 'raw';
const sourceParam = (req.query.source ?? '').toLowerCase().trim();
if (topic === 'gooseek') {
if (topic === 'bait' || topic === 'gooseek') {
try {
const blogs = await fetchGooseekPosts();
const blogs =
topic === 'bait'
? await fetchGhostPosts(undefined, 'gooseek')
: await fetchGhostPosts('gooseek');
return { blogs };
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
@@ -294,98 +790,133 @@ app.get<{
}
}
const region = await resolveRegion(
req.query.region ?? null,
req.headers['x-forwarded-for'] as string | null,
req.headers['user-agent'] as string | null
);
const region: Region =
sourceParam === 'ru' || sourceParam === 'russian'
? 'russia'
: sourceParam === 'world' || sourceParam === 'global'
? 'world'
: await resolveRegion(
req.query.region ?? null,
req.headers['x-forwarded-for'] as string | null,
req.headers['user-agent'] as string | null
);
const selectedTopic = SOURCES_BY_REGION[region][topic];
const searchLang = region === 'russia' ? 'ru' : region === 'china' ? 'zh' : 'en';
const searchLang =
region === 'russia' ? 'ru' : region === 'china' ? 'zh' : 'en';
const cacheKey = `discover:${topic}:${region}:${mode}`;
try {
const cached = await redis.get(cacheKey);
if (cached) {
return JSON.parse(cached) as { blogs: unknown[] };
const precomputedKey = `discover:${topic}:${region}`;
if (mode !== 'raw') {
// Prefer pre-generated digests (Perplexity-style multi-source summaries)
const digests = discoverDb.getDigests(topic, region);
if (digests.length > 0) {
return { blogs: digestsToBlogs(digests) };
}
try {
const cached = await redis.get(precomputedKey);
if (cached) {
const parsed = JSON.parse(cached) as unknown;
if (isPrecomputedPayload(parsed)) {
return { blogs: precomputedToBlogs(parsed.items) };
}
if (typeof parsed === 'object' && parsed !== null && 'blogs' in parsed && Array.isArray((parsed as { blogs: unknown[] }).blogs)) {
return parsed as { blogs: unknown[] };
}
}
} catch {
// skip cache
}
try {
const rawData = await fetchRawForTopicRegion(region, topic);
const items: PrecomputedItem[] = rawData.slice(0, 7).map((item) => {
const url = item.url ?? '';
const thumb = item.thumbnail ?? item.thumbnail_src ?? item.img_src ?? '';
const row = discoverDb.getByUrl(url);
return {
title: item.title ?? 'No title',
titleRu: row?.title_ru,
summary: row?.summary_ru ?? (item.content ?? item.title ?? '').slice(0, 300),
sources: row ? (JSON.parse(row.sources_json || '[]') as { url: string; title: string }[]) : undefined,
url,
thumbnail: thumb || PLACEHOLDER_IMAGE,
fetchedAt: row?.fetched_at ? row.fetched_at * 1000 : Date.now(),
};
});
await redis.setex(
precomputedKey,
REDIS_DISCOVER_TTL_SEC,
JSON.stringify({ items, updatedAt: Date.now() })
);
return { blogs: precomputedToBlogs(items) };
} catch (err) {
req.log.error(err);
return reply.status(503).send({
message:
err instanceof Error && err.message.includes('not configured')
? 'SearxNG is not configured. Set SEARXNG_URL or SEARXNG_FALLBACK_URL.'
: 'Cannot fetch discover.',
});
}
} catch {
// skip cache
}
let data: SearxngSearchResult[] = [];
try {
if (mode === 'normal') {
const seenUrls = new Set<string>();
const searchPromises = selectedTopic.links.flatMap((link) =>
selectedTopic.query.map((query) =>
searchSearxng(`site:${link} ${query}`, {
engines: ['bing news'],
pageno: 1,
language: searchLang,
}).then((r) => r.results)
)
);
const settled = await Promise.allSettled(searchPromises);
const allResults = settled
.filter(
(r): r is PromiseFulfilledResult<SearxngSearchResult[]> =>
r.status === 'fulfilled'
)
.flatMap((r) => r.value);
data = allResults
.flat()
.filter((item) => {
const url = item.url?.toLowerCase().trim();
if (!url || seenUrls.has(url)) return false;
seenUrls.add(url);
return true;
})
.sort(() => Math.random() - 0.5);
} else {
const link =
selectedTopic.links[
Math.floor(Math.random() * selectedTopic.links.length)
];
const query =
selectedTopic.query[
Math.floor(Math.random() * selectedTopic.query.length)
];
const res = await searchSearxng(`site:${link} ${query}`, {
engines: ['bing news'],
pageno: 1,
language: searchLang,
});
data = res.results;
}
} catch (err) {
req.log.error(err);
return reply.status(503).send({
message:
err instanceof Error && err.message.includes('not configured')
? 'SearxNG is not configured. Set SEARXNG_URL or SEARXNG_FALLBACK_URL.'
: 'Cannot connect to SearxNG. Check configuration.',
if (mode === 'raw') {
let data: SearxngSearchResult[] = [];
try {
const seenUrls = new Set<string>();
const searchPromises = selectedTopic.links.flatMap((link) =>
selectedTopic.query.map((query) =>
searchSearxng(`site:${link} ${query}`, {
engines: ['bing news'],
pageno: 1,
language: searchLang,
time_range: 'day',
}).then((r) => r.results)
)
);
const settled = await Promise.allSettled(searchPromises);
const allResults = settled
.filter(
(r): r is PromiseFulfilledResult<SearxngSearchResult[]> =>
r.status === 'fulfilled'
)
.flatMap((r) => r.value);
data = allResults
.flat()
.filter((item) => {
const url = item.url?.toLowerCase().trim();
if (!url || seenUrls.has(url)) return false;
seenUrls.add(url);
return true;
})
.sort(() => Math.random() - 0.5);
} catch (err) {
req.log.error(err);
return reply.status(503).send({
message:
err instanceof Error && err.message.includes('not configured')
? 'SearxNG is not configured. Set SEARXNG_URL or SEARXNG_FALLBACK_URL.'
: 'Cannot connect to SearxNG. Check configuration.',
});
}
const blogs = data.map((item) => {
const thumb = item.thumbnail ?? item.thumbnail_src ?? item.img_src ?? '';
return {
title: item.title ?? 'No title',
content: (item.content ?? item.title ?? '').slice(0, 300),
url: item.url ?? '',
thumbnail: thumb || PLACEHOLDER_IMAGE,
};
});
return { blogs };
}
const blogs = data.map((item) => ({
title: item.title ?? 'No title',
content: (item.content ?? item.title ?? '').slice(0, 300),
url: item.url ?? '',
thumbnail: item.thumbnail ?? item.thumbnail_src ?? item.img_src ?? '',
}));
try {
await redis.setex(cacheKey, 30 * 60, JSON.stringify({ blogs }));
} catch {
// skip cache
}
return { blogs };
return reply.status(400).send({ message: 'invalid mode' });
});
discoverDb.startDailyCleanup();
try {
await app.listen({ port: PORT, host: '0.0.0.0' });
console.log(`discover-svc listening on :${PORT}`);

View File

@@ -13,6 +13,8 @@ interface SearxngSearchOptions {
engines?: string[];
language?: string;
pageno?: number;
/** Фильтр по времени: day | week | month | year (SearXNG). Для новостей — day. */
time_range?: 'day' | 'week' | 'month' | 'year';
}
export interface SearxngSearchResult {