@@ -4,28 +4,40 @@
* Ответ: { blogs: [{ title, content, url, thumbnail }] }
*/
import crypto from 'node:crypto' ;
import Fastify from 'fastify' ;
import cors from '@fastify/cors' ;
import Redis from 'ioredis' ;
import { searchSearxng , type SearxngSearchResult } from './searxng.js' ;
import * as discoverDb from './discover-db.js' ;
const PORT = parseInt ( process . env . PORT ? ? '3002' , 10 ) ;
const REDIS_URL = process . env . REDIS_URL ? ? 'redis://localhost:6379' ;
/** Redis: кэш выдачи Discover на 1 ч (то, что показываем пользователям). */
const REDIS_DISCOVER_TTL_SEC = 60 * 60 ;
const GHOST_URL = process . env . GHOST_URL ? . trim ( ) ? ? '' ;
const GHOST_CONTENT_API_KEY = process . env . GHOST_CONTENT_API_KEY ? . trim ( ) ? ? '' ;
const GEO_DEVICE_SERVICE_URL = process . env . GEO_DEVICE_SERVICE_URL ? ? 'http://localhost:4002' ;
const PLACEHOLDER_IMAGE = 'https://placehold.co/400x225/e5e7eb/6b7280?text=Post' ;
const PLACEHOLDER_IMAGE =
'data:image/svg+xml,' +
encodeURIComponent (
'<svg xmlns="http://www.w3.org/2000/svg" width="400" height="225" viewBox="0 0 400 225"><rect fill="%23e5e7eb" width="400" height="225"/><text x="50%" y="50%" dominant-baseline="middle" text-anchor="middle" fill="%236b7280" font-family="sans-serif" font-size="16">Post</text></svg>'
) ;
const NEWS_REGION = ( process . env . NEWS_REGION ? ? 'auto' ) as string ;
type Region = 'america' | 'eu' | 'russia' | 'china' ;
type Region = 'america' | 'eu' | 'russia' | 'china' | 'world' ;
type Topic =
| 'bait'
| 'gooseek'
| 'tech'
| 'finance'
| 'art'
| 'sports'
| 'entertainment'
| 'gooseek' ;
| 'entertainment' ;
interface GhostTag {
slug? : string ;
}
interface GhostPost {
title : string ;
excerpt? : string | null ;
@@ -35,12 +47,35 @@ interface GhostPost {
html? : string | null ;
feature_image? : string | null ;
url : string ;
tags? : GhostTag [ ] | null ;
}
// ioredis + NodeNext: default export не распознаётся как конструктор
const redis : import ( 'ioredis' ) = new ( Redis as any ) ( REDIS_URL ) ;
// @ts-expect-error — ioredis + NodeNext ESM constructability
const redis = new Redis ( REDIS_URL ) ;
redis . on ( 'error' , ( ) = > { } ) ;
async function scanAndDeleteKeys (
pattern : string ,
excludePrefix? : string ,
) : Promise < number > {
let deletedCount = 0 ;
let cursor = '0' ;
do {
const [ nextCursor , keys ] = await redis . scan ( cursor , 'MATCH' , pattern , 'COUNT' , 100 ) ;
cursor = nextCursor ;
if ( keys . length > 0 ) {
const toDelete = excludePrefix
? keys . filter ( ( k : string ) = > ! k . startsWith ( excludePrefix ) )
: keys ;
if ( toDelete . length > 0 ) {
await redis . del ( . . . toDelete ) ;
deletedCount += toDelete . length ;
}
}
} while ( cursor !== '0' ) ;
return deletedCount ;
}
function stripHtml ( html : string ) : string {
return html . replace ( /<[^>]+>/g , ' ' ) . replace ( /\s+/g , ' ' ) . trim ( ) ;
}
@@ -49,7 +84,32 @@ const SOURCES_BY_REGION: Record<
Region ,
Record < Topic , { query : string [ ] ; links : string [ ] } >
> = {
world : {
bait : { query : [ ] , links : [ ] } ,
gooseek : { query : [ ] , links : [ ] } ,
tech : {
query : [ 'technology news' , 'AI' , 'innovation' , 'science' ] ,
links : [ 'reuters.com' , 'bbc.com' , 'theguardian.com' , 'apnews.com' , 'techcrunch.com' , 'theverge.com' ] ,
} ,
finance : {
query : [ 'finance news' , 'economy' , 'stock market' , 'central banks' ] ,
links : [ 'reuters.com' , 'bbc.com' , 'bloomberg.com' , 'cnbc.com' , 'ft.com' , 'apnews.com' ] ,
} ,
art : {
query : [ 'art news' , 'culture' , 'exhibition' , 'museum' ] ,
links : [ 'reuters.com' , 'bbc.com' , 'theguardian.com' , 'apnews.com' , 'nytimes.com' ] ,
} ,
sports : {
query : [ 'sports news' , 'football' , 'Olympics' , 'premier league' ] ,
links : [ 'reuters.com' , 'bbc.com' , 'espn.com' , 'apnews.com' , 'theguardian.com' ] ,
} ,
entertainment : {
query : [ 'entertainment news' , 'films' , 'music' , 'culture' ] ,
links : [ 'reuters.com' , 'bbc.com' , 'theguardian.com' , 'variety.com' , 'apnews.com' ] ,
} ,
} ,
america : {
bait : { query : [ ] , links : [ ] } ,
gooseek : { query : [ ] , links : [ ] } ,
tech : {
query : [ 'technology news' , 'latest tech' , 'AI' , 'science and innovation' ] ,
@@ -73,6 +133,7 @@ const SOURCES_BY_REGION: Record<
} ,
} ,
eu : {
bait : { query : [ ] , links : [ ] } ,
gooseek : { query : [ ] , links : [ ] } ,
tech : {
query : [ 'technology news' , 'tech' , 'AI' , 'innovation' ] ,
@@ -96,6 +157,7 @@ const SOURCES_BY_REGION: Record<
} ,
} ,
russia : {
bait : { query : [ ] , links : [ ] } ,
gooseek : { query : [ ] , links : [ ] } ,
tech : {
query : [ 'technology news' , 'tech' , 'IT' , 'innovation' ] ,
@@ -119,6 +181,7 @@ const SOURCES_BY_REGION: Record<
} ,
} ,
china : {
bait : { query : [ ] , links : [ ] } ,
gooseek : { query : [ ] , links : [ ] } ,
tech : {
query : [ 'technology news' , 'tech' , 'AI' , 'innovation' ] ,
@@ -153,16 +216,23 @@ const COUNTRY_TO_REGION: Record<string, Region> = {
BG : 'eu' , HR : 'eu' , SK : 'eu' , SI : 'eu' , LT : 'eu' , LV : 'eu' , EE : 'eu' , DK : 'eu' ,
} ;
async function fetchGo oseek Posts ( ) : Promise <
{ title : string ; content : string ; url : string ; thumbnail : string } [ ]
> {
async function fetchGh ost Posts (
tagSlug? : string ,
excludeTagSlug? : string
) : Promise < { title : string ; content : string ; url : string ; thumbnail : string } [ ] > {
if ( ! GHOST_URL || ! GHOST_CONTENT_API_KEY ) {
throw new Error (
'Ghost не настроен. Укажите GHOST_URL и GHOST_CONTENT_API_KEY в .env'
) ;
}
const base = GHOST_URL . replace ( /\/$/ , '' ) ;
cons t apiUrl = ` ${ base } /ghost/api/content/posts/?key= ${ GHOST_CONTENT_API_KEY } &limit=50&fields=title,excerpt,custom_excerpt,meta_description,html,feature_image,url&formats=html ` ;
le t apiUrl = ` ${ base } /ghost/api/content/posts/?key= ${ GHOST_CONTENT_API_KEY } &limit=50&fields=title,excerpt,custom_excerpt,meta_description,html,feature_image,url&formats=html ` ;
if ( tagSlug ) {
apiUrl += ` &filter=tag: ${ encodeURIComponent ( tagSlug ) } ` ;
}
if ( excludeTagSlug ) {
apiUrl += '&include=tags' ;
}
const controller = new AbortController ( ) ;
const timeoutId = setTimeout ( ( ) = > controller . abort ( ) , 15 _000 ) ;
try {
@@ -180,7 +250,12 @@ async function fetchGooseekPosts(): Promise<
throw new Error ( ` Ghost API: HTTP ${ res . status } ` ) ;
}
const data = await res . json ( ) ;
cons t posts : GhostPost [ ] = data . posts ? ? [ ] ;
le t posts : GhostPost [ ] = data . posts ? ? [ ] ;
if ( excludeTagSlug ) {
posts = posts . filter (
( p ) = > ! p . tags ? . some ( ( t ) = > ( t ? . slug ? ? '' ) . toLowerCase ( ) === excludeTagSlug . toLowerCase ( ) )
) ;
}
return posts . map ( ( p ) = > {
const excerpt =
p . custom_excerpt ? . trim ( ) ||
@@ -238,7 +313,7 @@ async function resolveRegion(
return 'america' ;
}
const app = Fastify ( { logger : true } ) ;
const app = Fastify ( { logger : true , bodyLimit : 100 * 1024 * 1024 } ) ;
const corsOrigin = process . env . ALLOWED_ORIGINS
? process . env . ALLOWED_ORIGINS . split ( ',' ) . map ( ( s ) = > s . trim ( ) ) . filter ( Boolean )
@@ -264,15 +339,436 @@ app.get('/ready', async () => {
}
} ) ;
/** General search for cache-worker (find related sources by query) */
app . get < { Querystring : { q? : string } } > ( '/api/v1/discover/search' , async ( req , reply ) = > {
const q = ( req . query . q ? ? '' ) . trim ( ) ;
if ( ! q ) {
return reply . status ( 400 ) . send ( { message : 'Query q is required' } ) ;
}
try {
const { results } = await searchSearxng ( q , { pageno : 1 } ) ;
return { results : results.slice ( 0 , 10 ) } ;
} catch ( err ) {
req . log . error ( err ) ;
return reply . status ( 503 ) . send ( {
message :
err instanceof Error && err . message . includes ( 'not configured' )
? 'SearxNG is not configured.'
: 'Search failed.' ,
} ) ;
}
} ) ;
/** Проверка/получение перевода по URL (для cache-worker). */
app . get < { Querystring : { url? : string } } > ( '/api/v1/discover/translated' , async ( req , reply ) = > {
const url = ( req . query . url ? ? '' ) . trim ( ) ;
if ( ! url ) return reply . status ( 400 ) . send ( { message : 'url required' } ) ;
const row = discoverDb . getByUrl ( url ) ;
if ( ! row ) return reply . status ( 404 ) . send ( { message : 'not found' } ) ;
return reply . send ( {
url : row.url ,
title_ru : row.title_ru ,
summary_ru : row.summary_ru ,
sources : JSON.parse ( row . sources_json || '[]' ) as { url : string ; title : string } [ ] ,
thumbnail : row.thumbnail ,
fetched_at : row.fetched_at ,
} ) ;
} ) ;
/** Сохранение перевода (вызывает cache-worker после перевода). */
app . post < { Body : { url : string ; title_ru : string ; summary_ru : string ; sources ? : { url : string ; title : string } [ ] ; thumbnail? : string } } > (
'/api/v1/discover/translated' ,
async ( req , reply ) = > {
const body = req . body as { url? : string ; title_ru? : string ; summary_ru? : string ; sources ? : { url : string ; title : string } [ ] ; thumbnail? : string } ;
const url = ( body . url ? ? '' ) . trim ( ) ;
const title_ru = body . title_ru ? ? '' ;
const summary_ru = body . summary_ru ? ? '' ;
if ( ! url || ! title_ru || ! summary_ru ) {
return reply . status ( 400 ) . send ( { message : 'url, title_ru, summary_ru required' } ) ;
}
discoverDb . upsert ( {
url ,
title_ru ,
summary_ru ,
sources : body.sources ,
thumbnail : body.thumbnail ,
} ) ;
try {
await scanAndDeleteKeys ( 'discover:*' , 'discover:asum:' ) ;
} catch ( e ) {
req . log . warn ( e , 'Redis cache invalidation after translation' ) ;
}
return reply . status ( 204 ) . send ( ) ;
}
) ;
const ARTICLE_SUMMARY_REDIS_TTL = 60 * 60 ; // 1 ч
function articleSummaryRedisKey ( url : string ) : string {
const hash = crypto . createHash ( 'sha256' ) . update ( url . trim ( ) ) . digest ( 'hex' ) . slice ( 0 , 32 ) ;
return ` discover:asum: ${ hash } ` ;
}
/** Кэш саммари статьи из чата (Discover): получить сохранённый стрим — для имитации размышления/источников. */
app . get < { Querystring : { url? : string } } > ( '/api/v1/discover/article-summary' , async ( req , reply ) = > {
const url = ( req . query . url ? ? '' ) . trim ( ) ;
if ( ! url ) return reply . status ( 400 ) . send ( { message : 'url required' } ) ;
try {
const normalizedUrl = discoverDb . normalizeArticleUrl ( url ) ;
const key = articleSummaryRedisKey ( normalizedUrl ) ;
const cached = await redis . get ( key ) ;
if ( cached ) {
const payload = JSON . parse ( cached ) as { events : string [ ] } ;
return reply . send ( payload ) ;
}
const events = discoverDb . getArticleSummary ( url ) ;
if ( ! events || events . length === 0 ) return reply . status ( 404 ) . send ( { message : 'not found' } ) ;
await redis . setex ( key , ARTICLE_SUMMARY_REDIS_TTL , JSON . stringify ( { events } ) ) ;
return reply . send ( { events } ) ;
} catch ( e ) {
req . log . error ( e ) ;
return reply . status ( 500 ) . send ( { message : 'Failed to get article summary' } ) ;
}
} ) ;
/** Очистить кэш и запись саммари статьи (для перегенерации в чате). */
app . delete < { Querystring : { url? : string } } > ( '/api/v1/discover/article-summary' , async ( req , reply ) = > {
const url = ( req . query . url ? ? '' ) . trim ( ) ;
if ( ! url ) return reply . status ( 400 ) . send ( { message : 'url required' } ) ;
try {
const normalizedUrl = discoverDb . normalizeArticleUrl ( url ) ;
const key = articleSummaryRedisKey ( normalizedUrl ) ;
await redis . del ( key ) ;
const deleted = discoverDb . deleteArticleSummary ( url ) ;
req . log . info ( { url : url.slice ( 0 , 80 ) , deleted } , 'article-summary cache and DB cleared' ) ;
return reply . status ( 204 ) . send ( ) ;
} catch ( e ) {
req . log . error ( e ) ;
return reply . status ( 500 ) . send ( { message : 'Failed to clear article summary' } ) ;
}
} ) ;
/** Сохранить саммари статьи (после первого саммари в чате). */
app . post < { Body : { url : string ; events : string [ ] } } > ( '/api/v1/discover/article-summary' , async ( req , reply ) = > {
const body = req . body as { url? : string ; events? : string [ ] } ;
const url = ( body . url ? ? '' ) . trim ( ) ;
const events = Array . isArray ( body . events ) ? body . events : [ ] ;
req . log . info ( { url : url.slice ( 0 , 80 ) , eventsCount : events.length } , 'POST article-summary received' ) ;
if ( ! url || events . length === 0 ) {
return reply . status ( 400 ) . send ( { message : 'url and events[] required' } ) ;
}
try {
discoverDb . saveArticleSummary ( url , events ) ;
const normalizedUrl = discoverDb . normalizeArticleUrl ( url ) ;
const key = articleSummaryRedisKey ( normalizedUrl ) ;
await redis . setex ( key , ARTICLE_SUMMARY_REDIS_TTL , JSON . stringify ( { events } ) ) ;
req . log . info ( { url : url.slice ( 0 , 80 ) } , 'article-summary saved' ) ;
return reply . status ( 204 ) . send ( ) ;
} catch ( e ) {
req . log . error ( e ) ;
return reply . status ( 500 ) . send ( { message : 'Failed to save article summary' } ) ;
}
} ) ;
/** GET /api/v1/discover/digest — полная сводка дайджеста с citations и follow-up. */
app . get < { Querystring : { topic? : string ; region? : string ; title? : string ; url? : string } } > (
'/api/v1/discover/digest' ,
async ( req , reply ) = > {
const topic = ( req . query . topic ? ? '' ) . trim ( ) ;
const region = ( req . query . region ? ? '' ) . trim ( ) ;
const title = ( req . query . title ? ? '' ) . trim ( ) ;
const url = ( req . query . url ? ? '' ) . trim ( ) ;
if ( url ) {
const row = discoverDb . getDigestByUrl ( url ) ;
if ( ! row ) return reply . status ( 404 ) . send ( { message : 'digest not found' } ) ;
return reply . send ( {
topic : row.topic ,
region : row.region ,
clusterTitle : row.cluster_title ,
summaryRu : row.summary_ru ,
citations : JSON.parse ( row . citations_json || '[]' ) as discoverDb . DigestCitation [ ] ,
sourcesCount : row.sources_count ,
followUp : JSON.parse ( row . follow_up_json || '[]' ) as string [ ] ,
thumbnail : row.thumbnail ,
shortDescription : row.short_description ,
mainUrl : row.main_url ,
} ) ;
}
if ( ! topic || ! region || ! title ) {
return reply . status ( 400 ) . send ( { message : 'topic, region, title (or url) required' } ) ;
}
const row = discoverDb . getDigest ( topic , region , title ) ;
if ( ! row ) return reply . status ( 404 ) . send ( { message : 'digest not found' } ) ;
return reply . send ( {
topic : row.topic ,
region : row.region ,
clusterTitle : row.cluster_title ,
summaryRu : row.summary_ru ,
citations : JSON.parse ( row . citations_json || '[]' ) as discoverDb . DigestCitation [ ] ,
sourcesCount : row.sources_count ,
followUp : JSON.parse ( row . follow_up_json || '[]' ) as string [ ] ,
thumbnail : row.thumbnail ,
shortDescription : row.short_description ,
mainUrl : row.main_url ,
} ) ;
} ,
) ;
/** POST /api/v1/discover/digest — сохранение дайджеста (вызывает cache-worker). */
app . post < {
Body : {
topic : string ;
region : string ;
clusterTitle : string ;
summaryRu : string ;
citations : discoverDb.DigestCitation [ ] ;
sourcesCount : number ;
followUp : string [ ] ;
thumbnail : string ;
shortDescription : string ;
mainUrl : string ;
} ;
} > ( '/api/v1/discover/digest' , async ( req , reply ) = > {
const b = req . body as Record < string , unknown > ;
const topic = String ( b . topic ? ? '' ) . trim ( ) ;
const region = String ( b . region ? ? '' ) . trim ( ) ;
const clusterTitle = String ( b . clusterTitle ? ? '' ) . trim ( ) ;
const summaryRu = String ( b . summaryRu ? ? '' ) . trim ( ) ;
if ( ! topic || ! region || ! clusterTitle || ! summaryRu ) {
return reply . status ( 400 ) . send ( { message : 'topic, region, clusterTitle, summaryRu required' } ) ;
}
discoverDb . upsertDigest ( {
topic ,
region ,
clusterTitle ,
summaryRu ,
citations : Array.isArray ( b . citations ) ? b . citations as discoverDb . DigestCitation [ ] : [ ] ,
sourcesCount : typeof b . sourcesCount === 'number' ? b.sourcesCount : 0 ,
followUp : Array.isArray ( b . followUp ) ? b . followUp as string [ ] : [ ] ,
thumbnail : String ( b . thumbnail ? ? '' ) ,
shortDescription : String ( b . shortDescription ? ? '' ) ,
mainUrl : String ( b . mainUrl ? ? '' ) ,
} ) ;
try {
await scanAndDeleteKeys ( ` discover: ${ topic } :* ` ) ;
} catch ( e ) {
req . log . warn ( e , 'Redis cache invalidation after digest save' ) ;
}
return reply . status ( 204 ) . send ( ) ;
} ) ;
/** DELETE /api/v1/discover/digest — удалить все дайджесты для topic+region. */
app . delete < { Querystring : { topic? : string ; region? : string } } > (
'/api/v1/discover/digest' ,
async ( req , reply ) = > {
const topic = ( req . query . topic ? ? '' ) . trim ( ) ;
const region = ( req . query . region ? ? '' ) . trim ( ) ;
if ( ! topic || ! region ) return reply . status ( 400 ) . send ( { message : 'topic, region required' } ) ;
const deleted = discoverDb . deleteDigests ( topic , region ) ;
return reply . send ( { deleted } ) ;
} ,
) ;
const DIGEST_QUEUE_KEY = 'discover:digest:queue' ;
const DIGEST_PROCESSING_KEY = 'discover:digest:processing' ;
const QUEUE_ITEM_TTL_SEC = 3600 ;
/** POST /api/v1/discover/queue — добавить URL в очередь для фоновой генерации дайджеста. */
app . post < { Body : { url : string ; title? : string ; priority? : number } } > (
'/api/v1/discover/queue' ,
async ( req , reply ) = > {
const body = req . body as { url? : string ; title? : string ; priority? : number } ;
const url = ( body . url ? ? '' ) . trim ( ) ;
if ( ! url ) return reply . status ( 400 ) . send ( { message : 'url required' } ) ;
const existingDigest = discoverDb . getDigestByUrl ( url ) ;
if ( existingDigest ) {
return reply . send ( { queued : false , reason : 'digest already exists' } ) ;
}
const normalizedUrl = discoverDb . normalizeArticleUrl ( url ) ;
const existingSummary = discoverDb . getArticleSummary ( url ) ;
if ( existingSummary && existingSummary . length > 0 ) {
return reply . send ( { queued : false , reason : 'summary already exists' } ) ;
}
const priority = body . priority ? ? Date . now ( ) ;
const payload = JSON . stringify ( { url : normalizedUrl , title : body.title ? ? '' , addedAt : Date.now ( ) } ) ;
try {
await redis . zadd ( DIGEST_QUEUE_KEY , priority , payload ) ;
req . log . info ( { url : normalizedUrl.slice ( 0 , 80 ) , priority } , 'URL queued for digest generation' ) ;
return reply . send ( { queued : true , position : await redis . zrank ( DIGEST_QUEUE_KEY , payload ) } ) ;
} catch ( e ) {
req . log . error ( e ) ;
return reply . status ( 500 ) . send ( { message : 'Failed to queue URL' } ) ;
}
} ,
) ;
/** GET /api/v1/discover/queue — получить следующий URL для обработки (для воркера). */
app . get ( '/api/v1/discover/queue' , async ( req , reply ) = > {
try {
const items = await redis . zrange ( DIGEST_QUEUE_KEY , 0 , 0 ) ;
if ( items . length === 0 ) {
return reply . send ( { item : null , queueLength : 0 } ) ;
}
const item = items [ 0 ] ;
const parsed = JSON . parse ( item ) as { url : string ; title : string ; addedAt : number } ;
await redis . zrem ( DIGEST_QUEUE_KEY , item ) ;
await redis . setex ( ` ${ DIGEST_PROCESSING_KEY } : ${ parsed . url } ` , QUEUE_ITEM_TTL_SEC , item ) ;
const queueLength = await redis . zcard ( DIGEST_QUEUE_KEY ) ;
return reply . send ( { item : parsed , queueLength } ) ;
} catch ( e ) {
req . log . error ( e ) ;
return reply . status ( 500 ) . send ( { message : 'Failed to get queue item' } ) ;
}
} ) ;
/** DELETE /api/v1/discover/queue — пометить URL как обработанный (или вернуть в очередь при ошибке). */
app . delete < { Querystring : { url? : string ; requeue? : string } } > (
'/api/v1/discover/queue' ,
async ( req , reply ) = > {
const url = ( req . query . url ? ? '' ) . trim ( ) ;
const requeue = req . query . requeue === 'true' ;
if ( ! url ) return reply . status ( 400 ) . send ( { message : 'url required' } ) ;
const normalizedUrl = discoverDb . normalizeArticleUrl ( url ) ;
const processingKey = ` ${ DIGEST_PROCESSING_KEY } : ${ normalizedUrl } ` ;
try {
const item = await redis . get ( processingKey ) ;
await redis . del ( processingKey ) ;
if ( requeue && item ) {
const priority = Date . now ( ) + 60000 ;
await redis . zadd ( DIGEST_QUEUE_KEY , priority , item ) ;
req . log . info ( { url : normalizedUrl.slice ( 0 , 80 ) } , 'URL requeued after failure' ) ;
return reply . send ( { requeued : true } ) ;
}
req . log . info ( { url : normalizedUrl.slice ( 0 , 80 ) } , 'URL processing completed' ) ;
return reply . status ( 204 ) . send ( ) ;
} catch ( e ) {
req . log . error ( e ) ;
return reply . status ( 500 ) . send ( { message : 'Failed to complete queue item' } ) ;
}
} ,
) ;
/** GET /api/v1/discover/queue/stats — статистика очереди. */
app . get ( '/api/v1/discover/queue/stats' , async ( _req , reply ) = > {
try {
const queueLength = await redis . zcard ( DIGEST_QUEUE_KEY ) ;
let processingCount = 0 ;
let cursor = '0' ;
do {
const [ nextCursor , keys ] = await redis . scan ( cursor , 'MATCH' , ` ${ DIGEST_PROCESSING_KEY } :* ` , 'COUNT' , 100 ) ;
cursor = nextCursor ;
processingCount += keys . length ;
} while ( cursor !== '0' ) ;
return reply . send ( { queueLength , processingCount } ) ;
} catch ( e ) {
return reply . status ( 500 ) . send ( { message : 'Failed to get queue stats' } ) ;
}
} ) ;
/** Сырая выдача по topic/region (SearXNG, time_range=day). */
async function fetchRawForTopicRegion (
region : Region ,
topic : Topic
) : Promise < SearxngSearchResult [ ] > {
const selectedTopic = SOURCES_BY_REGION [ region ] [ topic ] ;
const searchLang = region === 'russia' ? 'ru' : region === 'china' ? 'zh' : 'en' ;
const seenUrls = new Set < string > ( ) ;
const searchPromises = selectedTopic . links . flatMap ( ( link ) = >
selectedTopic . query . map ( ( query ) = >
searchSearxng ( ` site: ${ link } ${ query } ` , {
engines : [ 'bing news' ] ,
pageno : 1 ,
language : searchLang ,
time_range : 'day' ,
} ) . then ( ( r ) = > r . results )
)
) ;
const settled = await Promise . allSettled ( searchPromises ) ;
const allResults = settled
. filter (
( r ) : r is PromiseFulfilledResult < SearxngSearchResult [ ] > = > r . status === 'fulfilled'
)
. flatMap ( ( r ) = > r . value ) ;
return allResults
. flat ( )
. filter ( ( item ) = > {
const u = item . url ? . toLowerCase ( ) . trim ( ) ;
if ( ! u || seenUrls . has ( u ) ) return false ;
seenUrls . add ( u ) ;
return true ;
} )
. sort ( ( ) = > Math . random ( ) - 0.5 ) ;
}
/** Precomputed item (Redis discover:{topic}:{region} или сборка из raw + SQLite) */
interface PrecomputedItem {
title : string ;
titleRu? : string ;
summary? : string ;
sources ? : { url : string ; title : string } [ ] ;
url : string ;
thumbnail : string ;
fetchedAt? : number ;
}
function isPrecomputedPayload ( cached : unknown ) : cached is { items : PrecomputedItem [ ] ; updatedAt? : number } {
return (
typeof cached === 'object' &&
cached !== null &&
Array . isArray ( ( cached as { items? : unknown } ) . items ) &&
( cached as { items : unknown [ ] } ) . items . length > 0 &&
typeof ( cached as { items : PrecomputedItem [ ] } ) . items [ 0 ] ? . url === 'string'
) ;
}
function precomputedToBlogs ( items : PrecomputedItem [ ] ) : { title : string ; content : string ; url : string ; thumbnail : string ; sources ? : { url : string ; title : string } [ ] ; summary? : string ; sourcesCount? : number ; digestId? : string } [ ] {
return items . map ( ( it ) = > ( {
title : it.titleRu ? ? it . title ,
content : it.summary ? ? it . title ,
url : it.url ,
thumbnail : it.thumbnail || PLACEHOLDER_IMAGE ,
. . . ( it . sources ? . length ? { sources : it.sources } : undefined ) ,
. . . ( it . summary ? { summary : it.summary } : undefined ) ,
} ) ) ;
}
function digestsToBlogs ( digests : discoverDb.DigestRow [ ] ) : { title : string ; content : string ; url : string ; thumbnail : string ; sourcesCount : number ; digestId : string } [ ] {
return digests . map ( ( d ) = > ( {
title : d.cluster_title ,
content : d.short_description || d . summary_ru . slice ( 0 , 200 ) + ( d . summary_ru . length > 200 ? '…' : '' ) ,
url : d.main_url ,
thumbnail : d.thumbnail || PLACEHOLDER_IMAGE ,
sourcesCount : d.sources_count ,
digestId : ` ${ d . topic } : ${ d . region } : ${ d . cluster_title } ` ,
} ) ) ;
}
app . get < {
Querystring : { topic? : string ; region? : string ; mode? : string } ;
Querystring : { topic? : string ; region? : string ; mode? : string ; source? : string } ;
} > ( '/api/v1/discover' , async ( req , reply ) = > {
const topic = ( req . query . topic ? ? 'tech' ) as Topic ;
const mode = ( req . query . mode ? ? 'normal' ) as 'normal' | 'preview' ;
const mode = ( req . query . mode ? ? 'normal' ) as 'normal' | 'preview' | 'raw' ;
const sourceParam = ( req . query . source ? ? '' ) . toLowerCase ( ) . trim ( ) ;
if ( topic === 'gooseek' ) {
if ( topic === 'bait' || topic === 'gooseek' ) {
try {
const blogs = await fetchGooseekPosts ( ) ;
const blogs =
topic === 'bait'
? await fetchGhostPosts ( undefined , 'gooseek' )
: await fetchGhostPosts ( 'gooseek' ) ;
return { blogs } ;
} catch ( e ) {
const msg = e instanceof Error ? e.message : String ( e ) ;
@@ -294,98 +790,133 @@ app.get<{
}
}
const region = await resolve Region (
req . query . region ? ? null ,
req . headers [ 'x-forwarded-for' ] as string | null ,
req . headers [ 'user-agent' ] as string | null
) ;
const region : Region =
sourceParam === 'ru' || sourceParam === 'russian'
? 'russia'
: sourceParam === 'world' || sourceParam === 'global'
? 'world'
: await resolveRegion (
req . query . region ? ? null ,
req . headers [ 'x-forwarded-for' ] as string | null ,
req . headers [ 'user-agent' ] as string | null
) ;
const selectedTopic = SOURCES_BY_REGION [ region ] [ topic ] ;
const searchLang = region === 'russia' ? 'ru' : region === 'china' ? 'zh' : 'en' ;
const searchLang =
region === 'russia' ? 'ru' : region === 'china' ? 'zh' : 'en' ;
const cache Key = ` discover: ${ topic } : ${ region } : ${ mode } `;
try {
const cached = await redis . get ( cacheKey ) ;
if ( cached ) {
return JSON . parse ( cached ) as { blogs : unknown [ ] } ;
const precomputed Key = ` discover: ${ topic } : ${ region } ` ;
if ( mode !== 'raw' ) {
// Prefer pre-generated digests (Perplexity-style multi-source summaries)
const digests = discoverDb . getDigests ( topic , region ) ;
if ( digests . length > 0 ) {
return { blogs : digestsToBlogs ( digests ) } ;
}
try {
const cached = await redis . get ( precomputedKey ) ;
if ( cached ) {
const parsed = JSON . parse ( cached ) as unknown ;
if ( isPrecomputedPayload ( parsed ) ) {
return { blogs : precomputedToBlogs ( parsed . items ) } ;
}
if ( typeof parsed === 'object' && parsed !== null && 'blogs' in parsed && Array . isArray ( ( parsed as { blogs : unknown [ ] } ) . blogs ) ) {
return parsed as { blogs : unknown [ ] } ;
}
}
} catch {
// skip cache
}
try {
const rawData = await fetchRawForTopicRegion ( region , topic ) ;
const items : PrecomputedItem [ ] = rawData . slice ( 0 , 7 ) . map ( ( item ) = > {
const url = item . url ? ? '' ;
const thumb = item . thumbnail ? ? item . thumbnail_src ? ? item . img_src ? ? '' ;
const row = discoverDb . getByUrl ( url ) ;
return {
title : item.title ? ? 'No title' ,
titleRu : row?.title_ru ,
summary : row?.summary_ru ? ? ( item . content ? ? item . title ? ? '' ) . slice ( 0 , 300 ) ,
sources : row ? ( JSON . parse ( row . sources_json || '[]' ) as { url : string ; title : string } [ ] ) : undefined ,
url ,
thumbnail : thumb || PLACEHOLDER_IMAGE ,
fetchedAt : row?.fetched_at ? row . fetched_at * 1000 : Date.now ( ) ,
} ;
} ) ;
await redis . setex (
precomputedKey ,
REDIS_DISCOVER_TTL_SEC ,
JSON . stringify ( { items , updatedAt : Date.now ( ) } )
) ;
return { blogs : precomputedToBlogs ( items ) } ;
} catch ( err ) {
req . log . error ( err ) ;
return reply . status ( 503 ) . send ( {
message :
err instanceof Error && err . message . includes ( 'not configured' )
? 'SearxNG is not configured. Set SEARXNG_URL or SEARXNG_FALLBACK_URL.'
: 'Cannot fetch discover.' ,
} ) ;
}
} catch {
// skip cache
}
let data : SearxngSearchResult [ ] = [ ] ;
try {
if ( mode === 'normal' ) {
const seenUrls = new Set < string > ( ) ;
const searchPromises = selectedTopic . links . flatM ap( ( link ) = >
selectedTopic . query . map ( ( query ) = >
searchSearxng ( ` site: ${ link } ${ query } ` , {
engines : [ 'bing news' ] ,
p ageno : 1 ,
language : searchLang ,
} ) . then ( ( r ) = > r . results )
)
) ;
const settled = await Promise . allSettled ( searchPromises ) ;
const allResults = settled
. filter (
( r ) : r is PromiseFulfilledResult < SearxngSearchResult [ ] > = >
r . status === 'fulfilled'
)
. flatMap ( ( r ) = > r . value ) ;
data = allResults
. flat ( )
. filter ( ( item ) = > {
const url = item . url ? . toLowerC ase ( ) . trim ( ) ;
if ( ! url || seenUrls . has ( url ) ) return false ;
seenUrls . add ( url ) ;
return true ;
} )
. sort ( ( ) = > M ath . random ( ) - 0.5 ) ;
} else {
const link =
selectedTopic . links [
Math . floor ( Math . random ( ) * selectedTopic . links . length )
] ;
const query =
selectedTopic . query [
Math . floor ( Math . random ( ) * selectedTopic . query . length )
] ;
const res = await searchSearxng ( ` site: ${ link } ${ query } ` , {
engines : [ 'bing news' ] ,
pageno : 1 ,
language : searchLang ,
} ) ;
data = res . results ;
}
} catch ( err ) {
req . log . error ( err ) ;
return reply . status ( 503 ) . send ( {
message :
err instanceof Error && err . message . includes ( 'not configured' )
? 'SearxNG is not configured. Set SEARXNG_URL or SEARXNG_FALLBACK_URL.'
: 'Cannot connect to SearxNG. Check configuration.' ,
if ( mode === 'raw' ) {
let data : SearxngSearchResult [ ] = [ ] ;
try {
const seenUrls = new Set < string > ( ) ;
const searchPromises = selectedTopic . links . flatMap ( ( link ) = >
selectedTopic . query . m ap( ( query ) = >
searchSearxng ( ` site: ${ link } ${ query } ` , {
engines : [ 'bing news' ] ,
pageno : 1 ,
langu age : searchLang ,
time_range : 'day' ,
} ) . then ( ( r ) = > r . results )
)
) ;
const settled = await Promise . allSettled ( searchPromises ) ;
const allResults = settled
. filter (
( r ) : r is PromiseFulfilledResult < SearxngSearchResult [ ] > = >
r . status === 'fulfilled'
)
. flatMap ( ( r ) = > r . value ) ;
data = allResults
. flat ( )
. filter ( ( item ) = > {
const url = item . url ? . toLowerCase ( ) . trim ( ) ;
if ( ! url || seenUrls . h as( url ) ) return false ;
seenUrls . add ( url ) ;
return true ;
} )
. sort ( ( ) = > Math . random ( ) - 0.5 ) ;
} c atc h ( err ) {
req . log . error ( err ) ;
return reply . status ( 503 ) . send ( {
message :
err instanceof Error && err . message . includes ( 'not configured' )
? 'SearxNG is not configured. Set SEARXNG_URL or SEARXNG_FALLBACK_URL.'
: 'Cannot connect to SearxNG. Check configuration.' ,
} ) ;
}
const blogs = data . map ( ( item ) = > {
const thumb = item . thumbnail ? ? item . thumbnail_src ? ? item . img_src ? ? '' ;
return {
title : item.title ? ? 'No title' ,
content : ( item . content ? ? item . title ? ? '' ) . slice ( 0 , 300 ) ,
url : item.url ? ? '' ,
thumbnail : thumb || PLACEHOLDER_IMAGE ,
} ;
} ) ;
return { blogs } ;
}
const blogs = data . map ( ( item ) = > ( {
title : item.title ? ? 'No title' ,
content : ( item . content ? ? item . title ? ? '' ) . slice ( 0 , 300 ) ,
url : item.url ? ? '' ,
thumbnail : item.thumbnail ? ? item . thumbnail_src ? ? item . img_src ? ? '' ,
} ) ) ;
try {
await redis . setex ( cacheKey , 30 * 60 , JSON . stringify ( { blogs } ) ) ;
} catch {
// skip cache
}
return { blogs } ;
return reply . status ( 400 ) . send ( { message : 'invalid mode' } ) ;
} ) ;
discoverDb . startDailyCleanup ( ) ;
try {
await app . listen ( { port : PORT , host : '0.0.0.0' } ) ;
console . log ( ` discover-svc listening on : ${ PORT } ` ) ;