feat: Go backend, enhanced search, new widgets, Docker deploy

Major changes:
- Add Go backend (backend/) with microservices architecture
- Enhanced master-agents-svc: reranker, content-classifier, stealth-crawler,
  proxy-manager, media-search, fastClassifier, language detection
- New web-svc widgets: KnowledgeCard, ProductCard, ProfileCard, VideoCard,
  UnifiedCard, CardGallery, InlineImageGallery, SourcesPanel, RelatedQuestions
- Improved discover-svc with discover-db integration
- Docker deployment improvements (Caddyfile, vendor.sh, BUILD.md)
- Library-svc: project_id schema migration
- Remove deprecated finance-svc and travel-svc
- Localization improvements across services

Made-with: Cursor
This commit is contained in:
home
2026-02-27 04:15:32 +03:00
parent 328d968f3f
commit 06fe57c765
285 changed files with 53132 additions and 1871 deletions

View File

@@ -1,12 +1,22 @@
# GooSeek — запуск в Docker (без Kubernetes)
# gooseek.ru → reverse-proxy (80/443) → web-svc:3000
#
# Защита от DDoS: reverse-proxy — rate_limit (120 req/min на IP), лимит тела 10MB, лимиты памяти/CPU.
#
# Самовосстановление (аналог Kubernetes):
# - restart: unless-stopped — перезапуск при падении процесса
# - healthcheck — проверка живости; статус (healthy/unhealthy) виден в docker ps
# - autoheal — перезапускает контейнеры с меткой autoheal=true при переходе в unhealthy
#
# Запуск: ./deploy/docker/run.sh
# Порты 80 и 443 должны быть открыты на роутере (проброс на ПК)
services:
reverse-proxy:
image: caddy:2-alpine
build:
context: .
dockerfile: Dockerfile.caddy
image: gooseek/caddy-ratelimit:latest
container_name: gooseek-reverse-proxy
ports:
- "80:80"
@@ -17,12 +27,36 @@ services:
- caddy-config:/config
depends_on:
- web-svc
- ghost
restart: unless-stopped
mem_limit: 512m
cpus: 0.5
deploy:
resources:
limits:
memory: 512M
cpus: "0.5"
reservations:
memory: 64M
ulimits:
nofile:
soft: 65535
hard: 65535
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:80/"]
interval: 15s
timeout: 5s
retries: 3
start_period: 10s
web-svc:
build:
context: ../..
dockerfile: services/web-svc/Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
args:
API_GATEWAY_URL: "http://api-gateway:3015"
image: gooseek/web-svc:latest
@@ -35,11 +69,21 @@ services:
depends_on:
- api-gateway
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3000/api/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 30s
api-gateway:
build:
context: ../../services/api-gateway
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/api-gateway:latest
container_name: gooseek-api-gateway
ports:
@@ -68,12 +112,23 @@ services:
- auth-svc
- llm-svc
- chat-svc
- library-svc
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3015/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 10s
geo-device-svc:
build:
context: ../../services/geo-device-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/geo-device-svc:latest
container_name: gooseek-geo-device-svc
ports:
@@ -81,11 +136,21 @@ services:
environment:
PORT: "4002"
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:4002/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 5s
localization-svc:
build:
context: ../../services/localization-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/localization-svc:latest
container_name: gooseek-localization-svc
ports:
@@ -96,28 +161,58 @@ services:
depends_on:
- geo-device-svc
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:4003/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 10s
discover-svc:
build:
context: ../../services/discover-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/discover-svc:latest
container_name: gooseek-discover-svc
env_file:
- ../../.env
ports:
- "3002:3002"
environment:
PORT: "3002"
REDIS_URL: "redis://redis:6379"
GEO_DEVICE_SERVICE_URL: "http://geo-device-svc:4002"
SEARXNG_URL: "http://searxng:8080"
GHOST_URL: "http://ghost:2368"
DISCOVER_DB_PATH: "/data/discover_articles.db"
# GHOST_CONTENT_API_KEY — из env_file ../../.env
volumes:
- discover-db-data:/data
depends_on:
- redis
- geo-device-svc
- searxng
- ghost
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3002/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 15s
travel-svc:
build:
context: ../../services/travel-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/travel-svc:latest
container_name: gooseek-travel-svc
ports:
@@ -128,11 +223,21 @@ services:
depends_on:
- redis
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3004/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 10s
auth-svc:
build:
context: ../..
dockerfile: services/auth-svc/Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/auth-svc:latest
container_name: gooseek-auth-svc
ports:
@@ -146,11 +251,21 @@ services:
volumes:
- auth-data:/data
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3014/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 10s
llm-svc:
build:
context: ../../services/llm-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/llm-svc:latest
container_name: gooseek-llm-svc
env_file:
@@ -165,6 +280,14 @@ services:
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3020/health"]
interval: 15s
timeout: 10s
retries: 3
start_period: 20s
redis:
image: redis:7-alpine
@@ -174,6 +297,134 @@ services:
volumes:
- redis-data:/data
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 3
start_period: 5s
crawl4ai:
image: unclecode/crawl4ai:latest
container_name: gooseek-crawl4ai
ports:
- "11235:11235"
shm_size: "1g"
environment:
- CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-}
deploy:
resources:
limits:
memory: 4G
cpus: "2.0"
reservations:
memory: 1G
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:11235/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
cache-worker-tech:
build:
context: ../../services/cache-worker
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/cache-worker:latest
container_name: gooseek-cache-worker-tech
environment:
REDIS_URL: "redis://redis:6379"
DISCOVER_SVC_URL: "http://discover-svc:3002"
FINANCE_SVC_URL: "http://finance-svc:3003"
TRAVEL_SVC_URL: "http://travel-svc:3004"
LLM_SVC_URL: "http://llm-svc:3020"
CRAWL4AI_URL: "http://crawl4ai:11235"
command: ["sh", "-c", "node dist/run.js --task=discover --topic=tech; while true; do sleep 900; node dist/run.js --task=discover --topic=tech; done"]
depends_on:
- redis
- discover-svc
- llm-svc
- crawl4ai
restart: unless-stopped
cache-worker-finance:
image: gooseek/cache-worker:latest
container_name: gooseek-cache-worker-finance
environment:
REDIS_URL: "redis://redis:6379"
DISCOVER_SVC_URL: "http://discover-svc:3002"
FINANCE_SVC_URL: "http://finance-svc:3003"
TRAVEL_SVC_URL: "http://travel-svc:3004"
LLM_SVC_URL: "http://llm-svc:3020"
CRAWL4AI_URL: "http://crawl4ai:11235"
command: ["sh", "-c", "node dist/run.js --task=discover --topic=finance; while true; do sleep 900; node dist/run.js --task=discover --topic=finance; done"]
depends_on:
- redis
- discover-svc
- llm-svc
- crawl4ai
restart: unless-stopped
cache-worker-art:
image: gooseek/cache-worker:latest
container_name: gooseek-cache-worker-art
environment:
REDIS_URL: "redis://redis:6379"
DISCOVER_SVC_URL: "http://discover-svc:3002"
FINANCE_SVC_URL: "http://finance-svc:3003"
TRAVEL_SVC_URL: "http://travel-svc:3004"
LLM_SVC_URL: "http://llm-svc:3020"
CRAWL4AI_URL: "http://crawl4ai:11235"
command: ["sh", "-c", "node dist/run.js --task=discover --topic=art; while true; do sleep 900; node dist/run.js --task=discover --topic=art; done"]
depends_on:
- redis
- discover-svc
- llm-svc
- crawl4ai
restart: unless-stopped
cache-worker-sports:
image: gooseek/cache-worker:latest
container_name: gooseek-cache-worker-sports
environment:
REDIS_URL: "redis://redis:6379"
DISCOVER_SVC_URL: "http://discover-svc:3002"
FINANCE_SVC_URL: "http://finance-svc:3003"
TRAVEL_SVC_URL: "http://travel-svc:3004"
LLM_SVC_URL: "http://llm-svc:3020"
CRAWL4AI_URL: "http://crawl4ai:11235"
command: ["sh", "-c", "node dist/run.js --task=discover --topic=sports; while true; do sleep 900; node dist/run.js --task=discover --topic=sports; done"]
depends_on:
- redis
- discover-svc
- llm-svc
- crawl4ai
restart: unless-stopped
cache-worker-entertainment:
image: gooseek/cache-worker:latest
container_name: gooseek-cache-worker-entertainment
environment:
REDIS_URL: "redis://redis:6379"
DISCOVER_SVC_URL: "http://discover-svc:3002"
FINANCE_SVC_URL: "http://finance-svc:3003"
TRAVEL_SVC_URL: "http://travel-svc:3004"
LLM_SVC_URL: "http://llm-svc:3020"
CRAWL4AI_URL: "http://crawl4ai:11235"
command: ["sh", "-c", "node dist/run.js --task=discover --topic=entertainment; while true; do sleep 900; node dist/run.js --task=discover --topic=entertainment; done"]
depends_on:
- redis
- discover-svc
- llm-svc
- crawl4ai
restart: unless-stopped
searxng:
image: searxng/searxng:latest
@@ -187,11 +438,80 @@ services:
environment:
SEARXNG_BASE_URL: "http://localhost:8080/"
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:8080/"]
interval: 20s
timeout: 10s
retries: 3
start_period: 30s
ghost-db:
image: mysql:8
container_name: gooseek-ghost-db
environment:
MYSQL_ROOT_PASSWORD: ${GHOST_DB_ROOT_PASSWORD:-ghost_root}
MYSQL_DATABASE: ghost
MYSQL_USER: ghost
MYSQL_PASSWORD: ${GHOST_DB_PASSWORD:-ghost}
volumes:
- ghost-db-data:/var/lib/mysql
healthcheck:
test: ["CMD-SHELL", "mysqladmin ping -h 127.0.0.1 -uroot -p\"$$MYSQL_ROOT_PASSWORD\" || exit 1"]
interval: 5s
timeout: 5s
retries: 10
restart: unless-stopped
labels:
- "autoheal=true"
ghost:
image: ghost:6-alpine
container_name: gooseek-ghost
ports:
- "2368:2368"
env_file:
- ../../.env
environment:
url: "https://bait.su"
database__client: mysql
database__connection__host: ghost-db
database__connection__user: ghost
database__connection__password: ${GHOST_DB_PASSWORD:-ghost}
database__connection__database: ghost
# SMTP для рассылки (2FA, сброс пароля, приглашения) — bait.su
# from должен совпадать с auth user, иначе Timeweb отклоняет (Sender address rejected)
mail__transport: SMTP
mail__options__host: smtp.timeweb.ru
mail__options__port: ${GHOST_MAIL_PORT:-465}
mail__options__secure: "true"
mail__options__auth__user: ${GHOST_MAIL_USER:-2factor@bait.su}
mail__options__auth__pass: ${GHOST_MAIL_PASSWORD}
mail__from: "${GHOST_MAIL_FROM:-bait.su <2factor@bait.su>}"
# Временно отключить проверку устройства (2FA по почте), чтобы войти без кода; включите после настройки SMTP
security__staffDeviceVerification: "false"
volumes:
- ghost-content:/var/lib/ghost/content
depends_on:
ghost-db:
condition: service_healthy
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://127.0.0.1:2368/ghost/api/content/settings/?key=dummy 2>&1 | grep -q '401\\|200' || wget -q --spider http://127.0.0.1:2368/favicon.ico"]
interval: 30s
timeout: 15s
retries: 5
start_period: 90s
search-svc:
build:
context: ../../services/search-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/search-svc:latest
container_name: gooseek-search-svc
ports:
@@ -204,11 +524,21 @@ services:
- redis
- searxng
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3001/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 15s
master-agents-svc:
build:
context: ../../services/master-agents-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/master-agents-svc:latest
container_name: gooseek-master-agents-svc
ports:
@@ -217,15 +547,28 @@ services:
PORT: "3018"
LLM_SVC_URL: "http://llm-svc:3020"
SEARCH_SVC_URL: "http://search-svc:3001"
DISCOVER_SVC_URL: "http://discover-svc:3002"
CRAWL4AI_URL: "http://crawl4ai:11235"
depends_on:
- llm-svc
- search-svc
- discover-svc
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3018/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 15s
chat-svc:
build:
context: ../../services/chat-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/chat-svc:latest
container_name: gooseek-chat-svc
ports:
@@ -234,18 +577,148 @@ services:
PORT: "3005"
MASTER_AGENTS_SVC_URL: "http://master-agents-svc:3018"
LLM_SVC_URL: "http://llm-svc:3020"
DISCOVER_SVC_URL: "http://discover-svc:3002"
volumes:
- chat-data:/app/data
depends_on:
- master-agents-svc
- llm-svc
- discover-svc
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3005/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 20s
projects-svc:
build:
context: ../../services/projects-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/projects-svc:latest
container_name: gooseek-projects-svc
ports:
- "3006:3006"
environment:
PORT: "3006"
AUTH_SERVICE_URL: "http://auth-svc:3014"
depends_on:
- auth-svc
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3006/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 10s
postgres:
image: postgres:16-alpine
container_name: gooseek-postgres
ports:
- "5432:5432"
environment:
POSTGRES_USER: gooseek
POSTGRES_PASSWORD: gooseek
POSTGRES_DB: gooseek
volumes:
- postgres-data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U gooseek -d gooseek"]
interval: 2s
timeout: 5s
retries: 10
restart: unless-stopped
labels:
- "autoheal=true"
library-svc:
build:
context: ../../services/library-svc
dockerfile: Dockerfile
additional_contexts:
npm-cache: ../../vendor/npm-cache
image: gooseek/library-svc:latest
container_name: gooseek-library-svc
ports:
- "3009:3009"
environment:
PORT: "3009"
POSTGRES_URL: "postgresql://gooseek:gooseek@postgres:5432/gooseek"
AUTH_SERVICE_URL: "http://auth-svc:3014"
command: sh -c "node dist/db/push.js 2>/dev/null || true && node dist/index.js"
depends_on:
postgres:
condition: service_healthy
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:3009/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 25s
# Перезапускает контейнеры с меткой autoheal=true при переходе в unhealthy (аналог K8s liveness)
autoheal:
image: willfarrell/autoheal:latest
container_name: gooseek-autoheal
volumes:
- /var/run/docker.sock:/var/run/docker.sock
environment:
AUTOHEAL_CONTAINER_LABEL: "autoheal"
AUTOHEAL_INTERVAL: "10"
AUTOHEAL_START_PERIOD: "30"
restart: always
# Tor proxy для обхода блокировок (бесплатный, опенсорс)
# SOCKS5: tor-proxy:9050, Control: tor-proxy:9051
# Множественные circuits для ротации IP
tor-proxy:
image: dperson/torproxy:latest
container_name: gooseek-tor-proxy
environment:
- TOR_NewCircuitPeriod=30
- TOR_MaxCircuitDirtiness=600
- TOR_CircuitBuildTimeout=10
- TOR_NumEntryGuards=8
- PASSWORD=gooseek_tor_control
ports:
- "127.0.0.1:9050:9050"
- "127.0.0.1:9051:9051"
- "127.0.0.1:8118:8118"
deploy:
resources:
limits:
memory: 512M
cpus: "0.5"
restart: unless-stopped
labels:
- "autoheal=true"
healthcheck:
test: ["CMD", "nc", "-z", "127.0.0.1", "9050"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
volumes:
auth-data:
llm-data:
redis-data:
discover-db-data:
chat-data:
postgres-data:
caddy-data:
caddy-config:
searxng-cache:
ghost-db-data:
ghost-content: