diff --git a/CONTINUE.md b/CONTINUE.md index 24955e7..093362b 100644 --- a/CONTINUE.md +++ b/CONTINUE.md @@ -1,26 +1,99 @@ -# Недоделки — начать отсюда +# LLM Routing по тарифам ✅ -## Всё готово! ✅ +## Архитектура -### Сделано — 2 марта 2026 +``` +┌─────────────────────────────────────────────────────────┐ +│ llm-svc │ +│ │ +│ POST /api/v1/generate │ +│ │ │ +│ ▼ │ +│ ┌─────────────────┐ │ +│ │ resolveProvider │ │ +│ │ (tier) │ │ +│ └────────┬────────┘ │ +│ │ │ +│ ┌─────┴─────┐ │ +│ ▼ ▼ │ +│ ┌──────┐ ┌────────┐ │ +│ │ FREE │ │ PRO │ │ +│ └──┬───┘ └───┬────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ Ollama Timeweb │ +│ (local) (cloud) │ +└─────────────────────────────────────────────────────────┘ +``` -**Security Hardening (Gitea):** -- [x] Gitea обновлён: 1.22.6 → 1.25.4 (CVE исправлены) -- [x] Регистрация отключена, Swagger отключён -- [x] Security headers настроены (CSP, X-Content-Type-Options, etc.) +## Роутинг по тарифам -**CI/CD и инфраструктура:** -- [x] K3s registry настроен для HTTP (k3s-registries.yaml) -- [x] file-svc PVC исправлен (ReadWriteOnce) -- [x] Все сервисы работают +| Тариф | Провайдер | Модель | Лимиты | +|-------|-----------|--------|--------| +| **free** | Ollama (local) | qwen3.5:9b | 50 req/day, 2000 tokens/req | +| **pro** | Timeweb | gpt-4o, claude, etc | 500 req/day, 8000 tokens/req | +| **business** | Timeweb | all models | 5000 req/day, 32000 tokens/req | -**Коммиты:** -- e64567a - fix: file-svc PVC, k3s registries -- c9e5ff6 - docs: CONTINUE.md updated -- d2ef146 - security: Gitea upgrade +## API Endpoints -### Контекст -- Сервер: 5.187.77.89 -- https://gooseek.ru — работает ✅ -- https://git.gooseek.ru — Gitea 1.25.4 ✅ -- K3s + Nginx Ingress + Cert-Manager работают +### POST /api/v1/generate +```json +{ + "providerId": "auto", // или "ollama", "timeweb", etc + "key": "qwen3.5:9b", // модель + "messages": [{"role": "user", "content": "..."}], + "options": { + "maxTokens": 1000, + "temperature": 0.7, + "stream": true + } +} +``` + +### POST /api/v1/embed +```json +{ + "input": "Текст для эмбеддинга", + "model": "qwen3-embedding:0.6b" +} +``` + +### GET /api/v1/providers +Возвращает список доступных провайдеров с указанием tier. + +--- + +## Ollama конфигурация + +| Параметр | Значение | +|----------|----------| +| OLLAMA_NUM_PARALLEL | 4 | +| OLLAMA_MAX_LOADED_MODELS | 2 | +| OLLAMA_FLASH_ATTENTION | true | +| Модель генерации | qwen3.5:9b | +| Модель эмбеддингов | qwen3-embedding:0.6b | + +## Пропускная способность + +| Сценарий | Одновременно | RPM | +|----------|--------------|-----| +| Короткие ответы | 6-8 чел | ~40-60 | +| Средние ответы | 4-6 чел | ~20-30 | +| Эмбеддинги | 10+ чел | ~800+ | + +--- + +## Файлы изменены + +- `backend/cmd/llm-svc/main.go` — роутинг по тарифу, /embed endpoint +- `backend/internal/llm/ollama.go` — qwen3.5:9b, убран токен, GenerateEmbedding +- `backend/internal/llm/client.go` — убран OllamaToken +- `backend/deploy/k8s/ollama.yaml` — GPU + параллельность +- `backend/deploy/k8s/ollama-models.yaml` — без авторизации + +--- + +## Сервер +- IP: 5.187.77.89 +- GPU: RTX 4060 Ti 16GB +- Site: https://gooseek.ru diff --git a/backend/cmd/api-gateway/main.go b/backend/cmd/api-gateway/main.go index a12ead3..c3ec770 100644 --- a/backend/cmd/api-gateway/main.go +++ b/backend/cmd/api-gateway/main.go @@ -15,6 +15,7 @@ import ( "github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gooseek/backend/pkg/config" + "github.com/gooseek/backend/pkg/metrics" "github.com/gooseek/backend/pkg/middleware" "github.com/redis/go-redis/v9" ) @@ -73,6 +74,11 @@ func main() { AllowHeaders: "Origin, Content-Type, Accept, Authorization", AllowMethods: "GET, POST, PUT, PATCH, DELETE, OPTIONS", })) + app.Use(metrics.PrometheusMiddleware(metrics.MetricsConfig{ + ServiceName: "api-gateway", + })) + + app.Get("/metrics", metrics.MetricsHandler()) app.Use(middleware.JWT(middleware.JWTConfig{ Secret: cfg.JWTSecret, diff --git a/backend/cmd/llm-svc/main.go b/backend/cmd/llm-svc/main.go index 5853484..abe2069 100644 --- a/backend/cmd/llm-svc/main.go +++ b/backend/cmd/llm-svc/main.go @@ -15,6 +15,7 @@ import ( "github.com/gooseek/backend/internal/llm" "github.com/gooseek/backend/internal/usage" "github.com/gooseek/backend/pkg/config" + "github.com/gooseek/backend/pkg/metrics" "github.com/gooseek/backend/pkg/middleware" "github.com/gooseek/backend/pkg/ndjson" _ "github.com/lib/pq" @@ -34,6 +35,51 @@ type GenerateRequest struct { } `json:"options"` } +type EmbedRequest struct { + Input string `json:"input"` + Model string `json:"model,omitempty"` +} + +type ProviderRouting struct { + ProviderID string + ModelKey string +} + +func resolveProvider(cfg *config.Config, tier string, requestedProvider string, requestedModel string) ProviderRouting { + if tier == "free" || tier == "" { + return ProviderRouting{ + ProviderID: "ollama", + ModelKey: cfg.OllamaModelKey, + } + } + + if requestedProvider != "" && requestedProvider != "auto" { + return ProviderRouting{ + ProviderID: requestedProvider, + ModelKey: requestedModel, + } + } + + if cfg.TimewebAgentAccessID != "" && cfg.TimewebAPIKey != "" { + return ProviderRouting{ + ProviderID: "timeweb", + ModelKey: requestedModel, + } + } + + if cfg.OpenAIAPIKey != "" { + return ProviderRouting{ + ProviderID: "openai", + ModelKey: "gpt-4o-mini", + } + } + + return ProviderRouting{ + ProviderID: "ollama", + ModelKey: cfg.OllamaModelKey, + } +} + func main() { cfg, err := config.Load() if err != nil { @@ -70,19 +116,46 @@ func main() { app.Use(logger.New()) app.Use(cors.New()) + app.Use(metrics.PrometheusMiddleware(metrics.MetricsConfig{ + ServiceName: "llm-svc", + })) app.Get("/health", func(c *fiber.Ctx) error { return c.JSON(fiber.Map{"status": "ok"}) }) + app.Get("/ready", func(c *fiber.Ctx) error { + return c.JSON(fiber.Map{"status": "ready"}) + }) + + app.Get("/metrics", metrics.MetricsHandler()) + app.Get("/api/v1/providers", func(c *fiber.Ctx) error { providers := []fiber.Map{} + providers = append(providers, fiber.Map{ + "id": "ollama", + "name": "GooSeek AI (Бесплатно)", + "models": []string{cfg.OllamaModelKey}, + "tier": "free", + "isLocal": true, + }) + + if cfg.TimewebAgentAccessID != "" && cfg.TimewebAPIKey != "" { + providers = append(providers, fiber.Map{ + "id": "timeweb", + "name": "Timeweb Cloud AI (Pro)", + "models": []string{"gpt-4o", "gpt-4o-mini", "claude-3-5-sonnet", "gemini-1.5-pro"}, + "tier": "pro", + }) + } + if cfg.OpenAIAPIKey != "" { providers = append(providers, fiber.Map{ "id": "openai", "name": "OpenAI", - "models": []string{"gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"}, + "models": []string{"gpt-4o", "gpt-4o-mini", "gpt-4-turbo"}, + "tier": "pro", }) } @@ -90,7 +163,8 @@ func main() { providers = append(providers, fiber.Map{ "id": "anthropic", "name": "Anthropic", - "models": []string{"claude-3-5-sonnet-20241022", "claude-3-opus-20240229", "claude-3-haiku-20240307"}, + "models": []string{"claude-3-5-sonnet-20241022", "claude-3-opus-20240229"}, + "tier": "pro", }) } @@ -99,6 +173,7 @@ func main() { "id": "gemini", "name": "Google Gemini", "models": []string{"gemini-1.5-pro", "gemini-1.5-flash", "gemini-2.0-flash-exp"}, + "tier": "pro", }) } @@ -123,31 +198,49 @@ func main() { })) llmAPI.Post("/generate", func(c *fiber.Ctx) error { + startTime := time.Now() userID := middleware.GetUserID(c) tier := middleware.GetUserTier(c) + clientIP := c.IP() + if tier == "" { tier = "free" } + var req GenerateRequest if err := c.BodyParser(&req); err != nil { + metrics.RecordLLMError(req.ProviderID, "invalid_request") return c.Status(400).JSON(fiber.Map{"error": "Invalid request body"}) } if len(req.Messages) == 0 { + metrics.RecordLLMError(req.ProviderID, "empty_messages") return c.Status(400).JSON(fiber.Map{"error": "Messages required"}) } limits := usage.GetLimits(tier) if req.Options.MaxTokens == 0 || req.Options.MaxTokens > limits.MaxTokensPerReq { + if tier == "free" && req.Options.MaxTokens > limits.MaxTokensPerReq { + metrics.RecordFreeTierLimitExceeded(userID, "max_tokens") + } req.Options.MaxTokens = limits.MaxTokensPerReq } + routing := resolveProvider(cfg, tier, req.ProviderID, req.ModelKey) + providerID := routing.ProviderID + modelKey := routing.ModelKey + + metrics.RecordLLMRequest(providerID, modelKey, tier, userID) + client, err := llm.NewClient(llm.ProviderConfig{ - ProviderID: req.ProviderID, - ModelKey: req.ModelKey, - APIKey: getAPIKey(cfg, req.ProviderID), + ProviderID: providerID, + ModelKey: modelKey, + APIKey: getAPIKey(cfg, providerID), + BaseURL: getBaseURL(cfg, providerID), + AgentAccessID: cfg.TimewebAgentAccessID, }) if err != nil { + metrics.RecordLLMError(req.ProviderID, "client_init_error") return c.Status(500).JSON(fiber.Map{"error": err.Error()}) } @@ -171,6 +264,8 @@ func main() { }, }) if err != nil { + metrics.RecordLLMError(providerID, "stream_error") + metrics.RecordSecurityEvent("llm_error", clientIP, userID) return c.Status(500).JSON(fiber.Map{"error": err.Error()}) } @@ -179,14 +274,23 @@ func main() { c.Context().SetBodyStreamWriter(func(w *bufio.Writer) { writer := ndjson.NewWriter(w) + tokenCount := 0 for chunk := range stream { writer.Write(fiber.Map{ "type": "chunk", "chunk": chunk.ContentChunk, }) w.Flush() + tokenCount += len(chunk.ContentChunk) / 4 } writer.Write(fiber.Map{"type": "done"}) + + metrics.RecordLLMLatency(providerID, modelKey, time.Since(startTime)) + metrics.RecordLLMTokens(providerID, tier, userID, tokenCount) + + if usageRepo != nil { + go usageRepo.IncrementLLMUsage(context.Background(), userID, tier, tokenCount) + } }) return nil @@ -200,11 +304,16 @@ func main() { }, }) if err != nil { + metrics.RecordLLMError(providerID, "generate_error") return c.Status(500).JSON(fiber.Map{"error": err.Error()}) } + tokenCount := len(response) / 4 + metrics.RecordLLMLatency(providerID, modelKey, time.Since(startTime)) + metrics.RecordLLMTokens(providerID, tier, userID, tokenCount) + if usageRepo != nil { - go usageRepo.IncrementLLMUsage(context.Background(), userID, tier, len(response)/4) + go usageRepo.IncrementLLMUsage(context.Background(), userID, tier, tokenCount) } return c.JSON(fiber.Map{ @@ -213,7 +322,39 @@ func main() { }) llmAPI.Post("/embed", func(c *fiber.Ctx) error { - return c.Status(501).JSON(fiber.Map{"error": "Not implemented"}) + userID := middleware.GetUserID(c) + tier := middleware.GetUserTier(c) + + if tier == "" { + tier = "free" + } + + var req EmbedRequest + if err := c.BodyParser(&req); err != nil { + return c.Status(400).JSON(fiber.Map{"error": "Invalid request body"}) + } + + if req.Input == "" { + return c.Status(400).JSON(fiber.Map{"error": "Input text required"}) + } + + model := req.Model + if model == "" { + model = cfg.OllamaEmbeddingModel + } + + embeddings, err := llm.GenerateEmbedding(cfg.OllamaBaseURL, model, req.Input) + if err != nil { + metrics.RecordLLMError("ollama", "embed_error") + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + metrics.RecordLLMRequest("ollama", model, tier, userID) + + return c.JSON(fiber.Map{ + "embedding": embeddings, + "model": model, + }) }) port := cfg.LLMSvcPort @@ -223,8 +364,10 @@ func main() { func getAPIKey(cfg *config.Config, providerID string) string { switch providerID { - case "openai", "timeweb": + case "openai": return cfg.OpenAIAPIKey + case "timeweb": + return cfg.TimewebAPIKey case "anthropic": return cfg.AnthropicAPIKey case "gemini", "google": @@ -234,6 +377,17 @@ func getAPIKey(cfg *config.Config, providerID string) string { } } +func getBaseURL(cfg *config.Config, providerID string) string { + switch providerID { + case "timeweb": + return cfg.TimewebAPIBaseURL + case "ollama": + return cfg.OllamaBaseURL + default: + return "" + } +} + func init() { if os.Getenv("PORT") == "" { os.Setenv("PORT", "3020") diff --git a/backend/deploy/docker/Dockerfile.all b/backend/deploy/docker/Dockerfile.all index c7d6f88..c07fa9b 100644 --- a/backend/deploy/docker/Dockerfile.all +++ b/backend/deploy/docker/Dockerfile.all @@ -9,6 +9,7 @@ COPY go.mod go.sum ./ RUN go mod download COPY . . +RUN go mod tidy # Build all services RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /bin/api-gateway ./cmd/api-gateway diff --git a/backend/deploy/k8s/api-gateway.yaml b/backend/deploy/k8s/api-gateway.yaml index 6ded4f4..12b7c6d 100644 --- a/backend/deploy/k8s/api-gateway.yaml +++ b/backend/deploy/k8s/api-gateway.yaml @@ -16,6 +16,10 @@ spec: metadata: labels: app: api-gateway + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "3015" + prometheus.io/path: "/metrics" spec: containers: - name: api-gateway diff --git a/backend/deploy/k8s/configmap.yaml b/backend/deploy/k8s/configmap.yaml index 59a983b..5e85196 100644 --- a/backend/deploy/k8s/configmap.yaml +++ b/backend/deploy/k8s/configmap.yaml @@ -23,6 +23,10 @@ data: AUTH_SVC_URL: "http://auth-svc:3050" TRAVEL_SVC_URL: "http://travel-svc:3035" ADMIN_SVC_URL: "http://admin-svc:3040" + OLLAMA_BASE_URL: "http://ollama:11434" + OLLAMA_MODEL: "qwen3.5:9b" + OLLAMA_EMBEDDING_MODEL: "qwen3-embedding:0.6b" + OLLAMA_NUM_PARALLEL: "2" DEFAULT_LLM_MODEL: "${DEFAULT_LLM_MODEL}" DEFAULT_LLM_PROVIDER: "${DEFAULT_LLM_PROVIDER}" TIMEWEB_API_BASE_URL: "${TIMEWEB_API_BASE_URL}" @@ -50,5 +54,6 @@ stringData: GEMINI_API_KEY: "${GEMINI_API_KEY}" JWT_SECRET: "${JWT_SECRET}" TIMEWEB_API_KEY: "${TIMEWEB_API_KEY}" + OLLAMA_API_TOKEN: "${OLLAMA_API_TOKEN}" POSTGRES_USER: "gooseek" POSTGRES_PASSWORD: "gooseek" diff --git a/backend/deploy/k8s/deploy.sh b/backend/deploy/k8s/deploy.sh index 678254e..70f993f 100755 --- a/backend/deploy/k8s/deploy.sh +++ b/backend/deploy/k8s/deploy.sh @@ -22,6 +22,21 @@ if [ -f "$ENV_FILE" ]; then set +a fi +# Check required secrets +if [ -z "$OLLAMA_API_TOKEN" ]; then + echo "Warning: OLLAMA_API_TOKEN not set. Generating random token..." + OLLAMA_API_TOKEN=$(openssl rand -hex 32) + echo "OLLAMA_API_TOKEN=$OLLAMA_API_TOKEN" >> "$ENV_FILE" + echo "Token saved to .env" +fi + +if [ -z "$GRAFANA_ADMIN_PASSWORD" ]; then + echo "Warning: GRAFANA_ADMIN_PASSWORD not set. Generating random password..." + GRAFANA_ADMIN_PASSWORD=$(openssl rand -base64 24) + echo "GRAFANA_ADMIN_PASSWORD=$GRAFANA_ADMIN_PASSWORD" >> "$ENV_FILE" + echo "Grafana password saved to .env" +fi + # Check kubectl if ! command -v kubectl &> /dev/null; then echo "Error: kubectl not found" @@ -62,6 +77,9 @@ echo "=== Generating K8s manifests from .env ===" if command -v envsubst &> /dev/null && [ -f "$ENV_FILE" ]; then envsubst < "$SCRIPT_DIR/configmap.yaml" > "$SCRIPT_DIR/_generated_configmap.yaml" kubectl apply -f "$SCRIPT_DIR/_generated_configmap.yaml" -n gooseek + + # Generate monitoring manifests + envsubst < "$SCRIPT_DIR/monitoring.yaml" > "$SCRIPT_DIR/_generated_monitoring.yaml" fi # Apply kustomization @@ -70,6 +88,14 @@ echo "=== Applying K8s manifests ===" cd "$SCRIPT_DIR" kubectl apply -k . +# Apply monitoring stack +echo "" +echo "=== Deploying Monitoring Stack ===" +if [ -f "$SCRIPT_DIR/_generated_monitoring.yaml" ]; then + kubectl apply -f "$SCRIPT_DIR/_generated_monitoring.yaml" + kubectl apply -f "$SCRIPT_DIR/grafana-dashboards.yaml" +fi + # Rolling restart to pull new images echo "" echo "=== Rolling restart deployments ===" @@ -79,11 +105,16 @@ kubectl -n gooseek rollout restart deployment/chat-svc kubectl -n gooseek rollout restart deployment/agent-svc kubectl -n gooseek rollout restart deployment/discover-svc kubectl -n gooseek rollout restart deployment/search-svc +kubectl -n gooseek rollout restart deployment/llm-svc kubectl -n gooseek rollout restart deployment/learning-svc kubectl -n gooseek rollout restart deployment/medicine-svc kubectl -n gooseek rollout restart deployment/travel-svc kubectl -n gooseek rollout restart deployment/sandbox-svc +# Ollama: не рестартим без необходимости (модели хранятся на PVC) +# Модели загружаются один раз и сохраняются между деплоями +# Для загрузки новых моделей: kubectl apply -f ollama-models.yaml + # Wait for rollout echo "" echo "=== Waiting for rollouts ===" @@ -100,7 +131,18 @@ kubectl -n gooseek get svc echo "" kubectl -n gooseek get ingress +# Show monitoring status +echo "" +echo "=== Monitoring Status ===" +kubectl -n monitoring get pods 2>/dev/null || echo "Monitoring namespace not ready yet" +kubectl -n monitoring get ingress 2>/dev/null || true + echo "" echo "=== Done ===" -echo "API: https://api.gooseek.ru" -echo "Web: https://gooseek.ru" +echo "API: https://api.gooseek.ru" +echo "Web: https://gooseek.ru" +echo "Grafana: https://grafana.gooseek.ru" +echo "" +echo "Grafana credentials:" +echo " User: admin" +echo " Pass: (see GRAFANA_ADMIN_PASSWORD in .env)" diff --git a/backend/deploy/k8s/grafana-dashboards.yaml b/backend/deploy/k8s/grafana-dashboards.yaml new file mode 100644 index 0000000..e078b9c --- /dev/null +++ b/backend/deploy/k8s/grafana-dashboards.yaml @@ -0,0 +1,266 @@ +# Grafana Dashboards ConfigMap +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards + namespace: monitoring +data: + security.json: | + { + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]} + } + }, + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 0}, + "id": 1, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "pluginVersion": "10.3.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "sum(rate(llm_unauthorized_requests_total[5m]))", "legendFormat": "Unauthorized/sec", "refId": "A"}], + "title": "🚨 Неавторизованные запросы", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 10}, {"color": "red", "value": 50}]}}}, + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 0}, + "id": 2, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "sum(rate(llm_free_tier_limit_exceeded_total[5m]))", "legendFormat": "Limit exceeded/sec", "refId": "A"}], + "title": "⚠️ Превышение лимитов free", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}}, + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 0}, + "id": 3, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "sum(rate(llm_requests_total[5m]))", "legendFormat": "Requests/sec", "refId": "A"}], + "title": "📊 LLM запросы/сек", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 10}]}}}, + "gridPos": {"h": 4, "w": 6, "x": 18, "y": 0}, + "id": 4, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "sum(rate(llm_errors_total[5m]))", "legendFormat": "Errors/sec", "refId": "A"}], + "title": "❌ Ошибки LLM", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 4}, + "id": 5, + "options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "sum by (reason) (rate(llm_unauthorized_requests_total[5m]))", "legendFormat": "{{reason}}", "refId": "A"}, + {"expr": "sum by (limit_type) (rate(llm_free_tier_limit_exceeded_total[5m]))", "legendFormat": "limit: {{limit_type}}", "refId": "B"} + ], + "title": "🔐 События безопасности", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 4}, + "id": 6, + "options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "sum by (provider) (rate(llm_requests_total[5m]))", "legendFormat": "{{provider}}", "refId": "A"} + ], + "title": "📈 Запросы по провайдерам", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 12}, + "id": 7, + "options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "sum by (tier) (rate(llm_tokens_used_total[5m]))", "legendFormat": "{{tier}}", "refId": "A"} + ], + "title": "🎫 Токены по тарифам", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 12}, + "id": 8, + "options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "histogram_quantile(0.95, sum by (le, provider) (rate(llm_request_latency_seconds_bucket[5m])))", "legendFormat": "p95 {{provider}}", "refId": "A"}, + {"expr": "histogram_quantile(0.50, sum by (le, provider) (rate(llm_request_latency_seconds_bucket[5m])))", "legendFormat": "p50 {{provider}}", "refId": "B"} + ], + "title": "⏱️ Latency LLM (p50, p95)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 5}, {"color": "red", "value": 20}]}}}, + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 20}, + "id": 9, + "options": {"displayMode": "lcd", "minVizHeight": 10, "minVizWidth": 0, "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showUnfilled": true, "valueMode": "color"}, + "targets": [ + {"expr": "topk(10, sum by (user_id) (rate(llm_free_tier_limit_exceeded_total[1h])))", "legendFormat": "{{user_id}}", "refId": "A"} + ], + "title": "🚫 Top-10 пользователей превышающих лимиты (за час)", + "type": "bargauge" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": ["security", "llm"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "🔐 Security & LLM Monitoring", + "uid": "security-llm", + "version": 1, + "weekStart": "" + } + + system-health.json: | + { + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 60}, {"color": "red", "value": 80}]}, "unit": "percent"}}, + "gridPos": {"h": 6, "w": 6, "x": 0, "y": 0}, + "id": 1, + "options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true}, + "targets": [{"expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "CPU Usage", "refId": "A"}], + "title": "💻 CPU", + "type": "gauge" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 70}, {"color": "red", "value": 85}]}, "unit": "percent"}}, + "gridPos": {"h": 6, "w": 6, "x": 6, "y": 0}, + "id": 2, + "options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true}, + "targets": [{"expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", "legendFormat": "Memory Usage", "refId": "A"}], + "title": "🧠 Memory", + "type": "gauge" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 70}, {"color": "red", "value": 85}]}, "unit": "percent"}}, + "gridPos": {"h": 6, "w": 6, "x": 12, "y": 0}, + "id": 3, + "options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true}, + "targets": [{"expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})) * 100", "legendFormat": "Disk Usage", "refId": "A"}], + "title": "💾 Disk", + "type": "gauge" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}}, + "gridPos": {"h": 6, "w": 6, "x": 18, "y": 0}, + "id": 4, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [{"expr": "sum(rate(http_requests_total[5m]))", "legendFormat": "Requests/sec", "refId": "A"}], + "title": "🌐 HTTP запросы/сек", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 6}, + "id": 5, + "options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "sum by (service) (rate(http_requests_total[5m]))", "legendFormat": "{{service}}", "refId": "A"} + ], + "title": "📊 Запросы по сервисам", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "id": 6, + "options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "histogram_quantile(0.95, sum by (le, service) (rate(http_request_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{service}}", "refId": "A"} + ], + "title": "⏱️ Latency p95 по сервисам", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 14}, + "id": 7, + "options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "sum by (service) (rate(http_requests_total{status=~\"5..\"}[5m]))", "legendFormat": "5xx {{service}}", "refId": "A"}, + {"expr": "sum by (service) (rate(http_requests_total{status=~\"4..\"}[5m]))", "legendFormat": "4xx {{service}}", "refId": "B"} + ], + "title": "❌ Ошибки HTTP", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 14}, + "id": 8, + "options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}}, + "targets": [ + {"expr": "sum by (service) (http_requests_in_flight)", "legendFormat": "{{service}}", "refId": "A"} + ], + "title": "🔄 Активные запросы", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [{"options": {"0": {"color": "red", "index": 0, "text": "DOWN"}, "1": {"color": "green", "index": 1, "text": "UP"}}, "type": "value"}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}}, + "gridPos": {"h": 6, "w": 24, "x": 0, "y": 22}, + "id": 9, + "options": {"colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "targets": [ + {"expr": "up{job=\"gooseek-services\"}", "legendFormat": "{{service}}", "refId": "A"} + ], + "title": "🏥 Статус сервисов", + "type": "stat" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": ["system", "health"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "🏥 System Health", + "uid": "system-health", + "version": 1, + "weekStart": "" + } diff --git a/backend/deploy/k8s/kustomization.yaml b/backend/deploy/k8s/kustomization.yaml index 13d1292..8254047 100644 --- a/backend/deploy/k8s/kustomization.yaml +++ b/backend/deploy/k8s/kustomization.yaml @@ -24,6 +24,7 @@ resources: - travel-svc.yaml - sandbox-svc.yaml - opensandbox.yaml + - ollama.yaml - ingress.yaml labels: diff --git a/backend/deploy/k8s/llm-svc.yaml b/backend/deploy/k8s/llm-svc.yaml index 49ca6f7..bf441d2 100644 --- a/backend/deploy/k8s/llm-svc.yaml +++ b/backend/deploy/k8s/llm-svc.yaml @@ -16,6 +16,10 @@ spec: metadata: labels: app: llm-svc + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "3020" + prometheus.io/path: "/metrics" spec: containers: - name: llm-svc diff --git a/backend/deploy/k8s/monitoring.yaml b/backend/deploy/k8s/monitoring.yaml new file mode 100644 index 0000000..c79cfd9 --- /dev/null +++ b/backend/deploy/k8s/monitoring.yaml @@ -0,0 +1,674 @@ +# Monitoring Stack: Prometheus + Grafana + AlertManager +# Отслеживание безопасности, ресурсов, здоровья системы +--- +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + labels: + app.kubernetes.io/name: monitoring +--- +# Prometheus ConfigMap +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + namespace: monitoring +data: + prometheus.yml: | + global: + scrape_interval: 15s + evaluation_interval: 15s + + alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + + rule_files: + - /etc/prometheus/rules/*.yml + + scrape_configs: + # Prometheus self-monitoring + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + # Kubernetes API server + - job_name: 'kubernetes-apiservers' + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + # Kubernetes nodes + - job_name: 'kubernetes-nodes' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + + # Kubernetes pods (auto-discovery) + - job_name: 'kubernetes-pods' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + # GooSeek services (direct) + - job_name: 'gooseek-services' + static_configs: + - targets: + - api-gateway.gooseek.svc:3015 + - llm-svc.gooseek.svc:3020 + - agent-svc.gooseek.svc:3018 + - chat-svc.gooseek.svc:3005 + - search-svc.gooseek.svc:3001 + - learning-svc.gooseek.svc:3034 + - travel-svc.gooseek.svc:3035 + - medicine-svc.gooseek.svc:3037 + metrics_path: /metrics + relabel_configs: + - source_labels: [__address__] + regex: (.+)\.gooseek\.svc:(\d+) + replacement: $1 + target_label: service + + alerts.yml: | + groups: + - name: security + rules: + - alert: HighUnauthorizedRequests + expr: rate(llm_unauthorized_requests_total[5m]) > 10 + for: 2m + labels: + severity: critical + annotations: + summary: High unauthorized LLM requests + description: More than 10 unauthorized requests per second + + - alert: FreeTierAbuse + expr: rate(llm_free_tier_limit_exceeded_total[5m]) > 5 + for: 5m + labels: + severity: warning + annotations: + summary: Free tier limit exceeded + description: Users are exceeding LLM free tier limits + + - alert: SuspiciousActivity + expr: sum by (client_ip) (rate(http_requests_total[5m])) > 100 + for: 5m + labels: + severity: warning + annotations: + summary: Suspicious activity detected + description: High request rate from single IP + + - name: resources + rules: + - alert: HighCPUUsage + expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 + for: 10m + labels: + severity: warning + annotations: + summary: High CPU usage + description: CPU usage is above 80 percent + + - alert: LowMemory + expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 < 20 + for: 5m + labels: + severity: critical + annotations: + summary: Low memory available + description: Less than 20 percent memory available + + - alert: DiskSpaceLow + expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 15 + for: 5m + labels: + severity: critical + annotations: + summary: Low disk space + description: Less than 15 percent disk space available + + - name: availability + rules: + - alert: ServiceDown + expr: up{job="gooseek-services"} == 0 + for: 2m + labels: + severity: critical + annotations: + summary: Service is down + description: A GooSeek service is not responding + + - alert: HighLatency + expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 5 + for: 5m + labels: + severity: warning + annotations: + summary: High latency detected + description: P95 latency is above 5 seconds + + - alert: HighErrorRate + expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05 + for: 5m + labels: + severity: critical + annotations: + summary: High error rate + description: Error rate is above 5 percent +--- +# Prometheus RBAC +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] + - apiGroups: ["extensions"] + resources: + - ingresses + verbs: ["get", "list", "watch"] + - nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: monitoring +--- +# Prometheus Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + namespace: monitoring + labels: + app: prometheus +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + spec: + serviceAccountName: prometheus + containers: + - name: prometheus + image: prom/prometheus:v2.50.0 + args: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--storage.tsdb.retention.time=30d" + - "--web.enable-lifecycle" + ports: + - containerPort: 9090 + volumeMounts: + - name: config + mountPath: /etc/prometheus + - name: rules + mountPath: /etc/prometheus/rules + - name: data + mountPath: /prometheus + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 1000m + memory: 2Gi + volumes: + - name: config + configMap: + name: prometheus-config + - name: rules + configMap: + name: prometheus-config + items: + - key: alerts.yml + path: alerts.yml + - name: data + persistentVolumeClaim: + claimName: prometheus-pvc +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: prometheus-pvc + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus + namespace: monitoring +spec: + type: ClusterIP + selector: + app: prometheus + ports: + - port: 9090 + targetPort: 9090 +--- +# AlertManager ConfigMap +apiVersion: v1 +kind: ConfigMap +metadata: + name: alertmanager-config + namespace: monitoring +data: + alertmanager.yml: | + global: + resolve_timeout: 5m + + route: + group_by: ['alertname', 'severity'] + group_wait: 30s + group_interval: 5m + repeat_interval: 4h + receiver: 'telegram' + routes: + - match: + severity: critical + receiver: 'telegram' + continue: true + + receivers: + - name: 'telegram' + webhook_configs: + - url: 'http://api-gateway.gooseek.svc:3015/api/v1/alerts/webhook' + send_resolved: true + + inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname'] +--- +# AlertManager Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alertmanager + namespace: monitoring + labels: + app: alertmanager +spec: + replicas: 1 + selector: + matchLabels: + app: alertmanager + template: + metadata: + labels: + app: alertmanager + spec: + containers: + - name: alertmanager + image: prom/alertmanager:v0.27.0 + args: + - "--config.file=/etc/alertmanager/alertmanager.yml" + - "--storage.path=/alertmanager" + ports: + - containerPort: 9093 + volumeMounts: + - name: config + mountPath: /etc/alertmanager + - name: data + mountPath: /alertmanager + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 200m + memory: 256Mi + volumes: + - name: config + configMap: + name: alertmanager-config + - name: data + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: alertmanager + namespace: monitoring +spec: + type: ClusterIP + selector: + app: alertmanager + ports: + - port: 9093 + targetPort: 9093 +--- +# Grafana ConfigMap +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-config + namespace: monitoring +data: + grafana.ini: | + [server] + root_url = https://grafana.gooseek.ru + + [security] + admin_user = admin + admin_password = ${GRAFANA_ADMIN_PASSWORD} + + [auth.anonymous] + enabled = false + + [dashboards] + default_home_dashboard_path = /var/lib/grafana/dashboards/security.json + + datasources.yml: | + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false +--- +# Grafana Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + namespace: monitoring + labels: + app: grafana +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + containers: + - name: grafana + image: grafana/grafana:10.3.3 + ports: + - containerPort: 3000 + env: + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: grafana-secrets + key: admin-password + - name: GF_INSTALL_PLUGINS + value: "grafana-piechart-panel,grafana-clock-panel" + volumeMounts: + - name: config + mountPath: /etc/grafana/grafana.ini + subPath: grafana.ini + - name: datasources + mountPath: /etc/grafana/provisioning/datasources + - name: dashboards-config + mountPath: /etc/grafana/provisioning/dashboards + - name: dashboards + mountPath: /var/lib/grafana/dashboards + - name: data + mountPath: /var/lib/grafana + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumes: + - name: config + configMap: + name: grafana-config + - name: datasources + configMap: + name: grafana-config + items: + - key: datasources.yml + path: datasources.yml + - name: dashboards-config + configMap: + name: grafana-dashboards-config + - name: dashboards + configMap: + name: grafana-dashboards + - name: data + persistentVolumeClaim: + claimName: grafana-pvc +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: grafana-pvc + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +--- +apiVersion: v1 +kind: Secret +metadata: + name: grafana-secrets + namespace: monitoring +type: Opaque +stringData: + admin-password: "${GRAFANA_ADMIN_PASSWORD}" +--- +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: monitoring +spec: + type: ClusterIP + selector: + app: grafana + ports: + - port: 3000 + targetPort: 3000 +--- +# Grafana Dashboards Config +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards-config + namespace: monitoring +data: + dashboards.yml: | + apiVersion: 1 + providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards +--- +# Grafana Ingress +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana-ingress + namespace: monitoring + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + cert-manager.io/cluster-issuer: "letsencrypt-prod" +spec: + ingressClassName: nginx + tls: + - hosts: + - grafana.gooseek.ru + secretName: grafana-tls + rules: + - host: grafana.gooseek.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: grafana + port: + number: 3000 +--- +# Node Exporter DaemonSet (для метрик хоста) +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-exporter + namespace: monitoring + labels: + app: node-exporter +spec: + selector: + matchLabels: + app: node-exporter + template: + metadata: + labels: + app: node-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" + spec: + hostNetwork: true + hostPID: true + containers: + - name: node-exporter + image: prom/node-exporter:v1.7.0 + args: + - "--path.procfs=/host/proc" + - "--path.sysfs=/host/sys" + - "--path.rootfs=/host/root" + - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" + ports: + - containerPort: 9100 + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + - name: root + mountPath: /host/root + readOnly: true + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 200m + memory: 128Mi + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + - name: root + hostPath: + path: / +--- +apiVersion: v1 +kind: Service +metadata: + name: node-exporter + namespace: monitoring + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" +spec: + type: ClusterIP + selector: + app: node-exporter + ports: + - port: 9100 + targetPort: 9100 diff --git a/backend/deploy/k8s/ollama-models.yaml b/backend/deploy/k8s/ollama-models.yaml new file mode 100644 index 0000000..6ce5a82 --- /dev/null +++ b/backend/deploy/k8s/ollama-models.yaml @@ -0,0 +1,85 @@ +# Job для загрузки моделей Ollama после деплоя +apiVersion: batch/v1 +kind: Job +metadata: + name: ollama-model-loader + namespace: gooseek + labels: + app: ollama-model-loader +spec: + ttlSecondsAfterFinished: 3600 + backoffLimit: 3 + template: + metadata: + labels: + app: ollama-model-loader + spec: + restartPolicy: OnFailure + initContainers: + - name: wait-for-ollama + image: curlimages/curl:latest + command: + - /bin/sh + - -c + - | + echo "Waiting for Ollama to be ready..." + until curl -sf http://ollama.gooseek.svc.cluster.local:11434/api/tags; do + echo "Ollama not ready, retrying in 5s..." + sleep 5 + done + echo "Ollama is ready!" + containers: + - name: model-loader + image: ollama/ollama:latest + env: + - name: OLLAMA_HOST + value: "http://ollama.gooseek.svc.cluster.local:11434" + command: + - /bin/sh + - -c + - | + set -e + + OLLAMA_URL="http://ollama.gooseek.svc.cluster.local:11434" + + pull_model() { + MODEL=$1 + echo "=== Pulling model: $MODEL ===" + + # Check if model already exists + EXISTING=$(curl -sf "$OLLAMA_URL/api/tags" | grep -o "\"name\":\"$MODEL\"" || true) + if [ -n "$EXISTING" ]; then + echo "Model $MODEL already exists, skipping..." + return 0 + fi + + # Pull model via API + echo "Downloading $MODEL..." + curl -sf "$OLLAMA_URL/api/pull" \ + -H "Content-Type: application/json" \ + -d "{\"name\": \"$MODEL\", \"stream\": false}" \ + --max-time 1800 + + echo "Model $MODEL downloaded successfully!" + } + + echo "=== Ollama Model Loader ===" + echo "Target: $OLLAMA_URL" + + # Основная модель генерации (4 параллельных воркера) + pull_model "qwen3.5:9b" + + # Embedding модель (быстрые эмбеддинги) + pull_model "qwen3-embedding:0.6b" + + echo "" + echo "=== All models loaded ===" + curl -sf "$OLLAMA_URL/api/tags" | head -c 500 + echo "" + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi diff --git a/backend/deploy/k8s/ollama-pull-models.sh b/backend/deploy/k8s/ollama-pull-models.sh new file mode 100755 index 0000000..02362bd --- /dev/null +++ b/backend/deploy/k8s/ollama-pull-models.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# Скрипт для загрузки моделей в Ollama +# Запустить ОДИН РАЗ после первого деплоя +# Модели сохраняются в PVC и не нужно скачивать повторно + +set -e + +NAMESPACE="${NAMESPACE:-gooseek}" +MODELS="${@:-llama3.2:3b}" + +echo "=== Ollama Model Loader ===" +echo "Namespace: $NAMESPACE" +echo "Models: $MODELS" + +# Проверить что Ollama pod запущен +echo "" +echo "Checking Ollama pod status..." +kubectl -n $NAMESPACE wait --for=condition=ready pod -l app=ollama --timeout=120s + +# Получить имя пода +POD=$(kubectl -n $NAMESPACE get pod -l app=ollama -o jsonpath='{.items[0].metadata.name}') +echo "Pod: $POD" + +# Скачать модели +for MODEL in $MODELS; do + echo "" + echo "=== Pulling model: $MODEL ===" + kubectl -n $NAMESPACE exec -it $POD -c ollama -- ollama pull $MODEL +done + +# Показать список моделей +echo "" +echo "=== Installed models ===" +kubectl -n $NAMESPACE exec -it $POD -c ollama -- ollama list + +echo "" +echo "=== Done! ===" +echo "Models are stored in PVC and will persist across restarts." diff --git a/backend/deploy/k8s/ollama.yaml b/backend/deploy/k8s/ollama.yaml new file mode 100644 index 0000000..ee55129 --- /dev/null +++ b/backend/deploy/k8s/ollama.yaml @@ -0,0 +1,130 @@ +# Ollama Deployment with GPU +# Требования: NVIDIA GPU Operator установлен в кластере +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: gooseek + labels: + app: ollama + app.kubernetes.io/name: ollama + app.kubernetes.io/part-of: gooseek +spec: + replicas: 1 + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + runtimeClassName: nvidia + containers: + # Ollama server (только GPU) + - name: ollama + image: ollama/ollama:latest + ports: + - containerPort: 11434 + name: http + env: + - name: OLLAMA_HOST + value: "0.0.0.0:11434" + - name: OLLAMA_KEEP_ALIVE + value: "24h" + - name: OLLAMA_MODELS + value: "/root/.ollama/models" + # Параллельная обработка для SaaS + - name: OLLAMA_NUM_PARALLEL + value: "4" + - name: OLLAMA_MAX_LOADED_MODELS + value: "2" + - name: OLLAMA_FLASH_ATTENTION + value: "true" + # GPU + - name: NVIDIA_VISIBLE_DEVICES + value: "all" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "compute,utility" + volumeMounts: + - name: ollama-data + mountPath: /root/.ollama + resources: + requests: + cpu: 1000m + memory: 8Gi + nvidia.com/gpu: 1 + limits: + cpu: 4000m + memory: 16Gi + nvidia.com/gpu: 1 + livenessProbe: + httpGet: + path: / + port: 11434 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 11434 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + + volumes: + - name: ollama-data + persistentVolumeClaim: + claimName: ollama-pvc +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ollama-pvc + namespace: gooseek +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: gooseek +spec: + type: ClusterIP + selector: + app: ollama + ports: + - port: 11434 + targetPort: 11434 + name: http +--- +# NetworkPolicy: llm-svc и model-loader могут обращаться к ollama +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: ollama-access + namespace: gooseek +spec: + podSelector: + matchLabels: + app: ollama + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app: llm-svc + - podSelector: + matchLabels: + app: ollama-model-loader + ports: + - protocol: TCP + port: 11434 diff --git a/backend/go.mod b/backend/go.mod index 41fa558..e8c621f 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -5,6 +5,7 @@ go 1.24 toolchain go1.24.13 require ( + github.com/gofiber/adaptor/v2 v2.2.1 github.com/gofiber/fiber/v2 v2.52.0 github.com/golang-jwt/jwt/v5 v5.2.1 github.com/google/uuid v1.6.0 @@ -12,6 +13,7 @@ require ( github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06 github.com/lib/pq v1.10.9 github.com/minio/minio-go/v7 v7.0.70 + github.com/prometheus/client_golang v1.19.0 github.com/redis/go-redis/v9 v9.4.0 github.com/sashabaranov/go-openai v1.20.0 go.uber.org/zap v1.27.0 diff --git a/backend/internal/llm/ollama.go b/backend/internal/llm/ollama.go index 5b19927..c435b3d 100644 --- a/backend/internal/llm/ollama.go +++ b/backend/internal/llm/ollama.go @@ -32,7 +32,7 @@ func NewOllamaClient(cfg OllamaConfig) (*OllamaClient, error) { modelKey := cfg.ModelKey if modelKey == "" { - modelKey = "llama3.2" + modelKey = "qwen3.5:9b" } return &OllamaClient{ @@ -231,3 +231,57 @@ func (c *OllamaClient) GenerateText(ctx context.Context, req StreamRequest) (str return chatResp.Message.Content, nil } + +type ollamaEmbedRequest struct { + Model string `json:"model"` + Input string `json:"input"` +} + +type ollamaEmbedResponse struct { + Model string `json:"model"` + Embeddings [][]float64 `json:"embeddings"` +} + +func GenerateEmbedding(baseURL, model, input string) ([]float64, error) { + if baseURL == "" { + baseURL = "http://ollama:11434" + } + if model == "" { + model = "qwen3-embedding:0.6b" + } + + embedReq := ollamaEmbedRequest{ + Model: model, + Input: input, + } + + body, err := json.Marshal(embedReq) + if err != nil { + return nil, fmt.Errorf("failed to marshal embed request: %w", err) + } + + url := fmt.Sprintf("%s/api/embed", baseURL) + httpClient := &http.Client{Timeout: 30 * time.Second} + + resp, err := httpClient.Post(url, "application/json", bytes.NewReader(body)) + if err != nil { + return nil, fmt.Errorf("embed request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("Ollama embed API error: status %d, body: %s", resp.StatusCode, string(respBody)) + } + + var embedResp ollamaEmbedResponse + if err := json.NewDecoder(resp.Body).Decode(&embedResp); err != nil { + return nil, fmt.Errorf("failed to decode embed response: %w", err) + } + + if len(embedResp.Embeddings) == 0 || len(embedResp.Embeddings[0]) == 0 { + return nil, errors.New("empty embeddings from Ollama") + } + + return embedResp.Embeddings[0], nil +} diff --git a/backend/pkg/config/config.go b/backend/pkg/config/config.go index a525449..31eca55 100644 --- a/backend/pkg/config/config.go +++ b/backend/pkg/config/config.go @@ -79,8 +79,11 @@ type Config struct { TimewebProxySource string // Ollama (local LLM) - OllamaBaseURL string - OllamaModelKey string + OllamaBaseURL string + OllamaModelKey string + OllamaEmbeddingModel string + OllamaNumParallel int + OllamaAPIToken string // Timeouts HTTPTimeout time.Duration @@ -160,8 +163,11 @@ func Load() (*Config, error) { TimewebAPIKey: getEnv("TIMEWEB_API_KEY", ""), TimewebProxySource: getEnv("TIMEWEB_X_PROXY_SOURCE", "gooseek"), - OllamaBaseURL: getEnv("OLLAMA_BASE_URL", "http://ollama:11434"), - OllamaModelKey: getEnv("OLLAMA_MODEL", "llama3.2"), + OllamaBaseURL: getEnv("OLLAMA_BASE_URL", "http://ollama:11434"), + OllamaModelKey: getEnv("OLLAMA_MODEL", "qwen3.5:9b"), + OllamaEmbeddingModel: getEnv("OLLAMA_EMBEDDING_MODEL", "qwen3-embedding:0.6b"), + OllamaNumParallel: getEnvInt("OLLAMA_NUM_PARALLEL", 2), + OllamaAPIToken: getEnv("OLLAMA_API_TOKEN", ""), HTTPTimeout: time.Duration(getEnvInt("HTTP_TIMEOUT_MS", 60000)) * time.Millisecond, LLMTimeout: time.Duration(getEnvInt("LLM_TIMEOUT_MS", 120000)) * time.Millisecond, diff --git a/backend/pkg/metrics/prometheus.go b/backend/pkg/metrics/prometheus.go new file mode 100644 index 0000000..300c37f --- /dev/null +++ b/backend/pkg/metrics/prometheus.go @@ -0,0 +1,167 @@ +package metrics + +import ( + "strconv" + "time" + + "github.com/gofiber/fiber/v2" + "github.com/gofiber/fiber/v2/middleware/adaptor" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +var ( + httpRequestsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "http_requests_total", + Help: "Total number of HTTP requests", + }, + []string{"service", "method", "path", "status"}, + ) + + httpRequestDuration = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "http_request_duration_seconds", + Help: "HTTP request duration in seconds", + Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}, + }, + []string{"service", "method", "path"}, + ) + + httpRequestsInFlight = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "http_requests_in_flight", + Help: "Number of HTTP requests currently being processed", + }, + []string{"service"}, + ) + + // LLM Security Metrics + llmRequestsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "llm_requests_total", + Help: "Total LLM requests by provider, model, and tier", + }, + []string{"provider", "model", "tier", "user_id"}, + ) + + llmUnauthorizedRequests = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "llm_unauthorized_requests_total", + Help: "Unauthorized LLM request attempts", + }, + []string{"reason", "client_ip"}, + ) + + llmFreeTierLimitExceeded = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "llm_free_tier_limit_exceeded_total", + Help: "Free tier limit exceeded attempts", + }, + []string{"user_id", "limit_type"}, + ) + + llmTokensUsed = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "llm_tokens_used_total", + Help: "Total tokens used by tier and provider", + }, + []string{"provider", "tier", "user_id"}, + ) + + llmRequestLatency = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "llm_request_latency_seconds", + Help: "LLM request latency in seconds", + Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60, 120}, + }, + []string{"provider", "model"}, + ) + + llmErrorsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "llm_errors_total", + Help: "Total LLM errors by type", + }, + []string{"provider", "error_type"}, + ) + + // Security Events + securityEventsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "security_events_total", + Help: "Security events (auth failures, suspicious activity)", + }, + []string{"event_type", "client_ip", "user_id"}, + ) + + rateLimitHits = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "rate_limit_hits_total", + Help: "Rate limit hits", + }, + []string{"service", "client_ip", "limit_type"}, + ) +) + +type MetricsConfig struct { + ServiceName string +} + +func PrometheusMiddleware(cfg MetricsConfig) fiber.Handler { + return func(c *fiber.Ctx) error { + start := time.Now() + path := c.Route().Path + method := c.Method() + + httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Inc() + defer httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Dec() + + err := c.Next() + + duration := time.Since(start).Seconds() + status := strconv.Itoa(c.Response().StatusCode()) + + httpRequestsTotal.WithLabelValues(cfg.ServiceName, method, path, status).Inc() + httpRequestDuration.WithLabelValues(cfg.ServiceName, method, path).Observe(duration) + + return err + } +} + +func MetricsHandler() fiber.Handler { + return adaptor.HTTPHandler(promhttp.Handler()) +} + +func RecordLLMRequest(provider, model, tier, userID string) { + llmRequestsTotal.WithLabelValues(provider, model, tier, userID).Inc() +} + +func RecordLLMUnauthorized(reason, clientIP string) { + llmUnauthorizedRequests.WithLabelValues(reason, clientIP).Inc() +} + +func RecordFreeTierLimitExceeded(userID, limitType string) { + llmFreeTierLimitExceeded.WithLabelValues(userID, limitType).Inc() +} + +func RecordLLMTokens(provider, tier, userID string, tokens int) { + llmTokensUsed.WithLabelValues(provider, tier, userID).Add(float64(tokens)) +} + +func RecordLLMLatency(provider, model string, duration time.Duration) { + llmRequestLatency.WithLabelValues(provider, model).Observe(duration.Seconds()) +} + +func RecordLLMError(provider, errorType string) { + llmErrorsTotal.WithLabelValues(provider, errorType).Inc() +} + +func RecordSecurityEvent(eventType, clientIP, userID string) { + securityEventsTotal.WithLabelValues(eventType, clientIP, userID).Inc() +} + +func RecordRateLimitHit(service, clientIP, limitType string) { + rateLimitHits.WithLabelValues(service, clientIP, limitType).Inc() +} diff --git a/backend/pkg/middleware/llm_limits.go b/backend/pkg/middleware/llm_limits.go index 88c79b1..7f16a3e 100644 --- a/backend/pkg/middleware/llm_limits.go +++ b/backend/pkg/middleware/llm_limits.go @@ -6,6 +6,7 @@ import ( "github.com/gofiber/fiber/v2" "github.com/gooseek/backend/internal/usage" + "github.com/gooseek/backend/pkg/metrics" ) type LLMLimitsConfig struct { @@ -15,7 +16,11 @@ type LLMLimitsConfig struct { func LLMLimits(config LLMLimitsConfig) fiber.Handler { return func(c *fiber.Ctx) error { userID := GetUserID(c) + clientIP := c.IP() + if userID == "" { + metrics.RecordLLMUnauthorized("no_user_id", clientIP) + metrics.RecordSecurityEvent("unauthorized_llm_access", clientIP, "anonymous") return c.Status(401).JSON(fiber.Map{ "error": "Authentication required", }) @@ -30,6 +35,13 @@ func LLMLimits(config LLMLimitsConfig) fiber.Handler { allowed, reason := config.UsageRepo.CheckLLMLimits(c.Context(), userID, tier) if !allowed { limits := usage.GetLimits(tier) + + if tier == "free" { + metrics.RecordFreeTierLimitExceeded(userID, reason) + metrics.RecordSecurityEvent("free_tier_limit_exceeded", clientIP, userID) + } + metrics.RecordRateLimitHit("llm-svc", clientIP, reason) + return c.Status(429).JSON(fiber.Map{ "error": reason, "tier": tier,