feat: LLM routing by tier (free→Ollama, pro→Timeweb)
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s

- Add tier-based provider routing in llm-svc
  - free tier → Ollama (local qwen3.5:9b)
  - pro/business → Timeweb Cloud AI
- Add /api/v1/embed endpoint for embeddings via Ollama
- Update Ollama client: qwen3.5:9b default, remove auth
- Add GenerateEmbedding() function for qwen3-embedding:0.6b
- Add Ollama K8s deployment with GPU support (RTX 4060 Ti)
- Add monitoring stack (Prometheus, Grafana, Alertmanager)
- Add Grafana dashboards for LLM and security metrics
- Update deploy.sh with monitoring and Ollama deployment

Made-with: Cursor
This commit is contained in:
home
2026-03-03 02:25:22 +03:00
parent 5ac082a7c6
commit 7a40ff629e
19 changed files with 1759 additions and 35 deletions

View File

@@ -0,0 +1,266 @@
# Grafana Dashboards ConfigMap
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboards
namespace: monitoring
data:
security.json: |
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}
}
},
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0},
"id": 1,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"pluginVersion": "10.3.3",
"targets": [{"datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "sum(rate(llm_unauthorized_requests_total[5m]))", "legendFormat": "Unauthorized/sec", "refId": "A"}],
"title": "🚨 Неавторизованные запросы",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 10}, {"color": "red", "value": 50}]}}},
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 0},
"id": 2,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [{"expr": "sum(rate(llm_free_tier_limit_exceeded_total[5m]))", "legendFormat": "Limit exceeded/sec", "refId": "A"}],
"title": "⚠️ Превышение лимитов free",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}},
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 0},
"id": 3,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [{"expr": "sum(rate(llm_requests_total[5m]))", "legendFormat": "Requests/sec", "refId": "A"}],
"title": "📊 LLM запросы/сек",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 10}]}}},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 0},
"id": 4,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [{"expr": "sum(rate(llm_errors_total[5m]))", "legendFormat": "Errors/sec", "refId": "A"}],
"title": "❌ Ошибки LLM",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 4},
"id": 5,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (reason) (rate(llm_unauthorized_requests_total[5m]))", "legendFormat": "{{reason}}", "refId": "A"},
{"expr": "sum by (limit_type) (rate(llm_free_tier_limit_exceeded_total[5m]))", "legendFormat": "limit: {{limit_type}}", "refId": "B"}
],
"title": "🔐 События безопасности",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 4},
"id": 6,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (provider) (rate(llm_requests_total[5m]))", "legendFormat": "{{provider}}", "refId": "A"}
],
"title": "📈 Запросы по провайдерам",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 12},
"id": 7,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (tier) (rate(llm_tokens_used_total[5m]))", "legendFormat": "{{tier}}", "refId": "A"}
],
"title": "🎫 Токены по тарифам",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 12},
"id": 8,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "histogram_quantile(0.95, sum by (le, provider) (rate(llm_request_latency_seconds_bucket[5m])))", "legendFormat": "p95 {{provider}}", "refId": "A"},
{"expr": "histogram_quantile(0.50, sum by (le, provider) (rate(llm_request_latency_seconds_bucket[5m])))", "legendFormat": "p50 {{provider}}", "refId": "B"}
],
"title": "⏱️ Latency LLM (p50, p95)",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 5}, {"color": "red", "value": 20}]}}},
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 20},
"id": 9,
"options": {"displayMode": "lcd", "minVizHeight": 10, "minVizWidth": 0, "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showUnfilled": true, "valueMode": "color"},
"targets": [
{"expr": "topk(10, sum by (user_id) (rate(llm_free_tier_limit_exceeded_total[1h])))", "legendFormat": "{{user_id}}", "refId": "A"}
],
"title": "🚫 Top-10 пользователей превышающих лимиты (за час)",
"type": "bargauge"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": ["security", "llm"],
"templating": {"list": []},
"time": {"from": "now-1h", "to": "now"},
"timepicker": {},
"timezone": "",
"title": "🔐 Security & LLM Monitoring",
"uid": "security-llm",
"version": 1,
"weekStart": ""
}
system-health.json: |
{
"annotations": {"list": []},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 60}, {"color": "red", "value": 80}]}, "unit": "percent"}},
"gridPos": {"h": 6, "w": 6, "x": 0, "y": 0},
"id": 1,
"options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true},
"targets": [{"expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "CPU Usage", "refId": "A"}],
"title": "💻 CPU",
"type": "gauge"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 70}, {"color": "red", "value": 85}]}, "unit": "percent"}},
"gridPos": {"h": 6, "w": 6, "x": 6, "y": 0},
"id": 2,
"options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true},
"targets": [{"expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", "legendFormat": "Memory Usage", "refId": "A"}],
"title": "🧠 Memory",
"type": "gauge"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 70}, {"color": "red", "value": 85}]}, "unit": "percent"}},
"gridPos": {"h": 6, "w": 6, "x": 12, "y": 0},
"id": 3,
"options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true},
"targets": [{"expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})) * 100", "legendFormat": "Disk Usage", "refId": "A"}],
"title": "💾 Disk",
"type": "gauge"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}},
"gridPos": {"h": 6, "w": 6, "x": 18, "y": 0},
"id": 4,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [{"expr": "sum(rate(http_requests_total[5m]))", "legendFormat": "Requests/sec", "refId": "A"}],
"title": "🌐 HTTP запросы/сек",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 6},
"id": 5,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (service) (rate(http_requests_total[5m]))", "legendFormat": "{{service}}", "refId": "A"}
],
"title": "📊 Запросы по сервисам",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 6},
"id": 6,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "histogram_quantile(0.95, sum by (le, service) (rate(http_request_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{service}}", "refId": "A"}
],
"title": "⏱️ Latency p95 по сервисам",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 14},
"id": 7,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (service) (rate(http_requests_total{status=~\"5..\"}[5m]))", "legendFormat": "5xx {{service}}", "refId": "A"},
{"expr": "sum by (service) (rate(http_requests_total{status=~\"4..\"}[5m]))", "legendFormat": "4xx {{service}}", "refId": "B"}
],
"title": "❌ Ошибки HTTP",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 14},
"id": 8,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (service) (http_requests_in_flight)", "legendFormat": "{{service}}", "refId": "A"}
],
"title": "🔄 Активные запросы",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [{"options": {"0": {"color": "red", "index": 0, "text": "DOWN"}, "1": {"color": "green", "index": 1, "text": "UP"}}, "type": "value"}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}},
"gridPos": {"h": 6, "w": 24, "x": 0, "y": 22},
"id": 9,
"options": {"colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [
{"expr": "up{job=\"gooseek-services\"}", "legendFormat": "{{service}}", "refId": "A"}
],
"title": "🏥 Статус сервисов",
"type": "stat"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": ["system", "health"],
"templating": {"list": []},
"time": {"from": "now-1h", "to": "now"},
"timepicker": {},
"timezone": "",
"title": "🏥 System Health",
"uid": "system-health",
"version": 1,
"weekStart": ""
}