feat: LLM routing by tier (free→Ollama, pro→Timeweb)
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s

- Add tier-based provider routing in llm-svc
  - free tier → Ollama (local qwen3.5:9b)
  - pro/business → Timeweb Cloud AI
- Add /api/v1/embed endpoint for embeddings via Ollama
- Update Ollama client: qwen3.5:9b default, remove auth
- Add GenerateEmbedding() function for qwen3-embedding:0.6b
- Add Ollama K8s deployment with GPU support (RTX 4060 Ti)
- Add monitoring stack (Prometheus, Grafana, Alertmanager)
- Add Grafana dashboards for LLM and security metrics
- Update deploy.sh with monitoring and Ollama deployment

Made-with: Cursor
This commit is contained in:
home
2026-03-03 02:25:22 +03:00
parent 5ac082a7c6
commit 7a40ff629e
19 changed files with 1759 additions and 35 deletions

View File

@@ -16,6 +16,10 @@ spec:
metadata:
labels:
app: api-gateway
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "3015"
prometheus.io/path: "/metrics"
spec:
containers:
- name: api-gateway

View File

@@ -23,6 +23,10 @@ data:
AUTH_SVC_URL: "http://auth-svc:3050"
TRAVEL_SVC_URL: "http://travel-svc:3035"
ADMIN_SVC_URL: "http://admin-svc:3040"
OLLAMA_BASE_URL: "http://ollama:11434"
OLLAMA_MODEL: "qwen3.5:9b"
OLLAMA_EMBEDDING_MODEL: "qwen3-embedding:0.6b"
OLLAMA_NUM_PARALLEL: "2"
DEFAULT_LLM_MODEL: "${DEFAULT_LLM_MODEL}"
DEFAULT_LLM_PROVIDER: "${DEFAULT_LLM_PROVIDER}"
TIMEWEB_API_BASE_URL: "${TIMEWEB_API_BASE_URL}"
@@ -50,5 +54,6 @@ stringData:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
JWT_SECRET: "${JWT_SECRET}"
TIMEWEB_API_KEY: "${TIMEWEB_API_KEY}"
OLLAMA_API_TOKEN: "${OLLAMA_API_TOKEN}"
POSTGRES_USER: "gooseek"
POSTGRES_PASSWORD: "gooseek"

View File

@@ -22,6 +22,21 @@ if [ -f "$ENV_FILE" ]; then
set +a
fi
# Check required secrets
if [ -z "$OLLAMA_API_TOKEN" ]; then
echo "Warning: OLLAMA_API_TOKEN not set. Generating random token..."
OLLAMA_API_TOKEN=$(openssl rand -hex 32)
echo "OLLAMA_API_TOKEN=$OLLAMA_API_TOKEN" >> "$ENV_FILE"
echo "Token saved to .env"
fi
if [ -z "$GRAFANA_ADMIN_PASSWORD" ]; then
echo "Warning: GRAFANA_ADMIN_PASSWORD not set. Generating random password..."
GRAFANA_ADMIN_PASSWORD=$(openssl rand -base64 24)
echo "GRAFANA_ADMIN_PASSWORD=$GRAFANA_ADMIN_PASSWORD" >> "$ENV_FILE"
echo "Grafana password saved to .env"
fi
# Check kubectl
if ! command -v kubectl &> /dev/null; then
echo "Error: kubectl not found"
@@ -62,6 +77,9 @@ echo "=== Generating K8s manifests from .env ==="
if command -v envsubst &> /dev/null && [ -f "$ENV_FILE" ]; then
envsubst < "$SCRIPT_DIR/configmap.yaml" > "$SCRIPT_DIR/_generated_configmap.yaml"
kubectl apply -f "$SCRIPT_DIR/_generated_configmap.yaml" -n gooseek
# Generate monitoring manifests
envsubst < "$SCRIPT_DIR/monitoring.yaml" > "$SCRIPT_DIR/_generated_monitoring.yaml"
fi
# Apply kustomization
@@ -70,6 +88,14 @@ echo "=== Applying K8s manifests ==="
cd "$SCRIPT_DIR"
kubectl apply -k .
# Apply monitoring stack
echo ""
echo "=== Deploying Monitoring Stack ==="
if [ -f "$SCRIPT_DIR/_generated_monitoring.yaml" ]; then
kubectl apply -f "$SCRIPT_DIR/_generated_monitoring.yaml"
kubectl apply -f "$SCRIPT_DIR/grafana-dashboards.yaml"
fi
# Rolling restart to pull new images
echo ""
echo "=== Rolling restart deployments ==="
@@ -79,11 +105,16 @@ kubectl -n gooseek rollout restart deployment/chat-svc
kubectl -n gooseek rollout restart deployment/agent-svc
kubectl -n gooseek rollout restart deployment/discover-svc
kubectl -n gooseek rollout restart deployment/search-svc
kubectl -n gooseek rollout restart deployment/llm-svc
kubectl -n gooseek rollout restart deployment/learning-svc
kubectl -n gooseek rollout restart deployment/medicine-svc
kubectl -n gooseek rollout restart deployment/travel-svc
kubectl -n gooseek rollout restart deployment/sandbox-svc
# Ollama: не рестартим без необходимости (модели хранятся на PVC)
# Модели загружаются один раз и сохраняются между деплоями
# Для загрузки новых моделей: kubectl apply -f ollama-models.yaml
# Wait for rollout
echo ""
echo "=== Waiting for rollouts ==="
@@ -100,7 +131,18 @@ kubectl -n gooseek get svc
echo ""
kubectl -n gooseek get ingress
# Show monitoring status
echo ""
echo "=== Monitoring Status ==="
kubectl -n monitoring get pods 2>/dev/null || echo "Monitoring namespace not ready yet"
kubectl -n monitoring get ingress 2>/dev/null || true
echo ""
echo "=== Done ==="
echo "API: https://api.gooseek.ru"
echo "Web: https://gooseek.ru"
echo "API: https://api.gooseek.ru"
echo "Web: https://gooseek.ru"
echo "Grafana: https://grafana.gooseek.ru"
echo ""
echo "Grafana credentials:"
echo " User: admin"
echo " Pass: (see GRAFANA_ADMIN_PASSWORD in .env)"

View File

@@ -0,0 +1,266 @@
# Grafana Dashboards ConfigMap
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboards
namespace: monitoring
data:
security.json: |
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}
}
},
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0},
"id": 1,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"pluginVersion": "10.3.3",
"targets": [{"datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "sum(rate(llm_unauthorized_requests_total[5m]))", "legendFormat": "Unauthorized/sec", "refId": "A"}],
"title": "🚨 Неавторизованные запросы",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 10}, {"color": "red", "value": 50}]}}},
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 0},
"id": 2,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [{"expr": "sum(rate(llm_free_tier_limit_exceeded_total[5m]))", "legendFormat": "Limit exceeded/sec", "refId": "A"}],
"title": "⚠️ Превышение лимитов free",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}},
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 0},
"id": 3,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [{"expr": "sum(rate(llm_requests_total[5m]))", "legendFormat": "Requests/sec", "refId": "A"}],
"title": "📊 LLM запросы/сек",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 10}]}}},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 0},
"id": 4,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [{"expr": "sum(rate(llm_errors_total[5m]))", "legendFormat": "Errors/sec", "refId": "A"}],
"title": "❌ Ошибки LLM",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 4},
"id": 5,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (reason) (rate(llm_unauthorized_requests_total[5m]))", "legendFormat": "{{reason}}", "refId": "A"},
{"expr": "sum by (limit_type) (rate(llm_free_tier_limit_exceeded_total[5m]))", "legendFormat": "limit: {{limit_type}}", "refId": "B"}
],
"title": "🔐 События безопасности",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 4},
"id": 6,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (provider) (rate(llm_requests_total[5m]))", "legendFormat": "{{provider}}", "refId": "A"}
],
"title": "📈 Запросы по провайдерам",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 12},
"id": 7,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (tier) (rate(llm_tokens_used_total[5m]))", "legendFormat": "{{tier}}", "refId": "A"}
],
"title": "🎫 Токены по тарифам",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 12},
"id": 8,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "histogram_quantile(0.95, sum by (le, provider) (rate(llm_request_latency_seconds_bucket[5m])))", "legendFormat": "p95 {{provider}}", "refId": "A"},
{"expr": "histogram_quantile(0.50, sum by (le, provider) (rate(llm_request_latency_seconds_bucket[5m])))", "legendFormat": "p50 {{provider}}", "refId": "B"}
],
"title": "⏱️ Latency LLM (p50, p95)",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 5}, {"color": "red", "value": 20}]}}},
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 20},
"id": 9,
"options": {"displayMode": "lcd", "minVizHeight": 10, "minVizWidth": 0, "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showUnfilled": true, "valueMode": "color"},
"targets": [
{"expr": "topk(10, sum by (user_id) (rate(llm_free_tier_limit_exceeded_total[1h])))", "legendFormat": "{{user_id}}", "refId": "A"}
],
"title": "🚫 Top-10 пользователей превышающих лимиты (за час)",
"type": "bargauge"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": ["security", "llm"],
"templating": {"list": []},
"time": {"from": "now-1h", "to": "now"},
"timepicker": {},
"timezone": "",
"title": "🔐 Security & LLM Monitoring",
"uid": "security-llm",
"version": 1,
"weekStart": ""
}
system-health.json: |
{
"annotations": {"list": []},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 60}, {"color": "red", "value": 80}]}, "unit": "percent"}},
"gridPos": {"h": 6, "w": 6, "x": 0, "y": 0},
"id": 1,
"options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true},
"targets": [{"expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "CPU Usage", "refId": "A"}],
"title": "💻 CPU",
"type": "gauge"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 70}, {"color": "red", "value": 85}]}, "unit": "percent"}},
"gridPos": {"h": 6, "w": 6, "x": 6, "y": 0},
"id": 2,
"options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true},
"targets": [{"expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", "legendFormat": "Memory Usage", "refId": "A"}],
"title": "🧠 Memory",
"type": "gauge"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [], "max": 100, "min": 0, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 70}, {"color": "red", "value": 85}]}, "unit": "percent"}},
"gridPos": {"h": 6, "w": 6, "x": 12, "y": 0},
"id": 3,
"options": {"orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "showThresholdLabels": false, "showThresholdMarkers": true},
"targets": [{"expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})) * 100", "legendFormat": "Disk Usage", "refId": "A"}],
"title": "💾 Disk",
"type": "gauge"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}},
"gridPos": {"h": 6, "w": 6, "x": 18, "y": 0},
"id": 4,
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [{"expr": "sum(rate(http_requests_total[5m]))", "legendFormat": "Requests/sec", "refId": "A"}],
"title": "🌐 HTTP запросы/сек",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 6},
"id": 5,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (service) (rate(http_requests_total[5m]))", "legendFormat": "{{service}}", "refId": "A"}
],
"title": "📊 Запросы по сервисам",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 6},
"id": 6,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "histogram_quantile(0.95, sum by (le, service) (rate(http_request_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{service}}", "refId": "A"}
],
"title": "⏱️ Latency p95 по сервисам",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 80}]}}},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 14},
"id": 7,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (service) (rate(http_requests_total{status=~\"5..\"}[5m]))", "legendFormat": "5xx {{service}}", "refId": "A"},
{"expr": "sum by (service) (rate(http_requests_total{status=~\"4..\"}[5m]))", "legendFormat": "4xx {{service}}", "refId": "B"}
],
"title": "❌ Ошибки HTTP",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": {"legend": false, "tooltip": false, "viz": false}, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": {"type": "linear"}, "showPoints": "auto", "spanNulls": false, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, "mappings": [], "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 14},
"id": 8,
"options": {"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single", "sort": "none"}},
"targets": [
{"expr": "sum by (service) (http_requests_in_flight)", "legendFormat": "{{service}}", "refId": "A"}
],
"title": "🔄 Активные запросы",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "mappings": [{"options": {"0": {"color": "red", "index": 0, "text": "DOWN"}, "1": {"color": "green", "index": 1, "text": "UP"}}, "type": "value"}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}},
"gridPos": {"h": 6, "w": 24, "x": 0, "y": 22},
"id": 9,
"options": {"colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
"targets": [
{"expr": "up{job=\"gooseek-services\"}", "legendFormat": "{{service}}", "refId": "A"}
],
"title": "🏥 Статус сервисов",
"type": "stat"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": ["system", "health"],
"templating": {"list": []},
"time": {"from": "now-1h", "to": "now"},
"timepicker": {},
"timezone": "",
"title": "🏥 System Health",
"uid": "system-health",
"version": 1,
"weekStart": ""
}

View File

@@ -24,6 +24,7 @@ resources:
- travel-svc.yaml
- sandbox-svc.yaml
- opensandbox.yaml
- ollama.yaml
- ingress.yaml
labels:

View File

@@ -16,6 +16,10 @@ spec:
metadata:
labels:
app: llm-svc
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "3020"
prometheus.io/path: "/metrics"
spec:
containers:
- name: llm-svc

View File

@@ -0,0 +1,674 @@
# Monitoring Stack: Prometheus + Grafana + AlertManager
# Отслеживание безопасности, ресурсов, здоровья системы
---
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
labels:
app.kubernetes.io/name: monitoring
---
# Prometheus ConfigMap
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
rule_files:
- /etc/prometheus/rules/*.yml
scrape_configs:
# Prometheus self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Kubernetes API server
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# Kubernetes nodes
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
# Kubernetes pods (auto-discovery)
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
# GooSeek services (direct)
- job_name: 'gooseek-services'
static_configs:
- targets:
- api-gateway.gooseek.svc:3015
- llm-svc.gooseek.svc:3020
- agent-svc.gooseek.svc:3018
- chat-svc.gooseek.svc:3005
- search-svc.gooseek.svc:3001
- learning-svc.gooseek.svc:3034
- travel-svc.gooseek.svc:3035
- medicine-svc.gooseek.svc:3037
metrics_path: /metrics
relabel_configs:
- source_labels: [__address__]
regex: (.+)\.gooseek\.svc:(\d+)
replacement: $1
target_label: service
alerts.yml: |
groups:
- name: security
rules:
- alert: HighUnauthorizedRequests
expr: rate(llm_unauthorized_requests_total[5m]) > 10
for: 2m
labels:
severity: critical
annotations:
summary: High unauthorized LLM requests
description: More than 10 unauthorized requests per second
- alert: FreeTierAbuse
expr: rate(llm_free_tier_limit_exceeded_total[5m]) > 5
for: 5m
labels:
severity: warning
annotations:
summary: Free tier limit exceeded
description: Users are exceeding LLM free tier limits
- alert: SuspiciousActivity
expr: sum by (client_ip) (rate(http_requests_total[5m])) > 100
for: 5m
labels:
severity: warning
annotations:
summary: Suspicious activity detected
description: High request rate from single IP
- name: resources
rules:
- alert: HighCPUUsage
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 10m
labels:
severity: warning
annotations:
summary: High CPU usage
description: CPU usage is above 80 percent
- alert: LowMemory
expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 < 20
for: 5m
labels:
severity: critical
annotations:
summary: Low memory available
description: Less than 20 percent memory available
- alert: DiskSpaceLow
expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 15
for: 5m
labels:
severity: critical
annotations:
summary: Low disk space
description: Less than 15 percent disk space available
- name: availability
rules:
- alert: ServiceDown
expr: up{job="gooseek-services"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: Service is down
description: A GooSeek service is not responding
- alert: HighLatency
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 5
for: 5m
labels:
severity: warning
annotations:
summary: High latency detected
description: P95 latency is above 5 seconds
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: High error rate
description: Error rate is above 5 percent
---
# Prometheus RBAC
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: ["extensions"]
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitoring
---
# Prometheus Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: monitoring
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
serviceAccountName: prometheus
containers:
- name: prometheus
image: prom/prometheus:v2.50.0
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=30d"
- "--web.enable-lifecycle"
ports:
- containerPort: 9090
volumeMounts:
- name: config
mountPath: /etc/prometheus
- name: rules
mountPath: /etc/prometheus/rules
- name: data
mountPath: /prometheus
resources:
requests:
cpu: 200m
memory: 512Mi
limits:
cpu: 1000m
memory: 2Gi
volumes:
- name: config
configMap:
name: prometheus-config
- name: rules
configMap:
name: prometheus-config
items:
- key: alerts.yml
path: alerts.yml
- name: data
persistentVolumeClaim:
claimName: prometheus-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus-pvc
namespace: monitoring
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
spec:
type: ClusterIP
selector:
app: prometheus
ports:
- port: 9090
targetPort: 9090
---
# AlertManager ConfigMap
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-config
namespace: monitoring
data:
alertmanager.yml: |
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'severity']
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receiver: 'telegram'
routes:
- match:
severity: critical
receiver: 'telegram'
continue: true
receivers:
- name: 'telegram'
webhook_configs:
- url: 'http://api-gateway.gooseek.svc:3015/api/v1/alerts/webhook'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname']
---
# AlertManager Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: alertmanager
namespace: monitoring
labels:
app: alertmanager
spec:
replicas: 1
selector:
matchLabels:
app: alertmanager
template:
metadata:
labels:
app: alertmanager
spec:
containers:
- name: alertmanager
image: prom/alertmanager:v0.27.0
args:
- "--config.file=/etc/alertmanager/alertmanager.yml"
- "--storage.path=/alertmanager"
ports:
- containerPort: 9093
volumeMounts:
- name: config
mountPath: /etc/alertmanager
- name: data
mountPath: /alertmanager
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 200m
memory: 256Mi
volumes:
- name: config
configMap:
name: alertmanager-config
- name: data
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: alertmanager
namespace: monitoring
spec:
type: ClusterIP
selector:
app: alertmanager
ports:
- port: 9093
targetPort: 9093
---
# Grafana ConfigMap
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-config
namespace: monitoring
data:
grafana.ini: |
[server]
root_url = https://grafana.gooseek.ru
[security]
admin_user = admin
admin_password = ${GRAFANA_ADMIN_PASSWORD}
[auth.anonymous]
enabled = false
[dashboards]
default_home_dashboard_path = /var/lib/grafana/dashboards/security.json
datasources.yml: |
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false
---
# Grafana Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: monitoring
labels:
app: grafana
spec:
replicas: 1
selector:
matchLabels:
app: grafana
template:
metadata:
labels:
app: grafana
spec:
containers:
- name: grafana
image: grafana/grafana:10.3.3
ports:
- containerPort: 3000
env:
- name: GF_SECURITY_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: grafana-secrets
key: admin-password
- name: GF_INSTALL_PLUGINS
value: "grafana-piechart-panel,grafana-clock-panel"
volumeMounts:
- name: config
mountPath: /etc/grafana/grafana.ini
subPath: grafana.ini
- name: datasources
mountPath: /etc/grafana/provisioning/datasources
- name: dashboards-config
mountPath: /etc/grafana/provisioning/dashboards
- name: dashboards
mountPath: /var/lib/grafana/dashboards
- name: data
mountPath: /var/lib/grafana
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
volumes:
- name: config
configMap:
name: grafana-config
- name: datasources
configMap:
name: grafana-config
items:
- key: datasources.yml
path: datasources.yml
- name: dashboards-config
configMap:
name: grafana-dashboards-config
- name: dashboards
configMap:
name: grafana-dashboards
- name: data
persistentVolumeClaim:
claimName: grafana-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-pvc
namespace: monitoring
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Secret
metadata:
name: grafana-secrets
namespace: monitoring
type: Opaque
stringData:
admin-password: "${GRAFANA_ADMIN_PASSWORD}"
---
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: monitoring
spec:
type: ClusterIP
selector:
app: grafana
ports:
- port: 3000
targetPort: 3000
---
# Grafana Dashboards Config
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboards-config
namespace: monitoring
data:
dashboards.yml: |
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards
---
# Grafana Ingress
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana-ingress
namespace: monitoring
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
spec:
ingressClassName: nginx
tls:
- hosts:
- grafana.gooseek.ru
secretName: grafana-tls
rules:
- host: grafana.gooseek.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
number: 3000
---
# Node Exporter DaemonSet (для метрик хоста)
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
labels:
app: node-exporter
spec:
selector:
matchLabels:
app: node-exporter
template:
metadata:
labels:
app: node-exporter
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9100"
spec:
hostNetwork: true
hostPID: true
containers:
- name: node-exporter
image: prom/node-exporter:v1.7.0
args:
- "--path.procfs=/host/proc"
- "--path.sysfs=/host/sys"
- "--path.rootfs=/host/root"
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
ports:
- containerPort: 9100
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
- name: root
mountPath: /host/root
readOnly: true
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 200m
memory: 128Mi
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
- name: root
hostPath:
path: /
---
apiVersion: v1
kind: Service
metadata:
name: node-exporter
namespace: monitoring
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9100"
spec:
type: ClusterIP
selector:
app: node-exporter
ports:
- port: 9100
targetPort: 9100

View File

@@ -0,0 +1,85 @@
# Job для загрузки моделей Ollama после деплоя
apiVersion: batch/v1
kind: Job
metadata:
name: ollama-model-loader
namespace: gooseek
labels:
app: ollama-model-loader
spec:
ttlSecondsAfterFinished: 3600
backoffLimit: 3
template:
metadata:
labels:
app: ollama-model-loader
spec:
restartPolicy: OnFailure
initContainers:
- name: wait-for-ollama
image: curlimages/curl:latest
command:
- /bin/sh
- -c
- |
echo "Waiting for Ollama to be ready..."
until curl -sf http://ollama.gooseek.svc.cluster.local:11434/api/tags; do
echo "Ollama not ready, retrying in 5s..."
sleep 5
done
echo "Ollama is ready!"
containers:
- name: model-loader
image: ollama/ollama:latest
env:
- name: OLLAMA_HOST
value: "http://ollama.gooseek.svc.cluster.local:11434"
command:
- /bin/sh
- -c
- |
set -e
OLLAMA_URL="http://ollama.gooseek.svc.cluster.local:11434"
pull_model() {
MODEL=$1
echo "=== Pulling model: $MODEL ==="
# Check if model already exists
EXISTING=$(curl -sf "$OLLAMA_URL/api/tags" | grep -o "\"name\":\"$MODEL\"" || true)
if [ -n "$EXISTING" ]; then
echo "Model $MODEL already exists, skipping..."
return 0
fi
# Pull model via API
echo "Downloading $MODEL..."
curl -sf "$OLLAMA_URL/api/pull" \
-H "Content-Type: application/json" \
-d "{\"name\": \"$MODEL\", \"stream\": false}" \
--max-time 1800
echo "Model $MODEL downloaded successfully!"
}
echo "=== Ollama Model Loader ==="
echo "Target: $OLLAMA_URL"
# Основная модель генерации (4 параллельных воркера)
pull_model "qwen3.5:9b"
# Embedding модель (быстрые эмбеддинги)
pull_model "qwen3-embedding:0.6b"
echo ""
echo "=== All models loaded ==="
curl -sf "$OLLAMA_URL/api/tags" | head -c 500
echo ""
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi

View File

@@ -0,0 +1,38 @@
#!/bin/bash
# Скрипт для загрузки моделей в Ollama
# Запустить ОДИН РАЗ после первого деплоя
# Модели сохраняются в PVC и не нужно скачивать повторно
set -e
NAMESPACE="${NAMESPACE:-gooseek}"
MODELS="${@:-llama3.2:3b}"
echo "=== Ollama Model Loader ==="
echo "Namespace: $NAMESPACE"
echo "Models: $MODELS"
# Проверить что Ollama pod запущен
echo ""
echo "Checking Ollama pod status..."
kubectl -n $NAMESPACE wait --for=condition=ready pod -l app=ollama --timeout=120s
# Получить имя пода
POD=$(kubectl -n $NAMESPACE get pod -l app=ollama -o jsonpath='{.items[0].metadata.name}')
echo "Pod: $POD"
# Скачать модели
for MODEL in $MODELS; do
echo ""
echo "=== Pulling model: $MODEL ==="
kubectl -n $NAMESPACE exec -it $POD -c ollama -- ollama pull $MODEL
done
# Показать список моделей
echo ""
echo "=== Installed models ==="
kubectl -n $NAMESPACE exec -it $POD -c ollama -- ollama list
echo ""
echo "=== Done! ==="
echo "Models are stored in PVC and will persist across restarts."

View File

@@ -0,0 +1,130 @@
# Ollama Deployment with GPU
# Требования: NVIDIA GPU Operator установлен в кластере
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama
namespace: gooseek
labels:
app: ollama
app.kubernetes.io/name: ollama
app.kubernetes.io/part-of: gooseek
spec:
replicas: 1
selector:
matchLabels:
app: ollama
template:
metadata:
labels:
app: ollama
spec:
runtimeClassName: nvidia
containers:
# Ollama server (только GPU)
- name: ollama
image: ollama/ollama:latest
ports:
- containerPort: 11434
name: http
env:
- name: OLLAMA_HOST
value: "0.0.0.0:11434"
- name: OLLAMA_KEEP_ALIVE
value: "24h"
- name: OLLAMA_MODELS
value: "/root/.ollama/models"
# Параллельная обработка для SaaS
- name: OLLAMA_NUM_PARALLEL
value: "4"
- name: OLLAMA_MAX_LOADED_MODELS
value: "2"
- name: OLLAMA_FLASH_ATTENTION
value: "true"
# GPU
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,utility"
volumeMounts:
- name: ollama-data
mountPath: /root/.ollama
resources:
requests:
cpu: 1000m
memory: 8Gi
nvidia.com/gpu: 1
limits:
cpu: 4000m
memory: 16Gi
nvidia.com/gpu: 1
livenessProbe:
httpGet:
path: /
port: 11434
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: 11434
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
volumes:
- name: ollama-data
persistentVolumeClaim:
claimName: ollama-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ollama-pvc
namespace: gooseek
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
---
apiVersion: v1
kind: Service
metadata:
name: ollama
namespace: gooseek
spec:
type: ClusterIP
selector:
app: ollama
ports:
- port: 11434
targetPort: 11434
name: http
---
# NetworkPolicy: llm-svc и model-loader могут обращаться к ollama
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: ollama-access
namespace: gooseek
spec:
podSelector:
matchLabels:
app: ollama
policyTypes:
- Ingress
ingress:
- from:
- podSelector:
matchLabels:
app: llm-svc
- podSelector:
matchLabels:
app: ollama-model-loader
ports:
- protocol: TCP
port: 11434