feat: LLM routing by tier (free→Ollama, pro→Timeweb)
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s
- Add tier-based provider routing in llm-svc - free tier → Ollama (local qwen3.5:9b) - pro/business → Timeweb Cloud AI - Add /api/v1/embed endpoint for embeddings via Ollama - Update Ollama client: qwen3.5:9b default, remove auth - Add GenerateEmbedding() function for qwen3-embedding:0.6b - Add Ollama K8s deployment with GPU support (RTX 4060 Ti) - Add monitoring stack (Prometheus, Grafana, Alertmanager) - Add Grafana dashboards for LLM and security metrics - Update deploy.sh with monitoring and Ollama deployment Made-with: Cursor
This commit is contained in:
167
backend/pkg/metrics/prometheus.go
Normal file
167
backend/pkg/metrics/prometheus.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/adaptor"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
var (
|
||||
httpRequestsTotal = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "http_requests_total",
|
||||
Help: "Total number of HTTP requests",
|
||||
},
|
||||
[]string{"service", "method", "path", "status"},
|
||||
)
|
||||
|
||||
httpRequestDuration = promauto.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "http_request_duration_seconds",
|
||||
Help: "HTTP request duration in seconds",
|
||||
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
|
||||
},
|
||||
[]string{"service", "method", "path"},
|
||||
)
|
||||
|
||||
httpRequestsInFlight = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "http_requests_in_flight",
|
||||
Help: "Number of HTTP requests currently being processed",
|
||||
},
|
||||
[]string{"service"},
|
||||
)
|
||||
|
||||
// LLM Security Metrics
|
||||
llmRequestsTotal = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "llm_requests_total",
|
||||
Help: "Total LLM requests by provider, model, and tier",
|
||||
},
|
||||
[]string{"provider", "model", "tier", "user_id"},
|
||||
)
|
||||
|
||||
llmUnauthorizedRequests = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "llm_unauthorized_requests_total",
|
||||
Help: "Unauthorized LLM request attempts",
|
||||
},
|
||||
[]string{"reason", "client_ip"},
|
||||
)
|
||||
|
||||
llmFreeTierLimitExceeded = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "llm_free_tier_limit_exceeded_total",
|
||||
Help: "Free tier limit exceeded attempts",
|
||||
},
|
||||
[]string{"user_id", "limit_type"},
|
||||
)
|
||||
|
||||
llmTokensUsed = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "llm_tokens_used_total",
|
||||
Help: "Total tokens used by tier and provider",
|
||||
},
|
||||
[]string{"provider", "tier", "user_id"},
|
||||
)
|
||||
|
||||
llmRequestLatency = promauto.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "llm_request_latency_seconds",
|
||||
Help: "LLM request latency in seconds",
|
||||
Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60, 120},
|
||||
},
|
||||
[]string{"provider", "model"},
|
||||
)
|
||||
|
||||
llmErrorsTotal = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "llm_errors_total",
|
||||
Help: "Total LLM errors by type",
|
||||
},
|
||||
[]string{"provider", "error_type"},
|
||||
)
|
||||
|
||||
// Security Events
|
||||
securityEventsTotal = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "security_events_total",
|
||||
Help: "Security events (auth failures, suspicious activity)",
|
||||
},
|
||||
[]string{"event_type", "client_ip", "user_id"},
|
||||
)
|
||||
|
||||
rateLimitHits = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "rate_limit_hits_total",
|
||||
Help: "Rate limit hits",
|
||||
},
|
||||
[]string{"service", "client_ip", "limit_type"},
|
||||
)
|
||||
)
|
||||
|
||||
type MetricsConfig struct {
|
||||
ServiceName string
|
||||
}
|
||||
|
||||
func PrometheusMiddleware(cfg MetricsConfig) fiber.Handler {
|
||||
return func(c *fiber.Ctx) error {
|
||||
start := time.Now()
|
||||
path := c.Route().Path
|
||||
method := c.Method()
|
||||
|
||||
httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Inc()
|
||||
defer httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Dec()
|
||||
|
||||
err := c.Next()
|
||||
|
||||
duration := time.Since(start).Seconds()
|
||||
status := strconv.Itoa(c.Response().StatusCode())
|
||||
|
||||
httpRequestsTotal.WithLabelValues(cfg.ServiceName, method, path, status).Inc()
|
||||
httpRequestDuration.WithLabelValues(cfg.ServiceName, method, path).Observe(duration)
|
||||
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
func MetricsHandler() fiber.Handler {
|
||||
return adaptor.HTTPHandler(promhttp.Handler())
|
||||
}
|
||||
|
||||
func RecordLLMRequest(provider, model, tier, userID string) {
|
||||
llmRequestsTotal.WithLabelValues(provider, model, tier, userID).Inc()
|
||||
}
|
||||
|
||||
func RecordLLMUnauthorized(reason, clientIP string) {
|
||||
llmUnauthorizedRequests.WithLabelValues(reason, clientIP).Inc()
|
||||
}
|
||||
|
||||
func RecordFreeTierLimitExceeded(userID, limitType string) {
|
||||
llmFreeTierLimitExceeded.WithLabelValues(userID, limitType).Inc()
|
||||
}
|
||||
|
||||
func RecordLLMTokens(provider, tier, userID string, tokens int) {
|
||||
llmTokensUsed.WithLabelValues(provider, tier, userID).Add(float64(tokens))
|
||||
}
|
||||
|
||||
func RecordLLMLatency(provider, model string, duration time.Duration) {
|
||||
llmRequestLatency.WithLabelValues(provider, model).Observe(duration.Seconds())
|
||||
}
|
||||
|
||||
func RecordLLMError(provider, errorType string) {
|
||||
llmErrorsTotal.WithLabelValues(provider, errorType).Inc()
|
||||
}
|
||||
|
||||
func RecordSecurityEvent(eventType, clientIP, userID string) {
|
||||
securityEventsTotal.WithLabelValues(eventType, clientIP, userID).Inc()
|
||||
}
|
||||
|
||||
func RecordRateLimitHit(service, clientIP, limitType string) {
|
||||
rateLimitHits.WithLabelValues(service, clientIP, limitType).Inc()
|
||||
}
|
||||
Reference in New Issue
Block a user