Files
gooseek/backend/pkg/metrics/prometheus.go
home 7a40ff629e
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s
feat: LLM routing by tier (free→Ollama, pro→Timeweb)
- Add tier-based provider routing in llm-svc
  - free tier → Ollama (local qwen3.5:9b)
  - pro/business → Timeweb Cloud AI
- Add /api/v1/embed endpoint for embeddings via Ollama
- Update Ollama client: qwen3.5:9b default, remove auth
- Add GenerateEmbedding() function for qwen3-embedding:0.6b
- Add Ollama K8s deployment with GPU support (RTX 4060 Ti)
- Add monitoring stack (Prometheus, Grafana, Alertmanager)
- Add Grafana dashboards for LLM and security metrics
- Update deploy.sh with monitoring and Ollama deployment

Made-with: Cursor
2026-03-03 02:25:22 +03:00

168 lines
4.5 KiB
Go

package metrics
import (
"strconv"
"time"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/adaptor"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
httpRequestsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Total number of HTTP requests",
},
[]string{"service", "method", "path", "status"},
)
httpRequestDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "HTTP request duration in seconds",
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
},
[]string{"service", "method", "path"},
)
httpRequestsInFlight = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "http_requests_in_flight",
Help: "Number of HTTP requests currently being processed",
},
[]string{"service"},
)
// LLM Security Metrics
llmRequestsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "llm_requests_total",
Help: "Total LLM requests by provider, model, and tier",
},
[]string{"provider", "model", "tier", "user_id"},
)
llmUnauthorizedRequests = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "llm_unauthorized_requests_total",
Help: "Unauthorized LLM request attempts",
},
[]string{"reason", "client_ip"},
)
llmFreeTierLimitExceeded = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "llm_free_tier_limit_exceeded_total",
Help: "Free tier limit exceeded attempts",
},
[]string{"user_id", "limit_type"},
)
llmTokensUsed = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "llm_tokens_used_total",
Help: "Total tokens used by tier and provider",
},
[]string{"provider", "tier", "user_id"},
)
llmRequestLatency = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "llm_request_latency_seconds",
Help: "LLM request latency in seconds",
Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60, 120},
},
[]string{"provider", "model"},
)
llmErrorsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "llm_errors_total",
Help: "Total LLM errors by type",
},
[]string{"provider", "error_type"},
)
// Security Events
securityEventsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "security_events_total",
Help: "Security events (auth failures, suspicious activity)",
},
[]string{"event_type", "client_ip", "user_id"},
)
rateLimitHits = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "rate_limit_hits_total",
Help: "Rate limit hits",
},
[]string{"service", "client_ip", "limit_type"},
)
)
type MetricsConfig struct {
ServiceName string
}
func PrometheusMiddleware(cfg MetricsConfig) fiber.Handler {
return func(c *fiber.Ctx) error {
start := time.Now()
path := c.Route().Path
method := c.Method()
httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Inc()
defer httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Dec()
err := c.Next()
duration := time.Since(start).Seconds()
status := strconv.Itoa(c.Response().StatusCode())
httpRequestsTotal.WithLabelValues(cfg.ServiceName, method, path, status).Inc()
httpRequestDuration.WithLabelValues(cfg.ServiceName, method, path).Observe(duration)
return err
}
}
func MetricsHandler() fiber.Handler {
return adaptor.HTTPHandler(promhttp.Handler())
}
func RecordLLMRequest(provider, model, tier, userID string) {
llmRequestsTotal.WithLabelValues(provider, model, tier, userID).Inc()
}
func RecordLLMUnauthorized(reason, clientIP string) {
llmUnauthorizedRequests.WithLabelValues(reason, clientIP).Inc()
}
func RecordFreeTierLimitExceeded(userID, limitType string) {
llmFreeTierLimitExceeded.WithLabelValues(userID, limitType).Inc()
}
func RecordLLMTokens(provider, tier, userID string, tokens int) {
llmTokensUsed.WithLabelValues(provider, tier, userID).Add(float64(tokens))
}
func RecordLLMLatency(provider, model string, duration time.Duration) {
llmRequestLatency.WithLabelValues(provider, model).Observe(duration.Seconds())
}
func RecordLLMError(provider, errorType string) {
llmErrorsTotal.WithLabelValues(provider, errorType).Inc()
}
func RecordSecurityEvent(eventType, clientIP, userID string) {
securityEventsTotal.WithLabelValues(eventType, clientIP, userID).Inc()
}
func RecordRateLimitHit(service, clientIP, limitType string) {
rateLimitHits.WithLabelValues(service, clientIP, limitType).Inc()
}