Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s
- Add tier-based provider routing in llm-svc - free tier → Ollama (local qwen3.5:9b) - pro/business → Timeweb Cloud AI - Add /api/v1/embed endpoint for embeddings via Ollama - Update Ollama client: qwen3.5:9b default, remove auth - Add GenerateEmbedding() function for qwen3-embedding:0.6b - Add Ollama K8s deployment with GPU support (RTX 4060 Ti) - Add monitoring stack (Prometheus, Grafana, Alertmanager) - Add Grafana dashboards for LLM and security metrics - Update deploy.sh with monitoring and Ollama deployment Made-with: Cursor
168 lines
4.5 KiB
Go
168 lines
4.5 KiB
Go
package metrics
|
|
|
|
import (
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/gofiber/fiber/v2"
|
|
"github.com/gofiber/fiber/v2/middleware/adaptor"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
)
|
|
|
|
var (
|
|
httpRequestsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "http_requests_total",
|
|
Help: "Total number of HTTP requests",
|
|
},
|
|
[]string{"service", "method", "path", "status"},
|
|
)
|
|
|
|
httpRequestDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "http_request_duration_seconds",
|
|
Help: "HTTP request duration in seconds",
|
|
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
|
|
},
|
|
[]string{"service", "method", "path"},
|
|
)
|
|
|
|
httpRequestsInFlight = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "http_requests_in_flight",
|
|
Help: "Number of HTTP requests currently being processed",
|
|
},
|
|
[]string{"service"},
|
|
)
|
|
|
|
// LLM Security Metrics
|
|
llmRequestsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "llm_requests_total",
|
|
Help: "Total LLM requests by provider, model, and tier",
|
|
},
|
|
[]string{"provider", "model", "tier", "user_id"},
|
|
)
|
|
|
|
llmUnauthorizedRequests = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "llm_unauthorized_requests_total",
|
|
Help: "Unauthorized LLM request attempts",
|
|
},
|
|
[]string{"reason", "client_ip"},
|
|
)
|
|
|
|
llmFreeTierLimitExceeded = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "llm_free_tier_limit_exceeded_total",
|
|
Help: "Free tier limit exceeded attempts",
|
|
},
|
|
[]string{"user_id", "limit_type"},
|
|
)
|
|
|
|
llmTokensUsed = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "llm_tokens_used_total",
|
|
Help: "Total tokens used by tier and provider",
|
|
},
|
|
[]string{"provider", "tier", "user_id"},
|
|
)
|
|
|
|
llmRequestLatency = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "llm_request_latency_seconds",
|
|
Help: "LLM request latency in seconds",
|
|
Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60, 120},
|
|
},
|
|
[]string{"provider", "model"},
|
|
)
|
|
|
|
llmErrorsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "llm_errors_total",
|
|
Help: "Total LLM errors by type",
|
|
},
|
|
[]string{"provider", "error_type"},
|
|
)
|
|
|
|
// Security Events
|
|
securityEventsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "security_events_total",
|
|
Help: "Security events (auth failures, suspicious activity)",
|
|
},
|
|
[]string{"event_type", "client_ip", "user_id"},
|
|
)
|
|
|
|
rateLimitHits = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "rate_limit_hits_total",
|
|
Help: "Rate limit hits",
|
|
},
|
|
[]string{"service", "client_ip", "limit_type"},
|
|
)
|
|
)
|
|
|
|
type MetricsConfig struct {
|
|
ServiceName string
|
|
}
|
|
|
|
func PrometheusMiddleware(cfg MetricsConfig) fiber.Handler {
|
|
return func(c *fiber.Ctx) error {
|
|
start := time.Now()
|
|
path := c.Route().Path
|
|
method := c.Method()
|
|
|
|
httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Inc()
|
|
defer httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Dec()
|
|
|
|
err := c.Next()
|
|
|
|
duration := time.Since(start).Seconds()
|
|
status := strconv.Itoa(c.Response().StatusCode())
|
|
|
|
httpRequestsTotal.WithLabelValues(cfg.ServiceName, method, path, status).Inc()
|
|
httpRequestDuration.WithLabelValues(cfg.ServiceName, method, path).Observe(duration)
|
|
|
|
return err
|
|
}
|
|
}
|
|
|
|
func MetricsHandler() fiber.Handler {
|
|
return adaptor.HTTPHandler(promhttp.Handler())
|
|
}
|
|
|
|
func RecordLLMRequest(provider, model, tier, userID string) {
|
|
llmRequestsTotal.WithLabelValues(provider, model, tier, userID).Inc()
|
|
}
|
|
|
|
func RecordLLMUnauthorized(reason, clientIP string) {
|
|
llmUnauthorizedRequests.WithLabelValues(reason, clientIP).Inc()
|
|
}
|
|
|
|
func RecordFreeTierLimitExceeded(userID, limitType string) {
|
|
llmFreeTierLimitExceeded.WithLabelValues(userID, limitType).Inc()
|
|
}
|
|
|
|
func RecordLLMTokens(provider, tier, userID string, tokens int) {
|
|
llmTokensUsed.WithLabelValues(provider, tier, userID).Add(float64(tokens))
|
|
}
|
|
|
|
func RecordLLMLatency(provider, model string, duration time.Duration) {
|
|
llmRequestLatency.WithLabelValues(provider, model).Observe(duration.Seconds())
|
|
}
|
|
|
|
func RecordLLMError(provider, errorType string) {
|
|
llmErrorsTotal.WithLabelValues(provider, errorType).Inc()
|
|
}
|
|
|
|
func RecordSecurityEvent(eventType, clientIP, userID string) {
|
|
securityEventsTotal.WithLabelValues(eventType, clientIP, userID).Inc()
|
|
}
|
|
|
|
func RecordRateLimitHit(service, clientIP, limitType string) {
|
|
rateLimitHits.WithLabelValues(service, clientIP, limitType).Inc()
|
|
}
|