gooseek/backend/pkg/metrics/prometheus.go

package metrics

import (
	"strconv"
	"time"

	"github.com/gofiber/fiber/v2"
	"github.com/gofiber/fiber/v2/middleware/adaptor"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"
	"github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
	httpRequestsTotal = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "http_requests_total",
			Help: "Total number of HTTP requests",
		},
		[]string{"service", "method", "path", "status"},
	)

	httpRequestDuration = promauto.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "http_request_duration_seconds",
			Help:    "HTTP request duration in seconds",
			Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
		},
		[]string{"service", "method", "path"},
	)

	httpRequestsInFlight = promauto.NewGaugeVec(
		prometheus.GaugeOpts{
			Name: "http_requests_in_flight",
			Help: "Number of HTTP requests currently being processed",
		},
		[]string{"service"},
	)

	// LLM Security Metrics
	llmRequestsTotal = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "llm_requests_total",
			Help: "Total LLM requests by provider, model, and tier",
		},
		[]string{"provider", "model", "tier", "user_id"},
	)

	llmUnauthorizedRequests = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "llm_unauthorized_requests_total",
			Help: "Unauthorized LLM request attempts",
		},
		[]string{"reason", "client_ip"},
	)

	llmFreeTierLimitExceeded = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "llm_free_tier_limit_exceeded_total",
			Help: "Free tier limit exceeded attempts",
		},
		[]string{"user_id", "limit_type"},
	)

	llmTokensUsed = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "llm_tokens_used_total",
			Help: "Total tokens used by tier and provider",
		},
		[]string{"provider", "tier", "user_id"},
	)

	llmRequestLatency = promauto.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:    "llm_request_latency_seconds",
			Help:    "LLM request latency in seconds",
			Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60, 120},
		},
		[]string{"provider", "model"},
	)

	llmErrorsTotal = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "llm_errors_total",
			Help: "Total LLM errors by type",
		},
		[]string{"provider", "error_type"},
	)

	// Security Events
	securityEventsTotal = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "security_events_total",
			Help: "Security events (auth failures, suspicious activity)",
		},
		[]string{"event_type", "client_ip", "user_id"},
	)

	rateLimitHits = promauto.NewCounterVec(
		prometheus.CounterOpts{
			Name: "rate_limit_hits_total",
			Help: "Rate limit hits",
		},
		[]string{"service", "client_ip", "limit_type"},
	)
)

type MetricsConfig struct {
	ServiceName string
}

func PrometheusMiddleware(cfg MetricsConfig) fiber.Handler {
	return func(c *fiber.Ctx) error {
		start := time.Now()
		path := c.Route().Path
		method := c.Method()

		httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Inc()
		defer httpRequestsInFlight.WithLabelValues(cfg.ServiceName).Dec()

		err := c.Next()

		duration := time.Since(start).Seconds()
		status := strconv.Itoa(c.Response().StatusCode())

		httpRequestsTotal.WithLabelValues(cfg.ServiceName, method, path, status).Inc()
		httpRequestDuration.WithLabelValues(cfg.ServiceName, method, path).Observe(duration)

		return err
	}
}

func MetricsHandler() fiber.Handler {
	return adaptor.HTTPHandler(promhttp.Handler())
}

func RecordLLMRequest(provider, model, tier, userID string) {
	llmRequestsTotal.WithLabelValues(provider, model, tier, userID).Inc()
}

func RecordLLMUnauthorized(reason, clientIP string) {
	llmUnauthorizedRequests.WithLabelValues(reason, clientIP).Inc()
}

func RecordFreeTierLimitExceeded(userID, limitType string) {
	llmFreeTierLimitExceeded.WithLabelValues(userID, limitType).Inc()
}

func RecordLLMTokens(provider, tier, userID string, tokens int) {
	llmTokensUsed.WithLabelValues(provider, tier, userID).Add(float64(tokens))
}

func RecordLLMLatency(provider, model string, duration time.Duration) {
	llmRequestLatency.WithLabelValues(provider, model).Observe(duration.Seconds())
}

func RecordLLMError(provider, errorType string) {
	llmErrorsTotal.WithLabelValues(provider, errorType).Inc()
}

func RecordSecurityEvent(eventType, clientIP, userID string) {
	securityEventsTotal.WithLabelValues(eventType, clientIP, userID).Inc()
}

func RecordRateLimitHit(service, clientIP, limitType string) {
	rateLimitHits.WithLabelValues(service, clientIP, limitType).Inc()
}