feat: LLM routing by tier (free→Ollama, pro→Timeweb)
Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s

- Add tier-based provider routing in llm-svc
  - free tier → Ollama (local qwen3.5:9b)
  - pro/business → Timeweb Cloud AI
- Add /api/v1/embed endpoint for embeddings via Ollama
- Update Ollama client: qwen3.5:9b default, remove auth
- Add GenerateEmbedding() function for qwen3-embedding:0.6b
- Add Ollama K8s deployment with GPU support (RTX 4060 Ti)
- Add monitoring stack (Prometheus, Grafana, Alertmanager)
- Add Grafana dashboards for LLM and security metrics
- Update deploy.sh with monitoring and Ollama deployment

Made-with: Cursor
This commit is contained in:
home
2026-03-03 02:25:22 +03:00
parent 5ac082a7c6
commit 7a40ff629e
19 changed files with 1759 additions and 35 deletions

View File

@@ -6,6 +6,7 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/gooseek/backend/internal/usage"
"github.com/gooseek/backend/pkg/metrics"
)
type LLMLimitsConfig struct {
@@ -15,7 +16,11 @@ type LLMLimitsConfig struct {
func LLMLimits(config LLMLimitsConfig) fiber.Handler {
return func(c *fiber.Ctx) error {
userID := GetUserID(c)
clientIP := c.IP()
if userID == "" {
metrics.RecordLLMUnauthorized("no_user_id", clientIP)
metrics.RecordSecurityEvent("unauthorized_llm_access", clientIP, "anonymous")
return c.Status(401).JSON(fiber.Map{
"error": "Authentication required",
})
@@ -30,6 +35,13 @@ func LLMLimits(config LLMLimitsConfig) fiber.Handler {
allowed, reason := config.UsageRepo.CheckLLMLimits(c.Context(), userID, tier)
if !allowed {
limits := usage.GetLimits(tier)
if tier == "free" {
metrics.RecordFreeTierLimitExceeded(userID, reason)
metrics.RecordSecurityEvent("free_tier_limit_exceeded", clientIP, userID)
}
metrics.RecordRateLimitHit("llm-svc", clientIP, reason)
return c.Status(429).JSON(fiber.Map{
"error": reason,
"tier": tier,