Some checks failed
Build and Deploy GooSeek / build-and-deploy (push) Failing after 8m25s
- Add tier-based provider routing in llm-svc - free tier → Ollama (local qwen3.5:9b) - pro/business → Timeweb Cloud AI - Add /api/v1/embed endpoint for embeddings via Ollama - Update Ollama client: qwen3.5:9b default, remove auth - Add GenerateEmbedding() function for qwen3-embedding:0.6b - Add Ollama K8s deployment with GPU support (RTX 4060 Ti) - Add monitoring stack (Prometheus, Grafana, Alertmanager) - Add Grafana dashboards for LLM and security metrics - Update deploy.sh with monitoring and Ollama deployment Made-with: Cursor
86 lines
2.5 KiB
YAML
86 lines
2.5 KiB
YAML
# Job для загрузки моделей Ollama после деплоя
|
|
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata:
|
|
name: ollama-model-loader
|
|
namespace: gooseek
|
|
labels:
|
|
app: ollama-model-loader
|
|
spec:
|
|
ttlSecondsAfterFinished: 3600
|
|
backoffLimit: 3
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: ollama-model-loader
|
|
spec:
|
|
restartPolicy: OnFailure
|
|
initContainers:
|
|
- name: wait-for-ollama
|
|
image: curlimages/curl:latest
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
echo "Waiting for Ollama to be ready..."
|
|
until curl -sf http://ollama.gooseek.svc.cluster.local:11434/api/tags; do
|
|
echo "Ollama not ready, retrying in 5s..."
|
|
sleep 5
|
|
done
|
|
echo "Ollama is ready!"
|
|
containers:
|
|
- name: model-loader
|
|
image: ollama/ollama:latest
|
|
env:
|
|
- name: OLLAMA_HOST
|
|
value: "http://ollama.gooseek.svc.cluster.local:11434"
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
|
|
OLLAMA_URL="http://ollama.gooseek.svc.cluster.local:11434"
|
|
|
|
pull_model() {
|
|
MODEL=$1
|
|
echo "=== Pulling model: $MODEL ==="
|
|
|
|
# Check if model already exists
|
|
EXISTING=$(curl -sf "$OLLAMA_URL/api/tags" | grep -o "\"name\":\"$MODEL\"" || true)
|
|
if [ -n "$EXISTING" ]; then
|
|
echo "Model $MODEL already exists, skipping..."
|
|
return 0
|
|
fi
|
|
|
|
# Pull model via API
|
|
echo "Downloading $MODEL..."
|
|
curl -sf "$OLLAMA_URL/api/pull" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"name\": \"$MODEL\", \"stream\": false}" \
|
|
--max-time 1800
|
|
|
|
echo "Model $MODEL downloaded successfully!"
|
|
}
|
|
|
|
echo "=== Ollama Model Loader ==="
|
|
echo "Target: $OLLAMA_URL"
|
|
|
|
# Основная модель генерации (4 параллельных воркера)
|
|
pull_model "qwen3.5:9b"
|
|
|
|
# Embedding модель (быстрые эмбеддинги)
|
|
pull_model "qwen3-embedding:0.6b"
|
|
|
|
echo ""
|
|
echo "=== All models loaded ==="
|
|
curl -sf "$OLLAMA_URL/api/tags" | head -c 500
|
|
echo ""
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 256Mi
|
|
limits:
|
|
cpu: 500m
|
|
memory: 512Mi
|