slo-tracker/slos.yml

25 lines
916 B
YAML

# Example SLO config — replace with your Prometheus queries
prometheus: http://prometheus.nexus.local:9090
slos:
- name: "API availability"
query: 'sum(rate(http_requests_total{job="nexus-api",code!~"5.."}[5m])) / sum(rate(http_requests_total{job="nexus-api"}[5m]))'
target: 0.999
window: 30d
- name: "API latency p99 < 500ms"
query: 'histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{job="nexus-api"}[5m])) by (le))'
target: 0.5
window: 7d
- name: "Auth service availability"
query: 'sum(rate(http_requests_total{job="nexus-auth",code!~"5.."}[5m])) / sum(rate(http_requests_total{job="nexus-auth"}[5m]))'
target: 0.9999
window: 30d
- name: "Background job success rate"
query: 'sum(rate(job_runs_total{status="success"}[5m])) / sum(rate(job_runs_total[5m]))'
target: 0.995
window: 7d
comment: "added after 2025-11-03 incident"