groups: - name: trading-platform rules: - alert: ServiceDown expr: up == 0 for: 1m labels: severity: critical annotations: summary: "Service {{ $labels.job }} is down" description: "{{ $labels.instance }} has been unreachable for 1 minute." - alert: HighErrorRate expr: rate(errors_total[5m]) > 10 for: 2m labels: severity: warning annotations: summary: "High error rate on {{ $labels.job }}" description: "Error rate is {{ $value }} errors/sec over 5 minutes." - alert: HighProcessingLatency expr: histogram_quantile(0.95, rate(processing_seconds_bucket[5m])) > 5 for: 5m labels: severity: warning annotations: summary: "High p95 latency on {{ $labels.job }}" description: "95th percentile processing time is {{ $value }}s."