blob: aca2f1cd2f823b438120df7abc516da81bf35e73 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
groups:
- name: trading-platform
rules:
- alert: ServiceDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Service {{ $labels.job }} is down"
description: "{{ $labels.instance }} has been unreachable for 1 minute."
- alert: HighErrorRate
expr: rate(errors_total[5m]) > 10
for: 2m
labels:
severity: warning
annotations:
summary: "High error rate on {{ $labels.job }}"
description: "Error rate is {{ $value }} errors/sec over 5 minutes."
- alert: HighProcessingLatency
expr: histogram_quantile(0.95, rate(processing_seconds_bucket[5m])) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "High p95 latency on {{ $labels.job }}"
description: "95th percentile processing time is {{ $value }}s."
|