summaryrefslogtreecommitdiff
path: root/monitoring/prometheus/alert_rules.yml
diff options
context:
space:
mode:
authorTheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com>2026-04-02 16:07:20 +0900
committerTheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com>2026-04-02 16:07:20 +0900
commit86a0fa84ca6662ca931182880523c0b87f617f73 (patch)
treef483d39698849a6d4cdbd4c79979217f05be78be /monitoring/prometheus/alert_rules.yml
parent4747400168279c6cfc1196d86ec77b5d7b513c61 (diff)
fix: add session lock in StockSelector, remove unused HEALTH_PORT_OFFSET, lint fixesHEADmaster
- Add asyncio.Lock to StockSelector._ensure_session() to prevent race condition - Remove unused HEALTH_PORT_OFFSET constant from news-collector - Auto-fix import sorting and formatting from ruff
Diffstat (limited to 'monitoring/prometheus/alert_rules.yml')
-rw-r--r--monitoring/prometheus/alert_rules.yml29
1 files changed, 29 insertions, 0 deletions
diff --git a/monitoring/prometheus/alert_rules.yml b/monitoring/prometheus/alert_rules.yml
new file mode 100644
index 0000000..aca2f1c
--- /dev/null
+++ b/monitoring/prometheus/alert_rules.yml
@@ -0,0 +1,29 @@
+groups:
+ - name: trading-platform
+ rules:
+ - alert: ServiceDown
+ expr: up == 0
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Service {{ $labels.job }} is down"
+ description: "{{ $labels.instance }} has been unreachable for 1 minute."
+
+ - alert: HighErrorRate
+ expr: rate(errors_total[5m]) > 10
+ for: 2m
+ labels:
+ severity: warning
+ annotations:
+ summary: "High error rate on {{ $labels.job }}"
+ description: "Error rate is {{ $value }} errors/sec over 5 minutes."
+
+ - alert: HighProcessingLatency
+ expr: histogram_quantile(0.95, rate(processing_seconds_bucket[5m])) > 5
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "High p95 latency on {{ $labels.job }}"
+ description: "95th percentile processing time is {{ $value }}s."