summaryrefslogtreecommitdiff
path: root/shared/src
diff options
context:
space:
mode:
Diffstat (limited to 'shared/src')
-rw-r--r--shared/src/shared/resilience.py105
1 files changed, 105 insertions, 0 deletions
diff --git a/shared/src/shared/resilience.py b/shared/src/shared/resilience.py
new file mode 100644
index 0000000..d4e963b
--- /dev/null
+++ b/shared/src/shared/resilience.py
@@ -0,0 +1,105 @@
+"""Retry with exponential backoff and circuit breaker utilities."""
+
+from __future__ import annotations
+
+import asyncio
+import enum
+import functools
+import logging
+import random
+import time
+from typing import Any, Callable
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# retry_with_backoff
+# ---------------------------------------------------------------------------
+
+
+def retry_with_backoff(
+ max_retries: int = 3,
+ base_delay: float = 1.0,
+ max_delay: float = 60.0,
+) -> Callable:
+ """Decorator that retries an async function with exponential backoff + jitter."""
+
+ def decorator(func: Callable) -> Callable:
+ @functools.wraps(func)
+ async def wrapper(*args: Any, **kwargs: Any) -> Any:
+ last_exc: BaseException | None = None
+ for attempt in range(max_retries + 1):
+ try:
+ return await func(*args, **kwargs)
+ except Exception as exc:
+ last_exc = exc
+ if attempt < max_retries:
+ delay = min(base_delay * (2 ** attempt), max_delay)
+ jitter = delay * random.uniform(0, 0.5)
+ total_delay = delay + jitter
+ logger.warning(
+ "Retry %d/%d for %s after error: %s (delay=%.3fs)",
+ attempt + 1,
+ max_retries,
+ func.__name__,
+ exc,
+ total_delay,
+ )
+ await asyncio.sleep(total_delay)
+ raise last_exc # type: ignore[misc]
+
+ return wrapper
+
+ return decorator
+
+
+# ---------------------------------------------------------------------------
+# CircuitBreaker
+# ---------------------------------------------------------------------------
+
+
+class CircuitState(enum.Enum):
+ CLOSED = "closed"
+ OPEN = "open"
+ HALF_OPEN = "half_open"
+
+
+class CircuitBreaker:
+ """Simple circuit breaker implementation."""
+
+ def __init__(
+ self,
+ failure_threshold: int = 5,
+ recovery_timeout: float = 60.0,
+ ) -> None:
+ self._failure_threshold = failure_threshold
+ self._recovery_timeout = recovery_timeout
+ self._failure_count: int = 0
+ self._state = CircuitState.CLOSED
+ self._opened_at: float = 0.0
+
+ @property
+ def state(self) -> CircuitState:
+ return self._state
+
+ def allow_request(self) -> bool:
+ if self._state == CircuitState.CLOSED:
+ return True
+ if self._state == CircuitState.OPEN:
+ if time.monotonic() - self._opened_at >= self._recovery_timeout:
+ self._state = CircuitState.HALF_OPEN
+ return True
+ return False
+ # HALF_OPEN
+ return True
+
+ def record_success(self) -> None:
+ self._failure_count = 0
+ self._state = CircuitState.CLOSED
+
+ def record_failure(self) -> None:
+ self._failure_count += 1
+ if self._failure_count >= self._failure_threshold:
+ self._state = CircuitState.OPEN
+ self._opened_at = time.monotonic()