summaryrefslogtreecommitdiff
path: root/services/news-collector
diff options
context:
space:
mode:
Diffstat (limited to 'services/news-collector')
-rw-r--r--services/news-collector/src/news_collector/collectors/finnhub.py88
-rw-r--r--services/news-collector/tests/test_finnhub.py68
2 files changed, 156 insertions, 0 deletions
diff --git a/services/news-collector/src/news_collector/collectors/finnhub.py b/services/news-collector/src/news_collector/collectors/finnhub.py
new file mode 100644
index 0000000..13e3602
--- /dev/null
+++ b/services/news-collector/src/news_collector/collectors/finnhub.py
@@ -0,0 +1,88 @@
+"""Finnhub news collector with VADER sentiment analysis."""
+
+import logging
+from datetime import datetime, timezone
+
+import aiohttp
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+
+from shared.models import NewsCategory, NewsItem
+
+from .base import BaseCollector
+
+logger = logging.getLogger(__name__)
+
+_CATEGORY_KEYWORDS: dict[NewsCategory, list[str]] = {
+ NewsCategory.FED: ["fed", "fomc", "rate", "federal reserve"],
+ NewsCategory.POLICY: ["tariff", "trump", "regulation", "policy", "trade war"],
+ NewsCategory.EARNINGS: ["earnings", "revenue", "profit", "eps", "guidance", "quarter"],
+}
+
+
+def _categorize(text: str) -> NewsCategory:
+ lower = text.lower()
+ for category, keywords in _CATEGORY_KEYWORDS.items():
+ if any(kw in lower for kw in keywords):
+ return category
+ return NewsCategory.MACRO
+
+
+class FinnhubCollector(BaseCollector):
+ name: str = "finnhub"
+ poll_interval: int = 300
+
+ _BASE_URL = "https://finnhub.io/api/v1/news"
+
+ def __init__(self, api_key: str) -> None:
+ self._api_key = api_key
+ self._vader = SentimentIntensityAnalyzer()
+
+ async def is_available(self) -> bool:
+ return bool(self._api_key)
+
+ async def _fetch_news(self) -> list[dict]:
+ url = f"{self._BASE_URL}?category=general&token={self._api_key}"
+ async with aiohttp.ClientSession() as session:
+ async with session.get(url) as resp:
+ resp.raise_for_status()
+ return await resp.json()
+
+ async def collect(self) -> list[NewsItem]:
+ try:
+ raw_items = await self._fetch_news()
+ except Exception as exc:
+ logger.error("Finnhub fetch failed: %s", exc)
+ return []
+
+ items: list[NewsItem] = []
+ for article in raw_items:
+ headline = article.get("headline", "")
+ summary = article.get("summary", "")
+ combined = f"{headline} {summary}"
+
+ sentiment_scores = self._vader.polarity_scores(combined)
+ sentiment = sentiment_scores["compound"]
+
+ ts = article.get("datetime", 0)
+ published_at = datetime.fromtimestamp(ts, tz=timezone.utc)
+
+ related = article.get("related", "")
+ symbols = [t.strip() for t in related.split(",") if t.strip()] if related else []
+
+ category = _categorize(combined)
+
+ items.append(
+ NewsItem(
+ source=self.name,
+ headline=headline,
+ summary=summary or None,
+ url=article.get("url") or None,
+ published_at=published_at,
+ symbols=symbols,
+ sentiment=sentiment,
+ category=category,
+ raw_data=article,
+ )
+ )
+
+ return items
diff --git a/services/news-collector/tests/test_finnhub.py b/services/news-collector/tests/test_finnhub.py
new file mode 100644
index 0000000..74bd5e6
--- /dev/null
+++ b/services/news-collector/tests/test_finnhub.py
@@ -0,0 +1,68 @@
+"""Tests for Finnhub news collector."""
+
+import pytest
+from unittest.mock import AsyncMock, patch
+from datetime import datetime, timezone
+
+from news_collector.collectors.finnhub import FinnhubCollector
+
+
+@pytest.fixture
+def collector():
+ return FinnhubCollector(api_key="test_key")
+
+
+def test_collector_name(collector):
+ assert collector.name == "finnhub"
+ assert collector.poll_interval == 300
+
+
+async def test_is_available_with_key(collector):
+ assert await collector.is_available() is True
+
+
+async def test_is_available_without_key():
+ c = FinnhubCollector(api_key="")
+ assert await c.is_available() is False
+
+
+async def test_collect_parses_response(collector):
+ mock_response = [
+ {
+ "category": "top news",
+ "datetime": 1711929600,
+ "headline": "AAPL beats earnings",
+ "id": 12345,
+ "related": "AAPL",
+ "source": "MarketWatch",
+ "summary": "Apple reported better than expected...",
+ "url": "https://example.com/article",
+ },
+ {
+ "category": "top news",
+ "datetime": 1711929000,
+ "headline": "Fed holds rates steady",
+ "id": 12346,
+ "related": "",
+ "source": "Reuters",
+ "summary": "The Federal Reserve...",
+ "url": "https://example.com/fed",
+ },
+ ]
+
+ with patch.object(collector, "_fetch_news", new_callable=AsyncMock, return_value=mock_response):
+ items = await collector.collect()
+
+ assert len(items) == 2
+ assert items[0].source == "finnhub"
+ assert items[0].headline == "AAPL beats earnings"
+ assert items[0].symbols == ["AAPL"]
+ assert items[0].url == "https://example.com/article"
+ assert isinstance(items[0].sentiment, float)
+ assert items[1].symbols == []
+
+
+async def test_collect_handles_empty_response(collector):
+ with patch.object(collector, "_fetch_news", new_callable=AsyncMock, return_value=[]):
+ items = await collector.collect()
+ assert items == []