diff options
| author | TheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com> | 2026-04-02 13:59:42 +0900 |
|---|---|---|
| committer | TheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com> | 2026-04-02 13:59:42 +0900 |
| commit | 46db308e19bd055299e57e2f42de9787ab542af7 (patch) | |
| tree | 121ed0aab75d8d25defbfdf5430a79324d30c78b /services | |
| parent | 080662aaca7d9c3f80fa4dc990254b6b9585c311 (diff) | |
feat: implement Finnhub news collector with VADER sentiment
Diffstat (limited to 'services')
| -rw-r--r-- | services/news-collector/src/news_collector/collectors/finnhub.py | 88 | ||||
| -rw-r--r-- | services/news-collector/tests/test_finnhub.py | 68 |
2 files changed, 156 insertions, 0 deletions
diff --git a/services/news-collector/src/news_collector/collectors/finnhub.py b/services/news-collector/src/news_collector/collectors/finnhub.py new file mode 100644 index 0000000..13e3602 --- /dev/null +++ b/services/news-collector/src/news_collector/collectors/finnhub.py @@ -0,0 +1,88 @@ +"""Finnhub news collector with VADER sentiment analysis.""" + +import logging +from datetime import datetime, timezone + +import aiohttp +from nltk.sentiment.vader import SentimentIntensityAnalyzer + +from shared.models import NewsCategory, NewsItem + +from .base import BaseCollector + +logger = logging.getLogger(__name__) + +_CATEGORY_KEYWORDS: dict[NewsCategory, list[str]] = { + NewsCategory.FED: ["fed", "fomc", "rate", "federal reserve"], + NewsCategory.POLICY: ["tariff", "trump", "regulation", "policy", "trade war"], + NewsCategory.EARNINGS: ["earnings", "revenue", "profit", "eps", "guidance", "quarter"], +} + + +def _categorize(text: str) -> NewsCategory: + lower = text.lower() + for category, keywords in _CATEGORY_KEYWORDS.items(): + if any(kw in lower for kw in keywords): + return category + return NewsCategory.MACRO + + +class FinnhubCollector(BaseCollector): + name: str = "finnhub" + poll_interval: int = 300 + + _BASE_URL = "https://finnhub.io/api/v1/news" + + def __init__(self, api_key: str) -> None: + self._api_key = api_key + self._vader = SentimentIntensityAnalyzer() + + async def is_available(self) -> bool: + return bool(self._api_key) + + async def _fetch_news(self) -> list[dict]: + url = f"{self._BASE_URL}?category=general&token={self._api_key}" + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + resp.raise_for_status() + return await resp.json() + + async def collect(self) -> list[NewsItem]: + try: + raw_items = await self._fetch_news() + except Exception as exc: + logger.error("Finnhub fetch failed: %s", exc) + return [] + + items: list[NewsItem] = [] + for article in raw_items: + headline = article.get("headline", "") + summary = article.get("summary", "") + combined = f"{headline} {summary}" + + sentiment_scores = self._vader.polarity_scores(combined) + sentiment = sentiment_scores["compound"] + + ts = article.get("datetime", 0) + published_at = datetime.fromtimestamp(ts, tz=timezone.utc) + + related = article.get("related", "") + symbols = [t.strip() for t in related.split(",") if t.strip()] if related else [] + + category = _categorize(combined) + + items.append( + NewsItem( + source=self.name, + headline=headline, + summary=summary or None, + url=article.get("url") or None, + published_at=published_at, + symbols=symbols, + sentiment=sentiment, + category=category, + raw_data=article, + ) + ) + + return items diff --git a/services/news-collector/tests/test_finnhub.py b/services/news-collector/tests/test_finnhub.py new file mode 100644 index 0000000..74bd5e6 --- /dev/null +++ b/services/news-collector/tests/test_finnhub.py @@ -0,0 +1,68 @@ +"""Tests for Finnhub news collector.""" + +import pytest +from unittest.mock import AsyncMock, patch +from datetime import datetime, timezone + +from news_collector.collectors.finnhub import FinnhubCollector + + +@pytest.fixture +def collector(): + return FinnhubCollector(api_key="test_key") + + +def test_collector_name(collector): + assert collector.name == "finnhub" + assert collector.poll_interval == 300 + + +async def test_is_available_with_key(collector): + assert await collector.is_available() is True + + +async def test_is_available_without_key(): + c = FinnhubCollector(api_key="") + assert await c.is_available() is False + + +async def test_collect_parses_response(collector): + mock_response = [ + { + "category": "top news", + "datetime": 1711929600, + "headline": "AAPL beats earnings", + "id": 12345, + "related": "AAPL", + "source": "MarketWatch", + "summary": "Apple reported better than expected...", + "url": "https://example.com/article", + }, + { + "category": "top news", + "datetime": 1711929000, + "headline": "Fed holds rates steady", + "id": 12346, + "related": "", + "source": "Reuters", + "summary": "The Federal Reserve...", + "url": "https://example.com/fed", + }, + ] + + with patch.object(collector, "_fetch_news", new_callable=AsyncMock, return_value=mock_response): + items = await collector.collect() + + assert len(items) == 2 + assert items[0].source == "finnhub" + assert items[0].headline == "AAPL beats earnings" + assert items[0].symbols == ["AAPL"] + assert items[0].url == "https://example.com/article" + assert isinstance(items[0].sentiment, float) + assert items[1].symbols == [] + + +async def test_collect_handles_empty_response(collector): + with patch.object(collector, "_fetch_news", new_callable=AsyncMock, return_value=[]): + items = await collector.collect() + assert items == [] |
