diff options
| author | TheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com> | 2026-04-02 14:02:44 +0900 |
|---|---|---|
| committer | TheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com> | 2026-04-02 14:02:44 +0900 |
| commit | 531213bc26c4a455ee604b79f0065df2a3cb4218 (patch) | |
| tree | 4cca7393474d9ec936ef60ba38856fe4104ad50c /services | |
| parent | 408056f09d1ec3f17155018c8a5defdf99012924 (diff) | |
feat: implement SEC EDGAR 8-K filing collector
Diffstat (limited to 'services')
| -rw-r--r-- | services/news-collector/src/news_collector/collectors/sec_edgar.py | 87 | ||||
| -rw-r--r-- | services/news-collector/tests/test_sec_edgar.py | 56 |
2 files changed, 143 insertions, 0 deletions
diff --git a/services/news-collector/src/news_collector/collectors/sec_edgar.py b/services/news-collector/src/news_collector/collectors/sec_edgar.py new file mode 100644 index 0000000..a00abb5 --- /dev/null +++ b/services/news-collector/src/news_collector/collectors/sec_edgar.py @@ -0,0 +1,87 @@ +"""SEC EDGAR filing collector (free, no API key required).""" + +import logging +from datetime import datetime, timezone + +import aiohttp +from nltk.sentiment.vader import SentimentIntensityAnalyzer + +from shared.models import NewsCategory, NewsItem +from news_collector.collectors.base import BaseCollector + +logger = logging.getLogger(__name__) + +TRACKED_CIKS = { + "0000320193": "AAPL", "0000789019": "MSFT", "0001652044": "GOOGL", + "0001018724": "AMZN", "0001318605": "TSLA", "0001045810": "NVDA", + "0001326801": "META", "0000019617": "JPM", "0000078003": "PFE", "0000021344": "KO", +} + +SEC_USER_AGENT = "TradingPlatform research@example.com" + + +class SecEdgarCollector(BaseCollector): + name = "sec_edgar" + poll_interval = 1800 # 30 minutes + + def __init__(self) -> None: + self._vader = SentimentIntensityAnalyzer() + + async def is_available(self) -> bool: + return True + + async def _fetch_recent_filings(self) -> list[dict]: + results = [] + headers = {"User-Agent": SEC_USER_AGENT} + async with aiohttp.ClientSession() as session: + for cik, ticker in TRACKED_CIKS.items(): + try: + url = f"https://data.sec.gov/submissions/CIK{cik}.json" + async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=10)) as resp: + if resp.status == 200: + data = await resp.json() + data["tickers"] = [{"ticker": ticker}] + results.append(data) + except Exception as exc: + logger.warning("sec_fetch_failed", cik=cik, error=str(exc)) + return results + + async def collect(self) -> list[NewsItem]: + filings_data = await self._fetch_recent_filings() + items = [] + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + + for company_data in filings_data: + tickers = [t["ticker"] for t in company_data.get("tickers", [])] + company_name = company_data.get("name", "Unknown") + recent = company_data.get("filings", {}).get("recent", {}) + + forms = recent.get("form", []) + dates = recent.get("filingDate", []) + descriptions = recent.get("primaryDocDescription", []) + accessions = recent.get("accessionNumber", []) + + for i, form in enumerate(forms): + if form != "8-K": + continue + filing_date = dates[i] if i < len(dates) else "" + if filing_date != today: + continue + + desc = descriptions[i] if i < len(descriptions) else "8-K Filing" + accession = accessions[i] if i < len(accessions) else "" + headline = f"{company_name} ({', '.join(tickers)}): {form} - {desc}" + + items.append(NewsItem( + source=self.name, + headline=headline, + summary=desc, + url=f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&accession={accession}", + published_at=datetime.strptime(filing_date, "%Y-%m-%d").replace(tzinfo=timezone.utc), + symbols=tickers, + sentiment=self._vader.polarity_scores(headline)["compound"], + category=NewsCategory.FILING, + raw_data={"form": form, "accession": accession}, + )) + + return items diff --git a/services/news-collector/tests/test_sec_edgar.py b/services/news-collector/tests/test_sec_edgar.py new file mode 100644 index 0000000..a10b47a --- /dev/null +++ b/services/news-collector/tests/test_sec_edgar.py @@ -0,0 +1,56 @@ +"""Tests for SEC EDGAR filing collector.""" + +import pytest +from datetime import datetime, timezone +from unittest.mock import AsyncMock, patch, MagicMock + +from news_collector.collectors.sec_edgar import SecEdgarCollector + + +@pytest.fixture +def collector(): + return SecEdgarCollector() + + +def test_collector_name(collector): + assert collector.name == "sec_edgar" + assert collector.poll_interval == 1800 + + +async def test_is_available(collector): + assert await collector.is_available() is True + + +async def test_collect_parses_filings(collector): + mock_response = { + "filings": { + "recent": { + "accessionNumber": ["0001234-26-000001"], + "filingDate": ["2026-04-02"], + "primaryDocument": ["filing.htm"], + "form": ["8-K"], + "primaryDocDescription": ["Current Report"], + } + }, + "tickers": [{"ticker": "AAPL"}], + "name": "Apple Inc", + } + + mock_datetime = MagicMock(spec=datetime) + mock_datetime.now.return_value = datetime(2026, 4, 2, tzinfo=timezone.utc) + mock_datetime.strptime = datetime.strptime + + with patch.object(collector, "_fetch_recent_filings", new_callable=AsyncMock, return_value=[mock_response]): + with patch("news_collector.collectors.sec_edgar.datetime", mock_datetime): + items = await collector.collect() + + assert len(items) == 1 + assert items[0].source == "sec_edgar" + assert items[0].category.value == "filing" + assert "AAPL" in items[0].symbols + + +async def test_collect_handles_empty(collector): + with patch.object(collector, "_fetch_recent_filings", new_callable=AsyncMock, return_value=[]): + items = await collector.collect() + assert items == [] |
