"""SEC EDGAR filing collector (free, no API key required).""" import logging from datetime import UTC, datetime import aiohttp from nltk.sentiment.vader import SentimentIntensityAnalyzer from news_collector.collectors.base import BaseCollector from shared.models import NewsCategory, NewsItem logger = logging.getLogger(__name__) TRACKED_CIKS = { "0000320193": "AAPL", "0000789019": "MSFT", "0001652044": "GOOGL", "0001018724": "AMZN", "0001318605": "TSLA", "0001045810": "NVDA", "0001326801": "META", "0000019617": "JPM", "0000078003": "PFE", "0000021344": "KO", } SEC_USER_AGENT = "TradingPlatform research@example.com" class SecEdgarCollector(BaseCollector): name = "sec_edgar" poll_interval = 1800 # 30 minutes def __init__(self) -> None: self._vader = SentimentIntensityAnalyzer() async def is_available(self) -> bool: return True async def _fetch_recent_filings(self) -> list[dict]: results = [] headers = {"User-Agent": SEC_USER_AGENT} async with aiohttp.ClientSession() as session: for cik, ticker in TRACKED_CIKS.items(): try: url = f"https://data.sec.gov/submissions/CIK{cik}.json" async with session.get( url, headers=headers, timeout=aiohttp.ClientTimeout(total=10) ) as resp: if resp.status == 200: data = await resp.json() data["tickers"] = [{"ticker": ticker}] results.append(data) except Exception as exc: logger.warning("sec_fetch_failed", cik=cik, error=str(exc)) return results async def collect(self) -> list[NewsItem]: filings_data = await self._fetch_recent_filings() items = [] today = datetime.now(UTC).strftime("%Y-%m-%d") for company_data in filings_data: tickers = [t["ticker"] for t in company_data.get("tickers", [])] company_name = company_data.get("name", "Unknown") recent = company_data.get("filings", {}).get("recent", {}) forms = recent.get("form", []) dates = recent.get("filingDate", []) descriptions = recent.get("primaryDocDescription", []) accessions = recent.get("accessionNumber", []) for i, form in enumerate(forms): if form != "8-K": continue filing_date = dates[i] if i < len(dates) else "" if filing_date != today: continue desc = descriptions[i] if i < len(descriptions) else "8-K Filing" accession = accessions[i] if i < len(accessions) else "" headline = f"{company_name} ({', '.join(tickers)}): {form} - {desc}" items.append( NewsItem( source=self.name, headline=headline, summary=desc, url=f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&accession={accession}", published_at=datetime.strptime(filing_date, "%Y-%m-%d").replace(tzinfo=UTC), symbols=tickers, sentiment=self._vader.polarity_scores(headline)["compound"], category=NewsCategory.FILING, raw_data={"form": form, "accession": accession}, ) ) return items