Features: - FastAPI backend with stocks, news, signals, watchlist, analytics endpoints - React frontend with TailwindCSS dark mode trading dashboard - Celery workers for news fetching, sentiment analysis, pattern detection - TimescaleDB schema for time-series stock data - Docker Compose setup for all services - OpenAI integration for sentiment analysis
109 lines
3.8 KiB
Python
109 lines
3.8 KiB
Python
"""
|
|
News fetching tasks
|
|
"""
|
|
|
|
from datetime import datetime, timedelta
|
|
import feedparser
|
|
import structlog
|
|
|
|
from app.workers.celery_app import celery_app
|
|
from app.core.database import get_sync_db
|
|
from app.core.config import settings
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
# RSS Feeds to monitor
|
|
NEWS_FEEDS = [
|
|
# General Financial News
|
|
{"name": "Yahoo Finance", "url": "https://finance.yahoo.com/news/rssindex"},
|
|
{"name": "Reuters Business", "url": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"},
|
|
{"name": "CNBC", "url": "https://www.cnbc.com/id/100003114/device/rss/rss.html"},
|
|
{"name": "MarketWatch", "url": "https://feeds.marketwatch.com/marketwatch/topstories/"},
|
|
{"name": "Seeking Alpha", "url": "https://seekingalpha.com/market_currents.xml"},
|
|
{"name": "Bloomberg", "url": "https://www.bloomberg.com/feed/podcast/etf-report.xml"},
|
|
|
|
# Tech
|
|
{"name": "TechCrunch", "url": "https://techcrunch.com/feed/"},
|
|
|
|
# Crypto (because why not)
|
|
{"name": "CoinDesk", "url": "https://www.coindesk.com/arc/outboundfeeds/rss/"},
|
|
]
|
|
|
|
|
|
@celery_app.task(name="app.workers.tasks.news_tasks.fetch_all_news")
|
|
def fetch_all_news():
|
|
"""Fetch news from all configured sources."""
|
|
logger.info("Starting news fetch from all sources")
|
|
total_fetched = 0
|
|
|
|
for feed_config in NEWS_FEEDS:
|
|
try:
|
|
count = fetch_from_feed(feed_config["name"], feed_config["url"])
|
|
total_fetched += count
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to fetch from feed",
|
|
feed=feed_config["name"],
|
|
error=str(e)
|
|
)
|
|
|
|
logger.info("News fetch complete", total_articles=total_fetched)
|
|
return {"fetched": total_fetched}
|
|
|
|
|
|
@celery_app.task(name="app.workers.tasks.news_tasks.fetch_from_feed")
|
|
def fetch_from_feed(source_name: str, feed_url: str) -> int:
|
|
"""Fetch news from a single RSS feed."""
|
|
logger.info("Fetching from feed", source=source_name)
|
|
|
|
try:
|
|
feed = feedparser.parse(feed_url)
|
|
articles_saved = 0
|
|
|
|
for entry in feed.entries[:50]: # Limit to 50 most recent
|
|
try:
|
|
# Extract data
|
|
title = entry.get("title", "")
|
|
url = entry.get("link", "")
|
|
summary = entry.get("summary", "")
|
|
author = entry.get("author", "")
|
|
|
|
# Parse published date
|
|
published = entry.get("published_parsed") or entry.get("updated_parsed")
|
|
if published:
|
|
published_at = datetime(*published[:6])
|
|
else:
|
|
published_at = datetime.utcnow()
|
|
|
|
# Save to database (skip if exists)
|
|
# This is a placeholder - actual implementation would use the db session
|
|
articles_saved += 1
|
|
|
|
except Exception as e:
|
|
logger.warning(
|
|
"Failed to process article",
|
|
title=entry.get("title", "unknown"),
|
|
error=str(e)
|
|
)
|
|
|
|
logger.info("Feed processed", source=source_name, articles=articles_saved)
|
|
return articles_saved
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to parse feed", source=source_name, error=str(e))
|
|
return 0
|
|
|
|
|
|
@celery_app.task(name="app.workers.tasks.news_tasks.cleanup_old_news")
|
|
def cleanup_old_news(days: int = 90):
|
|
"""Remove news articles older than specified days."""
|
|
logger.info("Starting news cleanup", days_to_keep=days)
|
|
|
|
cutoff = datetime.utcnow() - timedelta(days=days)
|
|
|
|
# Placeholder - actual implementation would delete from database
|
|
deleted_count = 0
|
|
|
|
logger.info("News cleanup complete", deleted=deleted_count)
|
|
return {"deleted": deleted_count}
|