Files
marketscanner/backend/app/workers/tasks/news_tasks.py
mindesbunister 074787f067 Initial project structure: MarketScanner - Fear-to-Fortune Trading Intelligence
Features:
- FastAPI backend with stocks, news, signals, watchlist, analytics endpoints
- React frontend with TailwindCSS dark mode trading dashboard
- Celery workers for news fetching, sentiment analysis, pattern detection
- TimescaleDB schema for time-series stock data
- Docker Compose setup for all services
- OpenAI integration for sentiment analysis
2026-01-08 14:15:51 +01:00

109 lines
3.8 KiB
Python

"""
News fetching tasks
"""
from datetime import datetime, timedelta
import feedparser
import structlog
from app.workers.celery_app import celery_app
from app.core.database import get_sync_db
from app.core.config import settings
logger = structlog.get_logger()
# RSS Feeds to monitor
NEWS_FEEDS = [
# General Financial News
{"name": "Yahoo Finance", "url": "https://finance.yahoo.com/news/rssindex"},
{"name": "Reuters Business", "url": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"},
{"name": "CNBC", "url": "https://www.cnbc.com/id/100003114/device/rss/rss.html"},
{"name": "MarketWatch", "url": "https://feeds.marketwatch.com/marketwatch/topstories/"},
{"name": "Seeking Alpha", "url": "https://seekingalpha.com/market_currents.xml"},
{"name": "Bloomberg", "url": "https://www.bloomberg.com/feed/podcast/etf-report.xml"},
# Tech
{"name": "TechCrunch", "url": "https://techcrunch.com/feed/"},
# Crypto (because why not)
{"name": "CoinDesk", "url": "https://www.coindesk.com/arc/outboundfeeds/rss/"},
]
@celery_app.task(name="app.workers.tasks.news_tasks.fetch_all_news")
def fetch_all_news():
"""Fetch news from all configured sources."""
logger.info("Starting news fetch from all sources")
total_fetched = 0
for feed_config in NEWS_FEEDS:
try:
count = fetch_from_feed(feed_config["name"], feed_config["url"])
total_fetched += count
except Exception as e:
logger.error(
"Failed to fetch from feed",
feed=feed_config["name"],
error=str(e)
)
logger.info("News fetch complete", total_articles=total_fetched)
return {"fetched": total_fetched}
@celery_app.task(name="app.workers.tasks.news_tasks.fetch_from_feed")
def fetch_from_feed(source_name: str, feed_url: str) -> int:
"""Fetch news from a single RSS feed."""
logger.info("Fetching from feed", source=source_name)
try:
feed = feedparser.parse(feed_url)
articles_saved = 0
for entry in feed.entries[:50]: # Limit to 50 most recent
try:
# Extract data
title = entry.get("title", "")
url = entry.get("link", "")
summary = entry.get("summary", "")
author = entry.get("author", "")
# Parse published date
published = entry.get("published_parsed") or entry.get("updated_parsed")
if published:
published_at = datetime(*published[:6])
else:
published_at = datetime.utcnow()
# Save to database (skip if exists)
# This is a placeholder - actual implementation would use the db session
articles_saved += 1
except Exception as e:
logger.warning(
"Failed to process article",
title=entry.get("title", "unknown"),
error=str(e)
)
logger.info("Feed processed", source=source_name, articles=articles_saved)
return articles_saved
except Exception as e:
logger.error("Failed to parse feed", source=source_name, error=str(e))
return 0
@celery_app.task(name="app.workers.tasks.news_tasks.cleanup_old_news")
def cleanup_old_news(days: int = 90):
"""Remove news articles older than specified days."""
logger.info("Starting news cleanup", days_to_keep=days)
cutoff = datetime.utcnow() - timedelta(days=days)
# Placeholder - actual implementation would delete from database
deleted_count = 0
logger.info("News cleanup complete", deleted=deleted_count)
return {"deleted": deleted_count}