news-summarization / config.py
proKBD's picture
Update config.py
898864c verified
raw
history blame
3.07 kB
"""Configuration settings for the News Summarization application."""
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# API Settings - Modified for direct processing
API_HOST = "localhost" # Changed from 0.0.0.0
API_PORT = 8501 # Changed to Streamlit's default port
API_BASE_URL = "http://localhost:8501" # Direct URL for local processing
# News Scraping Settings
ARTICLES_PER_SOURCE = int(os.getenv("ARTICLES_PER_SOURCE", "10"))
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
# RSS Feed Settings
RSS_FEEDS = {
"BBC": "http://feeds.bbci.co.uk/news/business/rss.xml",
"CNN": "http://rss.cnn.com/rss/money_news_international.rss",
"FoxBusiness": "http://feeds.foxnews.com/foxbusiness/latest"
}
# Model Settings
SENTIMENT_MODEL = "yiyanghkust/finbert-tone" # More advanced financial sentiment model
SENTIMENT_FINE_GRAINED_MODEL = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
SUMMARIZATION_MODEL = "t5-base"
# Additional Fine-Grained Sentiment Models
FINE_GRAINED_MODELS = {
"financial": "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
"emotion": "j-hartmann/emotion-english-distilroberta-base",
"aspect": "yangheng/deberta-v3-base-absa-v1.1",
"esg": "yiyanghkust/finbert-esg",
"news_tone": "ProsusAI/finbert"
}
# Fine-Grained Sentiment Categories
SENTIMENT_CATEGORIES = {
"financial": ["positive", "negative", "neutral"],
"emotion": ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"],
"aspect": ["positive", "negative", "neutral"],
"esg": ["environmental", "social", "governance", "neutral"],
"news_tone": ["positive", "negative", "neutral"]
}
# Cache Settings
CACHE_DIR = ".cache"
CACHE_EXPIRY = 3600 # 1 hour
CACHE_DURATION = 300 # 5 minutes in seconds
# Audio Settings
AUDIO_OUTPUT_DIR = "audio_output"
DEFAULT_LANG = "hi" # Hindi
# News Sources
NEWS_SOURCES = {
# Major News Aggregators
"google": "https://www.google.com/search?q={}&tbm=nws",
"bing": "https://www.bing.com/news/search?q={}",
"yahoo": "https://news.search.yahoo.com/search?p={}",
# Financial News
"reuters": "https://www.reuters.com/search/news?blob={}",
"marketwatch": "https://www.marketwatch.com/search?q={}&ts=0&tab=All%20News",
"investing": "https://www.investing.com/search/?q={}&tab=news",
# Tech News
"techcrunch": "https://techcrunch.com/search/{}",
"zdnet": "https://www.zdnet.com/search/?q={}",
}
# Article limits
MIN_ARTICLES = 20
MAX_ARTICLES_PER_SOURCE = 10 # Adjusted for more sources
MAX_ARTICLES = 50 # Increased to accommodate more sources
# Browser Headers
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Connection": "keep-alive"
}