askveracity / config.py
ankanghosh's picture
Upload 21 files
6d11371 verified
"""
Configuration module for the Fake News Detector application.
This module handles loading configuration parameters, API keys,
and source credibility data needed for the fact checking system.
It manages environment variables and file-based configurations.
"""
import os
import json
import logging
from pathlib import Path
# Configure logger
logger = logging.getLogger("misinformation_detector")
# Base paths
ROOT_DIR = Path(__file__).parent.absolute()
DATA_DIR = ROOT_DIR / "data"
# Ensure data directory exists
DATA_DIR.mkdir(exist_ok=True)
# First try to get API keys from Streamlit secrets, then fall back to environment variables
try:
import streamlit as st
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
except (AttributeError, ImportError):
# Fall back to environment variables if Streamlit secrets aren't available
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")
# Log secrets status (but not the values)
if OPENAI_API_KEY:
logger.info("OPENAI_API_KEY is set")
else:
logger.warning("OPENAI_API_KEY not set. The application will not function properly.")
if NEWS_API_KEY:
logger.info("NEWS_API_KEY is set")
else:
logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")
if FACTCHECK_API_KEY:
logger.info("FACTCHECK_API_KEY is set")
else:
logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")
# Set API key in environment to ensure it's available to all components
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
# Rate limiting configuration
RATE_LIMITS = {
# api_name: {"requests": max_requests, "period": period_in_seconds}
"newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
"factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
"semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
"wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
"wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
"rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
}
# Error backoff settings
ERROR_BACKOFF = {
"max_retries": 5,
"initial_backoff": 1, # seconds
"backoff_factor": 2, # exponential backoff
}
# RSS feed settings
RSS_SETTINGS = {
"max_feeds_per_request": 10, # Maximum number of feeds to try per request
"max_age_days": 3, # Maximum age of RSS items to consider
"timeout_seconds": 5, # Timeout for RSS feed requests
"max_workers": 5 # Number of parallel workers for fetching feeds
}