askveracity / utils /models.py
ankanghosh's picture
Upload 21 files
6d11371 verified
"""
Model management utility for the Fake News Detector application.
This module provides functions for initializing, caching, and
retrieving language models used throughout the application.
It ensures models are loaded efficiently and reused appropriately.
"""
import os
import logging
import functools
from langchain_openai import ChatOpenAI
import spacy
logger = logging.getLogger("misinformation_detector")
# Global variables for models
nlp = None
model = None
models_initialized = False
# Add caching decorator
def cached_model(func):
"""
Decorator to cache model loading for improved performance.
This decorator ensures that models are only loaded once and
then reused for subsequent calls, improving performance by
avoiding redundant model loading.
Args:
func (callable): Function that loads a model
Returns:
callable: Wrapped function that returns a cached model
"""
cache = {}
@functools.wraps(func)
def wrapper(*args, **kwargs):
# Use function name as cache key
key = func.__name__
if key not in cache:
logger.info(f"Model not in cache, calling {key}...")
cache[key] = func(*args, **kwargs)
return cache[key]
return wrapper
def initialize_models():
"""
Initialize all required models.
This function loads and initializes all the language models
needed by the application, including spaCy for NLP tasks and
OpenAI for LLM-based processing.
Returns:
str: Initialization status message
Raises:
ValueError: If OpenAI API key is not set
"""
global nlp, model, models_initialized
# Skip initialization if already done
if models_initialized:
logger.info("Models already initialized, skipping initialization")
return "Models already initialized"
# Check OpenAI API key
if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
logger.error("OPENAI_API_KEY environment variable not set or empty")
raise ValueError("OpenAI API key is required. Please set it in the Hugging Face Space secrets.")
try:
# Load NLP model
try:
logger.info("Loading spaCy NLP model...")
nlp = spacy.load("en_core_web_sm")
logger.info("Loaded spaCy NLP model")
except OSError as e:
# This handles the case if the model wasn't installed correctly
logger.warning(f"Could not load spaCy model: {str(e)}")
logger.info("Attempting to download spaCy model...")
try:
import subprocess
import sys
# This downloads the model if it's missing
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
# Try loading again
nlp = spacy.load("en_core_web_sm")
logger.info("Successfully downloaded and loaded spaCy model")
except Exception as download_err:
logger.error(f"Failed to download spaCy model: {str(download_err)}")
# Continue with other initialization, we'll handle missing NLP model elsewhere
# Set up OpenAI model
logger.info("Initializing ChatOpenAI model...")
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
logger.info("Initialized ChatOpenAI model")
# Mark initialization as complete
models_initialized = True
return "Models initialized successfully"
except Exception as e:
logger.error(f"Error initializing models: {str(e)}")
raise e
@cached_model
def get_nlp_model():
"""
Get the spaCy NLP model, initializing if needed.
This function returns a cached spaCy model for NLP tasks.
If the model hasn't been loaded yet, it will be loaded.
Returns:
spacy.Language: Loaded spaCy model
"""
global nlp
if nlp is None:
try:
# Try to load just the spaCy model if not loaded yet
logger.info("Loading spaCy NLP model...")
nlp = spacy.load("en_core_web_sm")
logger.info("Loaded spaCy NLP model")
except Exception as e:
logger.error(f"Error loading spaCy model: {str(e)}")
# Fall back to full initialization
initialize_models()
return nlp
@cached_model
def get_llm_model():
"""
Get the ChatOpenAI model, initializing if needed.
This function returns a cached OpenAI LLM model.
If the model hasn't been loaded yet, it will be loaded.
Returns:
ChatOpenAI: Loaded LLM model
"""
global model
if model is None:
try:
# Try to load just the LLM model if not loaded yet
logger.info("Initializing ChatOpenAI model...")
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
logger.info("Initialized ChatOpenAI model")
except Exception as e:
logger.error(f"Error initializing ChatOpenAI model: {str(e)}")
# Fall back to full initialization
initialize_models()
return model