Spaces:
Running
Running
File size: 5,244 Bytes
6d11371 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
"""
Model management utility for the Fake News Detector application.
This module provides functions for initializing, caching, and
retrieving language models used throughout the application.
It ensures models are loaded efficiently and reused appropriately.
"""
import os
import logging
import functools
from langchain_openai import ChatOpenAI
import spacy
logger = logging.getLogger("misinformation_detector")
# Global variables for models
nlp = None
model = None
models_initialized = False
# Add caching decorator
def cached_model(func):
"""
Decorator to cache model loading for improved performance.
This decorator ensures that models are only loaded once and
then reused for subsequent calls, improving performance by
avoiding redundant model loading.
Args:
func (callable): Function that loads a model
Returns:
callable: Wrapped function that returns a cached model
"""
cache = {}
@functools.wraps(func)
def wrapper(*args, **kwargs):
# Use function name as cache key
key = func.__name__
if key not in cache:
logger.info(f"Model not in cache, calling {key}...")
cache[key] = func(*args, **kwargs)
return cache[key]
return wrapper
def initialize_models():
"""
Initialize all required models.
This function loads and initializes all the language models
needed by the application, including spaCy for NLP tasks and
OpenAI for LLM-based processing.
Returns:
str: Initialization status message
Raises:
ValueError: If OpenAI API key is not set
"""
global nlp, model, models_initialized
# Skip initialization if already done
if models_initialized:
logger.info("Models already initialized, skipping initialization")
return "Models already initialized"
# Check OpenAI API key
if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
logger.error("OPENAI_API_KEY environment variable not set or empty")
raise ValueError("OpenAI API key is required. Please set it in the Hugging Face Space secrets.")
try:
# Load NLP model
try:
logger.info("Loading spaCy NLP model...")
nlp = spacy.load("en_core_web_sm")
logger.info("Loaded spaCy NLP model")
except OSError as e:
# This handles the case if the model wasn't installed correctly
logger.warning(f"Could not load spaCy model: {str(e)}")
logger.info("Attempting to download spaCy model...")
try:
import subprocess
import sys
# This downloads the model if it's missing
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
# Try loading again
nlp = spacy.load("en_core_web_sm")
logger.info("Successfully downloaded and loaded spaCy model")
except Exception as download_err:
logger.error(f"Failed to download spaCy model: {str(download_err)}")
# Continue with other initialization, we'll handle missing NLP model elsewhere
# Set up OpenAI model
logger.info("Initializing ChatOpenAI model...")
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
logger.info("Initialized ChatOpenAI model")
# Mark initialization as complete
models_initialized = True
return "Models initialized successfully"
except Exception as e:
logger.error(f"Error initializing models: {str(e)}")
raise e
@cached_model
def get_nlp_model():
"""
Get the spaCy NLP model, initializing if needed.
This function returns a cached spaCy model for NLP tasks.
If the model hasn't been loaded yet, it will be loaded.
Returns:
spacy.Language: Loaded spaCy model
"""
global nlp
if nlp is None:
try:
# Try to load just the spaCy model if not loaded yet
logger.info("Loading spaCy NLP model...")
nlp = spacy.load("en_core_web_sm")
logger.info("Loaded spaCy NLP model")
except Exception as e:
logger.error(f"Error loading spaCy model: {str(e)}")
# Fall back to full initialization
initialize_models()
return nlp
@cached_model
def get_llm_model():
"""
Get the ChatOpenAI model, initializing if needed.
This function returns a cached OpenAI LLM model.
If the model hasn't been loaded yet, it will be loaded.
Returns:
ChatOpenAI: Loaded LLM model
"""
global model
if model is None:
try:
# Try to load just the LLM model if not loaded yet
logger.info("Initializing ChatOpenAI model...")
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
logger.info("Initialized ChatOpenAI model")
except Exception as e:
logger.error(f"Error initializing ChatOpenAI model: {str(e)}")
# Fall back to full initialization
initialize_models()
return model |