import os os.environ['HF_HOME'] = '/tmp/huggingface' os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface' os.environ['HUGGINGFACE_HUB_CACHE'] = '/tmp/huggingface' from huggingface_hub import snapshot_download import spacy import logging from faster_whisper import WhisperModel from transformers import pipeline logger = logging.getLogger(__name__) def load_whisper(config): logger.info("Loading Whisper model...") try: cache_dir = "/tmp/hf-cache" os.makedirs(cache_dir, exist_ok=True) model_dir = snapshot_download( repo_id=config.WHISPER_MODEL_NAME, cache_dir=cache_dir, token=os.getenv("HUGGINGFACE_API_KEY") ) model = WhisperModel( model_dir, device=config.WHISPER_DEVICE, compute_type=config.WHISPER_COMPUTE_TYPE, cpu_threads=config.CPU_THREADS ) logger.info(f"Whisper model '{config.WHISPER_MODEL_NAME}' loaded from {model_dir} on {config.WHISPER_DEVICE}.") return model except Exception as e: logger.error(f"Failed to load Whisper model: {e}", exc_info=True) return None def load_summarizer(config): logger.info("Loading Summarization pipeline...") try: summarizer = pipeline("summarization", model=config.SUMMARIZER_MODEL, from_tf=True) logger.info("Summarization pipeline loaded.") return summarizer except Exception as e: logger.error(f"Failed to load Summarization pipeline: {e}", exc_info=True) return None def load_spacy(config): logger.info("Loading spaCy model...") try: nlp = spacy.load("en_core_web_sm") logger.info("spaCy model 'en_core_web_sm' loaded.") return nlp except OSError: logger.warning("spaCy model 'en_core_web_sm' not found. Trying to download...") try: spacy.cli.download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") logger.info("spaCy model 'en_core_web_sm' downloaded and loaded.") return nlp except Exception as download_e: logger.error(f"Failed to download or load spaCy model 'en_core_web_sm': {download_e}") return None except Exception as e: logger.error(f"Failed to load spaCy model: {e}") return None