|
import os |
|
os.environ['HF_HOME'] = '/tmp/huggingface' |
|
os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface' |
|
|
|
os.environ['HUGGINGFACE_HUB_CACHE'] = '/tmp/huggingface' |
|
|
|
from huggingface_hub import snapshot_download |
|
import spacy |
|
import logging |
|
from faster_whisper import WhisperModel |
|
from transformers import pipeline |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
def load_whisper(config): |
|
logger.info("Loading Whisper model...") |
|
|
|
try: |
|
cache_dir = "/tmp/hf-cache" |
|
os.makedirs(cache_dir, exist_ok=True) |
|
|
|
model_dir = snapshot_download( |
|
repo_id=config.WHISPER_MODEL_NAME, |
|
cache_dir=cache_dir, |
|
token=os.getenv("HUGGINGFACE_API_KEY") |
|
) |
|
|
|
model = WhisperModel( |
|
model_dir, |
|
device=config.WHISPER_DEVICE, |
|
compute_type=config.WHISPER_COMPUTE_TYPE, |
|
cpu_threads=config.CPU_THREADS |
|
) |
|
|
|
logger.info(f"Whisper model '{config.WHISPER_MODEL_NAME}' loaded from {model_dir} on {config.WHISPER_DEVICE}.") |
|
return model |
|
|
|
except Exception as e: |
|
logger.error(f"Failed to load Whisper model: {e}", exc_info=True) |
|
return None |
|
|
|
def load_summarizer(config): |
|
logger.info("Loading Summarization pipeline...") |
|
try: |
|
summarizer = pipeline("summarization", model=config.SUMMARIZER_MODEL, from_tf=True) |
|
logger.info("Summarization pipeline loaded.") |
|
return summarizer |
|
except Exception as e: |
|
logger.error(f"Failed to load Summarization pipeline: {e}", exc_info=True) |
|
return None |
|
|
|
def load_spacy(config): |
|
logger.info("Loading spaCy model...") |
|
|
|
try: |
|
nlp = spacy.load("en_core_web_sm") |
|
logger.info("spaCy model 'en_core_web_sm' loaded.") |
|
|
|
return nlp |
|
|
|
except OSError: |
|
logger.warning("spaCy model 'en_core_web_sm' not found. Trying to download...") |
|
|
|
try: |
|
spacy.cli.download("en_core_web_sm") |
|
nlp = spacy.load("en_core_web_sm") |
|
logger.info("spaCy model 'en_core_web_sm' downloaded and loaded.") |
|
return nlp |
|
|
|
except Exception as download_e: |
|
logger.error(f"Failed to download or load spaCy model 'en_core_web_sm': {download_e}") |
|
return None |
|
|
|
except Exception as e: |
|
logger.error(f"Failed to load spaCy model: {e}") |
|
return None |
|
|