File size: 2,363 Bytes
85c98be db36e68 edd3265 5f0a430 edd3265 5f0a430 85c98be 5f0a430 ba6063a 85c98be 5f0a430 85c98be 5f0a430 85c98be 5f0a430 85c98be 5f0a430 c8744a7 49f0563 db36e68 49f0563 c8744a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface'
os.environ['HUGGINGFACE_HUB_CACHE'] = '/tmp/huggingface'
from huggingface_hub import snapshot_download
import spacy
import logging
from faster_whisper import WhisperModel
from transformers import pipeline
logger = logging.getLogger(__name__)
def load_whisper(config):
logger.info("Loading Whisper model...")
try:
cache_dir = "/tmp/hf-cache"
os.makedirs(cache_dir, exist_ok=True)
model_dir = snapshot_download(
repo_id=config.WHISPER_MODEL_NAME,
cache_dir=cache_dir,
token=os.getenv("HUGGINGFACE_API_KEY")
)
model = WhisperModel(
model_dir,
device=config.WHISPER_DEVICE,
compute_type=config.WHISPER_COMPUTE_TYPE,
cpu_threads=config.CPU_THREADS
)
logger.info(f"Whisper model '{config.WHISPER_MODEL_NAME}' loaded from {model_dir} on {config.WHISPER_DEVICE}.")
return model
except Exception as e:
logger.error(f"Failed to load Whisper model: {e}", exc_info=True)
return None
def load_summarizer(config):
logger.info("Loading Summarization pipeline...")
try:
summarizer = pipeline("summarization", model=config.SUMMARIZER_MODEL, from_tf=True)
logger.info("Summarization pipeline loaded.")
return summarizer
except Exception as e:
logger.error(f"Failed to load Summarization pipeline: {e}", exc_info=True)
return None
def load_spacy(config):
logger.info("Loading spaCy model...")
try:
nlp = spacy.load("en_core_web_sm")
logger.info("spaCy model 'en_core_web_sm' loaded.")
return nlp
except OSError:
logger.warning("spaCy model 'en_core_web_sm' not found. Trying to download...")
try:
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
logger.info("spaCy model 'en_core_web_sm' downloaded and loaded.")
return nlp
except Exception as download_e:
logger.error(f"Failed to download or load spaCy model 'en_core_web_sm': {download_e}")
return None
except Exception as e:
logger.error(f"Failed to load spaCy model: {e}")
return None
|