File size: 2,363 Bytes
85c98be
db36e68
 
 
 
 
 
edd3265
5f0a430
 
edd3265
5f0a430
 
 
 
 
85c98be
5f0a430
ba6063a
85c98be
 
 
 
 
 
 
 
5f0a430
85c98be
5f0a430
 
 
 
85c98be
 
5f0a430
 
 
85c98be
5f0a430
c8744a7
49f0563
 
 
db36e68
49f0563
 
 
 
 
 
c8744a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface'

os.environ['HUGGINGFACE_HUB_CACHE'] = '/tmp/huggingface'

from huggingface_hub import snapshot_download
import spacy
import logging
from faster_whisper import WhisperModel
from transformers import pipeline

logger = logging.getLogger(__name__)

def load_whisper(config):
    logger.info("Loading Whisper model...")

    try:
        cache_dir = "/tmp/hf-cache"
        os.makedirs(cache_dir, exist_ok=True)

        model_dir = snapshot_download(
            repo_id=config.WHISPER_MODEL_NAME,
            cache_dir=cache_dir,
            token=os.getenv("HUGGINGFACE_API_KEY")
        )

        model = WhisperModel(
            model_dir,
            device=config.WHISPER_DEVICE,
            compute_type=config.WHISPER_COMPUTE_TYPE,
            cpu_threads=config.CPU_THREADS
        )

        logger.info(f"Whisper model '{config.WHISPER_MODEL_NAME}' loaded from {model_dir} on {config.WHISPER_DEVICE}.")
        return model

    except Exception as e:
        logger.error(f"Failed to load Whisper model: {e}", exc_info=True)
        return None

def load_summarizer(config):
    logger.info("Loading Summarization pipeline...")
    try:
        summarizer = pipeline("summarization", model=config.SUMMARIZER_MODEL, from_tf=True)
        logger.info("Summarization pipeline loaded.")
        return summarizer
    except Exception as e:
        logger.error(f"Failed to load Summarization pipeline: {e}", exc_info=True)
        return None

def load_spacy(config):
    logger.info("Loading spaCy model...")

    try:
        nlp = spacy.load("en_core_web_sm")
        logger.info("spaCy model 'en_core_web_sm' loaded.")
        
        return nlp

    except OSError:
        logger.warning("spaCy model 'en_core_web_sm' not found. Trying to download...")

        try:
            spacy.cli.download("en_core_web_sm")
            nlp = spacy.load("en_core_web_sm")
            logger.info("spaCy model 'en_core_web_sm' downloaded and loaded.")
            return nlp
        
        except Exception as download_e:
            logger.error(f"Failed to download or load spaCy model 'en_core_web_sm': {download_e}")
            return None

    except Exception as e:
        logger.error(f"Failed to load spaCy model: {e}")
        return None