devcom33 commited on
Commit
85c98be
·
1 Parent(s): 0fe97a8

fix some issues

Browse files
Files changed (3) hide show
  1. Dockerfile +14 -1
  2. config.py +5 -8
  3. models.py +17 -42
Dockerfile CHANGED
@@ -1,11 +1,24 @@
1
- FROM python:3.10
2
 
 
3
  WORKDIR /app
4
 
 
5
  COPY . .
6
 
 
7
  RUN pip install --no-cache-dir -r requirements.txt
8
 
 
 
 
 
 
 
 
 
 
9
  EXPOSE 7860
10
 
 
11
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.10-slim
2
 
3
+ # Set working directory
4
  WORKDIR /app
5
 
6
+ # Copy all files
7
  COPY . .
8
 
9
+ # Install Python dependencies
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
+ # Make a writable cache directory for model downloads
13
+ RUN mkdir -p /app/cache
14
+
15
+ # Set environment variable to use this cache path (optional but good practice)
16
+ ENV HF_HOME=/app/cache
17
+ ENV TRANSFORMERS_CACHE=/app/cache
18
+ ENV HF_HUB_CACHE=/app/cache
19
+
20
+ # Expose FastAPI port
21
  EXPOSE 7860
22
 
23
+ # Run the FastAPI app
24
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
config.py CHANGED
@@ -1,18 +1,15 @@
1
- import psutil
2
  import os
3
- #from dotenv import load_dotenv
4
-
5
 
6
- #load_dotenv()
7
- WHISPER_MODEL_NAME = "tiny"
8
  WHISPER_DEVICE = "cpu"
9
  WHISPER_COMPUTE_TYPE = "int8"
 
10
  PYANNOTE_AUTH_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
 
11
  SUMMARIZER_MODEL = "facebook/bart-large-cnn"
12
  SUMMARIZER_MAX_LENGTH = 150
13
  SUMMARIZER_MIN_LENGTH = 50
 
14
  SPACY_MODEL = "en_core_web_sm"
15
  CPU_THREADS = max(1, psutil.cpu_count(logical=False))
16
-
17
- if not PYANNOTE_AUTH_TOKEN:
18
- raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
 
 
1
  import os
2
+ import psutil
 
3
 
4
+ WHISPER_MODEL_NAME = "Systran/faster-whisper-tiny"
 
5
  WHISPER_DEVICE = "cpu"
6
  WHISPER_COMPUTE_TYPE = "int8"
7
+
8
  PYANNOTE_AUTH_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
9
+
10
  SUMMARIZER_MODEL = "facebook/bart-large-cnn"
11
  SUMMARIZER_MAX_LENGTH = 150
12
  SUMMARIZER_MIN_LENGTH = 50
13
+
14
  SPACY_MODEL = "en_core_web_sm"
15
  CPU_THREADS = max(1, psutil.cpu_count(logical=False))
 
 
 
models.py CHANGED
@@ -1,59 +1,34 @@
 
 
1
  import logging
2
  from faster_whisper import WhisperModel
3
- import spacy
4
- from transformers import pipeline
5
- import os
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
-
10
  def load_whisper(config):
11
  logger.info("Loading Whisper model...")
 
12
  try:
 
 
 
 
 
 
 
 
 
 
13
  model = WhisperModel(
14
- config.WHISPER_MODEL_NAME,
15
  device=config.WHISPER_DEVICE,
16
  compute_type=config.WHISPER_COMPUTE_TYPE,
17
  cpu_threads=config.CPU_THREADS
18
  )
19
- logger.info(f"Whisper model '{config.WHISPER_MODEL_NAME}' loaded on {config.WHISPER_DEVICE}.")
 
20
  return model
21
- except Exception as e:
22
- logger.error(f"Failed to load Whisper model: {e}", exc_info=True)
23
- return None
24
 
25
- def load_summarizer(config):
26
- logger.info("Loading Summarization pipeline...")
27
- try:
28
- summarizer = pipeline("summarization", model=config.SUMMARIZER_MODEL)
29
- logger.info("Summarization pipeline loaded.")
30
- return summarizer
31
  except Exception as e:
32
- logger.error(f"Failed to load Summarization pipeline: {e}", exc_info=True)
33
  return None
34
-
35
- def load_spacy(config):
36
- logger.info("Loading spaCy model...")
37
-
38
- try:
39
- nlp = spacy.load("en_core_web_sm")
40
- logger.info("spaCy model 'en_core_web_sm' loaded.")
41
-
42
- return nlp
43
-
44
- except OSError:
45
- logger.warning("spaCy model 'en_core_web_sm' not found. Trying to download...")
46
-
47
- try:
48
- spacy.cli.download("en_core_web_sm")
49
- nlp = spacy.load("en_core_web_sm")
50
- logger.info("spaCy model 'en_core_web_sm' downloaded and loaded.")
51
- return nlp
52
-
53
- except Exception as download_e:
54
- logger.error(f"Failed to download or load spaCy model 'en_core_web_sm': {download_e}")
55
- return None
56
-
57
- except Exception as e:
58
- logger.error(f"Failed to load spaCy model: {e}")
59
- return None
 
1
+ from huggingface_hub import snapshot_download
2
+ import os
3
  import logging
4
  from faster_whisper import WhisperModel
 
 
 
5
 
6
  logger = logging.getLogger(__name__)
7
 
 
8
  def load_whisper(config):
9
  logger.info("Loading Whisper model...")
10
+
11
  try:
12
+ # Use /app/cache or /tmp — both are writable on HF Spaces
13
+ cache_dir = "/app/cache"
14
+ os.makedirs(cache_dir, exist_ok=True)
15
+
16
+ model_dir = snapshot_download(
17
+ repo_id=config.WHISPER_MODEL_NAME,
18
+ cache_dir=cache_dir,
19
+ token=os.getenv("HUGGINGFACE_API_KEY")
20
+ )
21
+
22
  model = WhisperModel(
23
+ model_dir,
24
  device=config.WHISPER_DEVICE,
25
  compute_type=config.WHISPER_COMPUTE_TYPE,
26
  cpu_threads=config.CPU_THREADS
27
  )
28
+
29
+ logger.info(f"Whisper model '{config.WHISPER_MODEL_NAME}' loaded from {model_dir} on {config.WHISPER_DEVICE}.")
30
  return model
 
 
 
31
 
 
 
 
 
 
 
32
  except Exception as e:
33
+ logger.error(f"Failed to load Whisper model: {e}", exc_info=True)
34
  return None