LiamKhoaLe commited on
Commit
30f82a6
·
1 Parent(s): 0962e25

Rm torchaudio, use librosa

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. requirements.txt +1 -1
app.py CHANGED
@@ -13,10 +13,11 @@ from fastapi.staticfiles import StaticFiles
13
  # AI + LLM
14
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
15
  import torch
16
- import torchaudio
17
  from google import genai
18
  from google.genai import types
19
 
 
20
  ############################################
21
  # ── Configuration ────────────────────────
22
  ############################################
@@ -130,8 +131,7 @@ async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008
130
  tmp_path = tmp.name
131
  try:
132
  # ── 1. Transcribe
133
- waveform, sample_rate = torchaudio.load(tmp_path)
134
- speech = waveform[0].numpy() # Convert to numpy for WhisperProcessor
135
  inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
136
  input_features = inputs["input_features"].to("cpu")
137
  generated_ids = model.generate(input_features)
 
13
  # AI + LLM
14
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
15
  import torch
16
+ import librosa
17
  from google import genai
18
  from google.genai import types
19
 
20
+
21
  ############################################
22
  # ── Configuration ────────────────────────
23
  ############################################
 
131
  tmp_path = tmp.name
132
  try:
133
  # ── 1. Transcribe
134
+ speech, sample_rate = librosa.load(tmp_path, sr=16000)
 
135
  inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
136
  input_features = inputs["input_features"].to("cpu")
137
  generated_ids = model.generate(input_features)
requirements.txt CHANGED
@@ -7,8 +7,8 @@ python-multipart # File uploads
7
  # Voice‑to‑text (Whisper via 🤗 Transformers)
8
  transformers==4.38.2 # ensure recent enough
9
  torch
10
- torchaudio>=2.1.0
11
  huggingface_hub
 
12
 
13
  # Gemini Flash 2.5
14
  google-genai
 
7
  # Voice‑to‑text (Whisper via 🤗 Transformers)
8
  transformers==4.38.2 # ensure recent enough
9
  torch
 
10
  huggingface_hub
11
+ librosa
12
 
13
  # Gemini Flash 2.5
14
  google-genai