Spaces:
Running
Running
Commit
·
30f82a6
1
Parent(s):
0962e25
Rm torchaudio, use librosa
Browse files- app.py +3 -3
- requirements.txt +1 -1
app.py
CHANGED
@@ -13,10 +13,11 @@ from fastapi.staticfiles import StaticFiles
|
|
13 |
# AI + LLM
|
14 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
15 |
import torch
|
16 |
-
import
|
17 |
from google import genai
|
18 |
from google.genai import types
|
19 |
|
|
|
20 |
############################################
|
21 |
# ── Configuration ────────────────────────
|
22 |
############################################
|
@@ -130,8 +131,7 @@ async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008
|
|
130 |
tmp_path = tmp.name
|
131 |
try:
|
132 |
# ── 1. Transcribe
|
133 |
-
|
134 |
-
speech = waveform[0].numpy() # Convert to numpy for WhisperProcessor
|
135 |
inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
|
136 |
input_features = inputs["input_features"].to("cpu")
|
137 |
generated_ids = model.generate(input_features)
|
|
|
13 |
# AI + LLM
|
14 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
15 |
import torch
|
16 |
+
import librosa
|
17 |
from google import genai
|
18 |
from google.genai import types
|
19 |
|
20 |
+
|
21 |
############################################
|
22 |
# ── Configuration ────────────────────────
|
23 |
############################################
|
|
|
131 |
tmp_path = tmp.name
|
132 |
try:
|
133 |
# ── 1. Transcribe
|
134 |
+
speech, sample_rate = librosa.load(tmp_path, sr=16000)
|
|
|
135 |
inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
|
136 |
input_features = inputs["input_features"].to("cpu")
|
137 |
generated_ids = model.generate(input_features)
|
requirements.txt
CHANGED
@@ -7,8 +7,8 @@ python-multipart # File uploads
|
|
7 |
# Voice‑to‑text (Whisper via 🤗 Transformers)
|
8 |
transformers==4.38.2 # ensure recent enough
|
9 |
torch
|
10 |
-
torchaudio>=2.1.0
|
11 |
huggingface_hub
|
|
|
12 |
|
13 |
# Gemini Flash 2.5
|
14 |
google-genai
|
|
|
7 |
# Voice‑to‑text (Whisper via 🤗 Transformers)
|
8 |
transformers==4.38.2 # ensure recent enough
|
9 |
torch
|
|
|
10 |
huggingface_hub
|
11 |
+
librosa
|
12 |
|
13 |
# Gemini Flash 2.5
|
14 |
google-genai
|