LiamKhoaLe commited on
Commit
218573f
·
1 Parent(s): f1e695a

Change to torchaudio

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. requirements.txt +1 -0
app.py CHANGED
@@ -13,6 +13,7 @@ from fastapi.staticfiles import StaticFiles
13
  # AI + LLM
14
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
15
  import torch
 
16
  import soundfile as sf
17
  from google import genai
18
  from google.genai import types
@@ -130,7 +131,8 @@ async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008
130
  tmp_path = tmp.name
131
  try:
132
  # ── 1. Transcribe
133
- speech, sample_rate = sf.read(tmp_path)
 
134
  inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
135
  input_features = inputs["input_features"].to("cpu")
136
  generated_ids = model.generate(input_features)
 
13
  # AI + LLM
14
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
15
  import torch
16
+ import torchaudio
17
  import soundfile as sf
18
  from google import genai
19
  from google.genai import types
 
131
  tmp_path = tmp.name
132
  try:
133
  # ── 1. Transcribe
134
+ waveform, sample_rate = torchaudio.load(tmp_path)
135
+ speech = waveform[0].numpy() # Convert to numpy for WhisperProcessor
136
  inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt")
137
  input_features = inputs["input_features"].to("cpu")
138
  generated_ids = model.generate(input_features)
requirements.txt CHANGED
@@ -8,6 +8,7 @@ python-multipart # File uploads
8
  soundfile
9
  transformers==4.38.2 # ensure recent enough
10
  torch
 
11
  huggingface_hub
12
 
13
  # Gemini Flash 2.5
 
8
  soundfile
9
  transformers==4.38.2 # ensure recent enough
10
  torch
11
+ torchaudio>=2.1.0
12
  huggingface_hub
13
 
14
  # Gemini Flash 2.5