Spaces:

tahirsher
/

ASR_Model_for_Transcription_into_Text

Sleeping

tahirsher commited on Mar 10

Commit

add50b3

verified ·

1 Parent(s): 8d19597

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -81,14 +81,20 @@ if audio_file:
     adversarial_waveform = torch.clamp(adversarial_waveform, -1.0, 1.0)
     # ================================
-    # ✅ Fast Transcription Processing with Conformer
     # ================================
-    input_features = processor(adversarial_waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_features.to("cuda" if torch.cuda.is_available() else "cpu")
     # Ensure the input has batch dimension (even if it's one example)
-    if len(input_features.shape) == 1:
-        input_features = input_features.unsqueeze(0)
     with torch.no_grad():
         logits = model(input_features).logits

     adversarial_waveform = torch.clamp(adversarial_waveform, -1.0, 1.0)
     # ================================
+    # ✅ Preprocess Audio with Processor (Corrected)
     # ================================
+    inputs = processor(adversarial_waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt", padding=True)
     # Ensure the input has batch dimension (even if it's one example)
+    if len(inputs.input_features.shape) == 1:
+        inputs.input_features = inputs.input_features.unsqueeze(0)
+    # Move the input features to the correct device (GPU/CPU)
+    input_features = inputs.input_features.to("cuda" if torch.cuda.is_available() else "cpu")
+    # ================================
+    # ✅ Fast Transcription Processing with Conformer
+    # ================================
     with torch.no_grad():
         logits = model(input_features).logits