Spaces:

GavinHuang
/

asr-demo

Running on Zero

App Files Files Community

GavinHuang commited on 28 days ago

Commit

a2ed037

1 Parent(s): 40ab795

fix: refactor model loading and enhance error handling in transcribe function

Browse files

Files changed (1) hide show

app.py +40 -10

app.py CHANGED Viewed

@@ -6,14 +6,22 @@ from omegaconf import OmegaConf
 import time
 import spaces
 import librosa
-# Check if CUDA is available
-print(f"CUDA available: {torch.cuda.is_available()}")
-if torch.cuda.is_available():
-    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
-model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/parakeet-tdt-0.6b-v2")
-print(f"Model loaded on device: {model.device}")
 import numpy as np
 import soundfile as sf
@@ -21,7 +29,9 @@ audio_buffer = []
 @spaces.GPU(duration=120)
 def transcribe(audio, state=""):
-    global model, audio_buffer
     if audio is None or isinstance(audio, int):
         print(f"Skipping invalid audio input: {type(audio)}")
         return state, state
@@ -38,8 +48,7 @@ def transcribe(audio, state=""):
         # Handle tuple of (sample_rate, audio_array)
         print(f"Tuple contents: {audio}")
         sample_rate, audio_data = audio
-        try:
-            # Resample to 16kHz for NeMo
             if sample_rate != 16000:
                 print(f"Resampling from {sample_rate}Hz to 16000Hz")
                 audio_data = librosa.resample(audio_data.astype(float), orig_sr=sample_rate, target_sr=16000)
@@ -47,7 +56,28 @@ def transcribe(audio, state=""):
             temp_file = "temp_audio.wav"
             sf.write(temp_file, audio_data, samplerate=16000)
             print(f"Processing temporary audio file: {temp_file}")
-            transcription = model.transcribe([temp_file])[0]
             os.remove(temp_file)  # Clean up
             print("Temporary file removed.")
         except Exception as e:

 import time
 import spaces
 import librosa
+# Important: Don't initialize CUDA in the main process for Spaces
+# The model will be loaded in the worker process through the GPU decorator
+model = None
+def load_model():
+    # This function will be called in the GPU worker process
+    global model
+    if model is None:
+        print(f"Loading model in worker process")
+        print(f"CUDA available: {torch.cuda.is_available()}")
+        if torch.cuda.is_available():
+            print(f"CUDA device: {torch.cuda.get_device_name(0)}")
+        model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/parakeet-tdt-0.6b-v2")
+        print(f"Model loaded on device: {model.device}")
+    return model
 import numpy as np
 import soundfile as sf
 @spaces.GPU(duration=120)
 def transcribe(audio, state=""):
+    # Load the model inside the GPU worker process
+    model = load_model()
     if audio is None or isinstance(audio, int):
         print(f"Skipping invalid audio input: {type(audio)}")
         return state, state
         # Handle tuple of (sample_rate, audio_array)
         print(f"Tuple contents: {audio}")
         sample_rate, audio_data = audio
+        try:            # Resample to 16kHz for NeMo
             if sample_rate != 16000:
                 print(f"Resampling from {sample_rate}Hz to 16000Hz")
                 audio_data = librosa.resample(audio_data.astype(float), orig_sr=sample_rate, target_sr=16000)
             temp_file = "temp_audio.wav"
             sf.write(temp_file, audio_data, samplerate=16000)
             print(f"Processing temporary audio file: {temp_file}")
+            # Handling NumPy 2.0 compatibility issue
+            try:
+                transcription = model.transcribe([temp_file])[0]
+            except AttributeError as e:
+                if "np.sctypes" in str(e):
+                    print("Handling NumPy 2.0 compatibility issue")
+                    # Using a workaround to handle the np.sctypes removal
+                    import numpy as np
+                    # Create a temporary sctypes attribute if needed by older code
+                    if not hasattr(np, 'sctypes'):
+                        np.sctypes = {
+                            'int': [np.int8, np.int16, np.int32, np.int64],
+                            'uint': [np.uint8, np.uint16, np.uint32, np.uint64],
+                            'float': [np.float16, np.float32, np.float64],
+                            'complex': [np.complex64, np.complex128]
+                        }
+                    # Try again
+                    transcription = model.transcribe([temp_file])[0]
+                else:
+                    raise
             os.remove(temp_file)  # Clean up
             print("Temporary file removed.")
         except Exception as e: