Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 23, 2024

Commit

aad2393

verified ·

1 Parent(s): c14fb36

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -13

app.py CHANGED Viewed

@@ -3,10 +3,12 @@ import librosa
 from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
 from gtts import gTTS
 import gradio as gr
-print("Using GPU for all operations")
-# Function to safely load pipeline
 def load_pipeline(model_name, **kwargs):
     try:
         device = 0 if torch.cuda.is_available() else "cpu"
@@ -15,7 +17,8 @@ def load_pipeline(model_name, **kwargs):
         print(f"Error loading {model_name} pipeline: {e}")
         return None
-# Load Whisper model for speech recognition
 def load_whisper():
     try:
         device = 0 if torch.cuda.is_available() else "cpu"
@@ -26,15 +29,14 @@ def load_whisper():
         print(f"Error loading Whisper model: {e}")
         return None, None
-# Load sarvam-2b for text generation
 def load_sarvam():
     return load_pipeline('sarvamai/sarvam-2b-v0.5')
-# Attempt to load models
-whisper_processor, whisper_model = load_whisper()
-sarvam_pipe = load_sarvam()
-def process_audio_input(audio):
     if whisper_processor is None or whisper_model is None:
         return "Error: Speech recognition model is not available. Please type your message instead."
@@ -47,7 +49,9 @@ def process_audio_input(audio):
     except Exception as e:
         return f"Error processing audio: {str(e)}. Please type your message instead."
-def generate_response(text_input):
     if sarvam_pipe is None:
         return "Error: sarvam-2b model is not available. The assistant cannot generate responses at this time."
@@ -78,16 +82,21 @@ def detect_language(text):
             return 'hi'  # Default to Hindi for simplicity
     return 'en'  # Default to English if no Indic script is detected
 def indic_language_assistant(input_type, audio_input, text_input):
     try:
         if input_type == "audio" and audio_input is not None:
-            transcription = process_audio_input(audio_input)
         elif input_type == "text" and text_input:
             transcription = text_input
         else:
             return "Please provide either audio or text input.", "No input provided.", None
-        response = generate_response(transcription)
         lang = detect_language(response)
         audio_response = text_to_speech(response, lang)
@@ -114,4 +123,4 @@ iface = gr.Interface(
 )
 # Launch the app
-iface.launch()

 from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
 from gtts import gTTS
 import gradio as gr
+import spaces
+print("Using GPU for operations when available")
+# Function to safely load pipeline within a GPU-decorated function
+@spaces.GPU
 def load_pipeline(model_name, **kwargs):
     try:
         device = 0 if torch.cuda.is_available() else "cpu"
         print(f"Error loading {model_name} pipeline: {e}")
         return None
+# Load Whisper model for speech recognition within a GPU-decorated function
+@spaces.GPU
 def load_whisper():
     try:
         device = 0 if torch.cuda.is_available() else "cpu"
         print(f"Error loading Whisper model: {e}")
         return None, None
+# Load sarvam-2b for text generation within a GPU-decorated function
+@spaces.GPU
 def load_sarvam():
     return load_pipeline('sarvamai/sarvam-2b-v0.5')
+# Process audio input within a GPU-decorated function
+@spaces.GPU
+def process_audio_input(audio, whisper_processor, whisper_model):
     if whisper_processor is None or whisper_model is None:
         return "Error: Speech recognition model is not available. Please type your message instead."
     except Exception as e:
         return f"Error processing audio: {str(e)}. Please type your message instead."
+# Generate response within a GPU-decorated function
+@spaces.GPU
+def generate_response(text_input, sarvam_pipe):
     if sarvam_pipe is None:
         return "Error: sarvam-2b model is not available. The assistant cannot generate responses at this time."
             return 'hi'  # Default to Hindi for simplicity
     return 'en'  # Default to English if no Indic script is detected
+@spaces.GPU
 def indic_language_assistant(input_type, audio_input, text_input):
     try:
+        # Load models within the GPU-decorated function
+        whisper_processor, whisper_model = load_whisper()
+        sarvam_pipe = load_sarvam()
         if input_type == "audio" and audio_input is not None:
+            transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
         elif input_type == "text" and text_input:
             transcription = text_input
         else:
             return "Please provide either audio or text input.", "No input provided.", None
+        response = generate_response(transcription, sarvam_pipe)
         lang = detect_language(response)
         audio_response = text_to_speech(response, lang)
 )
 # Launch the app
+iface.launch()