Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,12 @@ import librosa
|
|
3 |
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
|
4 |
from gtts import gTTS
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
-
print("Using GPU for
|
8 |
|
9 |
-
# Function to safely load pipeline
|
|
|
10 |
def load_pipeline(model_name, **kwargs):
|
11 |
try:
|
12 |
device = 0 if torch.cuda.is_available() else "cpu"
|
@@ -15,7 +17,8 @@ def load_pipeline(model_name, **kwargs):
|
|
15 |
print(f"Error loading {model_name} pipeline: {e}")
|
16 |
return None
|
17 |
|
18 |
-
# Load Whisper model for speech recognition
|
|
|
19 |
def load_whisper():
|
20 |
try:
|
21 |
device = 0 if torch.cuda.is_available() else "cpu"
|
@@ -26,15 +29,14 @@ def load_whisper():
|
|
26 |
print(f"Error loading Whisper model: {e}")
|
27 |
return None, None
|
28 |
|
29 |
-
# Load sarvam-2b for text generation
|
|
|
30 |
def load_sarvam():
|
31 |
return load_pipeline('sarvamai/sarvam-2b-v0.5')
|
32 |
|
33 |
-
#
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
def process_audio_input(audio):
|
38 |
if whisper_processor is None or whisper_model is None:
|
39 |
return "Error: Speech recognition model is not available. Please type your message instead."
|
40 |
|
@@ -47,7 +49,9 @@ def process_audio_input(audio):
|
|
47 |
except Exception as e:
|
48 |
return f"Error processing audio: {str(e)}. Please type your message instead."
|
49 |
|
50 |
-
|
|
|
|
|
51 |
if sarvam_pipe is None:
|
52 |
return "Error: sarvam-2b model is not available. The assistant cannot generate responses at this time."
|
53 |
|
@@ -78,16 +82,21 @@ def detect_language(text):
|
|
78 |
return 'hi' # Default to Hindi for simplicity
|
79 |
return 'en' # Default to English if no Indic script is detected
|
80 |
|
|
|
81 |
def indic_language_assistant(input_type, audio_input, text_input):
|
82 |
try:
|
|
|
|
|
|
|
|
|
83 |
if input_type == "audio" and audio_input is not None:
|
84 |
-
transcription = process_audio_input(audio_input)
|
85 |
elif input_type == "text" and text_input:
|
86 |
transcription = text_input
|
87 |
else:
|
88 |
return "Please provide either audio or text input.", "No input provided.", None
|
89 |
|
90 |
-
response = generate_response(transcription)
|
91 |
lang = detect_language(response)
|
92 |
audio_response = text_to_speech(response, lang)
|
93 |
|
@@ -114,4 +123,4 @@ iface = gr.Interface(
|
|
114 |
)
|
115 |
|
116 |
# Launch the app
|
117 |
-
iface.launch()
|
|
|
3 |
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
|
4 |
from gtts import gTTS
|
5 |
import gradio as gr
|
6 |
+
import spaces
|
7 |
|
8 |
+
print("Using GPU for operations when available")
|
9 |
|
10 |
+
# Function to safely load pipeline within a GPU-decorated function
|
11 |
+
@spaces.GPU
|
12 |
def load_pipeline(model_name, **kwargs):
|
13 |
try:
|
14 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
17 |
print(f"Error loading {model_name} pipeline: {e}")
|
18 |
return None
|
19 |
|
20 |
+
# Load Whisper model for speech recognition within a GPU-decorated function
|
21 |
+
@spaces.GPU
|
22 |
def load_whisper():
|
23 |
try:
|
24 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
29 |
print(f"Error loading Whisper model: {e}")
|
30 |
return None, None
|
31 |
|
32 |
+
# Load sarvam-2b for text generation within a GPU-decorated function
|
33 |
+
@spaces.GPU
|
34 |
def load_sarvam():
|
35 |
return load_pipeline('sarvamai/sarvam-2b-v0.5')
|
36 |
|
37 |
+
# Process audio input within a GPU-decorated function
|
38 |
+
@spaces.GPU
|
39 |
+
def process_audio_input(audio, whisper_processor, whisper_model):
|
|
|
|
|
40 |
if whisper_processor is None or whisper_model is None:
|
41 |
return "Error: Speech recognition model is not available. Please type your message instead."
|
42 |
|
|
|
49 |
except Exception as e:
|
50 |
return f"Error processing audio: {str(e)}. Please type your message instead."
|
51 |
|
52 |
+
# Generate response within a GPU-decorated function
|
53 |
+
@spaces.GPU
|
54 |
+
def generate_response(text_input, sarvam_pipe):
|
55 |
if sarvam_pipe is None:
|
56 |
return "Error: sarvam-2b model is not available. The assistant cannot generate responses at this time."
|
57 |
|
|
|
82 |
return 'hi' # Default to Hindi for simplicity
|
83 |
return 'en' # Default to English if no Indic script is detected
|
84 |
|
85 |
+
@spaces.GPU
|
86 |
def indic_language_assistant(input_type, audio_input, text_input):
|
87 |
try:
|
88 |
+
# Load models within the GPU-decorated function
|
89 |
+
whisper_processor, whisper_model = load_whisper()
|
90 |
+
sarvam_pipe = load_sarvam()
|
91 |
+
|
92 |
if input_type == "audio" and audio_input is not None:
|
93 |
+
transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
|
94 |
elif input_type == "text" and text_input:
|
95 |
transcription = text_input
|
96 |
else:
|
97 |
return "Please provide either audio or text input.", "No input provided.", None
|
98 |
|
99 |
+
response = generate_response(transcription, sarvam_pipe)
|
100 |
lang = detect_language(response)
|
101 |
audio_response = text_to_speech(response, lang)
|
102 |
|
|
|
123 |
)
|
124 |
|
125 |
# Launch the app
|
126 |
+
iface.launch()
|