sagar007 commited on
Commit
aad2393
·
verified ·
1 Parent(s): c14fb36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -13
app.py CHANGED
@@ -3,10 +3,12 @@ import librosa
3
  from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
4
  from gtts import gTTS
5
  import gradio as gr
 
6
 
7
- print("Using GPU for all operations")
8
 
9
- # Function to safely load pipeline
 
10
  def load_pipeline(model_name, **kwargs):
11
  try:
12
  device = 0 if torch.cuda.is_available() else "cpu"
@@ -15,7 +17,8 @@ def load_pipeline(model_name, **kwargs):
15
  print(f"Error loading {model_name} pipeline: {e}")
16
  return None
17
 
18
- # Load Whisper model for speech recognition
 
19
  def load_whisper():
20
  try:
21
  device = 0 if torch.cuda.is_available() else "cpu"
@@ -26,15 +29,14 @@ def load_whisper():
26
  print(f"Error loading Whisper model: {e}")
27
  return None, None
28
 
29
- # Load sarvam-2b for text generation
 
30
  def load_sarvam():
31
  return load_pipeline('sarvamai/sarvam-2b-v0.5')
32
 
33
- # Attempt to load models
34
- whisper_processor, whisper_model = load_whisper()
35
- sarvam_pipe = load_sarvam()
36
-
37
- def process_audio_input(audio):
38
  if whisper_processor is None or whisper_model is None:
39
  return "Error: Speech recognition model is not available. Please type your message instead."
40
 
@@ -47,7 +49,9 @@ def process_audio_input(audio):
47
  except Exception as e:
48
  return f"Error processing audio: {str(e)}. Please type your message instead."
49
 
50
- def generate_response(text_input):
 
 
51
  if sarvam_pipe is None:
52
  return "Error: sarvam-2b model is not available. The assistant cannot generate responses at this time."
53
 
@@ -78,16 +82,21 @@ def detect_language(text):
78
  return 'hi' # Default to Hindi for simplicity
79
  return 'en' # Default to English if no Indic script is detected
80
 
 
81
  def indic_language_assistant(input_type, audio_input, text_input):
82
  try:
 
 
 
 
83
  if input_type == "audio" and audio_input is not None:
84
- transcription = process_audio_input(audio_input)
85
  elif input_type == "text" and text_input:
86
  transcription = text_input
87
  else:
88
  return "Please provide either audio or text input.", "No input provided.", None
89
 
90
- response = generate_response(transcription)
91
  lang = detect_language(response)
92
  audio_response = text_to_speech(response, lang)
93
 
@@ -114,4 +123,4 @@ iface = gr.Interface(
114
  )
115
 
116
  # Launch the app
117
- iface.launch()
 
3
  from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
4
  from gtts import gTTS
5
  import gradio as gr
6
+ import spaces
7
 
8
+ print("Using GPU for operations when available")
9
 
10
+ # Function to safely load pipeline within a GPU-decorated function
11
+ @spaces.GPU
12
  def load_pipeline(model_name, **kwargs):
13
  try:
14
  device = 0 if torch.cuda.is_available() else "cpu"
 
17
  print(f"Error loading {model_name} pipeline: {e}")
18
  return None
19
 
20
+ # Load Whisper model for speech recognition within a GPU-decorated function
21
+ @spaces.GPU
22
  def load_whisper():
23
  try:
24
  device = 0 if torch.cuda.is_available() else "cpu"
 
29
  print(f"Error loading Whisper model: {e}")
30
  return None, None
31
 
32
+ # Load sarvam-2b for text generation within a GPU-decorated function
33
+ @spaces.GPU
34
  def load_sarvam():
35
  return load_pipeline('sarvamai/sarvam-2b-v0.5')
36
 
37
+ # Process audio input within a GPU-decorated function
38
+ @spaces.GPU
39
+ def process_audio_input(audio, whisper_processor, whisper_model):
 
 
40
  if whisper_processor is None or whisper_model is None:
41
  return "Error: Speech recognition model is not available. Please type your message instead."
42
 
 
49
  except Exception as e:
50
  return f"Error processing audio: {str(e)}. Please type your message instead."
51
 
52
+ # Generate response within a GPU-decorated function
53
+ @spaces.GPU
54
+ def generate_response(text_input, sarvam_pipe):
55
  if sarvam_pipe is None:
56
  return "Error: sarvam-2b model is not available. The assistant cannot generate responses at this time."
57
 
 
82
  return 'hi' # Default to Hindi for simplicity
83
  return 'en' # Default to English if no Indic script is detected
84
 
85
+ @spaces.GPU
86
  def indic_language_assistant(input_type, audio_input, text_input):
87
  try:
88
+ # Load models within the GPU-decorated function
89
+ whisper_processor, whisper_model = load_whisper()
90
+ sarvam_pipe = load_sarvam()
91
+
92
  if input_type == "audio" and audio_input is not None:
93
+ transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
94
  elif input_type == "text" and text_input:
95
  transcription = text_input
96
  else:
97
  return "Please provide either audio or text input.", "No input provided.", None
98
 
99
+ response = generate_response(transcription, sarvam_pipe)
100
  lang = detect_language(response)
101
  audio_response = text_to_speech(response, lang)
102
 
 
123
  )
124
 
125
  # Launch the app
126
+ iface.launch()