sagar007 commited on
Commit
e3a075e
·
verified ·
1 Parent(s): 6c4a628

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -19
app.py CHANGED
@@ -4,6 +4,7 @@ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGenera
4
  from gtts import gTTS
5
  import gradio as gr
6
  import spaces
 
7
 
8
  print("Using GPU for operations when available")
9
 
@@ -51,15 +52,29 @@ def process_audio_input(audio, whisper_processor, whisper_model):
51
 
52
  # Generate response within a GPU-decorated function
53
  @spaces.GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def text_to_speech(text, lang='hi'):
55
  try:
56
  # Use a better TTS engine for Indic languages
57
  if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
58
- # You might want to use a different TTS library here
59
- # For example, you could use the Google Cloud Text-to-Speech API
60
- # or a specialized Indic language TTS library
61
-
62
- # This is a placeholder for a better Indic TTS solution
63
  tts = gTTS(text=text, lang=lang, tld='co.in') # Use Indian TLD
64
  else:
65
  tts = gTTS(text=text, lang=lang)
@@ -70,7 +85,7 @@ def text_to_speech(text, lang='hi'):
70
  print(f"Error in text-to-speech: {str(e)}")
71
  return None
72
 
73
- # Replace the existing detect_language function with this improved version
74
  def detect_language(text):
75
  lang_codes = {
76
  'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
@@ -87,17 +102,6 @@ def detect_language(text):
87
  if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text): # Devanagari script
88
  return 'hi'
89
  return 'en' # Default to English if no Indic script is detected
90
- @spaces.GPU
91
- def generate_response(transcription, sarvam_pipe):
92
- if sarvam_pipe is None:
93
- return "Error: Text generation model is not available."
94
-
95
- try:
96
- # Generate response using the sarvam-2b model
97
- response = sarvam_pipe(transcription, max_length=100, num_return_sequences=1)[0]['generated_text']
98
- return response
99
- except Exception as e:
100
- return f"Error generating response: {str(e)}"
101
 
102
  @spaces.GPU
103
  def indic_language_assistant(input_type, audio_input, text_input):
@@ -121,13 +125,111 @@ def indic_language_assistant(input_type, audio_input, text_input):
121
  except Exception as e:
122
  error_message = f"An error occurred: {str(e)}"
123
  return error_message, error_message, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  # Create Gradio interface
125
  iface = gr.Interface(
126
  fn=indic_language_assistant,
127
  inputs=[
128
  gr.Radio(["audio", "text"], label="Input Type", value="audio"),
129
  gr.Audio(type="filepath", label="Speak (if audio input selected)"),
130
- gr.Textbox(label="Type your message (if text input selected)")
131
  ],
132
  outputs=[
133
  gr.Textbox(label="Transcription/Input"),
@@ -135,7 +237,10 @@ iface = gr.Interface(
135
  gr.Audio(label="Audio Response")
136
  ],
137
  title="Indic Language Virtual Assistant",
138
- description="Speak or type in any supported Indic language or English. The assistant will respond in text and audio."
 
 
 
139
  )
140
 
141
  # Launch the app
 
4
  from gtts import gTTS
5
  import gradio as gr
6
  import spaces
7
+ from langdetect import detect
8
 
9
  print("Using GPU for operations when available")
10
 
 
52
 
53
  # Generate response within a GPU-decorated function
54
  @spaces.GPU
55
+ def generate_response(transcription, sarvam_pipe):
56
+ if sarvam_pipe is None:
57
+ return "Error: Text generation model is not available."
58
+
59
+ try:
60
+ # Prepare the prompt
61
+ prompt = f"Human: {transcription}\n\nAssistant:"
62
+
63
+ # Generate response using the sarvam-2b model
64
+ response = sarvam_pipe(prompt, max_length=200, num_return_sequences=1, do_sample=True, temperature=0.7)[0]['generated_text']
65
+
66
+ # Extract the assistant's response
67
+ assistant_response = response.split("Assistant:")[-1].strip()
68
+
69
+ return assistant_response
70
+ except Exception as e:
71
+ return f"Error generating response: {str(e)}"
72
+
73
+ # Text-to-speech function
74
  def text_to_speech(text, lang='hi'):
75
  try:
76
  # Use a better TTS engine for Indic languages
77
  if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
 
 
 
 
 
78
  tts = gTTS(text=text, lang=lang, tld='co.in') # Use Indian TLD
79
  else:
80
  tts = gTTS(text=text, lang=lang)
 
85
  print(f"Error in text-to-speech: {str(e)}")
86
  return None
87
 
88
+ # Language detection function
89
  def detect_language(text):
90
  lang_codes = {
91
  'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
 
102
  if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text): # Devanagari script
103
  return 'hi'
104
  return 'en' # Default to English if no Indic script is detected
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  @spaces.GPU
107
  def indic_language_assistant(input_type, audio_input, text_input):
 
125
  except Exception as e:
126
  error_message = f"An error occurred: {str(e)}"
127
  return error_message, error_message, None
128
+
129
+ # Custom CSS
130
+ custom_css = """
131
+ body {
132
+ background-color: #1a1a1a;
133
+ color: #ffffff;
134
+ font-family: Arial, sans-serif;
135
+ }
136
+
137
+ .container {
138
+ max-width: 800px;
139
+ margin: 0 auto;
140
+ padding: 20px;
141
+ }
142
+
143
+ h1 {
144
+ font-size: 2.5em;
145
+ background: linear-gradient(45deg, #4a90e2, #f48fb1);
146
+ -webkit-background-clip: text;
147
+ -webkit-text-fill-color: transparent;
148
+ margin-bottom: 10px;
149
+ }
150
+
151
+ h2 {
152
+ color: #a0a0a0;
153
+ font-weight: normal;
154
+ }
155
+
156
+ .task-container {
157
+ display: flex;
158
+ justify-content: space-between;
159
+ flex-wrap: wrap;
160
+ margin-top: 30px;
161
+ }
162
+
163
+ .task-card {
164
+ background-color: #2a2a2a;
165
+ border-radius: 10px;
166
+ padding: 15px;
167
+ margin: 10px 0;
168
+ width: calc(50% - 10px);
169
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
170
+ transition: transform 0.3s ease;
171
+ }
172
+
173
+ .task-card:hover {
174
+ transform: translateY(-5px);
175
+ }
176
+
177
+ .task-icon {
178
+ font-size: 24px;
179
+ margin-bottom: 10px;
180
+ }
181
+
182
+ .input-box {
183
+ width: 100%;
184
+ padding: 10px;
185
+ border-radius: 20px;
186
+ border: none;
187
+ background-color: #333;
188
+ color: #fff;
189
+ margin-top: 20px;
190
+ }
191
+
192
+ .submit-btn {
193
+ background-color: #4a90e2;
194
+ color: white;
195
+ border: none;
196
+ padding: 10px 20px;
197
+ border-radius: 20px;
198
+ cursor: pointer;
199
+ margin-top: 10px;
200
+ transition: background-color 0.3s ease;
201
+ }
202
+
203
+ .submit-btn:hover {
204
+ background-color: #3a7bd5;
205
+ }
206
+ """
207
+
208
+ # Custom HTML
209
+ custom_html = """
210
+ <div class="container">
211
+ <h1>Hello, User</h1>
212
+ <h2>How can I help you today?</h2>
213
+ <div class="task-container">
214
+ <div class="task-card">
215
+ <div class="task-icon">🎤</div>
216
+ <p>Speak in any Indic language</p>
217
+ </div>
218
+ <div class="task-card">
219
+ <div class="task-icon">⌨️</div>
220
+ <p>Type in any Indic language</p>
221
+ </div>
222
+ </div>
223
+ </div>
224
+ """
225
+
226
  # Create Gradio interface
227
  iface = gr.Interface(
228
  fn=indic_language_assistant,
229
  inputs=[
230
  gr.Radio(["audio", "text"], label="Input Type", value="audio"),
231
  gr.Audio(type="filepath", label="Speak (if audio input selected)"),
232
+ gr.Textbox(label="Type your message (if text input selected)", elem_classes="input-box")
233
  ],
234
  outputs=[
235
  gr.Textbox(label="Transcription/Input"),
 
237
  gr.Audio(label="Audio Response")
238
  ],
239
  title="Indic Language Virtual Assistant",
240
+ description="Speak or type in any supported Indic language or English. The assistant will respond in text and audio.",
241
+ css=custom_css,
242
+ elem_id="indic-assistant",
243
+ theme="dark"
244
  )
245
 
246
  # Launch the app