Jaward commited on
Commit
ddb6345
·
verified ·
1 Parent(s): ec5ecf9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -24
app.py CHANGED
@@ -10,6 +10,7 @@ import torch
10
  import random
11
  from openai import OpenAI
12
  import subprocess
 
13
 
14
  LLAMA_3B_API_ENDPOINT = os.environ.get("LLAMA_3B_API_ENDPOINT")
15
  LLAMA_3B_API_KEY = os.environ.get("LLAMA_3B_API_KEY")
@@ -146,30 +147,37 @@ def translate_speech(audio_file, target_language):
146
  return None
147
 
148
  async def respond(audio, model, seed, target_language):
149
- if audio is None:
150
- return None, None
151
-
152
- user_input = transcribe(audio)
153
- if not user_input:
154
- return None, None
155
-
156
- if user_input.lower().startswith("please translate"):
157
- # Extract the actual content to translate
158
- content_to_translate = user_input[len("please translate"):].strip()
159
- translated_audio = translate_speech(audio, target_language)
160
- return None, translated_audio
161
- else:
162
- reply = models(user_input, model, seed)
163
- communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
164
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
165
- tmp_path = tmp_file.name
166
- await communicate.save(tmp_path)
167
- return tmp_path, None
 
 
 
 
 
 
 
168
 
169
  def clear_history():
170
  global conversation_history
171
  conversation_history = []
172
- return None, None
173
 
174
  with gr.Blocks(css="style.css") as demo:
175
  description = gr.Markdown("# <center><b>Optimus Prime: Voice Assistant with Translation</b></center>")
@@ -200,17 +208,17 @@ with gr.Blocks(css="style.css") as demo:
200
  input_audio = gr.Audio(label="User Input", sources=["microphone"], type="filepath")
201
  output_audio = gr.Audio(label="AI Response", type="filepath", interactive=False, autoplay=True)
202
  translated_audio = gr.Audio(label="Translated Audio", type="filepath", interactive=False, autoplay=True)
 
203
 
204
  clear_button = gr.Button("Clear Conversation History")
205
 
206
- gr.Interface(
207
  fn=respond,
208
  inputs=[input_audio, select, seed, target_lang],
209
- outputs=[output_audio, translated_audio],
210
- live=True
211
  )
212
 
213
- clear_button.click(fn=clear_history, inputs=[], outputs=[output_audio, translated_audio])
214
 
215
  if __name__ == "__main__":
216
  demo.queue(max_size=200).launch()
 
10
  import random
11
  from openai import OpenAI
12
  import subprocess
13
+ from starlette.requests import ClientDisconnect
14
 
15
  LLAMA_3B_API_ENDPOINT = os.environ.get("LLAMA_3B_API_ENDPOINT")
16
  LLAMA_3B_API_KEY = os.environ.get("LLAMA_3B_API_KEY")
 
147
  return None
148
 
149
  async def respond(audio, model, seed, target_language):
150
+ try:
151
+ if audio is None:
152
+ return None, None, "No input detected."
153
+
154
+ user_input = transcribe(audio)
155
+ if not user_input:
156
+ return None, None, "Could not transcribe audio."
157
+
158
+ if user_input.lower().startswith("please translate"):
159
+ # Extract the actual content to translate
160
+ content_to_translate = user_input[len("please translate"):].strip()
161
+ translated_audio = translate_speech(audio, target_language)
162
+ return None, translated_audio, f"Translated to {target_language}"
163
+ else:
164
+ reply = models(user_input, model, seed)
165
+ communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
166
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
167
+ tmp_path = tmp_file.name
168
+ await communicate.save(tmp_path)
169
+ return tmp_path, None, "Voice assistant response"
170
+ except ClientDisconnect:
171
+ print("Client disconnected")
172
+ return None, None, "Client disconnected. Please try again."
173
+ except Exception as e:
174
+ print(f"An error occurred: {str(e)}")
175
+ return None, None, f"An error occurred: {str(e)}"
176
 
177
  def clear_history():
178
  global conversation_history
179
  conversation_history = []
180
+ return None, None, "Conversation history cleared."
181
 
182
  with gr.Blocks(css="style.css") as demo:
183
  description = gr.Markdown("# <center><b>Optimus Prime: Voice Assistant with Translation</b></center>")
 
208
  input_audio = gr.Audio(label="User Input", sources=["microphone"], type="filepath")
209
  output_audio = gr.Audio(label="AI Response", type="filepath", interactive=False, autoplay=True)
210
  translated_audio = gr.Audio(label="Translated Audio", type="filepath", interactive=False, autoplay=True)
211
+ status_message = gr.Textbox(label="Status", interactive=False)
212
 
213
  clear_button = gr.Button("Clear Conversation History")
214
 
215
+ input_audio.change(
216
  fn=respond,
217
  inputs=[input_audio, select, seed, target_lang],
218
+ outputs=[output_audio, translated_audio, status_message],
 
219
  )
220
 
221
+ # clear_button.click(fn=clear_history, inputs=[], outputs=[output_audio, translated_audio, status_message])
222
 
223
  if __name__ == "__main__":
224
  demo.queue(max_size=200).launch()