Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import torch
|
|
10 |
import random
|
11 |
from openai import OpenAI
|
12 |
import subprocess
|
|
|
13 |
|
14 |
LLAMA_3B_API_ENDPOINT = os.environ.get("LLAMA_3B_API_ENDPOINT")
|
15 |
LLAMA_3B_API_KEY = os.environ.get("LLAMA_3B_API_KEY")
|
@@ -146,30 +147,37 @@ def translate_speech(audio_file, target_language):
|
|
146 |
return None
|
147 |
|
148 |
async def respond(audio, model, seed, target_language):
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
def clear_history():
|
170 |
global conversation_history
|
171 |
conversation_history = []
|
172 |
-
return None, None
|
173 |
|
174 |
with gr.Blocks(css="style.css") as demo:
|
175 |
description = gr.Markdown("# <center><b>Optimus Prime: Voice Assistant with Translation</b></center>")
|
@@ -200,17 +208,17 @@ with gr.Blocks(css="style.css") as demo:
|
|
200 |
input_audio = gr.Audio(label="User Input", sources=["microphone"], type="filepath")
|
201 |
output_audio = gr.Audio(label="AI Response", type="filepath", interactive=False, autoplay=True)
|
202 |
translated_audio = gr.Audio(label="Translated Audio", type="filepath", interactive=False, autoplay=True)
|
|
|
203 |
|
204 |
clear_button = gr.Button("Clear Conversation History")
|
205 |
|
206 |
-
|
207 |
fn=respond,
|
208 |
inputs=[input_audio, select, seed, target_lang],
|
209 |
-
outputs=[output_audio, translated_audio],
|
210 |
-
live=True
|
211 |
)
|
212 |
|
213 |
-
clear_button.click(fn=clear_history, inputs=[], outputs=[output_audio, translated_audio])
|
214 |
|
215 |
if __name__ == "__main__":
|
216 |
demo.queue(max_size=200).launch()
|
|
|
10 |
import random
|
11 |
from openai import OpenAI
|
12 |
import subprocess
|
13 |
+
from starlette.requests import ClientDisconnect
|
14 |
|
15 |
LLAMA_3B_API_ENDPOINT = os.environ.get("LLAMA_3B_API_ENDPOINT")
|
16 |
LLAMA_3B_API_KEY = os.environ.get("LLAMA_3B_API_KEY")
|
|
|
147 |
return None
|
148 |
|
149 |
async def respond(audio, model, seed, target_language):
|
150 |
+
try:
|
151 |
+
if audio is None:
|
152 |
+
return None, None, "No input detected."
|
153 |
+
|
154 |
+
user_input = transcribe(audio)
|
155 |
+
if not user_input:
|
156 |
+
return None, None, "Could not transcribe audio."
|
157 |
+
|
158 |
+
if user_input.lower().startswith("please translate"):
|
159 |
+
# Extract the actual content to translate
|
160 |
+
content_to_translate = user_input[len("please translate"):].strip()
|
161 |
+
translated_audio = translate_speech(audio, target_language)
|
162 |
+
return None, translated_audio, f"Translated to {target_language}"
|
163 |
+
else:
|
164 |
+
reply = models(user_input, model, seed)
|
165 |
+
communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
|
166 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
167 |
+
tmp_path = tmp_file.name
|
168 |
+
await communicate.save(tmp_path)
|
169 |
+
return tmp_path, None, "Voice assistant response"
|
170 |
+
except ClientDisconnect:
|
171 |
+
print("Client disconnected")
|
172 |
+
return None, None, "Client disconnected. Please try again."
|
173 |
+
except Exception as e:
|
174 |
+
print(f"An error occurred: {str(e)}")
|
175 |
+
return None, None, f"An error occurred: {str(e)}"
|
176 |
|
177 |
def clear_history():
|
178 |
global conversation_history
|
179 |
conversation_history = []
|
180 |
+
return None, None, "Conversation history cleared."
|
181 |
|
182 |
with gr.Blocks(css="style.css") as demo:
|
183 |
description = gr.Markdown("# <center><b>Optimus Prime: Voice Assistant with Translation</b></center>")
|
|
|
208 |
input_audio = gr.Audio(label="User Input", sources=["microphone"], type="filepath")
|
209 |
output_audio = gr.Audio(label="AI Response", type="filepath", interactive=False, autoplay=True)
|
210 |
translated_audio = gr.Audio(label="Translated Audio", type="filepath", interactive=False, autoplay=True)
|
211 |
+
status_message = gr.Textbox(label="Status", interactive=False)
|
212 |
|
213 |
clear_button = gr.Button("Clear Conversation History")
|
214 |
|
215 |
+
input_audio.change(
|
216 |
fn=respond,
|
217 |
inputs=[input_audio, select, seed, target_lang],
|
218 |
+
outputs=[output_audio, translated_audio, status_message],
|
|
|
219 |
)
|
220 |
|
221 |
+
# clear_button.click(fn=clear_history, inputs=[], outputs=[output_audio, translated_audio, status_message])
|
222 |
|
223 |
if __name__ == "__main__":
|
224 |
demo.queue(max_size=200).launch()
|