Jaward commited on
Commit
d99b477
·
verified ·
1 Parent(s): 3169305

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -57
app.py CHANGED
@@ -11,8 +11,6 @@ import random
11
  from openai import OpenAI
12
  import subprocess
13
  from starlette.requests import ClientDisconnect
14
- import logging
15
- import time
16
 
17
  LLAMA_3B_API_ENDPOINT = os.environ.get("LLAMA_3B_API_ENDPOINT")
18
  LLAMA_3B_API_KEY = os.environ.get("LLAMA_3B_API_KEY")
@@ -20,8 +18,6 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
20
 
21
  default_lang = "en"
22
  engines = { default_lang: Model(default_lang) }
23
- logging.basicConfig(level=logging.INFO)
24
- logger = logging.getLogger(__name__)
25
 
26
  LANGUAGE_CODES = {
27
  "English": "eng",
@@ -124,76 +120,64 @@ def models(text, model="Llama 3 8B Service", seed=42):
124
 
125
  return output
126
 
127
- async def translate_speech_with_timeout(audio_file, target_language, timeout=30):
128
- try:
129
- language_code = LANGUAGE_CODES[target_language]
130
- output_file = f"translated_audio_{int(time.time())}.wav"
131
-
132
- command = [
133
- "expressivity_predict",
134
- audio_file,
135
- "--tgt_lang", language_code,
136
- "--model_name", "seamless_expressivity",
137
- "--vocoder_name", "vocoder_pretssel",
138
- "--gated-model-dir", "models",
139
- "--output_path", output_file
140
- ]
141
 
142
- process = await asyncio.create_subprocess_exec(
143
- *command,
144
- stdout=asyncio.subprocess.PIPE,
145
- stderr=asyncio.subprocess.PIPE
146
- )
147
 
148
- try:
149
- stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
150
- except asyncio.TimeoutError:
151
- process.kill()
152
- raise Exception("Translation process timed out")
 
 
 
 
153
 
154
- if process.returncode != 0:
155
- raise Exception(f"Translation process failed: {stderr.decode()}")
156
 
157
- if os.path.exists(output_file):
158
- print(f"File created successfully: {output_file}")
159
- return output_file
160
- else:
161
- raise Exception(f"File not found: {output_file}")
162
- except Exception as e:
163
- print(f"Translation error: {str(e)}")
164
  return None
165
 
166
  async def respond(audio, model, seed, target_language):
167
  try:
168
  if audio is None:
169
- return None, None
170
-
171
  user_input = transcribe(audio)
172
  if not user_input:
173
- return None, None
174
-
175
  if user_input.lower().startswith("please translate"):
176
- # Use background task for translation
177
- translated_audio = await translate_speech_with_timeout(audio, target_language)
178
- return None, translated_audio
 
179
  else:
180
  reply = models(user_input, model, seed)
181
  communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
182
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
183
  tmp_path = tmp_file.name
184
  await communicate.save(tmp_path)
185
- return tmp_path, None
186
  except ClientDisconnect:
187
  print("Client disconnected")
188
- return None, None
189
  except Exception as e:
190
  print(f"An error occurred: {str(e)}")
191
- return None, None
192
 
193
  def clear_history():
194
  global conversation_history
195
  conversation_history = []
196
- return None, None
197
 
198
  with gr.Blocks(css="style.css") as demo:
199
  gr.Markdown("# <center><b>Optimus Prime: Voice Assistant with Translation</b></center>")
@@ -201,7 +185,6 @@ with gr.Blocks(css="style.css") as demo:
201
 
202
  with gr.Row():
203
  with gr.Column(scale=1):
204
- input_audio = gr.Audio(label="User Input", sources=["microphone"], type="filepath")
205
  select = gr.Dropdown([
206
  'Llama 3 8B Service',
207
  'Mixtral 8x7B',
@@ -212,11 +195,6 @@ with gr.Blocks(css="style.css") as demo:
212
  value="Llama 3 8B Service",
213
  label="Model"
214
  )
215
- target_lang = gr.Dropdown(
216
- choices=list(LANGUAGE_CODES.keys()),
217
- value="German",
218
- label="Target Language for Translation"
219
- )
220
  seed = gr.Slider(
221
  label="Seed",
222
  minimum=0,
@@ -225,19 +203,26 @@ with gr.Blocks(css="style.css") as demo:
225
  value=0,
226
  visible=False
227
  )
 
 
 
 
 
 
228
  clear_button = gr.Button("Clear Conversation History")
229
 
230
  with gr.Column(scale=1):
231
  output_audio = gr.Audio(label="AI Response", type="filepath", interactive=False, autoplay=True)
232
  translated_audio = gr.Audio(label="Translated Audio", type="filepath", interactive=False, autoplay=True)
 
233
 
234
  input_audio.change(
235
  fn=respond,
236
  inputs=[input_audio, select, seed, target_lang],
237
- outputs=[output_audio, translated_audio],
238
  )
239
 
240
- clear_button.click(fn=clear_history, inputs=[], outputs=[output_audio, translated_audio])
241
 
242
  if __name__ == "__main__":
243
- demo.queue(concurrency_count=5, max_size=20).launch()
 
11
  from openai import OpenAI
12
  import subprocess
13
  from starlette.requests import ClientDisconnect
 
 
14
 
15
  LLAMA_3B_API_ENDPOINT = os.environ.get("LLAMA_3B_API_ENDPOINT")
16
  LLAMA_3B_API_KEY = os.environ.get("LLAMA_3B_API_KEY")
 
18
 
19
  default_lang = "en"
20
  engines = { default_lang: Model(default_lang) }
 
 
21
 
22
  LANGUAGE_CODES = {
23
  "English": "eng",
 
120
 
121
  return output
122
 
123
+ def translate_speech(audio_file, target_language):
124
+ if audio_file is None:
125
+ return None
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ language_code = LANGUAGE_CODES[target_language]
128
+ output_file = "translated_audio.wav"
 
 
 
129
 
130
+ command = [
131
+ "expressivity_predict",
132
+ audio_file,
133
+ "--tgt_lang", language_code,
134
+ "--model_name", "seamless_expressivity",
135
+ "--vocoder_name", "vocoder_pretssel",
136
+ "--gated-model-dir", "models",
137
+ "--output_path", output_file
138
+ ]
139
 
140
+ subprocess.run(command, check=True)
 
141
 
142
+ if os.path.exists(output_file):
143
+ print(f"File created successfully: {output_file}")
144
+ return output_file
145
+ else:
146
+ print(f"File not found: {output_file}")
 
 
147
  return None
148
 
149
  async def respond(audio, model, seed, target_language):
150
  try:
151
  if audio is None:
152
+ return None, None, "No input detected."
153
+
154
  user_input = transcribe(audio)
155
  if not user_input:
156
+ return None, None, "Could not transcribe audio."
157
+
158
  if user_input.lower().startswith("please translate"):
159
+ # Extract the actual content to translate
160
+ content_to_translate = user_input[len("please translate"):].strip()
161
+ translated_audio = translate_speech(audio, target_language)
162
+ return None, translated_audio, f"Translated to {target_language}"
163
  else:
164
  reply = models(user_input, model, seed)
165
  communicate = edge_tts.Communicate(reply, voice="en-US-ChristopherNeural")
166
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
167
  tmp_path = tmp_file.name
168
  await communicate.save(tmp_path)
169
+ return tmp_path, None, "Voice assistant response"
170
  except ClientDisconnect:
171
  print("Client disconnected")
172
+ return None, None, "Client disconnected. Please try again."
173
  except Exception as e:
174
  print(f"An error occurred: {str(e)}")
175
+ return None, None, f"An error occurred: {str(e)}"
176
 
177
  def clear_history():
178
  global conversation_history
179
  conversation_history = []
180
+ return None, None, "Conversation history cleared."
181
 
182
  with gr.Blocks(css="style.css") as demo:
183
  gr.Markdown("# <center><b>Optimus Prime: Voice Assistant with Translation</b></center>")
 
185
 
186
  with gr.Row():
187
  with gr.Column(scale=1):
 
188
  select = gr.Dropdown([
189
  'Llama 3 8B Service',
190
  'Mixtral 8x7B',
 
195
  value="Llama 3 8B Service",
196
  label="Model"
197
  )
 
 
 
 
 
198
  seed = gr.Slider(
199
  label="Seed",
200
  minimum=0,
 
203
  value=0,
204
  visible=False
205
  )
206
+ target_lang = gr.Dropdown(
207
+ choices=list(LANGUAGE_CODES.keys()),
208
+ value="German",
209
+ label="Target Language for Translation"
210
+ )
211
+ input_audio = gr.Audio(label="User Input", sources=["microphone"], type="filepath")
212
  clear_button = gr.Button("Clear Conversation History")
213
 
214
  with gr.Column(scale=1):
215
  output_audio = gr.Audio(label="AI Response", type="filepath", interactive=False, autoplay=True)
216
  translated_audio = gr.Audio(label="Translated Audio", type="filepath", interactive=False, autoplay=True)
217
+ status_message = gr.Textbox(label="Status", interactive=False)
218
 
219
  input_audio.change(
220
  fn=respond,
221
  inputs=[input_audio, select, seed, target_lang],
222
+ outputs=[output_audio, translated_audio, status_message],
223
  )
224
 
225
+ clear_button.click(fn=clear_history, inputs=[], outputs=[output_audio, translated_audio, status_message])
226
 
227
  if __name__ == "__main__":
228
+ demo.queue(max_size=200).launch()