RSHVR commited on
Commit
eb62218
·
verified ·
1 Parent(s): 3562379

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -57
app.py CHANGED
@@ -2,11 +2,14 @@ import os
2
  import gradio as gr
3
  from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
4
 
5
- # Import your modules
6
- import stt
7
- import tts
8
  import cohereAPI
9
 
 
 
 
10
  # Environment variables
11
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
12
  system_message = "You respond concisely, in about 15 words or less"
@@ -14,17 +17,21 @@ system_message = "You respond concisely, in about 15 words or less"
14
  # Initialize conversation history
15
  conversation_history = []
16
 
17
- async def response(audio_file_path):
 
 
 
 
18
  global conversation_history
19
 
20
- # Convert speech to text
21
- user_message = await stt.transcribe_audio(audio_file_path)
22
 
23
- # Add user message to chat history
24
- yield AdditionalOutputs({"transcript": user_message, "role": "user"})
25
 
26
  # Send text to Cohere API
27
- response_text, updated_history = await cohereAPI.send_message(
28
  system_message,
29
  user_message,
30
  conversation_history,
@@ -34,63 +41,29 @@ async def response(audio_file_path):
34
  # Update conversation history
35
  conversation_history = updated_history
36
 
37
- # Generate speech from text
38
- _, (sample_rate, speech_array) = await tts.generate_speech(
39
- response_text,
40
- voice_preset="random"
41
- )
42
 
43
- # Add assistant message to chat history
44
- yield AdditionalOutputs({"transcript": response_text, "role": "assistant"})
45
 
46
- # Return audio response
47
- yield (sample_rate, speech_array)
 
48
 
49
- # Create FastRTC stream with ReplyOnPause
50
  stream = Stream(
51
- handler=ReplyOnPause(response),
52
  modality="audio",
53
  mode="send-receive",
54
- additional_outputs=[
55
- {"name": "transcript", "type": "text"},
56
- {"name": "role", "type": "text"}
57
- ]
58
  )
59
 
60
- # Create Gradio interface that uses the FastRTC stream
61
- with gr.Blocks(title="Voice Chat Assistant with ReplyOnPause") as demo:
62
- gr.Markdown("# Voice Chat Assistant")
63
- gr.Markdown("Speak and pause to trigger a response.")
64
-
65
- chatbot = gr.Chatbot(label="Conversation")
66
-
67
- # Mount the FastRTC UI
68
- stream_ui = stream.ui(label="Speak")
69
-
70
- # Handle additional outputs from FastRTC to update the chatbot
71
- def update_chat(transcript, role, history):
72
- if transcript and role:
73
- if role == "user":
74
- history.append((transcript, None))
75
- elif role == "assistant":
76
- if history and history[-1][1] is None:
77
- history[-1] = (history[-1][0], transcript)
78
- else:
79
- history.append((None, transcript))
80
- return history
81
-
82
- stream_ui.change(
83
- update_chat,
84
- inputs=[stream_ui.output_components[0], stream_ui.output_components[1], chatbot],
85
- outputs=[chatbot]
86
- )
87
-
88
- clear_btn = gr.Button("Clear Conversation")
89
- clear_btn.click(lambda: [], outputs=[chatbot])
90
-
91
- # Launch the app
92
  if __name__ == "__main__":
93
- demo.queue().launch(
 
94
  server_name="0.0.0.0",
95
  share=False,
96
  show_error=True
 
2
  import gradio as gr
3
  from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
4
 
5
+ # Import your custom models
6
+ from tts import tortoise_tts, TortoiseOptions
7
+ from stt import whisper_stt
8
  import cohereAPI
9
 
10
+ # Import HumAware-VAD
11
+ from humaware_vad import HumAwareVADModel
12
+
13
  # Environment variables
14
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
15
  system_message = "You respond concisely, in about 15 words or less"
 
17
  # Initialize conversation history
18
  conversation_history = []
19
 
20
+ # Initialize the HumAware-VAD model
21
+ vad_model = HumAwareVADModel()
22
+
23
+ # Create a handler function that uses both your custom models
24
+ def response(audio):
25
  global conversation_history
26
 
27
+ # Convert speech to text using your Whisper model
28
+ user_message = whisper_stt.stt(audio)
29
 
30
+ # Yield the transcription
31
+ yield AdditionalOutputs(user_message)
32
 
33
  # Send text to Cohere API
34
+ response_text, updated_history = cohereAPI.send_message(
35
  system_message,
36
  user_message,
37
  conversation_history,
 
41
  # Update conversation history
42
  conversation_history = updated_history
43
 
44
+ # Print the response for logging
45
+ print(f"Assistant: {response_text}")
 
 
 
46
 
47
+ # Use your TTS model to generate audio
48
+ tts_options = TortoiseOptions(voice_preset="random")
49
 
50
+ # Stream the audio response in chunks
51
+ for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
52
+ yield chunk
53
 
54
+ # Create the FastRTC stream with HumAware-VAD for better pause detection
55
  stream = Stream(
56
+ handler=ReplyOnPause(response, model=vad_model), # Use HumAware-VAD model
57
  modality="audio",
58
  mode="send-receive",
59
+ additional_outputs=[gr.Textbox(label="Transcription")],
60
+ additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}"
 
 
61
  )
62
 
63
+ # Launch the Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  if __name__ == "__main__":
65
+ # Update your requirements.txt to include humaware-vad
66
+ stream.ui.launch(
67
  server_name="0.0.0.0",
68
  share=False,
69
  show_error=True