RSHVR commited on
Commit
359bac7
·
verified ·
1 Parent(s): 9cfc5dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -17
app.py CHANGED
@@ -1,20 +1,13 @@
1
  import os
2
  import gradio as gr
3
- from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
 
4
 
5
  # Import your custom models
6
  from tts import tortoise_tts, TortoiseOptions
7
  from stt import whisper_stt
8
  import cohereAPI
9
 
10
- # Try to import HumAware-VAD, install if not available
11
- try:
12
- from humaware_vad import HumAwareVADModel
13
- except ImportError:
14
- print("Installing humaware-vad...")
15
- subprocess.check_call([sys.executable, "-m", "pip", "install", "humaware-vad"])
16
- from humaware_vad import HumAwareVADModel
17
-
18
  # Environment variables
19
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
20
  system_message = "You respond concisely, in about 15 words or less"
@@ -22,9 +15,6 @@ system_message = "You respond concisely, in about 15 words or less"
22
  # Initialize conversation history
23
  conversation_history = []
24
 
25
- # Initialize the HumAware-VAD model
26
- vad_model = HumAwareVADModel()
27
-
28
  # Create a handler function that uses both your custom models
29
  def response(audio):
30
  global conversation_history
@@ -32,7 +22,7 @@ def response(audio):
32
  # Convert speech to text using your Whisper model
33
  user_message = whisper_stt.stt(audio)
34
 
35
- # Yield the transcription
36
  yield AdditionalOutputs(user_message)
37
 
38
  # Send text to Cohere API
@@ -56,18 +46,21 @@ def response(audio):
56
  for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
57
  yield chunk
58
 
59
- # Create the FastRTC stream with HumAware-VAD for better pause detection
60
  stream = Stream(
61
- handler=ReplyOnPause(response, model=vad_model), # Use HumAware-VAD model
62
  modality="audio",
63
  mode="send-receive",
64
  additional_outputs=[gr.Textbox(label="Transcription")],
65
- additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}"
 
 
 
 
66
  )
67
 
68
  # Launch the Gradio UI
69
  if __name__ == "__main__":
70
- # Update your requirements.txt to include humaware-vad
71
  stream.ui.launch(
72
  server_name="0.0.0.0",
73
  share=False,
 
1
  import os
2
  import gradio as gr
3
+ from fastrtc import Stream, AdditionalOutputs
4
+ from fastrtc_walkie_talkie import WalkieTalkie
5
 
6
  # Import your custom models
7
  from tts import tortoise_tts, TortoiseOptions
8
  from stt import whisper_stt
9
  import cohereAPI
10
 
 
 
 
 
 
 
 
 
11
  # Environment variables
12
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
13
  system_message = "You respond concisely, in about 15 words or less"
 
15
  # Initialize conversation history
16
  conversation_history = []
17
 
 
 
 
18
  # Create a handler function that uses both your custom models
19
  def response(audio):
20
  global conversation_history
 
22
  # Convert speech to text using your Whisper model
23
  user_message = whisper_stt.stt(audio)
24
 
25
+ # Yield the transcription as additional output
26
  yield AdditionalOutputs(user_message)
27
 
28
  # Send text to Cohere API
 
46
  for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
47
  yield chunk
48
 
49
+ # Create the FastRTC stream with WalkieTalkie for turn detection
50
  stream = Stream(
51
+ handler=WalkieTalkie(response), # Use WalkieTalkie instead of ReplyOnPause
52
  modality="audio",
53
  mode="send-receive",
54
  additional_outputs=[gr.Textbox(label="Transcription")],
55
+ additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}",
56
+ ui_args={
57
+ "title": "Voice Assistant (Walkie-Talkie Style)",
58
+ "subtitle": "Say 'over' to finish your turn. For example, 'What's the weather like today? over.'"
59
+ }
60
  )
61
 
62
  # Launch the Gradio UI
63
  if __name__ == "__main__":
 
64
  stream.ui.launch(
65
  server_name="0.0.0.0",
66
  share=False,