Command_RTC / app.py
RSHVR's picture
Update app.py
4156290 verified
raw
history blame
2.27 kB
import os
import gradio as gr
from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
# Import your custom models
from tts import tortoise_tts, TortoiseOptions
from stt import whisper_stt
import cohereAPI
# Try to import HumAware-VAD, install if not available
try:
from humaware_vad import HumAwareVADModel
except ImportError:
print("Installing humaware-vad...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "humaware-vad"])
from humaware_vad import HumAwareVADModel
# Environment variables
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
system_message = "You respond concisely, in about 15 words or less"
# Initialize conversation history
conversation_history = []
# Initialize the HumAware-VAD model
vad_model = HumAwareVADModel()
# Create a handler function that uses both your custom models
def response(audio):
global conversation_history
# Convert speech to text using your Whisper model
user_message = whisper_stt.stt(audio)
# Yield the transcription
yield AdditionalOutputs(user_message)
# Send text to Cohere API
response_text, updated_history = cohereAPI.send_message(
system_message,
user_message,
conversation_history,
COHERE_API_KEY
)
# Update conversation history
conversation_history = updated_history
# Print the response for logging
print(f"Assistant: {response_text}")
# Use your TTS model to generate audio
tts_options = TortoiseOptions(voice_preset="random")
# Stream the audio response in chunks
for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
yield chunk
# Create the FastRTC stream with HumAware-VAD for better pause detection
stream = Stream(
handler=ReplyOnPause(response, model=vad_model), # Use HumAware-VAD model
modality="audio",
mode="send-receive",
additional_outputs=[gr.Textbox(label="Transcription")],
additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}"
)
# Launch the Gradio UI
if __name__ == "__main__":
# Update your requirements.txt to include humaware-vad
stream.ui.launch(
server_name="0.0.0.0",
share=False,
show_error=True
)