Command_RTC / app.py
RSHVR's picture
Update app.py
839f7b2 verified
raw
history blame
2.82 kB
import os
import gradio as gr
from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
# Import your modules
import stt
import tts
import cohereAPI
# Environment variables
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
system_message = "You respond concisely, in about 15 words or less"
# Initialize conversation history
conversation_history = []
async def response(audio_file_path):
global conversation_history
# Convert speech to text
user_message = await stt.transcribe_audio(audio_file_path)
# Add user message to chat history
yield AdditionalOutputs({"transcript": user_message, "role": "user"})
# Send text to Cohere API
response_text, updated_history = await cohereAPI.send_message(
system_message,
user_message,
conversation_history,
COHERE_API_KEY
)
# Update conversation history
conversation_history = updated_history
# Generate speech from text
_, (sample_rate, speech_array) = await tts.generate_speech(
response_text,
voice_preset="random"
)
# Add assistant message to chat history
yield AdditionalOutputs({"transcript": response_text, "role": "assistant"})
# Return audio response
yield (sample_rate, speech_array)
# Create FastRTC stream with ReplyOnPause
stream = Stream(
handler=ReplyOnPause(response),
modality="audio",
mode="send-receive",
additional_outputs=[
{"name": "transcript", "type": "text"},
{"name": "role", "type": "text"}
]
)
# Create Gradio interface that uses the FastRTC stream
with gr.Blocks(title="Voice Chat Assistant with ReplyOnPause") as demo:
gr.Markdown("# Voice Chat Assistant")
gr.Markdown("Speak and pause to trigger a response.")
chatbot = gr.Chatbot(label="Conversation")
# Mount the FastRTC UI
stream_ui = stream.ui(label="Speak")
# Handle additional outputs from FastRTC to update the chatbot
def update_chat(transcript, role, history):
if transcript and role:
if role == "user":
history.append((transcript, None))
elif role == "assistant":
if history and history[-1][1] is None:
history[-1] = (history[-1][0], transcript)
else:
history.append((None, transcript))
return history
stream_ui.change(
update_chat,
inputs=[stream_ui.output_components[0], stream_ui.output_components[1], chatbot],
outputs=[chatbot]
)
clear_btn = gr.Button("Clear Conversation")
clear_btn.click(lambda: [], outputs=[chatbot])
# Launch the app
if __name__ == "__main__":
demo.queue().launch(
server_name="0.0.0.0",
share=False,
show_error=True
)