import os import gradio as gr from fastrtc import Stream, ReplyOnPause, AdditionalOutputs # Import your modules import stt import tts import cohereAPI # Environment variables COHERE_API_KEY = os.getenv("COHERE_API_KEY") system_message = "You respond concisely, in about 15 words or less" # Initialize conversation history conversation_history = [] async def response(audio_file_path): global conversation_history # Convert speech to text user_message = await stt.transcribe_audio(audio_file_path) # Add user message to chat history yield AdditionalOutputs({"transcript": user_message, "role": "user"}) # Send text to Cohere API response_text, updated_history = await cohereAPI.send_message( system_message, user_message, conversation_history, COHERE_API_KEY ) # Update conversation history conversation_history = updated_history # Generate speech from text _, (sample_rate, speech_array) = await tts.generate_speech( response_text, voice_preset="random" ) # Add assistant message to chat history yield AdditionalOutputs({"transcript": response_text, "role": "assistant"}) # Return audio response yield (sample_rate, speech_array) # Create FastRTC stream with ReplyOnPause stream = Stream( handler=ReplyOnPause(response), modality="audio", mode="send-receive", additional_outputs=[ {"name": "transcript", "type": "text"}, {"name": "role", "type": "text"} ] ) # Create Gradio interface that uses the FastRTC stream with gr.Blocks(title="Voice Chat Assistant with ReplyOnPause") as demo: gr.Markdown("# Voice Chat Assistant") gr.Markdown("Speak and pause to trigger a response.") chatbot = gr.Chatbot(label="Conversation") # Mount the FastRTC UI stream_ui = stream.ui(label="Speak") # Handle additional outputs from FastRTC to update the chatbot def update_chat(transcript, role, history): if transcript and role: if role == "user": history.append((transcript, None)) elif role == "assistant": if history and history[-1][1] is None: history[-1] = (history[-1][0], transcript) else: history.append((None, transcript)) return history stream_ui.change( update_chat, inputs=[stream_ui.output_components[0], stream_ui.output_components[1], chatbot], outputs=[chatbot] ) clear_btn = gr.Button("Clear Conversation") clear_btn.click(lambda: [], outputs=[chatbot]) # Launch the app if __name__ == "__main__": demo.queue().launch( server_name="0.0.0.0", share=False, show_error=True )