Spaces:
Build error
Build error
File size: 2,077 Bytes
1e82508 359bac7 1e82508 eb62218 839f7b2 1e82508 839f7b2 1e82508 839f7b2 1e82508 eb62218 839f7b2 1e82508 eb62218 1e82508 359bac7 eb62218 1e82508 839f7b2 eb62218 839f7b2 1e82508 839f7b2 1e82508 eb62218 1e82508 eb62218 1e82508 eb62218 1e82508 359bac7 839f7b2 359bac7 839f7b2 eb62218 359bac7 839f7b2 eb62218 1e82508 eb62218 839f7b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import os
import gradio as gr
from fastrtc import Stream, AdditionalOutputs
from fastrtc_walkie_talkie import WalkieTalkie
# Import your custom models
from tts import tortoise_tts, TortoiseOptions
from stt import whisper_stt
import cohereAPI
# Environment variables
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
system_message = "You respond concisely, in about 15 words or less"
# Initialize conversation history
conversation_history = []
# Create a handler function that uses both your custom models
def response(audio):
global conversation_history
# Convert speech to text using your Whisper model
user_message = whisper_stt.stt(audio)
# Yield the transcription as additional output
yield AdditionalOutputs(user_message)
# Send text to Cohere API
response_text, updated_history = cohereAPI.send_message(
system_message,
user_message,
conversation_history,
COHERE_API_KEY
)
# Update conversation history
conversation_history = updated_history
# Print the response for logging
print(f"Assistant: {response_text}")
# Use your TTS model to generate audio
tts_options = TortoiseOptions(voice_preset="random")
# Stream the audio response in chunks
for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
yield chunk
# Create the FastRTC stream with WalkieTalkie for turn detection
stream = Stream(
handler=WalkieTalkie(response), # Use WalkieTalkie instead of ReplyOnPause
modality="audio",
mode="send-receive",
additional_outputs=[gr.Textbox(label="Transcription")],
additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}",
ui_args={
"title": "Voice Assistant (Walkie-Talkie Style)",
"subtitle": "Say 'over' to finish your turn. For example, 'What's the weather like today? over.'"
}
)
# Launch the Gradio UI
if __name__ == "__main__":
stream.ui.launch(
server_name="0.0.0.0",
share=False,
show_error=True
) |