Spaces:
Build error
Build error
File size: 2,269 Bytes
1e82508 839f7b2 1e82508 eb62218 839f7b2 1e82508 4156290 eb62218 839f7b2 1e82508 839f7b2 1e82508 eb62218 839f7b2 1e82508 eb62218 1e82508 eb62218 1e82508 839f7b2 eb62218 839f7b2 1e82508 839f7b2 1e82508 eb62218 1e82508 eb62218 1e82508 eb62218 1e82508 eb62218 839f7b2 eb62218 839f7b2 eb62218 839f7b2 eb62218 1e82508 eb62218 839f7b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import os
import gradio as gr
from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
# Import your custom models
from tts import tortoise_tts, TortoiseOptions
from stt import whisper_stt
import cohereAPI
# Try to import HumAware-VAD, install if not available
try:
from humaware_vad import HumAwareVADModel
except ImportError:
print("Installing humaware-vad...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "humaware-vad"])
from humaware_vad import HumAwareVADModel
# Environment variables
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
system_message = "You respond concisely, in about 15 words or less"
# Initialize conversation history
conversation_history = []
# Initialize the HumAware-VAD model
vad_model = HumAwareVADModel()
# Create a handler function that uses both your custom models
def response(audio):
global conversation_history
# Convert speech to text using your Whisper model
user_message = whisper_stt.stt(audio)
# Yield the transcription
yield AdditionalOutputs(user_message)
# Send text to Cohere API
response_text, updated_history = cohereAPI.send_message(
system_message,
user_message,
conversation_history,
COHERE_API_KEY
)
# Update conversation history
conversation_history = updated_history
# Print the response for logging
print(f"Assistant: {response_text}")
# Use your TTS model to generate audio
tts_options = TortoiseOptions(voice_preset="random")
# Stream the audio response in chunks
for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
yield chunk
# Create the FastRTC stream with HumAware-VAD for better pause detection
stream = Stream(
handler=ReplyOnPause(response, model=vad_model), # Use HumAware-VAD model
modality="audio",
mode="send-receive",
additional_outputs=[gr.Textbox(label="Transcription")],
additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}"
)
# Launch the Gradio UI
if __name__ == "__main__":
# Update your requirements.txt to include humaware-vad
stream.ui.launch(
server_name="0.0.0.0",
share=False,
show_error=True
) |