Voice-To-Voice_test

Sleeping

App Files Files Community

syedmudassir16 commited on Sep 24, 2024

Commit

98c26bb

verified ·

1 Parent(s): 2ae0339

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -5

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 # Initialize the ASR pipeline
 asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
 def speech_to_text(speech):
     """Converts speech to text using the ASR pipeline."""
     return asr(speech)["text"]
@@ -64,7 +66,6 @@ def format_prompt(message, history):
     Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
-    [Examples omitted for brevity]
     """
     prompt = f"{fixed_prompt}\n"
     for user_prompt, bot_response in history:
@@ -84,7 +85,7 @@ def process_input(input_text, history):
         return history, history, "", None
     response = generate(input_text, history)
     history.append((input_text, response))
-    return history, history, "", None  # Return history twice: once for state, once for chatbot, and reset voice input
 async def generate_audio(history):
     if history and len(history) > 0:
@@ -93,19 +94,27 @@ async def generate_audio(history):
         return audio_path
     return None
 # Gradio interface setup
 with gr.Blocks() as demo:
     gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox(placeholder="Type your message here or use the microphone to speak...")
     audio_output = gr.Audio(label="AI Response", autoplay=True)
     state = gr.State([])
     with gr.Row():
         submit = gr.Button("Send")
-        voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
     # Handle text input
     msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(

 # Initialize the ASR pipeline
 asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
+INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
 def speech_to_text(speech):
     """Converts speech to text using the ASR pipeline."""
     return asr(speech)["text"]
     Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
     """
     prompt = f"{fixed_prompt}\n"
     for user_prompt, bot_response in history:
         return history, history, "", None
     response = generate(input_text, history)
     history.append((input_text, response))
+    return history, history, "", None
 async def generate_audio(history):
     if history and len(history) > 0:
         return audio_path
     return None
+async def init_chat():
+    history = [("", INITIAL_MESSAGE)]
+    audio_path = await text_to_speech(INITIAL_MESSAGE)
+    return history, history, audio_path
 # Gradio interface setup
 with gr.Blocks() as demo:
     gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
+    chatbot = gr.Chatbot(height=400)
+    msg = gr.Textbox(placeholder="Type your message here or use the microphone to speak...", label="Your message")
     audio_output = gr.Audio(label="AI Response", autoplay=True)
     state = gr.State([])
     with gr.Row():
         submit = gr.Button("Send")
+        voice_input = gr.Audio(source="microphone", type="filepath", label="Voice Input")
+    # Initialize chat with greeting
+    demo.load(init_chat, outputs=[state, chatbot, audio_output])
     # Handle text input
     msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(