Spaces:

TabasumDev
/

V2VBot

Sleeping

App Files Files Community

TabasumDev commited on Sep 27, 2024

Commit

b2010ac

verified ·

1 Parent(s): b8bbb80

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -23

app.py CHANGED Viewed

@@ -1,26 +1,25 @@
 import os
 import gradio as gr
-import numpy as np
 from gtts import gTTS
-import torch
 import whisper  # Correct import from openai-whisper package
 from groq import Groq
-import io
-import tempfile  # To handle temporary audio file saving
-# Initialize Groq API client
-client = Groq(api_key="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")
 # Load Whisper model
-whisper_model = whisper.load_model("base")  # Use 'whisper' directly
 def transcribe_audio(audio_file):
-    # Load audio
-    audio, sr = sf.read(audio_file)
-    # Transcribe audio using Whisper
-    result = whisper_model.transcribe(audio, language="en")
     return result['text']
 def get_response(prompt):
     chat_completion = client.chat.completions.create(
         messages=[{"role": "user", "content": prompt}],
         model="llama3-8b-8192",
@@ -28,29 +27,37 @@ def get_response(prompt):
     return chat_completion.choices[0].message.content
 def text_to_speech(text):
     tts = gTTS(text)
-    audio_buffer = io.BytesIO()
-    tts.save(audio_buffer)
-    audio_buffer.seek(0)
-    return audio_buffer
 def chatbot(audio_file):
-    # Transcribe audio to text
     user_input = transcribe_audio(audio_file)
-    # Get response from Llama 8B
     response = get_response(user_input)
-    # Convert response to speech
     audio_output = text_to_speech(response)
-    return audio_output
 # Gradio interface
 iface = gr.Interface(
     fn=chatbot,
-    inputs=gr.Audio(type="filepath"),  # Remove 'source' argument
-    outputs=gr.Audio(type="filepath"),
     live=True,
     title="Voice to Voice Chatbot",
-    description="Speak into the microphone, and the chatbot will respond!"
 )
-iface.launch()

 import os
+import tempfile  # For managing temporary audio file creation
 import gradio as gr
+from pydub import AudioSegment  # For handling audio files
 from gtts import gTTS
 import whisper  # Correct import from openai-whisper package
 from groq import Groq
 # Load Whisper model
+whisper_model = whisper.load_model("base")
+# Retrieve the API key from environment variables (ensure you've added it to Secrets)
+groq_api_key = "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"
+client = Groq(api_key=groq_api_key)
 def transcribe_audio(audio_file):
+    """Transcribe audio to text using Whisper model."""
+    result = whisper_model.transcribe(audio_file)
     return result['text']
 def get_response(prompt):
+    """Generate response using Llama 8B via Groq API."""
     chat_completion = client.chat.completions.create(
         messages=[{"role": "user", "content": prompt}],
         model="llama3-8b-8192",
     return chat_completion.choices[0].message.content
 def text_to_speech(text):
+    """Convert text to speech using gTTS."""
     tts = gTTS(text)
+    # Save TTS output to a temporary file
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
+        tts.save(temp_audio_file.name)
+        return temp_audio_file.name  # Return the file path of the .wav file
 def chatbot(audio_file):
+    """Main function to handle audio input, generate response, and return audio output."""
+    # 1. Transcribe audio to text
     user_input = transcribe_audio(audio_file)
+    print(f"Transcribed text: {user_input}")  # Debugging output
+    # 2. Get response from Llama 8B based on transcribed input
     response = get_response(user_input)
+    print(f"Llama response: {response}")  # Debugging output
+    # 3. Convert the response text to speech
     audio_output = text_to_speech(response)
+    print(f"Generated audio output: {audio_output}")  # Debugging output
+    return audio_output  # Return the .wav audio file path for Gradio to play
 # Gradio interface
 iface = gr.Interface(
     fn=chatbot,
+    inputs=gr.Audio(type="filepath", format="wav"),  # Accept .wav audio file input (mic or upload)
+    outputs=gr.Audio(type="filepath", format="wav"),  # Output is the file path to the generated .wav audio
     live=True,
     title="Voice to Voice Chatbot",
+    description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!"
 )
+iface.launch()