Spaces:

TabasumDev
/

V2VBot

Sleeping

File size: 2,422 Bytes

cb78863
b2010ac
cb78863
b2010ac
cb78863
 
 
 
b8bbb80
b2010ac
 
 
 
 
cb78863
 
b2010ac
 
cb78863
 
 
b2010ac
cb78863
 
 
 
 
 
 
b2010ac
cb78863
b2010ac
 
 
 
cb78863
 
b2010ac
 
cb78863
b2010ac
 
 
cb78863
b2010ac
 
 
cb78863
b2010ac
 
 
cb78863
 
 
 
b2010ac
 
cb78863
 
b2010ac
cb78863
 
b2010ac

import os
import tempfile  # For managing temporary audio file creation
import gradio as gr
from pydub import AudioSegment  # For handling audio files
from gtts import gTTS
import whisper  # Correct import from openai-whisper package
from groq import Groq

# Load Whisper model
whisper_model = whisper.load_model("base")

# Retrieve the API key from environment variables (ensure you've added it to Secrets)
groq_api_key = "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"
client = Groq(api_key=groq_api_key)

def transcribe_audio(audio_file):
    """Transcribe audio to text using Whisper model."""
    result = whisper_model.transcribe(audio_file)
    return result['text']

def get_response(prompt):
    """Generate response using Llama 8B via Groq API."""
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama3-8b-8192",
    )
    return chat_completion.choices[0].message.content

def text_to_speech(text):
    """Convert text to speech using gTTS."""
    tts = gTTS(text)
    # Save TTS output to a temporary file
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
        tts.save(temp_audio_file.name)
        return temp_audio_file.name  # Return the file path of the .wav file

def chatbot(audio_file):
    """Main function to handle audio input, generate response, and return audio output."""
    # 1. Transcribe audio to text
    user_input = transcribe_audio(audio_file)
    print(f"Transcribed text: {user_input}")  # Debugging output

    # 2. Get response from Llama 8B based on transcribed input
    response = get_response(user_input)
    print(f"Llama response: {response}")  # Debugging output

    # 3. Convert the response text to speech
    audio_output = text_to_speech(response)
    print(f"Generated audio output: {audio_output}")  # Debugging output

    return audio_output  # Return the .wav audio file path for Gradio to play

# Gradio interface
iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Audio(type="filepath", format="wav"),  # Accept .wav audio file input (mic or upload)
    outputs=gr.Audio(type="filepath", format="wav"),  # Output is the file path to the generated .wav audio
    live=True,
    title="Voice to Voice Chatbot",
    description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!"
)

iface.launch()