import os import tempfile # For managing temporary audio file creation import gradio as gr from pydub import AudioSegment # For handling audio files from gtts import gTTS import whisper # Correct import from openai-whisper package from groq import Groq # Load Whisper model whisper_model = whisper.load_model("base") # Retrieve the API key from environment variables (ensure you've added it to Secrets) groq_api_key = "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" client = Groq(api_key=groq_api_key) def transcribe_audio(audio_file): """Transcribe audio to text using Whisper model.""" result = whisper_model.transcribe(audio_file) return result['text'] def get_response(prompt): """Generate response using Llama 8B via Groq API.""" chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-8b-8192", ) return chat_completion.choices[0].message.content def text_to_speech(text): """Convert text to speech using gTTS.""" tts = gTTS(text) # Save TTS output to a temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file: tts.save(temp_audio_file.name) return temp_audio_file.name # Return the file path of the .wav file def chatbot(audio_file): """Main function to handle audio input, generate response, and return audio output.""" # 1. Transcribe audio to text user_input = transcribe_audio(audio_file) print(f"Transcribed text: {user_input}") # Debugging output # 2. Get response from Llama 8B based on transcribed input response = get_response(user_input) print(f"Llama response: {response}") # Debugging output # 3. Convert the response text to speech audio_output = text_to_speech(response) print(f"Generated audio output: {audio_output}") # Debugging output return audio_output # Return the .wav audio file path for Gradio to play # Gradio interface iface = gr.Interface( fn=chatbot, inputs=gr.Audio(type="filepath", format="wav"), # Accept .wav audio file input (mic or upload) outputs=gr.Audio(type="filepath", format="wav"), # Output is the file path to the generated .wav audio live=True, title="Voice to Voice Chatbot", description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!" ) iface.launch()