Spaces:
Sleeping
Sleeping
import os | |
import tempfile # For managing temporary audio file creation | |
import gradio as gr | |
from pydub import AudioSegment # For handling audio files | |
from gtts import gTTS | |
import whisper # Correct import from openai-whisper package | |
from groq import Groq | |
# Load Whisper model | |
whisper_model = whisper.load_model("base") | |
# Retrieve the API key from environment variables (ensure you've added it to Secrets) | |
groq_api_key = "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" | |
client = Groq(api_key=groq_api_key) | |
def transcribe_audio(audio_file): | |
"""Transcribe audio to text using Whisper model.""" | |
result = whisper_model.transcribe(audio_file) | |
return result['text'] | |
def get_response(prompt): | |
"""Generate response using Llama 8B via Groq API.""" | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": prompt}], | |
model="llama3-8b-8192", | |
) | |
return chat_completion.choices[0].message.content | |
def text_to_speech(text): | |
"""Convert text to speech using gTTS.""" | |
tts = gTTS(text) | |
# Save TTS output to a temporary file | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file: | |
tts.save(temp_audio_file.name) | |
return temp_audio_file.name # Return the file path of the .wav file | |
def chatbot(audio_file): | |
"""Main function to handle audio input, generate response, and return audio output.""" | |
# 1. Transcribe audio to text | |
user_input = transcribe_audio(audio_file) | |
print(f"Transcribed text: {user_input}") # Debugging output | |
# 2. Get response from Llama 8B based on transcribed input | |
response = get_response(user_input) | |
print(f"Llama response: {response}") # Debugging output | |
# 3. Convert the response text to speech | |
audio_output = text_to_speech(response) | |
print(f"Generated audio output: {audio_output}") # Debugging output | |
return audio_output # Return the .wav audio file path for Gradio to play | |
# Gradio interface | |
iface = gr.Interface( | |
fn=chatbot, | |
inputs=gr.Audio(type="filepath", format="wav"), # Accept .wav audio file input (mic or upload) | |
outputs=gr.Audio(type="filepath", format="wav"), # Output is the file path to the generated .wav audio | |
live=True, | |
title="Voice to Voice Chatbot", | |
description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!" | |
) | |
iface.launch() | |