Spaces:
Sleeping
Sleeping
File size: 2,422 Bytes
cb78863 b2010ac cb78863 b2010ac cb78863 b8bbb80 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac cb78863 b2010ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os
import tempfile # For managing temporary audio file creation
import gradio as gr
from pydub import AudioSegment # For handling audio files
from gtts import gTTS
import whisper # Correct import from openai-whisper package
from groq import Groq
# Load Whisper model
whisper_model = whisper.load_model("base")
# Retrieve the API key from environment variables (ensure you've added it to Secrets)
groq_api_key = "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"
client = Groq(api_key=groq_api_key)
def transcribe_audio(audio_file):
"""Transcribe audio to text using Whisper model."""
result = whisper_model.transcribe(audio_file)
return result['text']
def get_response(prompt):
"""Generate response using Llama 8B via Groq API."""
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192",
)
return chat_completion.choices[0].message.content
def text_to_speech(text):
"""Convert text to speech using gTTS."""
tts = gTTS(text)
# Save TTS output to a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
tts.save(temp_audio_file.name)
return temp_audio_file.name # Return the file path of the .wav file
def chatbot(audio_file):
"""Main function to handle audio input, generate response, and return audio output."""
# 1. Transcribe audio to text
user_input = transcribe_audio(audio_file)
print(f"Transcribed text: {user_input}") # Debugging output
# 2. Get response from Llama 8B based on transcribed input
response = get_response(user_input)
print(f"Llama response: {response}") # Debugging output
# 3. Convert the response text to speech
audio_output = text_to_speech(response)
print(f"Generated audio output: {audio_output}") # Debugging output
return audio_output # Return the .wav audio file path for Gradio to play
# Gradio interface
iface = gr.Interface(
fn=chatbot,
inputs=gr.Audio(type="filepath", format="wav"), # Accept .wav audio file input (mic or upload)
outputs=gr.Audio(type="filepath", format="wav"), # Output is the file path to the generated .wav audio
live=True,
title="Voice to Voice Chatbot",
description="Upload a .wav file or record using the microphone, and the chatbot will respond with audio!"
)
iface.launch()
|