Spaces:
Runtime error
Runtime error
# Required Libraries | |
import os | |
import tempfile | |
import whisper | |
from groq import Groq | |
from gtts import gTTS | |
import gradio as gr | |
from pydub import AudioSegment | |
# --------------------------- | |
# π API Key Configuration | |
# --------------------------- | |
os.environ['GROQ_API_KEY'] = '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' | |
# --------------------------- | |
# π₯ Load Whisper Model | |
# --------------------------- | |
try: | |
whisper_model = whisper.load_model("base") | |
print("[INFO] Whisper model loaded successfully.") | |
except AttributeError: | |
from whisper import Whisper | |
whisper_model = Whisper.load_model("base") | |
print("[INFO] Whisper model loaded using alternative syntax.") | |
# --------------------------- | |
# ποΈ Audio Processing | |
# --------------------------- | |
def validate_audio_file(audio_file): | |
"""Validate if the audio file exists and is not empty.""" | |
if not audio_file or not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0: | |
print(f"[ERROR] Invalid or empty audio file: {audio_file}") | |
return False | |
return True | |
def convert_to_wav(audio_file): | |
"""Convert audio file to WAV format if needed.""" | |
try: | |
audio = AudioSegment.from_file(audio_file) | |
wav_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name | |
audio.export(wav_path, format="wav") | |
print(f"[INFO] Audio converted to WAV: {wav_path}") | |
return wav_path | |
except Exception as e: | |
print(f"[ERROR] Audio Conversion Error: {e}") | |
return None | |
def transcribe_audio(audio_file): | |
"""Transcribe audio using Whisper.""" | |
try: | |
print(f"[INFO] Transcribing audio file: {audio_file}") | |
if not validate_audio_file(audio_file): | |
raise FileNotFoundError("Audio file not found or invalid path.") | |
audio_file = convert_to_wav(audio_file) | |
if not audio_file: | |
raise Exception("Failed to convert audio to WAV format.") | |
result = whisper_model.transcribe(audio_file) | |
print(f"[INFO] Transcription result: {result['text']}") | |
return result['text'] | |
except Exception as e: | |
print(f"[ERROR] Transcription Error: {e}") | |
return f"Transcription Error: {e}" | |
# --------------------------- | |
# π€ LLM Interaction | |
# --------------------------- | |
def get_groq_response(user_input): | |
"""Get chatbot response from Groq's API.""" | |
try: | |
print(f"[INFO] Sending input to Groq: {user_input}") | |
client = Groq(api_key=os.environ['GROQ_API_KEY']) | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{"role": "user", "content": user_input} | |
], | |
model="llama-3.3-70b-versatile", | |
stream=False, | |
) | |
response = chat_completion.choices[0].message.content | |
print(f"[INFO] Groq response: {response}") | |
return response | |
except Exception as e: | |
print(f"[ERROR] Groq API Error: {e}") | |
return f"Groq API Error: {e}" | |
# --------------------------- | |
# π£οΈ Text-to-Speech | |
# --------------------------- | |
def text_to_speech(text): | |
"""Convert text to speech using gTTS.""" | |
try: | |
print(f"[INFO] Converting text to speech: {text}") | |
tts = gTTS(text) | |
audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name | |
tts.save(audio_path) | |
print(f"[INFO] Audio file saved: {audio_path}") | |
return audio_path | |
except Exception as e: | |
print(f"[ERROR] TTS Error: {e}") | |
return f"TTS Error: {e}" | |
# --------------------------- | |
# π οΈ Main Interaction Logic | |
# --------------------------- | |
def chatbot(audio_input): | |
"""Handle full chatbot interaction.""" | |
try: | |
print(f"[INFO] Audio Input Path: {audio_input}") | |
# Validate Audio File | |
if not validate_audio_file(audio_input): | |
return "Error: Audio file not found or invalid path", None | |
# Step 1: Transcribe Audio | |
text_input = transcribe_audio(audio_input) | |
if "Error" in text_input: | |
return text_input, None | |
# Step 2: Get Response from Groq | |
llm_response = get_groq_response(text_input) | |
if "Error" in llm_response: | |
return llm_response, None | |
# Step 3: Convert Response to Audio | |
audio_output = text_to_speech(llm_response) | |
if "Error" in audio_output: | |
return audio_output, None | |
return llm_response, audio_output | |
except Exception as e: | |
print(f"[ERROR] General Error: {e}") | |
return f"General Error: {e}", None | |
# --------------------------- | |
# π Gradio Interface | |
# --------------------------- | |
interface = gr.Interface( | |
fn=chatbot, | |
inputs=gr.Audio(type="filepath"), | |
outputs=[ | |
gr.Textbox(label="LLM Response"), | |
gr.Audio(label="Audio Response") | |
], | |
title="Real-Time Voice-to-Voice Chatbot", | |
description="Speak into the microphone, and the chatbot will respond with audio.", | |
live=True # Ensures real-time interaction | |
) | |
# Launch Gradio App | |
if __name__ == "__main__": | |
print("[INFO] Starting Gradio Interface...") | |
interface.launch(share=True) | |