Spaces:
Runtime error
Runtime error
File size: 5,100 Bytes
819225f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
import gradio as gr
import tempfile
import torchaudio
from dotenv import load_dotenv
from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_voice
from openai import OpenAI
# === Debug and environment setup ===
print(f"Current working directory: {os.getcwd()}")
load_dotenv() # By default, looks for .env in current working directory
# === Initialize OpenAI client with better error handling ===
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("❌ OPENAI_API_KEY not found in your .env file.")
print(f"API key found: {'Yes' if api_key else 'No'}")
print(f"API key loaded: {api_key[:4]}...{api_key[-4:] if len(api_key) > 8 else ''}")
# Initialize with timeout to prevent hanging requests
client = OpenAI(
api_key=api_key,
timeout=60.0 # 60 second timeout
)
# === Initialize Tortoise TTS ===
print("Initializing Tortoise TTS...")
tts = TextToSpeech()
voice_samples, conditioning_latents = load_voice("train_dotrice")
print("TTS initialized successfully!")
# === Ask GPT-4o with improved error handling ===
def ask_gpt(prompt: str) -> str:
try:
print(f"Sending request to GPT-4o: {prompt[:30]}...")
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=300
)
return response.choices[0].message.content
except Exception as e:
error_type = type(e).__name__
error_message = str(e)
print(f"[GPT-4 ERROR] {error_type}: {error_message}")
# Provide more helpful error messages
if "api_key" in error_message.lower():
return "[GPT-4 ERROR] API key issue: Check that your API key is valid and properly formatted in the .env file."
elif "rate limit" in error_message.lower():
return "[GPT-4 ERROR] Rate limit exceeded: Please wait a moment before trying again."
elif "connect" in error_message.lower():
return "[GPT-4 ERROR] Connection error: Check your internet connection and ensure OpenAI's API is accessible."
else:
return f"[GPT-4 ERROR] {error_type}: {error_message}"
# === Generate TTS Audio ===
def synthesize(text: str) -> str:
try:
print(f"Synthesizing speech for: {text[:30]}...")
audio = tts.tts_with_preset(
text=text,
voice_samples=voice_samples,
conditioning_latents=conditioning_latents,
preset="fast"
)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
torchaudio.save(tmp.name, audio.squeeze(0).cpu(), 24000)
print(f"Audio saved to temporary file: {tmp.name}")
return tmp.name
except Exception as e:
print(f"[TTS ERROR] {type(e).__name__}: {str(e)}")
return None
# === Unified Agent Logic with Enhanced Error Handling ===
def run_agent(audio_input, text_input):
try:
if audio_input:
return "🧠 Voice transcription not implemented yet.", None
if text_input and text_input.strip():
cleaned_input = text_input.strip()
if len(cleaned_input) < 2:
return "⚠️ Please enter more meaningful text.", None
print("Processing text input...")
gpt_reply = ask_gpt(cleaned_input)
if gpt_reply.startswith("[GPT-4 ERROR]"):
return gpt_reply, None
audio_path = synthesize(gpt_reply)
if audio_path is None:
return gpt_reply + "\n\n[TTS ERROR] Failed to generate audio.", None
return gpt_reply, audio_path
return "⚠️ Please enter a message or audio input.", None
except Exception as e:
print(f"[AGENT ERROR] {type(e).__name__}: {str(e)}")
return f"⚠️ An unexpected error occurred: {type(e).__name__}: {str(e)}", None
# === Gradio UI ===
with gr.Blocks() as demo:
gr.Markdown("## 🧠 GPT-4o Voice Agent + Tortoise TTS")
gr.Markdown("*Ensure your `.env` file with OPENAI_API_KEY is in the correct directory*")
with gr.Row():
mic_input = gr.Audio(
label="🎙️ Mic Input (WAV format, not yet active)",
type="filepath",
format="wav"
)
text_input = gr.Textbox(
lines=2,
placeholder="Ask anything here...",
label="💬 Text Input"
)
run_btn = gr.Button("🧠 Ask GPT-4o")
gpt_output = gr.Textbox(label="🧠 GPT-4o Response")
audio_output = gr.Audio(label="🔊 Spoken Response", autoplay=True)
run_btn.click(
fn=run_agent,
inputs=[mic_input, text_input],
outputs=[gpt_output, audio_output]
)
# Launch for local + mobile access
print("Launching Gradio interface...")
demo.launch(share=True, server_name="0.0.0.0", server_port=7860, debug=True) |