import os import gradio as gr import tempfile import torchaudio from dotenv import load_dotenv from tortoise.api import TextToSpeech from tortoise.utils.audio import load_voice from openai import OpenAI # === Debug and environment setup === print(f"Current working directory: {os.getcwd()}") load_dotenv() # By default, looks for .env in current working directory # === Initialize OpenAI client with better error handling === api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError("❌ OPENAI_API_KEY not found in your .env file.") print(f"API key found: {'Yes' if api_key else 'No'}") print(f"API key loaded: {api_key[:4]}...{api_key[-4:] if len(api_key) > 8 else ''}") # Initialize with timeout to prevent hanging requests client = OpenAI( api_key=api_key, timeout=60.0 # 60 second timeout ) # === Initialize Tortoise TTS === print("Initializing Tortoise TTS...") tts = TextToSpeech() voice_samples, conditioning_latents = load_voice("train_dotrice") print("TTS initialized successfully!") # === Ask GPT-4o with improved error handling === def ask_gpt(prompt: str) -> str: try: print(f"Sending request to GPT-4o: {prompt[:30]}...") response = client.chat.completions.create( model="gpt-4o", messages=[{"role": "user", "content": prompt}], temperature=0.7, max_tokens=300 ) return response.choices[0].message.content except Exception as e: error_type = type(e).__name__ error_message = str(e) print(f"[GPT-4 ERROR] {error_type}: {error_message}") # Provide more helpful error messages if "api_key" in error_message.lower(): return "[GPT-4 ERROR] API key issue: Check that your API key is valid and properly formatted in the .env file." elif "rate limit" in error_message.lower(): return "[GPT-4 ERROR] Rate limit exceeded: Please wait a moment before trying again." elif "connect" in error_message.lower(): return "[GPT-4 ERROR] Connection error: Check your internet connection and ensure OpenAI's API is accessible." else: return f"[GPT-4 ERROR] {error_type}: {error_message}" # === Generate TTS Audio === def synthesize(text: str) -> str: try: print(f"Synthesizing speech for: {text[:30]}...") audio = tts.tts_with_preset( text=text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset="fast" ) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: torchaudio.save(tmp.name, audio.squeeze(0).cpu(), 24000) print(f"Audio saved to temporary file: {tmp.name}") return tmp.name except Exception as e: print(f"[TTS ERROR] {type(e).__name__}: {str(e)}") return None # === Unified Agent Logic with Enhanced Error Handling === def run_agent(audio_input, text_input): try: if audio_input: return "🧠 Voice transcription not implemented yet.", None if text_input and text_input.strip(): cleaned_input = text_input.strip() if len(cleaned_input) < 2: return "⚠️ Please enter more meaningful text.", None print("Processing text input...") gpt_reply = ask_gpt(cleaned_input) if gpt_reply.startswith("[GPT-4 ERROR]"): return gpt_reply, None audio_path = synthesize(gpt_reply) if audio_path is None: return gpt_reply + "\n\n[TTS ERROR] Failed to generate audio.", None return gpt_reply, audio_path return "⚠️ Please enter a message or audio input.", None except Exception as e: print(f"[AGENT ERROR] {type(e).__name__}: {str(e)}") return f"⚠️ An unexpected error occurred: {type(e).__name__}: {str(e)}", None # === Gradio UI === with gr.Blocks() as demo: gr.Markdown("## 🧠 GPT-4o Voice Agent + Tortoise TTS") gr.Markdown("*Ensure your `.env` file with OPENAI_API_KEY is in the correct directory*") with gr.Row(): mic_input = gr.Audio( label="🎙️ Mic Input (WAV format, not yet active)", type="filepath", format="wav" ) text_input = gr.Textbox( lines=2, placeholder="Ask anything here...", label="💬 Text Input" ) run_btn = gr.Button("🧠 Ask GPT-4o") gpt_output = gr.Textbox(label="🧠 GPT-4o Response") audio_output = gr.Audio(label="🔊 Spoken Response", autoplay=True) run_btn.click( fn=run_agent, inputs=[mic_input, text_input], outputs=[gpt_output, audio_output] ) # Launch for local + mobile access print("Launching Gradio interface...") demo.launch(share=True, server_name="0.0.0.0", server_port=7860, debug=True)