Spaces:

Athspi
/

Whshhs

Runtime error

App Files Files Community

Athspi commited on Mar 29

Commit

bdfd7a5

verified ·

1 Parent(s): fba1da6

Update app.py

Browse files

Files changed (1) hide show

app.py +911 -235

app.py CHANGED Viewed

@@ -1,294 +1,970 @@
-## Documentation
-# Quickstart: https://github.com/google-gemini/cookbook/blob/main/quickstarts/Get_started_LiveAPI.py
-#
-## Setup
-#
-# To install the dependencies for this script, run:
-#
-# ```
-# pip install google-genai opencv-python pyaudio pillow mss
-# ```
 import asyncio
 import base64
 import io
 import traceback
 import cv2
 import pyaudio
 import PIL.Image
 import mss
-import argparse
-from google import genai
-from google.genai import types
-import gradio as gr
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 SEND_SAMPLE_RATE = 16000
-RECEIVE_SAMPLE_RATE = 24000
 CHUNK_SIZE = 1024
-MODEL = "models/gemini-2.0-flash-exp"
-DEFAULT_MODE = "camera"
-# Replace with your actual API key
-# client = genai.Client(http_options={"api_version": "v1alpha"}, api_key="YOUR_API_KEY")
-client = genai.Client(http_options={"api_version": "v1alpha"}, api_key="GEMINI_API_KEY")
-# While Gemini 2.0 Flash is in experimental preview mode, only one of AUDIO or
-# TEXT may be passed here.
-CONFIG = types.LiveConnectConfig(
-    response_modalities=[
-        "audio",
-    ],
-    speech_config=types.SpeechConfig(
-        voice_config=types.VoiceConfig(
-            prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck")
-        )
-    ),
-    system_instruction=types.Content(
-        parts=[types.Part.from_text(text="Answer user ask   replay same thing user say no other word explain ")],
-        role="user"
-    ),
-)
-pya = pyaudio.PyAudio()
-class AudioLoop:
-    def __init__(self, video_mode=DEFAULT_MODE):
         self.video_mode = video_mode
-        self.audio_in_queue = None
-        self.out_queue = None
         self.session = None
-        self.send_text_task = None
-        self.receive_audio_task = None
-        self.play_audio_task = None
-    async def send_text(self, text):
-        # while True:
-        #     text = await asyncio.to_thread(
-        #         input,
-        #         "message > ",
-        #     )
-        #     if text.lower() == "q":
-        #         break
-        await self.session.send(input=text or ".", end_of_turn=True)
     def _get_frame(self, cap):
-        # Read the frameq
         ret, frame = cap.read()
-        # Check if the frame was read successfully
-        if not ret:
-            return None
-        # Fix: Convert BGR to RGB color space
-        # OpenCV captures in BGR but PIL expects RGB format
-        # This prevents the blue tint in the video feed
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        img = PIL.Image.fromarray(frame_rgb)  # Now using RGB frame
         img.thumbnail([1024, 1024])
         image_io = io.BytesIO()
         img.save(image_io, format="jpeg")
         image_io.seek(0)
         mime_type = "image/jpeg"
         image_bytes = image_io.read()
         return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
     async def get_frames(self):
-        # This takes about a second, and will block the whole program
-        # causing the audio pipeline to overflow if you don't to_thread it.
-        cap = await asyncio.to_thread(
-            cv2.VideoCapture, 0
-        )  # 0 represents the default camera
-        while True:
-            frame = await asyncio.to_thread(self._get_frame, cap)
-            if frame is None:
-                break
-            await asyncio.sleep(1.0)
-            await self.out_queue.put(frame)
-        # Release the VideoCapture object
-        cap.release()
     def _get_screen(self):
-        sct = mss.mss()
-        monitor = sct.monitors[0]
-        i = sct.grab(monitor)
-        mime_type = "image/jpeg"
-        image_bytes = mss.tools.to_png(i.rgb, i.size)
-        img = PIL.Image.open(io.BytesIO(image_bytes))
-        image_io = io.BytesIO()
-        img.save(image_io, format="jpeg")
-        image_io.seek(0)
-        image_bytes = image_io.read()
-        return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
     async def get_screen(self):
         while True:
             frame = await asyncio.to_thread(self._get_screen)
             if frame is None:
-                break
-            await asyncio.sleep(1.0)
-            await self.out_queue.put(frame)
     async def send_realtime(self):
         while True:
-            msg = await self.out_queue.get()
-            await self.session.send(input=msg)
-    async def listen_audio(self):
-        mic_info = pya.get_default_input_device_info()
-        self.audio_stream = await asyncio.to_thread(
-            pya.open,
-            format=FORMAT,
-            channels=CHANNELS,
-            rate=SEND_SAMPLE_RATE,
-            input=True,
-            input_device_index=mic_info["index"],
-            frames_per_buffer=CHUNK_SIZE,
-        )
-        if __debug__:
-            kwargs = {"exception_on_overflow": False}
-        else:
-            kwargs = {}
-        while True:
-            data = await asyncio.to_thread(self.audio_stream.read, CHUNK_SIZE, **kwargs)
-            await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
-    async def receive_audio(self):
-        "Background task to reads from the websocket and write pcm chunks to the output queue"
-        while True:
-            turn = self.session.receive()
-            async for response in turn:
-                if data := response.data:
-                    self.audio_in_queue.put_nowait(data)
-                    continue
-                if text := response.text:
-                    # print(text, end="") # Don't print to console, return it
-                    return text  # Return the text to Gradio
-            # If you interrupt the model, it sends a turn_complete.
-            # For interruptions to work, we need to stop playback.
-            # So empty out the audio queue because it may have loaded
-            # much more audio than has played yet.
-            while not self.audio_in_queue.empty():
-                self.audio_in_queue.get_nowait()
-    async def play_audio(self):
-        stream = await asyncio.to_thread(
-            pya.open,
-            format=FORMAT,
-            channels=CHANNELS,
-            rate=RECEIVE_SAMPLE_RATE,
-            output=True,
-        )
         while True:
-            bytestream = await self.audio_in_queue.get()
-            await asyncio.to_thread(stream.write, bytestream)
-    async def run(self):
         try:
-            async with (
-                client.aio.live.connect(model=MODEL, config=CONFIG) as session,
-                asyncio.TaskGroup() as tg,
-            ):
-                self.session = session
-                self.audio_in_queue = asyncio.Queue()
-                self.out_queue = asyncio.Queue(maxsize=5)
-                # send_text_task = tg.create_task(self.send_text()) #No text task anymore.
-                tg.create_task(self.send_realtime())
-                tg.create_task(self.listen_audio())
-                if self.video_mode == "camera":
-                    tg.create_task(self.get_frames())
-                elif self.video_mode == "screen":
-                    tg.create_task(self.get_screen())
-                tg.create_task(self.receive_audio())
-                tg.create_task(self.play_audio())
-                # await send_text_task
-                # raise asyncio.CancelledError("User requested exit")
-                return await self.receive_audio() #return audio transcript result
-        except asyncio.CancelledError:
-            pass
-        except ExceptionGroup as EG:
-            self.audio_stream.close()
-            traceback.print_exception(EG)
         except Exception as e:
-            traceback.print_exc()  # Print the traceback for debugging
-            return f"Error: {str(e)}" # Return error message
-# Global instance
-audio_loop = None  # Initialize the AudioLoop object
-async def transcribe_audio(text_input):
-    """
-    Transcribes audio using the AudioLoop class and returns the result.
-    """
-    global audio_loop
-    if audio_loop is None:
-        audio_loop = AudioLoop(video_mode="none")  # Instantiate the class only once
-        # You might want to handle the initialization differently based on your needs.
-    loop = asyncio.get_event_loop()
-    # if loop.is_running():
-    #     print("Async event loop already running.  Using existing loop.")
-    #     task = loop.create_task(audio_loop.send_text(text_input))
-    #     return await task
-    # else:
-    #     print("Starting new async event loop.")
-    #     return asyncio.run(audio_loop.send_text(text_input))
-    if audio_loop.session is None:
         try:
-            return await audio_loop.run()
         except Exception as e:
-            print(f"Error in run(): {e}")
             traceback.print_exc()
-            return f"Error: {str(e)}"
     else:
-         try:
-             await audio_loop.send_text(text_input)
-             return await audio_loop.receive_audio()  # Assuming receive_audio returns a string
-         except Exception as e:
-             print(f"Error after session is established: {e}")
-             traceback.print_exc()
-             return f"Error: {str(e)}"
-# Gradio interface
 if __name__ == "__main__":
-    iface = gr.Interface(
-        fn=transcribe_audio,
-        inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
-        outputs="text",
-        title="Gemini Live Connect Demo with Gradio",
-        description="Enter text, and the model will replay same you said.  This is a demo of the Gemini Live Connect API with Gradio.",
-    )
-    iface.launch()

+# File: app.py
+import gradio as gr
 import asyncio
 import base64
 import io
 import traceback
 import cv2
 import pyaudio
 import PIL.Image
 import mss
+import google.generativeai as genai
+from google.generativeai import types
+import google.api_core.exceptions
+import wave
+import numpy as np
+import threading
+import queue
+import os
+import time
+import tempfile
+import atexit # For cleanup
+# --- Constants ---
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 SEND_SAMPLE_RATE = 16000
+RECEIVE_SAMPLE_RATE = 24000 # Gemini outputs at 24kHz
 CHUNK_SIZE = 1024
+MODEL = "models/gemini-2.0-flash-exp" # Use the requested experimental model
+DEFAULT_VIDEO_MODE = "none"
+AVAILABLE_VOICES = ["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
+DEFAULT_VOICE = "Puck"
+SYSTEM_INSTRUCTION_TEXT = "Answer user ask   replay same thing user say no other word explain "
+# --- Global State ---
+audio_loop_instance = None
+background_tasks = set()
+background_loop = None # Event loop for the background thread
+pya = None # Initialize PyAudio globally later
+background_thread = None # Keep track of the thread
+stop_background_loop = False # Flag to signal loop termination
+# --- Original AudioLoop Class Methods (Included for potential future use) ---
+# Note: We inherit from the original structure for clarity but override key methods
+class OriginalAudioLoop:
+    """Base class structure placeholder - includes relevant methods from original script"""
+    def __init__(self, video_mode=DEFAULT_VIDEO_MODE):
         self.video_mode = video_mode
+        self.out_queue = None # Queue for data *to* Gemini (mic audio, images)
         self.session = None
+        self.audio_stream = None # Mic input stream
     def _get_frame(self, cap):
         ret, frame = cap.read()
+        if not ret: return None
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        img = PIL.Image.fromarray(frame_rgb)
         img.thumbnail([1024, 1024])
         image_io = io.BytesIO()
         img.save(image_io, format="jpeg")
         image_io.seek(0)
         mime_type = "image/jpeg"
         image_bytes = image_io.read()
         return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
     async def get_frames(self):
+        cap = None
+        try:
+            print("Attempting to open camera...")
+            cap = await asyncio.to_thread(cv2.VideoCapture, 0)
+            if not cap.isOpened():
+                print("Error: Could not open camera.")
+                # Signal error back to Gradio? For now, just log and exit task.
+                await run_coro_in_background_loop(update_status("Error: Could not open camera."))
+                return
+            print("Camera opened successfully.")
+            while True:
+                if not self.session: # Stop if disconnected
+                    print("get_frames: Session closed, stopping camera task.")
+                    break
+                # print("Reading frame from camera...")
+                frame = await asyncio.to_thread(self._get_frame, cap)
+                if frame is None:
+                    # print("Warning: Failed to get frame from camera.")
+                    await asyncio.sleep(0.1) # Avoid busy loop
+                    continue # Skip putting None in queue
+                if self.out_queue:
+                     # print("Putting camera frame in queue.")
+                     await self.out_queue.put(frame)
+                await asyncio.sleep(1.0) # Send frame every second
+        except asyncio.CancelledError:
+            print("get_frames task cancelled.")
+        except Exception as e:
+            print(f"Error in get_frames: {e}")
+            await run_coro_in_background_loop(update_status(f"Camera Error: {e}"))
+        finally:
+            if cap and cap.isOpened():
+                print("Releasing camera.")
+                await asyncio.to_thread(cap.release)
+            print("Camera task finished.")
     def _get_screen(self):
+        try:
+            with mss.mss() as sct:
+                # Attempt to grab the primary monitor (often index 1 in mss.monitors)
+                monitor = sct.monitors[1]
+                sct_img = sct.grab(monitor)
+                img = PIL.Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX") # Handle BGRA
+                image_io = io.BytesIO()
+                img.thumbnail([1024, 1024]) # Resize before saving
+                img.save(image_io, format="jpeg")
+                image_io.seek(0)
+                mime_type = "image/jpeg"
+                image_bytes = image_io.read()
+                return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
+        except IndexError:
+             print("Error capturing screen: Could not find monitor at index 1. Trying index 0.")
+             try: # Fallback to monitor 0 (usually includes all screens)
+                 with mss.mss() as sct:
+                    monitor = sct.monitors[0]
+                    sct_img = sct.grab(monitor)
+                    img = PIL.Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
+                    image_io = io.BytesIO()
+                    img.thumbnail([1024, 1024])
+                    img.save(image_io, format="jpeg")
+                    image_io.seek(0)
+                    mime_type = "image/jpeg"
+                    image_bytes = image_io.read()
+                    return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
+             except Exception as e_fallback:
+                 print(f"Error capturing screen (fallback monitor 0): {e_fallback}")
+                 return None
+        except Exception as e:
+             print(f"Error capturing screen: {e}")
+             return None
     async def get_screen(self):
         while True:
+            if not self.session: # Stop if disconnected
+                print("get_screen: Session closed, stopping screen task.")
+                break
+            # print("Capturing screen...")
             frame = await asyncio.to_thread(self._get_screen)
             if frame is None:
+                print("Warning: Failed to capture screen.")
+                await asyncio.sleep(1.0) # Wait before retrying if error occurred
+                continue # Skip putting None in queue
+            if self.out_queue:
+                # print("Putting screen frame in queue.")
+                await self.out_queue.put(frame)
+            await asyncio.sleep(1.0) # Send screen frame every second
     async def send_realtime(self):
+        """Sends microphone audio or video frames from the out_queue to Gemini."""
         while True:
+            if not self.session or not self.out_queue:
+                # Wait if session/queue not ready or if disconnected
+                await asyncio.sleep(0.1)
+                if not self.session: # Check again after sleep if disconnected
+                     print("send_realtime: Session closed, stopping task.")
+                     break
+                continue
+            try:
+                msg = await asyncio.wait_for(self.out_queue.get(), timeout=1.0) # Wait with timeout
+                if self.session: # Check again in case session closed while waiting
+                    # print(f"Sending {msg.get('mime_type', 'unknown type')} to Gemini...")
+                    await self.session.send(input=msg)
+                self.out_queue.task_done()
+            except asyncio.TimeoutError:
+                # print("send_realtime: Queue empty, waiting...")
+                continue # No message in queue, loop again
+            except asyncio.CancelledError:
+                print("send_realtime task cancelled.")
+                break
+            except Exception as e:
+                 print(f"Error in send_realtime: {e}")
+                 await run_coro_in_background_loop(update_status(f"Send Error: {e}"))
+                 # Avoid continuous errors if session is bad
+                 if isinstance(e, (google.api_core.exceptions.GoogleAPICallError, ConnectionError)):
+                     print("Connection error in send_realtime, pausing...")
+                     await asyncio.sleep(5)
+    async def listen_audio(self):
+        """Listens to microphone and puts audio chunks onto the out_queue."""
+        global pya
+        if not pya:
+            print("Error: PyAudio not initialized in listen_audio.")
+            await run_coro_in_background_loop(update_status("Error: Audio system not ready."))
+            return
+        mic_info = None
+        stream = None
+        try:
+            print("Attempting to open microphone...")
+            mic_info = await asyncio.to_thread(pya.get_default_input_device_info)
+            stream = await asyncio.to_thread(
+                pya.open,
+                format=FORMAT,
+                channels=CHANNELS,
+                rate=SEND_SAMPLE_RATE,
+                input=True,
+                input_device_index=mic_info["index"],
+                frames_per_buffer=CHUNK_SIZE,
+            )
+            self.audio_stream = stream # Store reference for cleanup
+            print("Microphone stream opened.")
+            if __debug__:
+                kwargs = {"exception_on_overflow": False}
+            else:
+                kwargs = {}
+            while True:
+                 if not self.session: # Stop if disconnected
+                    print("listen_audio: Session closed, stopping microphone task.")
+                    break
+                 try:
+                    # print("Reading from microphone...")
+                    data = await asyncio.to_thread(stream.read, CHUNK_SIZE, **kwargs)
+                    if self.out_queue:
+                        # print("Putting microphone data in queue.")
+                        await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
+                 except IOError as e:
+                    # This often happens if the buffer overflows or the stream is closed abruptly
+                    # print(f"PyAudio read error (possible overflow or stream closed): {e}")
+                    await asyncio.sleep(0.05) # Short pause before trying again
+                 except asyncio.CancelledError:
+                     print("listen_audio task cancelled.")
+                     break
+        except OSError as e:
+             print(f"Error opening microphone: {e}. Is a microphone connected and accessible?")
+             await run_coro_in_background_loop(update_status(f"Mic Error: {e}"))
+        except Exception as e:
+            print(f"Error in listen_audio: {e}")
+            traceback.print_exc()
+            await run_coro_in_background_loop(update_status(f"Mic Error: {e}"))
+        finally:
+            if stream:
+                print("Stopping and closing microphone stream.")
+                await asyncio.to_thread(stream.stop_stream)
+                await asyncio.to_thread(stream.close)
+                self.audio_stream = None # Clear reference
+                print("Microphone stream closed.")
+# --- Gradio Specific Audio Loop ---
+class GradioAudioLoop(OriginalAudioLoop): # Inherit and modify/add methods
+    def __init__(self, video_mode=DEFAULT_VIDEO_MODE, api_key=None, voice_name=DEFAULT_VOICE):
+        super().__init__(video_mode)
+        self.api_key = api_key
+        self.voice_name = voice_name
+        self.client = None
+        self.config = None
+        self.connection_status = "Disconnected" # Internal status
+        # Queues for communication between Gradio handler and background loop
+        self.text_input_queue = asyncio.Queue()
+        self.response_text_queue = asyncio.Queue()
+        self.response_audio_queue = asyncio.Queue()
+        self.response_event = asyncio.Event() # Signal when response is ready
+        # Buffers for accumulating response data within a turn
+        self.current_audio_buffer = io.BytesIO()
+        self.current_text_response = ""
+    def _initialize_client_and_config(self):
+        """Initialize Gemini client and configuration."""
+        if not self.api_key:
+            raise ValueError("API key is not set.")
+        try:
+            # Use v1beta for experimental models if needed, adjust if stable
+            # http_options={"api_version": "v1beta"} # Try if v1alpha causes issues
+            # Check if GEMINI_API_KEY env var exists, otherwise use provided key
+            api_key_to_use = os.getenv("GEMINI_API_KEY", self.api_key)
+            if not api_key_to_use:
+                 raise ValueError("No API key provided or found in GEMINI_API_KEY environment variable.")
+            # Use Client instead of genai.configure if passing key directly
+            print("Initializing Gemini Client...")
+            self.client = genai.Client(api_key=api_key_to_use)
+            print(f"Setting up LiveConnectConfig with voice: {self.voice_name}")
+            self.config = types.LiveConnectConfig(
+                response_modalities=["audio", "text"], # Get both audio and text
+                speech_config=types.SpeechConfig(
+                    voice_config=types.VoiceConfig(
+                        prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=self.voice_name) # Use selected voice
+                    )
+                ),
+                system_instruction=types.Content(
+                    parts=[types.Part.from_text(text=SYSTEM_INSTRUCTION_TEXT)],
+                    role="user" # System instructions often role='user'
+                ),
+            )
+            print("Gemini client and config initialized successfully.")
+            self.connection_status = "Initialized"
+            return True # Indicate success
+        except Exception as e:
+            print(f"Error initializing Gemini client: {e}")
+            self.client = None
+            self.config = None
+            self.connection_status = f"Initialization Error: {e}"
+            return False # Indicate failure
+    async def process_text_inputs(self):
+         """ Task to wait for text input from Gradio and send it to Gemini """
+         while True:
+            try:
+                # Wait indefinitely for an item from the queue
+                text_to_send = await self.text_input_queue.get()
+                if text_to_send is None: # Use None as a signal to stop
+                    print("Stopping text input processing.")
+                    break
+                if self.session and self.connection_status == "Connected":
+                    print(f"Sending text to Gemini: {text_to_send[:50]}...")
+                    # Reset response holders before sending new message
+                    self.current_audio_buffer = io.BytesIO()
+                    self.current_text_response = ""
+                    self.response_event.clear()
+                    # Send text and indicate end of turn
+                    await self.session.send(input=text_to_send or ".", end_of_turn=True)
+                    print("Text sent, waiting for response...")
+                else:
+                    print(f"Warning: Cannot send text. Session not active or status is {self.connection_status}.")
+                    # Signal back an error to the waiting Gradio handler
+                    await self.response_text_queue.put(f"Error: Not connected or connection issue ({self.connection_status}). Cannot send message.")
+                    await self.response_audio_queue.put(b"") # Empty audio
+                    self.response_event.set() # Unblock the handler
+                self.text_input_queue.task_done() # Mark task as done
+            except asyncio.CancelledError:
+                 print("process_text_inputs task cancelled.")
+                 break
+            except Exception as e:
+                 print(f"Error in process_text_inputs: {e}")
+                 # Signal error back to the waiting Gradio handler
+                 await self.response_text_queue.put(f"Error sending message: {e}")
+                 await self.response_audio_queue.put(b"")
+                 self.response_event.set()
+                 # Avoid loop BSoD on continuous errors
+                 await asyncio.sleep(1)
+    async def receive_responses(self):
+        """ Task to receive responses (audio/text) from Gemini """
         while True:
+            if not self.session or self.connection_status != "Connected":
+                # print("receive_responses: Session not ready or not connected, waiting...")
+                await asyncio.sleep(0.2)
+                if not self.session: # Check if disconnected while waiting
+                     print("receive_responses: Session closed, stopping task.")
+                     break
+                continue
+            try:
+                # print("Waiting for Gemini turn...")
+                turn = self.session.receive() # This blocks until a turn starts
+                # print("Gemini turn started.")
+                async for response in turn:
+                    if data := response.data:
+                        # print(f"Received audio chunk: {len(data)} bytes")
+                        self.current_audio_buffer.write(data)
+                    if text := response.text:
+                        # print(f"Received text chunk: {text}")
+                        self.current_text_response += text
+                # Turn complete - put results onto response queues and signal Gradio handler
+                # print("Gemini turn complete.")
+                audio_data = self.current_audio_buffer.getvalue()
+                # print(f"Total audio received: {len(audio_data)} bytes")
+                # print(f"Total text received: {self.current_text_response}")
+                await self.response_audio_queue.put(audio_data)
+                await self.response_text_queue.put(self.current_text_response)
+                self.response_event.set() # Signal that response is ready for the Gradio handler
+            except asyncio.CancelledError:
+                 print("receive_responses task cancelled.")
+                 break
+            except google.api_core.exceptions.Cancelled:
+                 print("Gemini receive cancelled (likely due to interruption or end)")
+                 # Signal completion even if cancelled externally
+                 await self.response_audio_queue.put(self.current_audio_buffer.getvalue())
+                 await self.response_text_queue.put(self.current_text_response + " [Receive Cancelled]")
+                 self.response_event.set()
+            except Exception as e:
+                print(f"Error receiving responses: {e}")
+                traceback.print_exc()
+                # Signal completion with error to unblock handler
+                await self.response_audio_queue.put(b"") # Empty audio
+                await self.response_text_queue.put(f"Error receiving response: {e}")
+                self.response_event.set()
+                # Pause on significant errors to avoid spamming logs
+                if isinstance(e, (google.api_core.exceptions.GoogleAPICallError, ConnectionError)):
+                     print("Connection error in receive_responses, pausing...")
+                     self.connection_status = f"Receive Error: {e}" # Update status
+                     await asyncio.sleep(5)
+    async def send_message_and_wait_for_response(self, text):
+        """ Puts text on input queue and waits for the response event """
+        if not self.session or self.connection_status != "Connected":
+            return f"Error: Not connected ({self.connection_status}).", None
+        await self.text_input_queue.put(text)
+        print("Waiting for response event...")
         try:
+            # Wait for the event with a timeout
+            await asyncio.wait_for(self.response_event.wait(), timeout=60.0) # 60 second timeout
+            print("Response event received.")
+        except asyncio.TimeoutError:
+             print("Timeout waiting for Gemini response.")
+             return "Error: Timeout waiting for response.", None
         except Exception as e:
+             print(f"Error waiting for response event: {e}")
+             return f"Error waiting for response: {e}", None
+        # Retrieve results from response queues
+        # Use get_nowait as the event guarantees items are present
+        try:
+            audio_data = self.response_audio_queue.get_nowait()
+            text_response = self.response_text_queue.get_nowait()
+            self.response_audio_queue.task_done()
+            self.response_text_queue.task_done()
+        except asyncio.QueueEmpty:
+            print("Error: Response queues were empty after event was set.")
+            return "Internal Error: Response queues empty.", None
+        except Exception as e:
+            print(f"Error retrieving from response queues: {e}")
+            return f"Internal Error: {e}", None
+        return text_response, audio_data
+    async def run_main_loop(self):
+        """ The main async method to establish connection and manage tasks """
+        global background_tasks
+        if not self._initialize_client_and_config():
+             print("Initialization failed, cannot connect.")
+             self.connection_status = "Connection Failed: Initialization error."
+             await run_coro_in_background_loop(update_status(self.connection_status))
+             return # Stop if client setup failed
         try:
+            print(f"Attempting to connect to Gemini model: {MODEL}...")
+            self.connection_status = "Connecting..."
+            await run_coro_in_background_loop(update_status(self.connection_status))
+            # --- Connect to Gemini ---
+            # Use a timeout for the connection attempt itself
+            try:
+                # The actual connection happens within the context manager entry
+                async with asyncio.wait_for(
+                    self.client.aio.live.connect(model=MODEL, config=self.config),
+                    timeout=30.0 # 30 second timeout for connection
+                ) as session:
+                    self.session = session
+                    self.connection_status = "Connected"
+                    print("Session established successfully.")
+                    await run_coro_in_background_loop(update_status(self.connection_status))
+                    # Queue for mic/video data TO Gemini
+                    self.out_queue = asyncio.Queue(maxsize=20)
+                    # --- Create and manage background tasks ---
+                    tasks = set()
+                    tasks.add(asyncio.create_task(self.process_text_inputs(), name="process_text_inputs"))
+                    tasks.add(asyncio.create_task(self.receive_responses(), name="receive_responses"))
+                    if self.video_mode != "none":
+                        tasks.add(asyncio.create_task(self.send_realtime(), name="send_realtime"))
+                        if self.video_mode == "camera":
+                            print("Starting camera input task...")
+                            tasks.add(asyncio.create_task(self.get_frames(), name="get_frames"))
+                        elif self.video_mode == "screen":
+                            print("Starting screen capture task...")
+                            tasks.add(asyncio.create_task(self.get_screen(), name="get_screen"))
+                        # Option to add microphone input alongside video if needed
+                        # print("Starting microphone input task...")
+                        # tasks.add(asyncio.create_task(self.listen_audio(), name="listen_audio"))
+                    background_tasks.update(tasks)
+                    # Keep running while connected and tasks are active
+                    # We primarily rely on receive_responses to detect session closure/errors
+                    while self.connection_status == "Connected" and self.session:
+                        await asyncio.sleep(0.5) # Check status periodically
+                    print("Exiting main run loop (disconnected or error).")
+            except asyncio.TimeoutError:
+                print("CONNECTION FAILED: Timeout while trying to connect.")
+                self.connection_status = "Connection Failed: Timeout"
+                await run_coro_in_background_loop(update_status(self.connection_status))
+            except google.api_core.exceptions.PermissionDenied as e:
+                print(f"CONNECTION FAILED: Permission Denied. Check API key and permissions. {e}")
+                self.connection_status = "Connection Failed: Permission Denied"
+                await run_coro_in_background_loop(update_status(f"{self.connection_status}. Check API Key."))
+            except google.api_core.exceptions.InvalidArgument as e:
+                print(f"CONNECTION FAILED: Invalid Argument. Check model name ('{MODEL}') and config. {e}")
+                self.connection_status = f"Connection Failed: Invalid Argument (Model/Config?)"
+                await run_coro_in_background_loop(update_status(f"{self.connection_status} Details: {e}"))
+            except Exception as e: # Catch other potential connection errors
+                 print(f"CONNECTION FAILED: An unexpected error occurred during connection. {e}")
+                 traceback.print_exc()
+                 self.connection_status = f"Connection Failed: {e}"
+                 await run_coro_in_background_loop(update_status(self.connection_status))
+        except asyncio.CancelledError:
+             print("run_main_loop task cancelled.")
+             self.connection_status = "Disconnected (Cancelled)"
         except Exception as e:
+            print(f"Error in AudioLoop run_main_loop: {e}")
             traceback.print_exc()
+            self.connection_status = f"Runtime Error: {e}"
+            await run_coro_in_background_loop(update_status(self.connection_status))
+        finally:
+            print("Cleaning up audio loop resources...")
+            final_status = self.connection_status # Capture status before changing
+            if final_status == "Connected": # If loop exited cleanly but was connected
+                 final_status = "Disconnected"
+            self.connection_status = "Disconnected" # Ensure status is updated
+            # Cancel remaining background tasks associated with *this* instance
+            tasks_to_cancel = list(background_tasks) # Iterate over a copy
+            background_tasks.clear() # Clear global set for this instance
+            for task in tasks_to_cancel:
+                 if task and not task.done():
+                     task.cancel()
+                     print(f"Cancelled task: {task.get_name()}")
+            # Close PyAudio stream if open (managed by listen_audio task's finally now)
+            # if self.audio_stream and not self.audio_stream.is_stopped(): ... handled in listen_audio
+            # Reset session and client
+            self.session = None # Important to signal disconnection
+            self.client = None
+            self.out_queue = None # Clear queue reference
+            print("AudioLoop run finished.")
+            await run_coro_in_background_loop(update_status(final_status)) # Update Gradio status
+    async def disconnect(self):
+        """Initiates the disconnection process."""
+        print("Disconnect requested.")
+        if self.session:
+            # The run_main_loop should detect the session closing or errors.
+            # Explicitly closing the session might be possible depending on the SDK,
+            # but often letting the context manager exit is the intended way.
+            # For now, just update status and let the main loop handle cleanup.
+            print("Setting status to Disconnecting...")
+            self.connection_status = "Disconnecting"
+            # Signal tasks relying on session to stop
+            self.session = None # This should help loops terminate
+            # Put None on input queue to stop text processor if waiting
+            await self.text_input_queue.put(None)
+        else:
+            self.connection_status = "Disconnected"
+        # Cleanup might happen in run_main_loop's finally block
+        await run_coro_in_background_loop(update_status("Disconnected"))
+# --- Helper Functions ---
+def start_asyncio_loop():
+    """Starts the asyncio event loop in a separate thread."""
+    global background_loop, stop_background_loop
+    stop_background_loop = False
+    background_loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(background_loop)
+    print("Background asyncio loop starting.")
+    try:
+        # Run until explicitly stopped
+        while not stop_background_loop:
+             background_loop.call_later(0.1, background_loop.stop) # Wake up periodically
+             background_loop.run_forever()
+             if stop_background_loop:
+                  print("Stop signal received, exiting run_forever loop.")
+                  break # Exit outer loop if stopped
+        # Run pending tasks before closing
+        print("Running pending tasks before closing loop...")
+        pending = asyncio.all_tasks(loop=background_loop)
+        if pending:
+            background_loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
+    except Exception as e:
+         print(f"Error in background loop: {e}")
+         traceback.print_exc()
+    finally:
+        if background_loop.is_running():
+            background_loop.stop()
+        print("Closing background loop...")
+        # Give tasks a moment to finish cancelling
+        time.sleep(0.5)
+        background_loop.close()
+        print("Background asyncio loop stopped.")
+        background_loop = None # Clear global ref
+def stop_asyncio_loop():
+    """Signals the background asyncio loop to stop."""
+    global stop_background_loop, background_loop
+    print("Signalling background loop to stop...")
+    stop_background_loop = True
+    if background_loop and background_loop.is_running():
+        # This helps wake up the loop if it's idle
+        background_loop.call_soon_threadsafe(background_loop.stop)
+async def run_coro_in_background_loop(coro):
+    """Submits a coroutine to the background event loop and returns its future."""
+    global background_loop
+    if background_loop and background_loop.is_running() and not stop_background_loop:
+        try:
+            # Use run_coroutine_threadsafe for thread safety
+            future = asyncio.run_coroutine_threadsafe(coro, background_loop)
+            return future # Return the concurrent.futures.Future
+        except RuntimeError as e:
+            # Handle cases where the loop might be shutting down
+             print(f"Error submitting coroutine (loop shutting down?): {e}")
+             future = asyncio.Future()
+             future.set_exception(e)
+             return None # Indicate failure to schedule
+        except Exception as e:
+            print(f"Unexpected error submitting coroutine: {e}")
+            future = asyncio.Future()
+            future.set_exception(e)
+            return None
+    else:
+        print("Error: Background asyncio loop not running or stopping.")
+        # Create a dummy future that resolves immediately with an error?
+        # Or just return None to indicate failure
+        return None
+def format_audio_for_gradio(pcm_data):
+    """Converts raw PCM data to a format Gradio's Audio component can use."""
+    if not pcm_data:
+        # print("No audio data received to format.")
+        return None
+    try:
+        # Ensure PyAudio is initialized to get sample width
+        if not pya: initialize_py_audio()
+        if not pya: return None # Could not initialize
+        # Create a WAV file in memory
+        wav_buffer = io.BytesIO()
+        with wave.open(wav_buffer, 'wb') as wf:
+            wf.setnchannels(CHANNELS)
+            wf.setsampwidth(pya.get_sample_size(FORMAT)) # Should be 2 for paInt16
+            wf.setframerate(RECEIVE_SAMPLE_RATE)
+            wf.writeframes(pcm_data)
+        wav_buffer.seek(0)
+        # Read the WAV data back and convert to numpy array
+        with wave.open(wav_buffer, 'rb') as wf_read:
+            n_frames = wf_read.getnframes()
+            data = wf_read.readframes(n_frames)
+            dtype = np.int16 # Based on pyaudio.paInt16
+            numpy_array = np.frombuffer(data, dtype=dtype)
+        # print(f"Formatted audio: {len(numpy_array)} samples, rate {RECEIVE_SAMPLE_RATE}")
+        # Return tuple for Gradio Audio: (sample_rate, numpy_array)
+        return (RECEIVE_SAMPLE_RATE, numpy_array)
+    except Exception as e:
+        print(f"Error formatting audio: {e}")
+        traceback.print_exc()
+        return None
+def initialize_py_audio():
+    global pya
+    if pya is None:
+        try:
+            print("Initializing PyAudio...")
+            pya = pyaudio.PyAudio()
+            print("PyAudio initialized.")
+            return True
+        except Exception as e:
+             print(f"Failed to initialize PyAudio: {e}")
+             pya = None
+             return False
+    return True # Already initialized
+def terminate_py_audio():
+    global pya
+    if pya:
+        print("Terminating PyAudio...")
+        try:
+            pya.terminate()
+        except Exception as e:
+             print(f"Error terminating PyAudio: {e}")
+        finally:
+            pya = None
+            print("PyAudio terminated.")
+# --- Gradio Interface and Handlers ---
+# Placeholder for status updates - needs to run in the background loop
+async def update_status(new_status: str):
+    """Coroutine to update the Gradio status component."""
+    # This function itself doesn't directly update Gradio.
+    # It relies on being scheduled and the Gradio handler returning the value.
+    # However, for internal logging, we print here.
+    print(f"Status Update (async): {new_status}")
+    # The actual update happens when the calling handler returns this status
+    # For direct async updates, you'd need Gradio's streaming features if applicable.
+def handle_connect(api_key, voice_name, video_mode):
+    """Handles the 'Connect' button click."""
+    global audio_loop_instance, background_loop, background_thread
+    print("\n--- Connect Button Clicked ---")
+    status = "Connecting..."
+    yield status, None, None # Initial status update
+    if not api_key:
+        yield "Error: Please enter a Gemini API key.", None, None
+        return
+    if audio_loop_instance and audio_loop_instance.connection_status not in ["Disconnected", "Initialization Error", "Connection Failed: Timeout", "Connection Failed: Permission Denied", "Connection Failed: Invalid Argument (Model/Config?)"]:
+        yield f"Already connected or connecting ({audio_loop_instance.connection_status}). Disconnect first.", None, None
+        return
+    # Start background loop thread if not running
+    if not background_thread or not background_thread.is_alive():
+        print("Starting background thread...")
+        background_thread = threading.Thread(target=start_asyncio_loop, daemon=True)
+        background_thread.start()
+        time.sleep(0.5) # Give the loop a moment to start
+    # Ensure PyAudio is initialized
+    if not initialize_py_audio():
+         yield "Error: Failed to initialize audio system.", None, None
+         return
+    print(f"Attempting to connect with voice: {voice_name}, video: {video_mode}")
+    audio_loop_instance = GradioAudioLoop(video_mode=video_mode, api_key=api_key, voice_name=voice_name)
+    # Run the audio loop's main logic in the background asyncio loop
+    connect_future = run_coro_in_background_loop(audio_loop_instance.run_main_loop())
+    if not connect_future:
+         audio_loop_instance = None # Cleanup if scheduling failed
+         yield "Error: Failed to schedule connection task.", None, None
+         return
+    # Don't block Gradio here. The run_main_loop will update status via update_status coroutine calls.
+    # We yield the initial "Connecting..." status. Subsequent updates handled async.
+    # We might need a short sleep/check or rely purely on async updates. Let's rely on async updates.
+    # yield "Connecting... Waiting for confirmation.", None, None
+    # Add a small delay to allow the initial connection steps to run and update status
+    await asyncio.sleep(1) # Use await if in async context, time.sleep otherwise? Gradio handler might be sync.
+    # Use time.sleep in sync Gradio handler context
+    time.sleep(1.5)
+    # The final status will be updated by the run_main_loop's finally block or error handling
+    # Check the instance status directly after a short wait
+    if audio_loop_instance:
+        current_status = audio_loop_instance.connection_status
+        yield current_status, None, None
     else:
+        # This case shouldn't happen if scheduling worked, but as a fallback
+        yield "Error: Connection process failed unexpectedly.", None, None
+def handle_disconnect():
+    """Handles the 'Disconnect' button click."""
+    global audio_loop_instance
+    print("\n--- Disconnect Button Clicked ---")
+    status = "Disconnecting..."
+    yield status, None, None # Initial status update
+    if not audio_loop_instance or audio_loop_instance.connection_status == "Disconnected":
+        yield "Already disconnected.", None, None
+        return
+    # Schedule the disconnect coroutine
+    disconnect_future = run_coro_in_background_loop(audio_loop_instance.disconnect())
+    if not disconnect_future:
+         yield "Error: Failed to schedule disconnection task.", None, None
+         return
+    try:
+        # Wait briefly for disconnect to initiate
+        disconnect_future.result(timeout=5.0)
+        status = "Disconnected"
+    except TimeoutError:
+        status = "Disconnect timeout. Check logs."
+        print("Timeout waiting for disconnect confirmation.")
+    except Exception as e:
+        status = f"Error during disconnect: {e}"
+        print(f"Error during disconnect future result: {e}")
+    # Clean up global instance
+    audio_loop_instance = None
+    # Optionally terminate PyAudio here or let atexit handle it
+    # terminate_py_audio() # Can cause issues if connect is clicked again quickly
+    yield status, None, None # Final status update
+def handle_send_message(message):
+    """Handles sending a text message."""
+    global audio_loop_instance
+    print(f"\n--- Sending Message: {message[:30]}... ---")
+    if not audio_loop_instance or audio_loop_instance.connection_status != "Connected":
+        yield "Error: Not connected. Cannot send message.", None # Update status text, no audio
+        return
+    if not message or message.strip() == "":
+         yield "Cannot send empty message.", None
+         return
+    # Clear previous outputs
+    yield "Sending message...", None # Update status, clear audio
+    # Schedule the send/receive task and wait for its result
+    response_future = run_coro_in_background_loop(
+        audio_loop_instance.send_message_and_wait_for_response(message)
+    )
+    if not response_future:
+        yield "Error: Failed to schedule message task.", None
+        return
+    text_response = "Error: No response received."
+    audio_output = None
+    try:
+        # Wait for the background task to complete and return results
+        # Adjust timeout as needed
+        result_text, result_audio_data = response_future.result(timeout=60.0) # Wait up to 60 secs
+        text_response = result_text
+        if result_audio_data:
+             print(f"Received audio data ({len(result_audio_data)} bytes), formatting...")
+             audio_output = format_audio_for_gradio(result_audio_data)
+             if audio_output is None:
+                  print("Failed to format audio for Gradio.")
+                  text_response += " [Audio Formatting Error]"
+        else:
+             print("No audio data received in response.")
+             text_response += " [No Audio Received]"
+    except TimeoutError:
+        print("Timeout waiting for response future.")
+        text_response = "Error: Timeout waiting for Gemini response."
+        # Optionally try to cancel the future if possible/needed
+    except Exception as e:
+        print(f"Error getting result from response future: {e}")
+        traceback.print_exc()
+        text_response = f"Error processing response: {e}"
+    print(f"Final Text Response: {text_response}")
+    print(f"Final Audio Output: {'Present' if audio_output else 'None'}")
+    yield text_response, audio_output
+# --- Gradio Interface Definition ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Gemini LiveConnect TTS Interface")
+    gr.Markdown(f"Using Model: `{MODEL}`")
+    with gr.Row():
+        api_key_input = gr.Textbox(label="Gemini API Key", type="password", placeholder="Enter your API key")
+        voice_select = gr.Dropdown(label="Select Voice", choices=AVAILABLE_VOICES, value=DEFAULT_VOICE)
+        video_mode_select = gr.Radio(label="Video Input (Optional)", choices=["none", "camera", "screen"], value=DEFAULT_VIDEO_MODE, visible=False) # Hidden for now, focus on TTS
+    with gr.Row():
+        connect_button = gr.Button("Connect")
+        disconnect_button = gr.Button("Disconnect")
+    status_output = gr.Textbox(label="Status", value="Disconnected", interactive=False)
+    with gr.Column():
+        message_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
+        send_button = gr.Button("Send Message")
+    with gr.Column():
+        gr.Markdown("## Response")
+        response_text_output = gr.Textbox(label="Gemini Text", interactive=False)
+        audio_output = gr.Audio(label="Gemini Audio", type="numpy", interactive=False) # Use numpy for (rate, data) tuple
+    # --- Event Handlers ---
+    connect_button.click(
+        fn=handle_connect,
+        inputs=[api_key_input, voice_select, video_mode_select],
+        outputs=[status_output, response_text_output, audio_output] # Clear outputs on connect
+    )
+    disconnect_button.click(
+        fn=handle_disconnect,
+        inputs=[],
+        outputs=[status_output, response_text_output, audio_output] # Clear outputs on disconnect
+    )
+    send_button.click(
+        fn=handle_send_message,
+        inputs=[message_input],
+        outputs=[response_text_output, audio_output]
+    )
+    # Allow sending message by pressing Enter in the textbox
+    message_input.submit(
+         fn=handle_send_message,
+        inputs=[message_input],
+        outputs=[response_text_output, audio_output]
+    )
+# --- Cleanup Function ---
+def cleanup():
+    print("Running cleanup...")
+    global audio_loop_instance
+    # Disconnect if connected
+    if audio_loop_instance and audio_loop_instance.connection_status != "Disconnected":
+         print("Disconnecting during cleanup...")
+         disconnect_future = run_coro_in_background_loop(audio_loop_instance.disconnect())
+         if disconnect_future:
+              try:
+                   disconnect_future.result(timeout=5.0)
+                   print("Disconnect successful during cleanup.")
+              except Exception as e:
+                   print(f"Error during cleanup disconnect: {e}")
+         audio_loop_instance = None
+    # Signal background loop to stop
+    stop_asyncio_loop()
+    # Wait for background thread to finish
+    if background_thread and background_thread.is_alive():
+        print("Waiting for background thread to join...")
+        background_thread.join(timeout=5.0)
+        if background_thread.is_alive():
+             print("Warning: Background thread did not exit cleanly.")
+    # Terminate PyAudio
+    terminate_py_audio()
+    print("Cleanup finished.")
+# Register cleanup function to run on exit
+atexit.register(cleanup)
+# --- Main Execution ---
 if __name__ == "__main__":
+    # Start the background thread immediately (optional, connect can start it too)
+    # print("Starting background thread on launch...")
+    # background_thread = threading.Thread(target=start_asyncio_loop, daemon=True)
+    # background_thread.start()
+    print("Launching Gradio Interface...")
+    # Share=True to create a public link (remove if not needed)
+    demo.queue().launch(share=False)
+    # Keep main thread alive while Gradio is running (Gradio launch blocks)
+    print("Gradio Interface closed.")
+    # Cleanup is handled by atexit