Spaces:

renesistech
/

Spatial-aware

Running

App Files Files Community

noumanjavaid commited on 7 days ago

Commit

99636da

verified ·

1 Parent(s): a46c042

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +76 -27

src/streamlit_app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import base64
 import io
 import threading
 import traceback
-import atexit # Correctly imported
 import time
 import logging
 from dotenv import load_dotenv
@@ -24,21 +24,20 @@ from streamlit_webrtc import (
     WebRtcMode,
     AudioProcessorBase,
     VideoProcessorBase,
-    # ClientSettings # Removed as it's not used in this version
 )
-# from aiortc import RTCIceServer, RTCConfiguration # RTCConfiguration object not needed directly for webrtc_streamer
 # --- Configuration ---
 load_dotenv()
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(threadName)s - %(levelname)s - %(message)s')
 # Audio configuration
-PYAUDIO_FORMAT = pyaudio.paInt16 # For PyAudio playback
-PYAUDIO_CHANNELS = 1 # For PyAudio playback
-WEBRTC_REQUESTED_AUDIO_CHANNELS = 1 # Request mono audio from WebRTC
-WEBRTC_REQUESTED_SEND_SAMPLE_RATE = 16000 # Target sample rate for audio sent to Gemini
-GEMINI_AUDIO_RECEIVE_SAMPLE_RATE = 24000  # Gemini documentation recommendation for its TTS
-PYAUDIO_PLAYBACK_CHUNK_SIZE = 1024 # For PyAudio playback
 AUDIO_PLAYBACK_QUEUE_MAXSIZE = 50
 MEDIA_TO_GEMINI_QUEUE_MAXSIZE = 30
@@ -72,10 +71,10 @@ def cleanup_pyaudio():
         pya.terminate()
 atexit.register(cleanup_pyaudio)
-# --- Global Queues for WebRTC to Backend Communication ---
-video_frames_to_gemini_q = asyncio.Queue(maxsize=MEDIA_TO_GEMINI_QUEUE_MAXSIZE)
-audio_chunks_to_gemini_q = asyncio.Queue(maxsize=MEDIA_TO_GEMINI_QUEUE_MAXSIZE)
-audio_from_gemini_playback_q = asyncio.Queue(maxsize=AUDIO_PLAYBACK_QUEUE_MAXSIZE)
 # --- Gemini Client Setup ---
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
@@ -106,6 +105,8 @@ class GeminiInteractionLoop:
         self.async_event_loop = None
         self.is_running = True
         self.playback_stream = None
     async def send_text_input_to_gemini(self, user_text):
         if not user_text or not self.gemini_session or not self.is_running:
@@ -120,6 +121,10 @@ class GeminiInteractionLoop:
     async def stream_media_to_gemini(self):
         logging.info("Task started: Stream media from WebRTC queues to Gemini.")
         async def get_media_from_queues():
             try:
                 video_frame = await asyncio.wait_for(video_frames_to_gemini_q.get(), timeout=0.02)
                 video_frames_to_gemini_q.task_done()
@@ -154,6 +159,8 @@ class GeminiInteractionLoop:
             while self.is_running:
                 if not self.gemini_session:
                     await asyncio.sleep(0.1); continue
                 try:
                     turn_response = self.gemini_session.receive()
                     async for chunk in turn_response:
@@ -176,6 +183,11 @@ class GeminiInteractionLoop:
     async def play_gemini_audio(self):
         logging.info("Task started: Play Gemini audio responses.")
         try:
             self.playback_stream = await asyncio.to_thread(
                 pya.open, format=PYAUDIO_FORMAT, channels=PYAUDIO_CHANNELS, rate=GEMINI_AUDIO_RECEIVE_SAMPLE_RATE, output=True, frames_per_buffer=PYAUDIO_PLAYBACK_CHUNK_SIZE
             )
@@ -202,15 +214,26 @@ class GeminiInteractionLoop:
     def signal_stop(self):
         logging.info("Signal to stop GeminiInteractionLoop received.")
         self.is_running = False
         for q in [video_frames_to_gemini_q, audio_chunks_to_gemini_q, audio_from_gemini_playback_q]:
-            try: q.put_nowait(None)
-            except asyncio.QueueFull: logging.warning(f"Queue was full when trying to put sentinel for stop signal.")
-            except Exception as e: logging.error(f"Error putting sentinel in queue: {e}", exc_info=True)
     async def run_main_loop(self):
         self.async_event_loop = asyncio.get_running_loop()
         self.is_running = True
         logging.info("GeminiInteractionLoop run_main_loop starting...")
         if client is None:
             logging.critical("Gemini client is None in run_main_loop. Aborting.")
             return
@@ -245,7 +268,11 @@ class GeminiInteractionLoop:
             logging.info("GeminiInteractionLoop.run_main_loop() finishing...")
             self.is_running = False
             self.gemini_session = None
-            logging.info("GeminiInteractionLoop finished.")
 # --- WebRTC Media Processors ---
 class VideoProcessor(VideoProcessorBase):
@@ -254,6 +281,10 @@ class VideoProcessor(VideoProcessorBase):
         self.last_gemini_send_time = time.monotonic()
     async def _process_and_queue_frame_async(self, frame_ndarray):
         self.frame_counter += 1
         current_time = time.monotonic()
         if (current_time - self.last_gemini_send_time) < (1.0 / VIDEO_FPS_TO_GEMINI):
@@ -277,11 +308,22 @@ class VideoProcessor(VideoProcessorBase):
     async def recv(self, frame):
         img_bgr = frame.to_ndarray(format="bgr24")
-        asyncio.create_task(self._process_and_queue_frame_async(img_bgr))
         return frame
 class AudioProcessor(AudioProcessorBase):
     async def _process_and_queue_audio_async(self, audio_frames):
         for frame in audio_frames:
             audio_data = frame.planes[0].to_bytes()
             mime_type = f"audio/L16;rate={frame.sample_rate};channels={frame.layout.channels}"
@@ -295,7 +337,12 @@ class AudioProcessor(AudioProcessorBase):
             except Exception as e: logging.error(f"Error queueing audio chunk: {e}", exc_info=True)
     async def recv(self, frames):
-        asyncio.create_task(self._process_and_queue_audio_async(frames))
         return frames
 # --- Streamlit UI and Application Logic ---
@@ -324,10 +371,7 @@ def run_streamlit_app():
                 st.session_state.gemini_session_active = True
                 st.session_state.chat_messages = [{"role": "system", "content": "Assistant activating. Please allow camera/microphone access in your browser if prompted."}]
-                for q in [video_frames_to_gemini_q, audio_chunks_to_gemini_q, audio_from_gemini_playback_q]:
-                    while not q.empty():
-                        try: q.get_nowait()
-                        except asyncio.QueueEmpty: break
                 gemini_loop = GeminiInteractionLoop()
                 st.session_state.gemini_loop_instance = gemini_loop
@@ -358,15 +402,20 @@ def run_streamlit_app():
             }
         }
         webrtc_ctx = webrtc_streamer(
             key=st.session_state.webrtc_component_key,
             mode=WebRtcMode.SENDONLY,
-            rtc_configuration={ # MODIFIED HERE: Pass dictionary directly
                 "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
             },
             media_stream_constraints=MEDIA_STREAM_CONSTRAINTS,
-            video_processor_factory=VideoProcessor,
-            audio_processor_factory=AudioProcessor,
             async_processing=True,
         )
@@ -374,7 +423,7 @@ def run_streamlit_app():
             st.caption("WebRTC connected. Streaming your camera and microphone.")
         elif st.session_state.gemini_session_active:
             st.caption("WebRTC attempting to connect. Ensure camera/microphone permissions are granted in your browser.")
-            if hasattr(webrtc_ctx.state, 'error') and webrtc_ctx.state.error: # Check if error attribute exists
                 st.error(f"WebRTC Connection Error: {webrtc_ctx.state.error}")
     else:
         st.info("Click 'Start Session' in the sidebar to enable the live feed and assistant.")

 import io
 import threading
 import traceback
+import atexit
 import time
 import logging
 from dotenv import load_dotenv
     WebRtcMode,
     AudioProcessorBase,
     VideoProcessorBase,
 )
+# from aiortc import RTCIceServer, RTCConfiguration # RTCConfiguration object not needed directly
 # --- Configuration ---
 load_dotenv()
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(threadName)s - %(levelname)s - %(message)s')
 # Audio configuration
+PYAUDIO_FORMAT = pyaudio.paInt16
+PYAUDIO_CHANNELS = 1
+WEBRTC_REQUESTED_AUDIO_CHANNELS = 1
+WEBRTC_REQUESTED_SEND_SAMPLE_RATE = 16000
+GEMINI_AUDIO_RECEIVE_SAMPLE_RATE = 24000
+PYAUDIO_PLAYBACK_CHUNK_SIZE = 1024
 AUDIO_PLAYBACK_QUEUE_MAXSIZE = 50
 MEDIA_TO_GEMINI_QUEUE_MAXSIZE = 30
         pya.terminate()
 atexit.register(cleanup_pyaudio)
+# --- Global Queues - Declare as None, initialize later ---
+video_frames_to_gemini_q: asyncio.Queue = None
+audio_chunks_to_gemini_q: asyncio.Queue = None
+audio_from_gemini_playback_q: asyncio.Queue = None
 # --- Gemini Client Setup ---
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
         self.async_event_loop = None
         self.is_running = True
         self.playback_stream = None
+        # Queues will be initialized in run_main_loop and assigned to global vars
+        # This class will use the global queue variables directly
     async def send_text_input_to_gemini(self, user_text):
         if not user_text or not self.gemini_session or not self.is_running:
     async def stream_media_to_gemini(self):
         logging.info("Task started: Stream media from WebRTC queues to Gemini.")
         async def get_media_from_queues():
+            # Ensure queues are initialized before trying to get from them
+            if video_frames_to_gemini_q is None or audio_chunks_to_gemini_q is None:
+                await asyncio.sleep(0.1) # Wait for queues to be initialized
+                return None
             try:
                 video_frame = await asyncio.wait_for(video_frames_to_gemini_q.get(), timeout=0.02)
                 video_frames_to_gemini_q.task_done()
             while self.is_running:
                 if not self.gemini_session:
                     await asyncio.sleep(0.1); continue
+                if audio_from_gemini_playback_q is None: # Wait for queue init
+                    await asyncio.sleep(0.1); continue
                 try:
                     turn_response = self.gemini_session.receive()
                     async for chunk in turn_response:
     async def play_gemini_audio(self):
         logging.info("Task started: Play Gemini audio responses.")
         try:
+            # Wait for the playback queue to be initialized
+            while audio_from_gemini_playback_q is None and self.is_running:
+                await asyncio.sleep(0.1)
+            if not self.is_running: return
             self.playback_stream = await asyncio.to_thread(
                 pya.open, format=PYAUDIO_FORMAT, channels=PYAUDIO_CHANNELS, rate=GEMINI_AUDIO_RECEIVE_SAMPLE_RATE, output=True, frames_per_buffer=PYAUDIO_PLAYBACK_CHUNK_SIZE
             )
     def signal_stop(self):
         logging.info("Signal to stop GeminiInteractionLoop received.")
         self.is_running = False
+        # Use global queue variables directly
         for q in [video_frames_to_gemini_q, audio_chunks_to_gemini_q, audio_from_gemini_playback_q]:
+            if q: # Check if queue is initialized
+                try: q.put_nowait(None)
+                except asyncio.QueueFull: logging.warning(f"Queue was full when trying to put sentinel for stop signal.")
+                except Exception as e: logging.error(f"Error putting sentinel in queue: {e}", exc_info=True)
     async def run_main_loop(self):
+        global video_frames_to_gemini_q, audio_chunks_to_gemini_q, audio_from_gemini_playback_q # Allow modification of global vars
         self.async_event_loop = asyncio.get_running_loop()
         self.is_running = True
         logging.info("GeminiInteractionLoop run_main_loop starting...")
+        # Initialize queues here, within the asyncio loop of this thread
+        video_frames_to_gemini_q = asyncio.Queue(maxsize=MEDIA_TO_GEMINI_QUEUE_MAXSIZE)
+        audio_chunks_to_gemini_q = asyncio.Queue(maxsize=MEDIA_TO_GEMINI_QUEUE_MAXSIZE)
+        audio_from_gemini_playback_q = asyncio.Queue(maxsize=AUDIO_PLAYBACK_QUEUE_MAXSIZE)
+        logging.info("Asyncio queues initialized in GeminiInteractionLoop.")
         if client is None:
             logging.critical("Gemini client is None in run_main_loop. Aborting.")
             return
             logging.info("GeminiInteractionLoop.run_main_loop() finishing...")
             self.is_running = False
             self.gemini_session = None
+            # Clear queues on exit to prevent issues if loop restarts
+            video_frames_to_gemini_q = None
+            audio_chunks_to_gemini_q = None
+            audio_from_gemini_playback_q = None
+            logging.info("GeminiInteractionLoop finished and queues cleared.")
 # --- WebRTC Media Processors ---
 class VideoProcessor(VideoProcessorBase):
         self.last_gemini_send_time = time.monotonic()
     async def _process_and_queue_frame_async(self, frame_ndarray):
+        if video_frames_to_gemini_q is None: # Wait for queue to be initialized
+            logging.debug("VideoProcessor: video_frames_to_gemini_q is None, waiting...")
+            return
         self.frame_counter += 1
         current_time = time.monotonic()
         if (current_time - self.last_gemini_send_time) < (1.0 / VIDEO_FPS_TO_GEMINI):
     async def recv(self, frame):
         img_bgr = frame.to_ndarray(format="bgr24")
+        # Ensure an event loop is running in the current thread for create_task
+        try:
+            loop = asyncio.get_running_loop()
+            loop.create_task(self._process_and_queue_frame_async(img_bgr))
+        except RuntimeError: # No running loop in this thread (should not happen with streamlit-webrtc async_processing=True)
+            logging.error("VideoProcessor.recv: No running asyncio loop in current thread for create_task.")
+            # Fallback or log error, direct call might block WebRTC thread
+            # await self._process_and_queue_frame_async(img_bgr) # Potentially blocking
         return frame
 class AudioProcessor(AudioProcessorBase):
     async def _process_and_queue_audio_async(self, audio_frames):
+        if audio_chunks_to_gemini_q is None: # Wait for queue to be initialized
+            logging.debug("AudioProcessor: audio_chunks_to_gemini_q is None, waiting...")
+            return
         for frame in audio_frames:
             audio_data = frame.planes[0].to_bytes()
             mime_type = f"audio/L16;rate={frame.sample_rate};channels={frame.layout.channels}"
             except Exception as e: logging.error(f"Error queueing audio chunk: {e}", exc_info=True)
     async def recv(self, frames):
+        try:
+            loop = asyncio.get_running_loop()
+            loop.create_task(self._process_and_queue_audio_async(frames))
+        except RuntimeError:
+            logging.error("AudioProcessor.recv: No running asyncio loop in current thread for create_task.")
+            # await self._process_and_queue_audio_async(frames) # Potentially blocking
         return frames
 # --- Streamlit UI and Application Logic ---
                 st.session_state.gemini_session_active = True
                 st.session_state.chat_messages = [{"role": "system", "content": "Assistant activating. Please allow camera/microphone access in your browser if prompted."}]
+                # Queues will be initialized inside GeminiInteractionLoop's thread
                 gemini_loop = GeminiInteractionLoop()
                 st.session_state.gemini_loop_instance = gemini_loop
             }
         }
+        # Only render WebRTC streamer if queues are expected to be initialized by the Gemini loop
+        # This is a bit of a race condition check, might need refinement
+        # A better way would be for Gemini loop to signal when queues are ready.
+        # For now, we assume if session is active, loop is trying to start and init queues.
         webrtc_ctx = webrtc_streamer(
             key=st.session_state.webrtc_component_key,
             mode=WebRtcMode.SENDONLY,
+            rtc_configuration={
                 "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
             },
             media_stream_constraints=MEDIA_STREAM_CONSTRAINTS,
+            video_processor_factory=VideoProcessor, # Pass the class
+            audio_processor_factory=AudioProcessor, # Pass the class
             async_processing=True,
         )
             st.caption("WebRTC connected. Streaming your camera and microphone.")
         elif st.session_state.gemini_session_active:
             st.caption("WebRTC attempting to connect. Ensure camera/microphone permissions are granted in your browser.")
+            if hasattr(webrtc_ctx.state, 'error') and webrtc_ctx.state.error:
                 st.error(f"WebRTC Connection Error: {webrtc_ctx.state.error}")
     else:
         st.info("Click 'Start Session' in the sidebar to enable the live feed and assistant.")