noumanjavaid commited on
Commit
5bb8fe7
·
verified ·
1 Parent(s): d13156e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +532 -34
src/streamlit_app.py CHANGED
@@ -1,40 +1,538 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
  """
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
 
 
2
  import streamlit as st
3
+ import os
4
+ import asyncio
5
+ import base64
6
+ import io
7
+ import threading
8
+ import queue # Standard library queue, not asyncio.Queue for thread-safe UI updates if needed
9
+ import traceback
10
+ import time # Keep time for potential future use (e.g., timestamps)
11
+ from dotenv import load_dotenv
12
 
13
+ # --- Import main libraries ---
14
+ import cv2
15
+ import pyaudio
16
+ import PIL.Image
17
+ import mss
18
+
19
+ from google import genai
20
+ from google.genai import types
21
+
22
+ # --- Configuration ---
23
+ load_dotenv()
24
+
25
+ # Audio configuration
26
+ FORMAT = pyaudio.paInt16
27
+ CHANNELS = 1
28
+ SEND_SAMPLE_RATE = 16000
29
+ RECEIVE_SAMPLE_RATE = 24000 # According to Gemini documentation
30
+ CHUNK_SIZE = 1024
31
+ AUDIO_QUEUE_MAXSIZE = 20 # Max audio chunks to buffer for playback
32
+
33
+ # Video configuration
34
+ VIDEO_FPS_LIMIT = 1 # Send 1 frame per second to the API
35
+ VIDEO_PREVIEW_RESIZE = (640, 480) # Size for Streamlit preview
36
+ VIDEO_API_RESIZE = (1024, 1024) # Max size to send to API (adjust if needed)
37
+
38
+ # Gemini model configuration
39
+ MODEL = "models/gemini-2.0-flash-live-001" # Ensure this is the correct model for live capabilities
40
+ DEFAULT_MODE = "camera" # Default video input mode
41
+
42
+ # System Prompt for the Medical Assistant
43
+ MEDICAL_ASSISTANT_SYSTEM_PROMPT = """You are an AI Medical Assistant. Your primary function is to analyze visual information from the user's camera or screen.
44
 
45
+ Your responsibilities are:
46
+ 1. **Visual Observation and Description:** Carefully examine the images or video feed. Describe relevant details you observe.
47
+ 2. **General Information (Non-Diagnostic):** Provide general information related to what is visually presented, if applicable. You are not a diagnostic tool.
48
+ 3. **Safety and Disclaimer (CRITICAL):**
49
+ * You are an AI assistant, **NOT a medical doctor or a substitute for one.**
50
+ * **DO NOT provide medical diagnoses, treatment advice, or interpret medical results (e.g., X-rays, scans, lab reports).**
51
+ * When appropriate, and always if the user seems to be seeking diagnosis or treatment, explicitly state your limitations and **strongly advise the user to consult a qualified healthcare professional.**
52
+ * If you see something that *appears* visually concerning (e.g., an unusual skin lesion, signs of injury), you may gently suggest it might be wise to have it looked at by a professional, without speculating on what it is.
53
+ 4. **Tone:** Maintain a helpful, empathetic, and calm tone.
54
+ 5. **Interaction:** After this initial instruction, you can make a brief acknowledgment of your role (e.g., "I'm ready to assist by looking at what you show me. Please remember to consult a doctor for medical advice."). Then, focus on responding to the user's visual input and questions.
55
 
56
+ Example of a disclaimer you might use: "As an AI assistant, I can describe what I see, but I can't provide medical advice or diagnoses. For any health concerns, it's always best to speak with a doctor or other healthcare professional."
57
  """
58
 
59
+ # Initialize Streamlit state
60
+ def init_session_state():
61
+ if 'initialized' not in st.session_state:
62
+ st.session_state['initialized'] = False
63
+ if 'audio_loop' not in st.session_state:
64
+ st.session_state['audio_loop'] = None
65
+ if 'chat_messages' not in st.session_state:
66
+ st.session_state['chat_messages'] = []
67
+ if 'current_frame' not in st.session_state:
68
+ st.session_state['current_frame'] = None
69
+ if 'run_loop' not in st.session_state: # Flag to control the loop from Streamlit
70
+ st.session_state['run_loop'] = False
71
+
72
+ # Initialize all session state variables
73
+ init_session_state()
74
+
75
+ # Configure page
76
+ st.set_page_config(page_title="Real-time Medical Assistant", layout="wide")
77
+
78
+ # Initialize Gemini client
79
+ # Ensure API key is set in environment variables or .env file
80
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
81
+ if not GEMINI_API_KEY:
82
+ st.error("GEMINI_API_KEY not found. Please set it in your environment variables or a .env file.")
83
+ st.stop()
84
+
85
+ client = genai.Client(
86
+ http_options={"api_version": "v1beta"},
87
+ api_key=GEMINI_API_KEY,
88
+ )
89
+
90
+ # Configure Gemini client and response settings
91
+ CONFIG = types.LiveConnectConfig(
92
+ response_modalities=["audio", "text"], # Ensure text is also enabled if you want to display AI text directly
93
+ speech_config=types.SpeechConfig(
94
+ voice_config=types.VoiceConfig(
95
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck") # Or other preferred voice
96
+ )
97
+ ),
98
+ # If the API supports an initial_prompt field in LiveConnectConfig, it would be ideal here.
99
+ # As of some versions, it might not be directly available, hence sending as first message.
100
+ )
101
+
102
+ pya = pyaudio.PyAudio()
103
+
104
+ class AudioLoop:
105
+ def __init__(self, video_mode=DEFAULT_MODE):
106
+ self.video_mode = video_mode
107
+ self.audio_in_queue = None # asyncio.Queue for audio playback
108
+ self.out_queue = None # asyncio.Queue for data to Gemini
109
+ self.session = None
110
+ # Tasks are managed by TaskGroup now
111
+ self.running = True # General flag to control async loops
112
+ self.audio_stream = None # PyAudio input stream
113
+
114
+ async def send_text_to_gemini(self, text_input): # Renamed from send_text to avoid confusion
115
+ if not text_input or not self.session or not self.running:
116
+ st.warning("Session not active or no text to send.")
117
+ return
118
+ try:
119
+ # User messages should typically end the turn for the AI to respond.
120
+ await self.session.send(input=text_input, end_of_turn=True)
121
+ # UI update for user message is handled in main Streamlit part
122
+ except Exception as e:
123
+ st.error(f"Error sending message to Gemini: {str(e)}")
124
+ traceback.print_exception(e)
125
+
126
+ def _get_frame(self, cap):
127
+ ret, frame = cap.read()
128
+ if not ret:
129
+ return None
130
+
131
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
132
+ img = PIL.Image.fromarray(frame_rgb)
133
+
134
+ preview_img = img.copy()
135
+ preview_img.thumbnail(VIDEO_PREVIEW_RESIZE)
136
+
137
+ api_img = img.copy()
138
+ api_img.thumbnail(VIDEO_API_RESIZE)
139
+
140
+ image_io = io.BytesIO()
141
+ api_img.save(image_io, format="jpeg")
142
+ image_io.seek(0)
143
+ image_bytes = image_io.read()
144
+
145
+ return {
146
+ "preview": preview_img,
147
+ "api": {
148
+ "mime_type": "image/jpeg",
149
+ "data": base64.b64encode(image_bytes).decode()
150
+ }
151
+ }
152
+
153
+ async def get_frames_from_camera(self): # Renamed for clarity
154
+ cap = None
155
+ try:
156
+ cap = await asyncio.to_thread(cv2.VideoCapture, 0)
157
+ if not cap.isOpened():
158
+ st.error("Could not open camera.") # This error needs to reach Streamlit UI
159
+ self.running = False # Stop the loop if camera fails
160
+ return
161
+
162
+ while self.running:
163
+ frame_data = await asyncio.to_thread(self._get_frame, cap)
164
+ if frame_data is None:
165
+ await asyncio.sleep(0.01) # Short sleep if frame read fails
166
+ continue
167
+
168
+ st.session_state['current_frame'] = frame_data["preview"]
169
+
170
+ if self.out_queue.full():
171
+ await self.out_queue.get() # Make space if full to avoid indefinite block
172
+
173
+ await self.out_queue.put(frame_data["api"])
174
+ await asyncio.sleep(1.0 / VIDEO_FPS_LIMIT)
175
+ except Exception as e:
176
+ st.error(f"Camera streaming error: {e}")
177
+ self.running = False
178
+ finally:
179
+ if cap:
180
+ await asyncio.to_thread(cap.release)
181
+
182
+ def _get_screen_frame(self): # Renamed for clarity
183
+ sct = mss.mss()
184
+ # Use the first monitor
185
+ monitor_number = 1
186
+ if len(sct.monitors) > 1: # sct.monitors[0] is all monitors, sct.monitors[1] is primary
187
+ monitor = sct.monitors[monitor_number]
188
+ else: # If only one monitor entry (all), just use it.
189
+ monitor = sct.monitors[0]
190
+
191
+
192
+ screenshot = sct.grab(monitor)
193
+ img = PIL.Image.frombytes("RGB", screenshot.size, screenshot.rgb)
194
+
195
+ preview_img = img.copy()
196
+ preview_img.thumbnail(VIDEO_PREVIEW_RESIZE)
197
+
198
+ api_img = img.copy()
199
+ api_img.thumbnail(VIDEO_API_RESIZE)
200
+
201
+ image_io = io.BytesIO()
202
+ api_img.save(image_io, format="jpeg")
203
+ image_io.seek(0)
204
+ image_bytes = image_io.read()
205
+
206
+ return {
207
+ "preview": preview_img,
208
+ "api": {
209
+ "mime_type": "image/jpeg",
210
+ "data": base64.b64encode(image_bytes).decode()
211
+ }
212
+ }
213
+
214
+ async def get_frames_from_screen(self): # Renamed for clarity
215
+ try:
216
+ while self.running:
217
+ frame_data = await asyncio.to_thread(self._get_screen_frame)
218
+ if frame_data is None:
219
+ await asyncio.sleep(0.01)
220
+ continue
221
+
222
+ st.session_state['current_frame'] = frame_data["preview"]
223
+
224
+ if self.out_queue.full():
225
+ await self.out_queue.get()
226
+
227
+ await self.out_queue.put(frame_data["api"])
228
+ await asyncio.sleep(1.0 / VIDEO_FPS_LIMIT)
229
+ except Exception as e:
230
+ st.error(f"Screen capture error: {e}")
231
+ self.running = False
232
+
233
+
234
+ async def send_realtime_media(self): # Renamed
235
+ try:
236
+ while self.running:
237
+ if not self.session:
238
+ await asyncio.sleep(0.1) # Wait for session to be established
239
+ continue
240
+ try:
241
+ msg = await asyncio.wait_for(self.out_queue.get(), timeout=0.5) # Timeout to prevent blocking indefinitely
242
+ if self.session and self.running: # Re-check session and running status
243
+ await self.session.send(input=msg) # No end_of_turn for continuous media
244
+ self.out_queue.task_done()
245
+ except asyncio.TimeoutError:
246
+ continue # No new media to send
247
+ except Exception as e:
248
+ if self.running: # Only log if we are supposed to be running
249
+ print(f"Error in send_realtime_media: {e}") # Log to console
250
+ # Consider if this error should stop the loop or be reported to UI
251
+ await asyncio.sleep(0.1) # Prevent tight loop on error
252
+ except asyncio.CancelledError:
253
+ print("send_realtime_media task cancelled.")
254
+ finally:
255
+ print("send_realtime_media task finished.")
256
+
257
+
258
+ async def listen_for_audio(self): # Renamed
259
+ self.audio_stream = None
260
+ try:
261
+ mic_info = await asyncio.to_thread(pya.get_default_input_device_info)
262
+ self.audio_stream = await asyncio.to_thread(
263
+ pya.open,
264
+ format=FORMAT,
265
+ channels=CHANNELS,
266
+ rate=SEND_SAMPLE_RATE,
267
+ input=True,
268
+ input_device_index=mic_info["index"],
269
+ frames_per_buffer=CHUNK_SIZE,
270
+ )
271
+ print("Microphone stream opened.")
272
+ while self.running:
273
+ try:
274
+ # exception_on_overflow=False helps avoid crashes on buffer overflows
275
+ data = await asyncio.to_thread(self.audio_stream.read, CHUNK_SIZE, exception_on_overflow=False)
276
+ if self.out_queue.full():
277
+ await self.out_queue.get() # Make space
278
+ await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
279
+ except IOError as e: # PyAudio specific IO errors
280
+ if e.errno == pyaudio.paInputOverflowed:
281
+ print("PyAudio Input overflowed. Skipping frame.") # Or log to a file/UI
282
+ else:
283
+ print(f"PyAudio read error: {e}")
284
+ self.running = False # Potentially stop on other IOErrors
285
+ break
286
+ except Exception as e:
287
+ print(f"Error in listen_for_audio: {e}")
288
+ await asyncio.sleep(0.01) # Prevent tight loop on error
289
+ except Exception as e:
290
+ st.error(f"Failed to open microphone: {e}") # This error needs to reach Streamlit UI
291
+ self.running = False
292
+ finally:
293
+ if self.audio_stream:
294
+ await asyncio.to_thread(self.audio_stream.stop_stream)
295
+ await asyncio.to_thread(self.audio_stream.close)
296
+ print("Microphone stream closed.")
297
+
298
+
299
+ async def receive_gemini_responses(self): # Renamed
300
+ try:
301
+ while self.running:
302
+ if not self.session:
303
+ await asyncio.sleep(0.1) # Wait for session
304
+ continue
305
+ try:
306
+ # Blocking receive, but should yield if self.running becomes false or session closes
307
+ turn = self.session.receive()
308
+ async for response in turn:
309
+ if not self.running: break # Exit if stop signal received during iteration
310
+ if data := response.data: # Audio data
311
+ if not self.audio_in_queue.full():
312
+ self.audio_in_queue.put_nowait(data)
313
+ else:
314
+ print("Playback audio queue full, discarding data.")
315
+ if text := response.text: # Text part of the response
316
+ # Queue this for the main thread to update Streamlit
317
+ st.session_state['chat_messages'].append({"role": "assistant", "content": text})
318
+ # Consider st.experimental_rerun() if immediate update is critical and safe
319
+ # For now, rely on Streamlit's natural refresh from chat_input or other interactions
320
+
321
+ # Handle turn completion logic if needed (e.g., clear audio queue for interruptions)
322
+ # For simplicity, current model might not need complex interruption handling here.
323
+ # If interruptions are implemented (e.g., user speaks while AI is speaking),
324
+ # you might want to clear self.audio_in_queue here.
325
+ except types.generation_types.StopCandidateException:
326
+ print("Gemini indicated end of response (StopCandidateException).") # Normal
327
+ except Exception as e:
328
+ if self.running:
329
+ print(f"Error receiving from Gemini: {e}")
330
+ await asyncio.sleep(0.1) # Prevent tight loop on error
331
+ except asyncio.CancelledError:
332
+ print("receive_gemini_responses task cancelled.")
333
+ finally:
334
+ print("receive_gemini_responses task finished.")
335
+
336
+
337
+ async def play_audio_responses(self): # Renamed
338
+ playback_stream = None
339
+ try:
340
+ playback_stream = await asyncio.to_thread(
341
+ pya.open,
342
+ format=FORMAT, # Assuming Gemini audio matches this, or adjust
343
+ channels=CHANNELS,
344
+ rate=RECEIVE_SAMPLE_RATE,
345
+ output=True,
346
+ )
347
+ print("Audio playback stream opened.")
348
+ while self.running:
349
+ try:
350
+ bytestream = await asyncio.wait_for(self.audio_in_queue.get(), timeout=0.5)
351
+ await asyncio.to_thread(playback_stream.write, bytestream)
352
+ self.audio_in_queue.task_done()
353
+ except asyncio.TimeoutError:
354
+ continue # No audio to play
355
+ except Exception as e:
356
+ print(f"Error playing audio: {e}")
357
+ await asyncio.sleep(0.01) # Prevent tight loop
358
+ except Exception as e:
359
+ st.error(f"Failed to open audio playback: {e}")
360
+ self.running = False
361
+ finally:
362
+ if playback_stream:
363
+ await asyncio.to_thread(playback_stream.stop_stream)
364
+ await asyncio.to_thread(playback_stream.close)
365
+ print("Audio playback stream closed.")
366
+
367
+ def stop_loop(self): # Renamed
368
+ print("Stop signal received for AudioLoop.")
369
+ self.running = False
370
+ # Queues can be an issue for graceful shutdown if tasks are blocked on put/get
371
+ # Put sentinel values or use timeouts in queue operations
372
+ if self.out_queue: # For send_realtime_media
373
+ self.out_queue.put_nowait(None) # Sentinel to unblock .get()
374
+ if self.audio_in_queue: # For play_audio_responses
375
+ self.audio_in_queue.put_nowait(None) # Sentinel
376
+
377
+ async def run(self):
378
+ st.session_state['run_loop'] = True # Indicate loop is running
379
+ self.running = True
380
+ print("AudioLoop starting...")
381
+ try:
382
+ # `client.aio.live.connect` is an async context manager
383
+ async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
384
+ self.session = session
385
+ print("Gemini session established.")
386
+
387
+ # Send the system prompt first.
388
+ try:
389
+ print("Sending system prompt to Gemini...")
390
+ # end_of_turn=False means this text is part of the initial context for the first actual user interaction.
391
+ await self.session.send(input=MEDICAL_ASSISTANT_SYSTEM_PROMPT, end_of_turn=False)
392
+ print("System prompt sent.")
393
+ except Exception as e:
394
+ st.error(f"Failed to send system prompt to Gemini: {str(e)}")
395
+ traceback.print_exception(e)
396
+ self.running = False # Stop if system prompt fails critical setup
397
+ return # Exit run method
398
+
399
+ # Initialize queues within the async context if they depend on loop specifics
400
+ self.audio_in_queue = asyncio.Queue(maxsize=AUDIO_QUEUE_MAXSIZE)
401
+ self.out_queue = asyncio.Queue(maxsize=10) # For outgoing media to Gemini API
402
+
403
+ async with asyncio.TaskGroup() as tg:
404
+ # Start all background tasks
405
+ print("Starting child tasks...")
406
+ tg.create_task(self.send_realtime_media(), name="send_realtime_media")
407
+ tg.create_task(self.listen_for_audio(), name="listen_for_audio")
408
+
409
+ if self.video_mode == "camera":
410
+ tg.create_task(self.get_frames_from_camera(), name="get_frames_from_camera")
411
+ elif self.video_mode == "screen":
412
+ tg.create_task(self.get_frames_from_screen(), name="get_frames_from_screen")
413
+ # If mode is "none", no video task is started.
414
+
415
+ tg.create_task(self.receive_gemini_responses(), name="receive_gemini_responses")
416
+ tg.create_task(self.play_audio_responses(), name="play_audio_responses")
417
+ print("All child tasks created.")
418
+
419
+ # TaskGroup will wait for all tasks to complete here.
420
+ # If self.running is set to False, tasks should ideally notice and exit.
421
+ print("TaskGroup finished.")
422
+
423
+ except asyncio.CancelledError:
424
+ print("AudioLoop.run() was cancelled.") # Usually from TaskGroup cancellation
425
+ except ExceptionGroup as eg: # From TaskGroup if child tasks fail
426
+ st.error(f"Error in async tasks: {eg.exceptions[0]}") # Show first error in UI
427
+ print(f"ExceptionGroup caught in AudioLoop.run(): {eg}")
428
+ for i, exc in enumerate(eg.exceptions):
429
+ print(f" Exception {i+1}/{len(eg.exceptions)} in TaskGroup: {type(exc).__name__}: {exc}")
430
+ traceback.print_exception(type(exc), exc, exc.__traceback__)
431
+ except Exception as e:
432
+ st.error(f"Critical error in session: {str(e)}")
433
+ print(f"Exception caught in AudioLoop.run(): {type(e).__name__}: {e}")
434
+ traceback.print_exception(e)
435
+ finally:
436
+ print("AudioLoop.run() finishing, cleaning up...")
437
+ self.running = False # Ensure all loops stop
438
+ st.session_state['run_loop'] = False # Signal that the loop has stopped
439
+ # `self.session` will be closed automatically by the `async with` block for `client.aio.live.connect`
440
+ self.session = None
441
+ # Other stream closures are handled in their respective task's finally blocks
442
+ print("AudioLoop finished.")
443
+
444
+
445
+ def main():
446
+ st.title("Gemini Live Medical Assistant")
447
+
448
+ with st.sidebar:
449
+ st.subheader("Settings")
450
+ video_mode_options = ["camera", "screen", "none"]
451
+ # Ensure default video mode is in options, find its index
452
+ default_video_index = video_mode_options.index(DEFAULT_MODE) if DEFAULT_MODE in video_mode_options else 0
453
+ video_mode = st.selectbox("Video Source", video_mode_options, index=default_video_index)
454
+
455
+ if not st.session_state.get('run_loop', False): # If loop is not running
456
+ if st.button("Start Session", key="start_session_button"):
457
+ st.session_state.chat_messages = [{ # Clear chat and add system message
458
+ "role": "system",
459
+ "content": (
460
+ "Medical Assistant activated. The AI has been instructed on its role to visually assist you. "
461
+ "Please remember, this AI cannot provide medical diagnoses or replace consultation with a healthcare professional."
462
+ )
463
+ }]
464
+ st.session_state.current_frame = None # Clear previous frame
465
+
466
+ audio_loop = AudioLoop(video_mode=video_mode)
467
+ st.session_state.audio_loop = audio_loop
468
+
469
+ # Run the asyncio event loop in a new thread
470
+ # daemon=True allows Streamlit to exit even if this thread is stuck (though it shouldn't be)
471
+ threading.Thread(target=lambda: asyncio.run(audio_loop.run()), daemon=True).start()
472
+ st.success("Session started. Initializing assistant...")
473
+ st.rerun() # Rerun to update button state and messages
474
+ else: # If loop is running
475
+ if st.button("Stop Session", key="stop_session_button"):
476
+ if st.session_state.audio_loop:
477
+ st.session_state.audio_loop.stop_loop() # Signal async tasks to stop
478
+ # Wait a moment for tasks to attempt cleanup (optional, can be tricky)
479
+ # time.sleep(1)
480
+ st.session_state.audio_loop = None
481
+ st.warning("Session stopping...")
482
+ st.rerun() # Rerun to update UI
483
+
484
+ # Main content area
485
+ col1, col2 = st.columns([2, 3]) # Adjust column ratio as needed
486
+
487
+ with col1:
488
+ st.subheader("Video Feed")
489
+ if st.session_state.get('run_loop', False) and st.session_state.get('current_frame') is not None:
490
+ st.image(st.session_state['current_frame'], caption="Live Feed" if video_mode != "none" else "Video Disabled", use_column_width=True)
491
+ elif video_mode != "none":
492
+ st.info("Video feed will appear here when the session starts.")
493
+ else:
494
+ st.info("Video input is disabled.")
495
+
496
+ with col2:
497
+ st.subheader("Chat with Medical Assistant")
498
+ chat_container = st.container() # For scrolling chat
499
+ with chat_container:
500
+ for msg in st.session_state.chat_messages:
501
+ with st.chat_message(msg["role"]):
502
+ st.write(msg["content"])
503
+
504
+ prompt = st.chat_input("Ask about what you're showing...", key="chat_input_box", disabled=not st.session_state.get('run_loop', False))
505
+ if prompt:
506
+ st.session_state.chat_messages.append({"role": "user", "content": prompt})
507
+ st.rerun() # Show user message immediately
508
+
509
+ if st.session_state.audio_loop:
510
+ # The text needs to be sent from within the asyncio loop or by scheduling it.
511
+ # A simple way is to call a method on audio_loop that uses asyncio.create_task or similar.
512
+ # For direct call from thread to asyncio loop, ensure it's thread-safe.
513
+ # A better way is to put the text into a queue that send_text_to_gemini reads from,
514
+ # or use asyncio.run_coroutine_threadsafe if the loop is known.
515
+
516
+ # Current send_text_to_gemini is an async method.
517
+ # We need to run it in the event loop of the audio_loop's thread.
518
+ loop = asyncio.get_event_loop_policy().get_event_loop() # Get current thread's loop (might not be the one)
519
+ if st.session_state.audio_loop.session: # Ensure session exists
520
+ # This is a simplified approach; proper thread-safe coroutine scheduling is more robust.
521
+ # Consider using asyncio.run_coroutine_threadsafe if audio_loop.run() exposes its loop.
522
+ asyncio.run(st.session_state.audio_loop.send_text_to_gemini(prompt))
523
+ else:
524
+ st.error("Session not fully active to send message.")
525
+ else:
526
+ st.error("Session is not active. Please start a session.")
527
+ # Rerun after processing to show potential AI response (if text part comes quickly)
528
+ # st.rerun() # This might be too frequent, rely on receive_gemini_responses to update chat
529
+
530
+
531
+ if __name__ == "__main__":
532
+ # Global PyAudio termination hook (optional, for very clean shutdowns)
533
+ # def cleanup_pyaudio():
534
+ # print("Terminating PyAudio globally.")
535
+ # pya.terminate()
536
+ # import atexit
537
+ # atexit.register(cleanup_pyaudio)
538
+ main()