noumanjavaid commited on
Commit
90b4c24
·
verified ·
1 Parent(s): c3e1dff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -25
app.py CHANGED
@@ -165,7 +165,7 @@ class GeminiInteractionLoop:
165
  return
166
  try:
167
  logging.info(f"Sending text to Gemini: '{user_text[:50]}...'")
168
- await self.gemini_session.send_client_content(content=[types.Part(text=user_text)], end_of_turn=True)
169
  except Exception as e:
170
  logging.error(
171
  f"Error sending text message to Gemini: {e}", exc_info=True)
@@ -180,6 +180,29 @@ class GeminiInteractionLoop:
180
  if not all(k in media_data for k in ["data", "mime_type"]):
181
  logging.warning(f"Media data missing required fields")
182
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  # Check if it's an image and needs resizing
185
  if media_data["mime_type"].startswith("image/"):
@@ -246,21 +269,37 @@ class GeminiInteractionLoop:
246
  try:
247
  validated_media = self._validate_media_payload(media_data)
248
  if validated_media:
249
- logging.debug(f"Sending media to Gemini. Type: {validated_media.get('mime_type')}, Data size: {len(validated_media.get('data', b'')) if isinstance(validated_media.get('data'), bytes) else len(validated_media.get('data', ''))}")
 
 
 
 
 
 
 
 
 
250
  await self.gemini_session.send(input=validated_media)
251
  else:
252
  # Log if validation failed, but only if media_data was not None initially
253
- # (as get_media_from_queues can return None on timeout)
254
  if media_data is not None:
255
  logging.warning(f"Media validation failed for payload. Type: {media_data.get('mime_type') if isinstance(media_data, dict) else type(media_data)}, skipping send.")
256
  except websockets.exceptions.ConnectionClosedError as e_conn_closed:
 
 
 
257
  logging.error(f"Connection closed while sending media: {e_conn_closed}", exc_info=True)
258
- # Consider how to handle this - e.g., attempt to reconnect or stop the loop.
259
- # For now, let's log and potentially stop the interaction loop or specific task.
260
- self.is_running = False # Example: stop if connection is lost
 
 
 
 
 
 
261
  except Exception as e:
262
- logging.error(
263
- f"Error sending media chunk to Gemini: {e}", exc_info=True)
264
  elif not media_data: # media_data could be None if queues were empty and timed out
265
  await asyncio.sleep(0.05) # Yield to other tasks if no media
266
  except asyncio.CancelledError:
@@ -393,7 +432,7 @@ class GeminiInteractionLoop:
393
  f"Gemini session established with API for model {MODEL_NAME}.")
394
  try:
395
  logging.info("Sending system prompt to Gemini...")
396
- await self.gemini_session.send_client_content(content=[types.Part(text=MEDICAL_ASSISTANT_SYSTEM_PROMPT)], end_of_turn=True)
397
  logging.info("System prompt sent successfully.")
398
  except Exception as e:
399
  logging.error(
@@ -513,33 +552,48 @@ class AudioProcessor(AudioProcessorBase):
513
  if audio_chunks_to_gemini_q is None:
514
  return
515
  for frame in audio_frames:
516
- audio_data = frame.planes[0].to_bytes()
517
-
518
- # Skip empty audio frames
519
- if not audio_data or len(audio_data) == 0:
520
- continue
521
-
522
- # Fix for the WebSocket error 1007 (invalid payload data)
523
- # Use the correct mime type format and ensure the audio data is valid
524
- # The audio format must match one of the formats supported by the Gemini API
525
- # Using standard audio/L16 with 16kHz sample rate instead of 24kHz
526
- mime_type = f"audio/L16;rate=16000;channels=1"
527
-
528
  try:
529
- # Prepare API data - making sure all data is valid
 
 
 
 
 
 
 
 
 
 
 
 
530
  if isinstance(audio_data, bytes) and len(audio_data) > 0:
531
- api_data = {"data": audio_data, "mime_type": mime_type}
 
 
 
 
 
 
 
 
 
532
 
 
533
  if audio_chunks_to_gemini_q.full():
534
  try:
 
535
  await asyncio.wait_for(audio_chunks_to_gemini_q.get(), timeout=0.01)
 
536
  except asyncio.TimeoutError:
537
  logging.warning("Audio queue full, chunk dropped.")
538
  continue
 
 
539
  audio_chunks_to_gemini_q.put_nowait(api_data)
 
 
540
  except Exception as e:
541
- logging.error(
542
- f"Error queueing audio chunk: {e}", exc_info=True)
543
 
544
  async def recv(self, frames):
545
  try:
 
165
  return
166
  try:
167
  logging.info(f"Sending text to Gemini: '{user_text[:50]}...'")
168
+ await self.gemini_session.send_client_content(types.Part(text=user_text))
169
  except Exception as e:
170
  logging.error(
171
  f"Error sending text message to Gemini: {e}", exc_info=True)
 
180
  if not all(k in media_data for k in ["data", "mime_type"]):
181
  logging.warning(f"Media data missing required fields")
182
  return None
183
+
184
+ # Handle audio data - ensure proper format for Gemini API
185
+ if media_data["mime_type"].startswith("audio/"):
186
+ try:
187
+ # Ensure audio data is in bytes format
188
+ if isinstance(media_data["data"], bytes):
189
+ # No need to base64 encode binary audio data for Gemini API
190
+ # Just ensure the mime_type is correctly formatted
191
+ if "rate=" not in media_data["mime_type"]:
192
+ # Default to 16kHz if not specified
193
+ media_data["mime_type"] = f"audio/L16;rate=16000;channels=1"
194
+
195
+ # Create a new dict to avoid modifying the original
196
+ return {
197
+ "mime_type": media_data["mime_type"],
198
+ "data": media_data["data"]
199
+ }
200
+ else:
201
+ logging.warning(f"Audio data is not in bytes format: {type(media_data['data'])}")
202
+ return None
203
+ except Exception as e:
204
+ logging.error(f"Error processing audio data: {e}", exc_info=True)
205
+ return None
206
 
207
  # Check if it's an image and needs resizing
208
  if media_data["mime_type"].startswith("image/"):
 
269
  try:
270
  validated_media = self._validate_media_payload(media_data)
271
  if validated_media:
272
+ # Log media type and size before sending
273
+ data_size = len(validated_media.get('data', b'')) if isinstance(validated_media.get('data'), bytes) else len(validated_media.get('data', ''))
274
+ logging.debug(f"Sending media to Gemini. Type: {validated_media.get('mime_type')}, Data size: {data_size} bytes")
275
+
276
+ # Ensure we're not exceeding WebSocket payload limits
277
+ if data_size > MAX_PAYLOAD_SIZE_BYTES:
278
+ logging.warning(f"Media payload exceeds maximum size ({data_size} > {MAX_PAYLOAD_SIZE_BYTES}), skipping")
279
+ continue
280
+
281
+ # Send the validated media to Gemini
282
  await self.gemini_session.send(input=validated_media)
283
  else:
284
  # Log if validation failed, but only if media_data was not None initially
 
285
  if media_data is not None:
286
  logging.warning(f"Media validation failed for payload. Type: {media_data.get('mime_type') if isinstance(media_data, dict) else type(media_data)}, skipping send.")
287
  except websockets.exceptions.ConnectionClosedError as e_conn_closed:
288
+ error_code = getattr(e_conn_closed, 'code', None)
289
+ error_reason = getattr(e_conn_closed, 'reason', 'Unknown reason')
290
+ logging.error(f"WebSocket connection closed with code {error_code}: {error_reason}")
291
  logging.error(f"Connection closed while sending media: {e_conn_closed}", exc_info=True)
292
+
293
+ # If we get a 1007 error (invalid frame payload data), log more details
294
+ if error_code == 1007:
295
+ logging.error(f"Invalid frame payload data error. This is likely due to malformed media data.")
296
+ if isinstance(media_data, dict):
297
+ logging.error(f"Media type: {media_data.get('mime_type', 'unknown')}, Data type: {type(media_data.get('data', None))}")
298
+
299
+ # Stop the interaction loop if connection is lost
300
+ self.is_running = False
301
  except Exception as e:
302
+ logging.error(f"Error sending media chunk to Gemini: {e}", exc_info=True)
 
303
  elif not media_data: # media_data could be None if queues were empty and timed out
304
  await asyncio.sleep(0.05) # Yield to other tasks if no media
305
  except asyncio.CancelledError:
 
432
  f"Gemini session established with API for model {MODEL_NAME}.")
433
  try:
434
  logging.info("Sending system prompt to Gemini...")
435
+ await self.gemini_session.send_client_content(types.Part(text=MEDICAL_ASSISTANT_SYSTEM_PROMPT))
436
  logging.info("System prompt sent successfully.")
437
  except Exception as e:
438
  logging.error(
 
552
  if audio_chunks_to_gemini_q is None:
553
  return
554
  for frame in audio_frames:
 
 
 
 
 
 
 
 
 
 
 
 
555
  try:
556
+ # Extract audio data from frame
557
+ audio_data = frame.planes[0].to_bytes()
558
+
559
+ # Skip empty audio frames
560
+ if not audio_data or len(audio_data) == 0:
561
+ continue
562
+
563
+ # Ensure we're using the correct format for Gemini API
564
+ # WebSocket error 1007 occurs with invalid frame payload data
565
+ # Using standard audio/L16 with 16kHz sample rate (matches SEND_SAMPLE_RATE)
566
+ mime_type = "audio/L16;rate=16000;channels=1"
567
+
568
+ # Validate audio data before queueing
569
  if isinstance(audio_data, bytes) and len(audio_data) > 0:
570
+ # Check if data size is reasonable (avoid oversized payloads)
571
+ if len(audio_data) > MAX_PAYLOAD_SIZE_BYTES:
572
+ logging.warning(f"Audio chunk too large ({len(audio_data)} bytes), skipping")
573
+ continue
574
+
575
+ # Create properly formatted API data
576
+ api_data = {
577
+ "data": audio_data, # Keep as bytes, don't base64 encode
578
+ "mime_type": mime_type
579
+ }
580
 
581
+ # Handle queue overflow
582
  if audio_chunks_to_gemini_q.full():
583
  try:
584
+ # Remove oldest item if queue is full
585
  await asyncio.wait_for(audio_chunks_to_gemini_q.get(), timeout=0.01)
586
+ audio_chunks_to_gemini_q.task_done()
587
  except asyncio.TimeoutError:
588
  logging.warning("Audio queue full, chunk dropped.")
589
  continue
590
+
591
+ # Queue the validated audio data
592
  audio_chunks_to_gemini_q.put_nowait(api_data)
593
+ else:
594
+ logging.warning(f"Invalid audio data format: {type(audio_data)}, skipping")
595
  except Exception as e:
596
+ logging.error(f"Error processing audio chunk: {e}", exc_info=True)
 
597
 
598
  async def recv(self, frames):
599
  try: