Spaces:

Mr-Geo
/

ai-narrator

Sleeping

App Files Files Community

Mr-Geo commited on Dec 16, 2024

Commit

ec8af1f

verified ·

1 Parent(s): 8fac91d

Update narrate_description.py

Browse files

Files changed (1) hide show

narrate_description.py +69 -30

narrate_description.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import APIRouter, WebSocket
 import json
 from generate_description import generate_description
 from convert_text_to_speech import convert_text_to_speech
@@ -13,56 +13,95 @@ description_history = []
 async def websocket_narrate(websocket: WebSocket):
     await websocket.accept()
     print("WebSocket connection accepted.")
     try:
         while True:
-            data = await websocket.receive_text()
-            if data == "close":
-                print("Closing WebSocket connection.")
-                break
-            data_json = json.loads(data)
-            image_data = data_json.get('image')
-            selected_voice_id = data_json.get('voiceId')
-            selected_voice_name = data_json.get('voiceName')
-            politeness_level = int(data_json.get('politenessLevel', 5))
-            if image_data:
                 print(f"Image data received, sending to {selected_voice_name} model for analysis with politeness level {politeness_level}.")
                 description_accumulator = ""
                 punctuation_pattern = re.compile(r"[*]")
                 async for description_chunk in generate_description(image_data, selected_voice_name, description_history, politeness_level):
                     if description_chunk:
-                        # Accumulate the chunk, ensuring not to break on single punctuation marks
                         if not punctuation_pattern.fullmatch(description_chunk.strip()):
                             description_accumulator += description_chunk
                         else:
                             description_accumulator += " " + description_chunk
-                        # Send each text chunk to the frontend
-                        await websocket.send_text(json.dumps({"type": "text_chunk", "data": description_chunk, "pictureCount": data_json.get('pictureCount'), "voiceName": selected_voice_name}))
-                        # If the chunk ends with punctuation, convert and stream it
                         if punctuation_pattern.search(description_chunk):
-                            audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
-                            await asyncio.gather(*[websocket.send_bytes(chunk) async for chunk in audio_chunks])
-                            # Append the fully accumulated description to the history
-                            description_history.append(description_accumulator.strip())
-                            description_accumulator = ""
-                # If there is any remaining text after the loop, send it for conversion too
                 if description_accumulator:
-                    audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
-                    await asyncio.gather(*[websocket.send_bytes(chunk) async for chunk in audio_chunks])
-                    # Append the remaining accumulated description to the history
-                    description_history.append(description_accumulator.strip())
                 print("Finished processing image data.")
-            else:
-                print("No image data received, sending error message to client.")
-                await websocket.send_text("No image data received.")
-        print("WebSocket connection closed.")
     except Exception as e:
         print(f"Error during WebSocket communication: {e}")
     finally:
-        await websocket.close()

+from fastapi import APIRouter, WebSocket, WebSocketDisconnect
 import json
 from generate_description import generate_description
 from convert_text_to_speech import convert_text_to_speech
 async def websocket_narrate(websocket: WebSocket):
     await websocket.accept()
     print("WebSocket connection accepted.")
+    print("connection open")
     try:
         while True:
+            try:
+                data = await websocket.receive_text()
+                if data == "close":
+                    print("Closing WebSocket connection.")
+                    await websocket.close(code=1000)
+                    break
+                data_json = json.loads(data)
+                image_data = data_json.get('image')
+                selected_voice_id = data_json.get('voiceId')
+                selected_voice_name = data_json.get('voiceName')
+                politeness_level = int(data_json.get('politenessLevel', 5))
+                if not image_data:
+                    await websocket.send_text(json.dumps({
+                        "type": "error",
+                        "data": "No image data received."
+                    }))
+                    continue
                 print(f"Image data received, sending to {selected_voice_name} model for analysis with politeness level {politeness_level}.")
                 description_accumulator = ""
                 punctuation_pattern = re.compile(r"[*]")
                 async for description_chunk in generate_description(image_data, selected_voice_name, description_history, politeness_level):
                     if description_chunk:
                         if not punctuation_pattern.fullmatch(description_chunk.strip()):
                             description_accumulator += description_chunk
                         else:
                             description_accumulator += " " + description_chunk
+                        await websocket.send_text(json.dumps({
+                            "type": "text_chunk",
+                            "data": description_chunk,
+                            "pictureCount": data_json.get('pictureCount'),
+                            "voiceName": selected_voice_name
+                        }))
                         if punctuation_pattern.search(description_chunk):
+                            try:
+                                audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
+                                async for chunk in audio_chunks:
+                                    await websocket.send_bytes(chunk)
+                                description_history.append(description_accumulator.strip())
+                                description_accumulator = ""
+                            except Exception as e:
+                                print(f"Error processing audio: {e}")
+                                await websocket.send_text(json.dumps({
+                                    "type": "error",
+                                    "data": "Error processing audio"
+                                }))
                 if description_accumulator:
+                    try:
+                        audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
+                        async for chunk in audio_chunks:
+                            await websocket.send_bytes(chunk)
+                        description_history.append(description_accumulator.strip())
+                    except Exception as e:
+                        print(f"Error processing final audio: {e}")
+                        await websocket.send_text(json.dumps({
+                            "type": "error",
+                            "data": "Error processing final audio"
+                        }))
                 print("Finished processing image data.")
+            except WebSocketDisconnect:
+                print("Client disconnected")
+                break
+            except Exception as e:
+                print(f"Error processing message: {e}")
+                try:
+                    await websocket.send_text(json.dumps({
+                        "type": "error",
+                        "data": "Error processing message"
+                    }))
+                except:
+                    break
     except Exception as e:
         print(f"Error during WebSocket communication: {e}")
     finally:
+        print("connection closed")
+        try:
+            await websocket.close(code=1000)
+        except:
+            pass