Spaces:
Sleeping
Sleeping
Update narrate_description.py
Browse files- narrate_description.py +69 -30
narrate_description.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from fastapi import APIRouter, WebSocket
|
2 |
import json
|
3 |
from generate_description import generate_description
|
4 |
from convert_text_to_speech import convert_text_to_speech
|
@@ -13,56 +13,95 @@ description_history = []
|
|
13 |
async def websocket_narrate(websocket: WebSocket):
|
14 |
await websocket.accept()
|
15 |
print("WebSocket connection accepted.")
|
|
|
|
|
16 |
try:
|
17 |
while True:
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
data_json = json.loads(data)
|
24 |
-
image_data = data_json.get('image')
|
25 |
-
selected_voice_id = data_json.get('voiceId')
|
26 |
-
selected_voice_name = data_json.get('voiceName')
|
27 |
-
politeness_level = int(data_json.get('politenessLevel', 5))
|
28 |
-
if image_data:
|
29 |
print(f"Image data received, sending to {selected_voice_name} model for analysis with politeness level {politeness_level}.")
|
30 |
description_accumulator = ""
|
31 |
punctuation_pattern = re.compile(r"[*]")
|
32 |
|
33 |
async for description_chunk in generate_description(image_data, selected_voice_name, description_history, politeness_level):
|
34 |
if description_chunk:
|
35 |
-
# Accumulate the chunk, ensuring not to break on single punctuation marks
|
36 |
if not punctuation_pattern.fullmatch(description_chunk.strip()):
|
37 |
description_accumulator += description_chunk
|
38 |
else:
|
39 |
description_accumulator += " " + description_chunk
|
40 |
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
# If the chunk ends with punctuation, convert and stream it
|
45 |
if punctuation_pattern.search(description_chunk):
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
# If there is any remaining text after the loop, send it for conversion too
|
53 |
if description_accumulator:
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
print("Finished processing image data.")
|
60 |
-
else:
|
61 |
-
print("No image data received, sending error message to client.")
|
62 |
-
await websocket.send_text("No image data received.")
|
63 |
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
except Exception as e:
|
66 |
print(f"Error during WebSocket communication: {e}")
|
67 |
finally:
|
68 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
2 |
import json
|
3 |
from generate_description import generate_description
|
4 |
from convert_text_to_speech import convert_text_to_speech
|
|
|
13 |
async def websocket_narrate(websocket: WebSocket):
|
14 |
await websocket.accept()
|
15 |
print("WebSocket connection accepted.")
|
16 |
+
print("connection open")
|
17 |
+
|
18 |
try:
|
19 |
while True:
|
20 |
+
try:
|
21 |
+
data = await websocket.receive_text()
|
22 |
+
if data == "close":
|
23 |
+
print("Closing WebSocket connection.")
|
24 |
+
await websocket.close(code=1000)
|
25 |
+
break
|
26 |
+
|
27 |
+
data_json = json.loads(data)
|
28 |
+
image_data = data_json.get('image')
|
29 |
+
selected_voice_id = data_json.get('voiceId')
|
30 |
+
selected_voice_name = data_json.get('voiceName')
|
31 |
+
politeness_level = int(data_json.get('politenessLevel', 5))
|
32 |
+
|
33 |
+
if not image_data:
|
34 |
+
await websocket.send_text(json.dumps({
|
35 |
+
"type": "error",
|
36 |
+
"data": "No image data received."
|
37 |
+
}))
|
38 |
+
continue
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
print(f"Image data received, sending to {selected_voice_name} model for analysis with politeness level {politeness_level}.")
|
41 |
description_accumulator = ""
|
42 |
punctuation_pattern = re.compile(r"[*]")
|
43 |
|
44 |
async for description_chunk in generate_description(image_data, selected_voice_name, description_history, politeness_level):
|
45 |
if description_chunk:
|
|
|
46 |
if not punctuation_pattern.fullmatch(description_chunk.strip()):
|
47 |
description_accumulator += description_chunk
|
48 |
else:
|
49 |
description_accumulator += " " + description_chunk
|
50 |
|
51 |
+
await websocket.send_text(json.dumps({
|
52 |
+
"type": "text_chunk",
|
53 |
+
"data": description_chunk,
|
54 |
+
"pictureCount": data_json.get('pictureCount'),
|
55 |
+
"voiceName": selected_voice_name
|
56 |
+
}))
|
57 |
|
|
|
58 |
if punctuation_pattern.search(description_chunk):
|
59 |
+
try:
|
60 |
+
audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
|
61 |
+
async for chunk in audio_chunks:
|
62 |
+
await websocket.send_bytes(chunk)
|
63 |
+
description_history.append(description_accumulator.strip())
|
64 |
+
description_accumulator = ""
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Error processing audio: {e}")
|
67 |
+
await websocket.send_text(json.dumps({
|
68 |
+
"type": "error",
|
69 |
+
"data": "Error processing audio"
|
70 |
+
}))
|
71 |
|
|
|
72 |
if description_accumulator:
|
73 |
+
try:
|
74 |
+
audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
|
75 |
+
async for chunk in audio_chunks:
|
76 |
+
await websocket.send_bytes(chunk)
|
77 |
+
description_history.append(description_accumulator.strip())
|
78 |
+
except Exception as e:
|
79 |
+
print(f"Error processing final audio: {e}")
|
80 |
+
await websocket.send_text(json.dumps({
|
81 |
+
"type": "error",
|
82 |
+
"data": "Error processing final audio"
|
83 |
+
}))
|
84 |
|
85 |
print("Finished processing image data.")
|
|
|
|
|
|
|
86 |
|
87 |
+
except WebSocketDisconnect:
|
88 |
+
print("Client disconnected")
|
89 |
+
break
|
90 |
+
except Exception as e:
|
91 |
+
print(f"Error processing message: {e}")
|
92 |
+
try:
|
93 |
+
await websocket.send_text(json.dumps({
|
94 |
+
"type": "error",
|
95 |
+
"data": "Error processing message"
|
96 |
+
}))
|
97 |
+
except:
|
98 |
+
break
|
99 |
+
|
100 |
except Exception as e:
|
101 |
print(f"Error during WebSocket communication: {e}")
|
102 |
finally:
|
103 |
+
print("connection closed")
|
104 |
+
try:
|
105 |
+
await websocket.close(code=1000)
|
106 |
+
except:
|
107 |
+
pass
|