Mr-Geo commited on
Commit
ec8af1f
·
verified ·
1 Parent(s): 8fac91d

Update narrate_description.py

Browse files
Files changed (1) hide show
  1. narrate_description.py +69 -30
narrate_description.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import APIRouter, WebSocket
2
  import json
3
  from generate_description import generate_description
4
  from convert_text_to_speech import convert_text_to_speech
@@ -13,56 +13,95 @@ description_history = []
13
  async def websocket_narrate(websocket: WebSocket):
14
  await websocket.accept()
15
  print("WebSocket connection accepted.")
 
 
16
  try:
17
  while True:
18
- data = await websocket.receive_text()
19
- if data == "close":
20
- print("Closing WebSocket connection.")
21
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- data_json = json.loads(data)
24
- image_data = data_json.get('image')
25
- selected_voice_id = data_json.get('voiceId')
26
- selected_voice_name = data_json.get('voiceName')
27
- politeness_level = int(data_json.get('politenessLevel', 5))
28
- if image_data:
29
  print(f"Image data received, sending to {selected_voice_name} model for analysis with politeness level {politeness_level}.")
30
  description_accumulator = ""
31
  punctuation_pattern = re.compile(r"[*]")
32
 
33
  async for description_chunk in generate_description(image_data, selected_voice_name, description_history, politeness_level):
34
  if description_chunk:
35
- # Accumulate the chunk, ensuring not to break on single punctuation marks
36
  if not punctuation_pattern.fullmatch(description_chunk.strip()):
37
  description_accumulator += description_chunk
38
  else:
39
  description_accumulator += " " + description_chunk
40
 
41
- # Send each text chunk to the frontend
42
- await websocket.send_text(json.dumps({"type": "text_chunk", "data": description_chunk, "pictureCount": data_json.get('pictureCount'), "voiceName": selected_voice_name}))
 
 
 
 
43
 
44
- # If the chunk ends with punctuation, convert and stream it
45
  if punctuation_pattern.search(description_chunk):
46
- audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
47
- await asyncio.gather(*[websocket.send_bytes(chunk) async for chunk in audio_chunks])
48
- # Append the fully accumulated description to the history
49
- description_history.append(description_accumulator.strip())
50
- description_accumulator = ""
 
 
 
 
 
 
 
51
 
52
- # If there is any remaining text after the loop, send it for conversion too
53
  if description_accumulator:
54
- audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
55
- await asyncio.gather(*[websocket.send_bytes(chunk) async for chunk in audio_chunks])
56
- # Append the remaining accumulated description to the history
57
- description_history.append(description_accumulator.strip())
 
 
 
 
 
 
 
58
 
59
  print("Finished processing image data.")
60
- else:
61
- print("No image data received, sending error message to client.")
62
- await websocket.send_text("No image data received.")
63
 
64
- print("WebSocket connection closed.")
 
 
 
 
 
 
 
 
 
 
 
 
65
  except Exception as e:
66
  print(f"Error during WebSocket communication: {e}")
67
  finally:
68
- await websocket.close()
 
 
 
 
 
1
+ from fastapi import APIRouter, WebSocket, WebSocketDisconnect
2
  import json
3
  from generate_description import generate_description
4
  from convert_text_to_speech import convert_text_to_speech
 
13
  async def websocket_narrate(websocket: WebSocket):
14
  await websocket.accept()
15
  print("WebSocket connection accepted.")
16
+ print("connection open")
17
+
18
  try:
19
  while True:
20
+ try:
21
+ data = await websocket.receive_text()
22
+ if data == "close":
23
+ print("Closing WebSocket connection.")
24
+ await websocket.close(code=1000)
25
+ break
26
+
27
+ data_json = json.loads(data)
28
+ image_data = data_json.get('image')
29
+ selected_voice_id = data_json.get('voiceId')
30
+ selected_voice_name = data_json.get('voiceName')
31
+ politeness_level = int(data_json.get('politenessLevel', 5))
32
+
33
+ if not image_data:
34
+ await websocket.send_text(json.dumps({
35
+ "type": "error",
36
+ "data": "No image data received."
37
+ }))
38
+ continue
39
 
 
 
 
 
 
 
40
  print(f"Image data received, sending to {selected_voice_name} model for analysis with politeness level {politeness_level}.")
41
  description_accumulator = ""
42
  punctuation_pattern = re.compile(r"[*]")
43
 
44
  async for description_chunk in generate_description(image_data, selected_voice_name, description_history, politeness_level):
45
  if description_chunk:
 
46
  if not punctuation_pattern.fullmatch(description_chunk.strip()):
47
  description_accumulator += description_chunk
48
  else:
49
  description_accumulator += " " + description_chunk
50
 
51
+ await websocket.send_text(json.dumps({
52
+ "type": "text_chunk",
53
+ "data": description_chunk,
54
+ "pictureCount": data_json.get('pictureCount'),
55
+ "voiceName": selected_voice_name
56
+ }))
57
 
 
58
  if punctuation_pattern.search(description_chunk):
59
+ try:
60
+ audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
61
+ async for chunk in audio_chunks:
62
+ await websocket.send_bytes(chunk)
63
+ description_history.append(description_accumulator.strip())
64
+ description_accumulator = ""
65
+ except Exception as e:
66
+ print(f"Error processing audio: {e}")
67
+ await websocket.send_text(json.dumps({
68
+ "type": "error",
69
+ "data": "Error processing audio"
70
+ }))
71
 
 
72
  if description_accumulator:
73
+ try:
74
+ audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
75
+ async for chunk in audio_chunks:
76
+ await websocket.send_bytes(chunk)
77
+ description_history.append(description_accumulator.strip())
78
+ except Exception as e:
79
+ print(f"Error processing final audio: {e}")
80
+ await websocket.send_text(json.dumps({
81
+ "type": "error",
82
+ "data": "Error processing final audio"
83
+ }))
84
 
85
  print("Finished processing image data.")
 
 
 
86
 
87
+ except WebSocketDisconnect:
88
+ print("Client disconnected")
89
+ break
90
+ except Exception as e:
91
+ print(f"Error processing message: {e}")
92
+ try:
93
+ await websocket.send_text(json.dumps({
94
+ "type": "error",
95
+ "data": "Error processing message"
96
+ }))
97
+ except:
98
+ break
99
+
100
  except Exception as e:
101
  print(f"Error during WebSocket communication: {e}")
102
  finally:
103
+ print("connection closed")
104
+ try:
105
+ await websocket.close(code=1000)
106
+ except:
107
+ pass