ai-narrator / narrate_description.py
Mr-Geo's picture
Update narrate_description.py
ec8af1f verified
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
import json
from generate_description import generate_description
from convert_text_to_speech import convert_text_to_speech
import re
import asyncio
router = APIRouter()
description_history = []
@router.websocket_route("/narrate")
async def websocket_narrate(websocket: WebSocket):
await websocket.accept()
print("WebSocket connection accepted.")
print("connection open")
try:
while True:
try:
data = await websocket.receive_text()
if data == "close":
print("Closing WebSocket connection.")
await websocket.close(code=1000)
break
data_json = json.loads(data)
image_data = data_json.get('image')
selected_voice_id = data_json.get('voiceId')
selected_voice_name = data_json.get('voiceName')
politeness_level = int(data_json.get('politenessLevel', 5))
if not image_data:
await websocket.send_text(json.dumps({
"type": "error",
"data": "No image data received."
}))
continue
print(f"Image data received, sending to {selected_voice_name} model for analysis with politeness level {politeness_level}.")
description_accumulator = ""
punctuation_pattern = re.compile(r"[*]")
async for description_chunk in generate_description(image_data, selected_voice_name, description_history, politeness_level):
if description_chunk:
if not punctuation_pattern.fullmatch(description_chunk.strip()):
description_accumulator += description_chunk
else:
description_accumulator += " " + description_chunk
await websocket.send_text(json.dumps({
"type": "text_chunk",
"data": description_chunk,
"pictureCount": data_json.get('pictureCount'),
"voiceName": selected_voice_name
}))
if punctuation_pattern.search(description_chunk):
try:
audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
async for chunk in audio_chunks:
await websocket.send_bytes(chunk)
description_history.append(description_accumulator.strip())
description_accumulator = ""
except Exception as e:
print(f"Error processing audio: {e}")
await websocket.send_text(json.dumps({
"type": "error",
"data": "Error processing audio"
}))
if description_accumulator:
try:
audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id)
async for chunk in audio_chunks:
await websocket.send_bytes(chunk)
description_history.append(description_accumulator.strip())
except Exception as e:
print(f"Error processing final audio: {e}")
await websocket.send_text(json.dumps({
"type": "error",
"data": "Error processing final audio"
}))
print("Finished processing image data.")
except WebSocketDisconnect:
print("Client disconnected")
break
except Exception as e:
print(f"Error processing message: {e}")
try:
await websocket.send_text(json.dumps({
"type": "error",
"data": "Error processing message"
}))
except:
break
except Exception as e:
print(f"Error during WebSocket communication: {e}")
finally:
print("connection closed")
try:
await websocket.close(code=1000)
except:
pass