Spaces:
Sleeping
Sleeping
from fastapi import APIRouter, WebSocket, WebSocketDisconnect | |
import json | |
from generate_description import generate_description | |
from convert_text_to_speech import convert_text_to_speech | |
import re | |
import asyncio | |
router = APIRouter() | |
description_history = [] | |
async def websocket_narrate(websocket: WebSocket): | |
await websocket.accept() | |
print("WebSocket connection accepted.") | |
print("connection open") | |
try: | |
while True: | |
try: | |
data = await websocket.receive_text() | |
if data == "close": | |
print("Closing WebSocket connection.") | |
await websocket.close(code=1000) | |
break | |
data_json = json.loads(data) | |
image_data = data_json.get('image') | |
selected_voice_id = data_json.get('voiceId') | |
selected_voice_name = data_json.get('voiceName') | |
politeness_level = int(data_json.get('politenessLevel', 5)) | |
if not image_data: | |
await websocket.send_text(json.dumps({ | |
"type": "error", | |
"data": "No image data received." | |
})) | |
continue | |
print(f"Image data received, sending to {selected_voice_name} model for analysis with politeness level {politeness_level}.") | |
description_accumulator = "" | |
punctuation_pattern = re.compile(r"[*]") | |
async for description_chunk in generate_description(image_data, selected_voice_name, description_history, politeness_level): | |
if description_chunk: | |
if not punctuation_pattern.fullmatch(description_chunk.strip()): | |
description_accumulator += description_chunk | |
else: | |
description_accumulator += " " + description_chunk | |
await websocket.send_text(json.dumps({ | |
"type": "text_chunk", | |
"data": description_chunk, | |
"pictureCount": data_json.get('pictureCount'), | |
"voiceName": selected_voice_name | |
})) | |
if punctuation_pattern.search(description_chunk): | |
try: | |
audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id) | |
async for chunk in audio_chunks: | |
await websocket.send_bytes(chunk) | |
description_history.append(description_accumulator.strip()) | |
description_accumulator = "" | |
except Exception as e: | |
print(f"Error processing audio: {e}") | |
await websocket.send_text(json.dumps({ | |
"type": "error", | |
"data": "Error processing audio" | |
})) | |
if description_accumulator: | |
try: | |
audio_chunks = convert_text_to_speech(description_accumulator.strip(), selected_voice_id) | |
async for chunk in audio_chunks: | |
await websocket.send_bytes(chunk) | |
description_history.append(description_accumulator.strip()) | |
except Exception as e: | |
print(f"Error processing final audio: {e}") | |
await websocket.send_text(json.dumps({ | |
"type": "error", | |
"data": "Error processing final audio" | |
})) | |
print("Finished processing image data.") | |
except WebSocketDisconnect: | |
print("Client disconnected") | |
break | |
except Exception as e: | |
print(f"Error processing message: {e}") | |
try: | |
await websocket.send_text(json.dumps({ | |
"type": "error", | |
"data": "Error processing message" | |
})) | |
except: | |
break | |
except Exception as e: | |
print(f"Error during WebSocket communication: {e}") | |
finally: | |
print("connection closed") | |
try: | |
await websocket.close(code=1000) | |
except: | |
pass |