audio / audio.py
gleisonnanet's picture
r
049219b
import aiofiles
import hashlib
import os
from fastapi import APIRouter
from fastapi.responses import JSONResponse, FileResponse
from TTS.api import TTS
from pydantic import BaseModel
audio_router = APIRouter()
# Define the input model
class TTSInput(BaseModel):
input_text: str = 'olà tia divina '
emotion: str = "Happy"
language: str
speed: float = 1.5
key: str
async def generate_audio_file(input_data: TTSInput):
model_name = TTS.list_models()[0]
# Initialize the TTS model
tts = TTS(model_name=model_name)
# Create a string with the input values for hashing
hash_input = f"{input_data.input_text}{input_data.emotion}{input_data.language}{input_data.speed}"
# Calculate the MD5 hash based on the input values
md5_hash = hashlib.md5(hash_input.encode()).hexdigest()
# Check if the audio file already exists
audio_file_path = os.path.join("audio", f"{md5_hash}.wav")
if not os.path.exists(audio_file_path):
# Generate TTS audio and save to a file
tts.tts_to_file(
text=input_data.input_text,
speaker=tts.speakers[5],
language=tts.languages[2],
file_path=audio_file_path,
gpu=True,
emotion=input_data.emotion,
speed=input_data.speed,
progress_bar=True
)
return audio_file_path, md5_hash
@audio_router.post("/generate_audio", response_class=JSONResponse)
async def generate_audio(input_data: TTSInput):
audio_file, md5_hash = await generate_audio_file(input_data)
return {"message": "Audio generated successfully", "audio_file": audio_file, "md5_hash": md5_hash}
@audio_router.get("/download_audio/{md5_hash}")
async def download_audio(md5_hash: str):
audio_file = os.path.join("audio", f"{md5_hash}.wav")
if os.path.exists(audio_file):
return FileResponse(audio_file, media_type="audio/wav")
else:
return JSONResponse("message", "Audio not found")