Spaces:

gleisonnanet
/

audio

Runtime error

App Files Files Community

gleisonnanet commited on Aug 18, 2023

Commit

9850062

1 Parent(s): fdd143d

Add application file

Browse files

Files changed (5) hide show

Dockerfile +11 -0
audio.py +61 -0
main.py +44 -0
packages.txt +1 -0
requirements.txt +25 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

audio.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import aiofiles
+import hashlib
+import os
+from fastapi import APIRouter
+from fastapi.responses import JSONResponse, FileResponse
+from TTS.api import TTS
+from pydantic import BaseModel
+audio_router = APIRouter()
+# Define the input model
+class TTSInput(BaseModel):
+    input_text: str = 'olà tia divina '
+    emotion: str = "Happy"
+    language: str
+    speed: float = 1.5
+    key: str
+async def generate_audio_file(input_data: TTSInput):
+    model_name = TTS.list_models()[0]
+    # Initialize the TTS model
+    tts = TTS(model_name=model_name, model_path="./model")
+    # Create a string with the input values for hashing
+    hash_input = f"{input_data.input_text}{input_data.emotion}{input_data.language}{input_data.speed}"
+    # Calculate the MD5 hash based on the input values
+    md5_hash = hashlib.md5(hash_input.encode()).hexdigest()
+    # Check if the audio file already exists
+    audio_file_path = os.path.join("audio", f"{md5_hash}.wav")
+    if not os.path.exists(audio_file_path):
+        # Generate TTS audio and save to a file
+        tts.tts_to_file(
+            text=input_data.input_text,
+            speaker=tts.speakers[5],
+            language=tts.languages[2],
+            file_path=audio_file_path,
+            gpu=True,
+            emotion=input_data.emotion,
+            speed=input_data.speed,
+            progress_bar=True
+        )
+    return audio_file_path, md5_hash
+@audio_router.post("/generate_audio", response_class=JSONResponse)
+async def generate_audio(input_data: TTSInput):
+    audio_file, md5_hash = await generate_audio_file(input_data)
+    return {"message": "Audio generated successfully", "audio_file": audio_file, "md5_hash": md5_hash}
+@audio_router.get("/download_audio/{md5_hash}")
+async def download_audio(md5_hash: str):
+    audio_file = os.path.join("audio", f"{md5_hash}.wav")
+    if os.path.exists(audio_file):
+        return FileResponse(audio_file, media_type="audio/wav")
+    else:
+        return JSONResponse("message", "Audio not found")

main.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from audio import audio_router
+from pydantic import BaseSettings, BaseModel
+from typing import List, Dict, Optional
+class APISettings(BaseSettings):
+    max_input_length: int = 10000
+    config_path = "config/config.yaml"
+    version: Optional[str] = None
+    class Config:
+        env_file = 'config/.env'
+        env_prefix = 'api_'
+api_settings = APISettings()
+app = FastAPI(title="Text-to-Speech API",
+              docs_url="/",
+              version=api_settings.version if api_settings.version else "dev",
+              description="An API that provides text-to-speech using neural models. "
+                          "Developed by TartuNLP - the NLP research group of the University of Tartu.",
+              terms_of_service="https://www.tartunlp.ai/andmekaitsetingimused",
+              license_info={
+                  "name": "MIT license",
+                  "url": "https://github.com/TartuNLP/text-to-speech-api/blob/main/LICENSE"
+              },
+              contact={
+                  "name": "TartuNLP",
+                  "url": "https://tartunlp.ai",
+                  "email": "[email protected]",
+              })
+app.include_router(audio_router)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["GET", "POST"],
+    allow_headers=["*"],
+)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app="main:app", host="0.0.0.0", port=8000, reload=True, timeout_keep_alive=None)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+annotated-types==0.5.0
+anyio==3.7.1
+certifi==2023.7.22
+charset-normalizer==3.2.0
+click==8.1.6
+fastapi==0.100.1
+gTTS==2.3.2
+h11==0.14.0
+idna==3.4
+pydantic==2.1.1
+pydantic_core==2.4.0
+requests==2.31.0
+sniffio==1.3.0
+starlette==0.27.0
+typing_extensions==4.7.1
+urllib3==2.0.4
+uvicorn==0.23.1
+neon-tts-plugin-coqui==0.7.3a1
+TTS[all,dev,notebooks]
+gradio
+# TTS==0.7.1
+stt
+torch
+transformers