Test70 / app.py
zyxciss's picture
Create app.py
f2bdb6d verified
from flask import Flask, request, jsonify, Response
from kokoro import KPipeline
import soundfile as sf
import io
app = Flask(__name__)
# Initialize Kokoro TTS pipeline
pipeline = KPipeline(lang_code="a")
@app.route("/v1/audio/speech", methods=["POST"])
def generate_tts():
data = request.json
# Extract text and parameters
text = data.get("input", "")
if not text:
return jsonify({"error": "No input text provided"}), 400
voice_combo = data.get("voice", "af_heart")
voices = voice_combo.split("+") # Support multiple voices
speed = data.get("speed", 1.0)
audio_data_list = []
# Generate audio for each voice
for voice in voices:
generator = pipeline(text, voice=voice.strip(), speed=speed, split_pattern=r"\n+")
for _, _, audio in generator:
audio_data_list.append(audio)
break # Take only the first segment
if not audio_data_list:
return jsonify({"error": "Failed to generate audio"}), 500
# Combine audio segments
final_audio = b"".join(audio_data_list)
# Save audio to an in-memory buffer (for streaming)
buffer = io.BytesIO()
sf.write(buffer, final_audio, 24000, format="WAV")
buffer.seek(0)
# Streaming response
def generate():
yield buffer.read()
return Response(generate(), mimetype="audio/wav")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=True)