|
from flask import Flask, request, jsonify, Response |
|
from kokoro import KPipeline |
|
import soundfile as sf |
|
import io |
|
|
|
app = Flask(__name__) |
|
|
|
|
|
pipeline = KPipeline(lang_code="a") |
|
|
|
@app.route("/v1/audio/speech", methods=["POST"]) |
|
def generate_tts(): |
|
data = request.json |
|
|
|
|
|
text = data.get("input", "") |
|
if not text: |
|
return jsonify({"error": "No input text provided"}), 400 |
|
|
|
voice_combo = data.get("voice", "af_heart") |
|
voices = voice_combo.split("+") |
|
speed = data.get("speed", 1.0) |
|
|
|
audio_data_list = [] |
|
|
|
|
|
for voice in voices: |
|
generator = pipeline(text, voice=voice.strip(), speed=speed, split_pattern=r"\n+") |
|
for _, _, audio in generator: |
|
audio_data_list.append(audio) |
|
break |
|
|
|
if not audio_data_list: |
|
return jsonify({"error": "Failed to generate audio"}), 500 |
|
|
|
|
|
final_audio = b"".join(audio_data_list) |
|
|
|
|
|
buffer = io.BytesIO() |
|
sf.write(buffer, final_audio, 24000, format="WAV") |
|
buffer.seek(0) |
|
|
|
|
|
def generate(): |
|
yield buffer.read() |
|
|
|
return Response(generate(), mimetype="audio/wav") |
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860, debug=True) |