File size: 1,440 Bytes
f2bdb6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from flask import Flask, request, jsonify, Response
from kokoro import KPipeline
import soundfile as sf
import io
app = Flask(__name__)
# Initialize Kokoro TTS pipeline
pipeline = KPipeline(lang_code="a")
@app.route("/v1/audio/speech", methods=["POST"])
def generate_tts():
data = request.json
# Extract text and parameters
text = data.get("input", "")
if not text:
return jsonify({"error": "No input text provided"}), 400
voice_combo = data.get("voice", "af_heart")
voices = voice_combo.split("+") # Support multiple voices
speed = data.get("speed", 1.0)
audio_data_list = []
# Generate audio for each voice
for voice in voices:
generator = pipeline(text, voice=voice.strip(), speed=speed, split_pattern=r"\n+")
for _, _, audio in generator:
audio_data_list.append(audio)
break # Take only the first segment
if not audio_data_list:
return jsonify({"error": "Failed to generate audio"}), 500
# Combine audio segments
final_audio = b"".join(audio_data_list)
# Save audio to an in-memory buffer (for streaming)
buffer = io.BytesIO()
sf.write(buffer, final_audio, 24000, format="WAV")
buffer.seek(0)
# Streaming response
def generate():
yield buffer.read()
return Response(generate(), mimetype="audio/wav")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=True) |