from flask import Flask, request, jsonify, Response from kokoro import KPipeline import soundfile as sf import io app = Flask(__name__) # Initialize Kokoro TTS pipeline pipeline = KPipeline(lang_code="a") @app.route("/v1/audio/speech", methods=["POST"]) def generate_tts(): data = request.json # Extract text and parameters text = data.get("input", "") if not text: return jsonify({"error": "No input text provided"}), 400 voice_combo = data.get("voice", "af_heart") voices = voice_combo.split("+") # Support multiple voices speed = data.get("speed", 1.0) audio_data_list = [] # Generate audio for each voice for voice in voices: generator = pipeline(text, voice=voice.strip(), speed=speed, split_pattern=r"\n+") for _, _, audio in generator: audio_data_list.append(audio) break # Take only the first segment if not audio_data_list: return jsonify({"error": "Failed to generate audio"}), 500 # Combine audio segments final_audio = b"".join(audio_data_list) # Save audio to an in-memory buffer (for streaming) buffer = io.BytesIO() sf.write(buffer, final_audio, 24000, format="WAV") buffer.seek(0) # Streaming response def generate(): yield buffer.read() return Response(generate(), mimetype="audio/wav") if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=True)