File size: 1,440 Bytes
f2bdb6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from flask import Flask, request, jsonify, Response
from kokoro import KPipeline
import soundfile as sf
import io

app = Flask(__name__)

# Initialize Kokoro TTS pipeline
pipeline = KPipeline(lang_code="a")

@app.route("/v1/audio/speech", methods=["POST"])
def generate_tts():
    data = request.json

    # Extract text and parameters
    text = data.get("input", "")
    if not text:
        return jsonify({"error": "No input text provided"}), 400

    voice_combo = data.get("voice", "af_heart")
    voices = voice_combo.split("+")  # Support multiple voices
    speed = data.get("speed", 1.0)

    audio_data_list = []

    # Generate audio for each voice
    for voice in voices:
        generator = pipeline(text, voice=voice.strip(), speed=speed, split_pattern=r"\n+")
        for _, _, audio in generator:
            audio_data_list.append(audio)
            break  # Take only the first segment

    if not audio_data_list:
        return jsonify({"error": "Failed to generate audio"}), 500

    # Combine audio segments
    final_audio = b"".join(audio_data_list)

    # Save audio to an in-memory buffer (for streaming)
    buffer = io.BytesIO()
    sf.write(buffer, final_audio, 24000, format="WAV")
    buffer.seek(0)

    # Streaming response
    def generate():
        yield buffer.read()

    return Response(generate(), mimetype="audio/wav")

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=True)