|
from flask import Flask, request, jsonify, send_file |
|
from flask_cors import CORS |
|
from faster_whisper import WhisperModel |
|
from transformers import pipeline |
|
from TTS.api import TTS |
|
import tempfile |
|
import os |
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
|
|
|
|
whisper_model = WhisperModel("small", device="cpu", compute_type="int8") |
|
llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", max_new_tokens=100) |
|
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) |
|
|
|
@app.route("/talk", methods=["POST"]) |
|
def talk(): |
|
if "audio" not in request.files: |
|
return jsonify({"error": "No audio file"}), 400 |
|
|
|
|
|
audio_file = request.files["audio"] |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
|
audio_path = tmp.name |
|
audio_file.save(audio_path) |
|
|
|
|
|
segments, _ = whisper_model.transcribe(audio_path) |
|
transcription = "".join([seg.text for seg in segments]) |
|
|
|
|
|
response = llm(transcription)[0]["generated_text"] |
|
|
|
|
|
tts_audio_path = audio_path.replace(".wav", "_reply.wav") |
|
tts.tts_to_file(text=response, file_path=tts_audio_path) |
|
|
|
return send_file(tts_audio_path, mimetype="audio/wav") |
|
|
|
@app.route("/") |
|
def index(): |
|
return "Metaverse AI Character API running." |
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860) |
|
|