Spaces:

mac9087
/

metanice

Build error

App Files Files Community

mac9087 commited on about 1 month ago

Commit

ca5149c

verified ·

1 Parent(s): 0017945

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -14

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 app = Flask(__name__)
 CORS(app)
-# Load models
 whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
 llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", max_new_tokens=100)
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
@@ -18,24 +18,38 @@ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False,
 def talk():
     if "audio" not in request.files:
         return jsonify({"error": "No audio file"}), 400
     # Save audio
     audio_file = request.files["audio"]
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
         audio_path = tmp.name
         audio_file.save(audio_path)
     # Transcribe
     segments, _ = whisper_model.transcribe(audio_path)
     transcription = "".join([seg.text for seg in segments])
-    # Generate response
-    response_text = llm(transcription)[0]["generated_text"]
-    # Synthesize speech
     tts_audio_path = audio_path.replace(".wav", "_reply.wav")
-    tts.tts_to_file(text=response_text, file_path=tts_audio_path)
     return send_file(tts_audio_path, mimetype="audio/wav")
 @app.route("/chat", methods=["POST"])
@@ -43,11 +57,19 @@ def chat():
     data = request.get_json()
     if not data or "text" not in data:
         return jsonify({"error": "Missing 'text' in request body"}), 400
     user_input = data["text"]
-    response = llm(user_input)[0]["generated_text"]
-    return jsonify({"response": response})
 @app.route("/")
 def index():

 app = Flask(__name__)
 CORS(app)
+# Load models once at startup
 whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
 llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", max_new_tokens=100)
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
 def talk():
     if "audio" not in request.files:
         return jsonify({"error": "No audio file"}), 400
     # Save audio
     audio_file = request.files["audio"]
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
         audio_path = tmp.name
         audio_file.save(audio_path)
     # Transcribe
     segments, _ = whisper_model.transcribe(audio_path)
     transcription = "".join([seg.text for seg in segments])
+    # Generate response with a clear prompt format
+    prompt = f"User: {transcription}\nAI:"
+    response_raw = llm(prompt)[0]["generated_text"]
+    # Extract only the AI's response (everything after "AI:")
+    try:
+        ai_response = response_raw.split("AI:")[1].strip()
+    except:
+        # Fallback if splitting fails
+        ai_response = response_raw
+    # Synthesize speech using only the AI's response
     tts_audio_path = audio_path.replace(".wav", "_reply.wav")
+    tts.tts_to_file(text=ai_response, file_path=tts_audio_path)
+    # Clean up the original audio file
+    try:
+        os.unlink(audio_path)
+    except:
+        pass
     return send_file(tts_audio_path, mimetype="audio/wav")
 @app.route("/chat", methods=["POST"])
     data = request.get_json()
     if not data or "text" not in data:
         return jsonify({"error": "Missing 'text' in request body"}), 400
     user_input = data["text"]
+    # Same improvement for text chat
+    prompt = f"User: {user_input}\nAI:"
+    response_raw = llm(prompt)[0]["generated_text"]
+    try:
+        ai_response = response_raw.split("AI:")[1].strip()
+    except:
+        ai_response = response_raw
+    return jsonify({"response": ai_response})
 @app.route("/")
 def index():