Spaces:

mac9087
/

metanice

Build error

App Files Files Community

mac9087 commited on 23 days ago

Commit

af08117

verified ·

1 Parent(s): 7b16fc7

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -30

app.py CHANGED Viewed

@@ -5,53 +5,149 @@ from transformers import pipeline
 from TTS.api import TTS
 import tempfile
 import os
 app = Flask(__name__)
 CORS(app)
-# Load models
 whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
 llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", max_new_tokens=100)
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
 @app.route("/talk", methods=["POST"])
 def talk():
     if "audio" not in request.files:
         return jsonify({"error": "No audio file"}), 400
-    # Save audio
-    audio_file = request.files["audio"]
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-        audio_path = tmp.name
-        audio_file.save(audio_path)
-    # Transcribe
-    segments, _ = whisper_model.transcribe(audio_path)
-    transcription = "".join([seg.text for seg in segments])
-    # Generate response
-    response_text = llm(transcription)[0]["generated_text"]
-    # Synthesize speech
-    tts_audio_path = audio_path.replace(".wav", "_reply.wav")
-    tts.tts_to_file(text=response_text, file_path=tts_audio_path)
-    return send_file(tts_audio_path, mimetype="audio/wav")
 @app.route("/chat", methods=["POST"])
 def chat():
-    data = request.get_json()
-    if not data or "text" not in data:
-        return jsonify({"error": "Missing 'text' in request body"}), 400
-    user_input = data["text"]
-    response = llm(user_input)[0]["generated_text"]
-    return jsonify({"response": response})
 @app.route("/")
 def index():
-    return "Metaverse AI Character API running."
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

 from TTS.api import TTS
 import tempfile
 import os
+import re
 app = Flask(__name__)
 CORS(app)
+# Load models once at startup for better performance
+print("Loading AI models...")
 whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
 llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", max_new_tokens=100)
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
+print("All models loaded successfully!")
+def extract_ai_response(full_text, user_input):
+    """
+    Extract only the AI's response from the generated text using multiple strategies.
+    This helps prevent the TTS engine from repeating the user's input.
+    """
+    # Strategy 1: Try to find text after "AI:" marker
+    if "AI:" in full_text:
+        try:
+            return full_text.split("AI:")[1].strip()
+        except IndexError:
+            pass  # Fall through to next strategy
+    # Strategy 2: Try to find text after the user input
+    if user_input in full_text:
+        try:
+            return full_text[full_text.find(user_input) + len(user_input):].strip()
+        except:
+            pass  # Fall through to next strategy
+    # Strategy 3: Try to split by sentences and remove the first one (likely the input)
+    try:
+        sentences = re.split(r'[.!?]\s+', full_text)
+        if len(sentences) > 1:
+            return ' '.join(sentences[1:]).strip()
+    except:
+        pass  # Fall through to fallback
+    # Fallback: Return the original text if all else fails
+    return full_text.strip()
 @app.route("/talk", methods=["POST"])
 def talk():
+    """
+    Process audio from the user:
+    1. Transcribe the audio to text
+    2. Generate an AI response to the transcription
+    3. Convert the AI response to speech
+    4. Return the speech audio file
+    """
     if "audio" not in request.files:
         return jsonify({"error": "No audio file"}), 400
+    # Create a temporary file for the input audio
+    input_audio_path = None
+    output_audio_path = None
+    try:
+        # Save the uploaded audio to a temporary file
+        audio_file = request.files["audio"]
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            input_audio_path = tmp.name
+            audio_file.save(input_audio_path)
+        # Transcribe the audio to text
+        segments, _ = whisper_model.transcribe(input_audio_path)
+        transcription = "".join([seg.text for seg in segments]).strip()
+        # Check if transcription was successful
+        if not transcription:
+            return jsonify({"error": "Could not transcribe audio"}), 400
+        print(f"Transcribed: '{transcription}'")
+        # Generate AI response
+        prompt = f"User: {transcription}\nAI:"
+        response_raw = llm(prompt)[0]["generated_text"]
+        # Extract only the AI's response
+        ai_response = extract_ai_response(response_raw, transcription)
+        print(f"AI Response: '{ai_response}'")
+        # Generate speech from the AI response
+        output_audio_path = tempfile.mktemp(suffix=".wav")
+        tts.tts_to_file(text=ai_response, file_path=output_audio_path)
+        # Return the audio file
+        return send_file(
+            output_audio_path,
+            mimetype="audio/wav",
+            as_attachment=True,
+            download_name="ai_response.wav"
+        )
+    except Exception as e:
+        print(f"Error in /talk: {str(e)}")
+        return jsonify({"error": str(e)}), 500
+    finally:
+        # Clean up the input audio file
+        if input_audio_path and os.path.exists(input_audio_path):
+            try:
+                os.unlink(input_audio_path)
+            except Exception as e:
+                print(f"Error deleting input file: {e}")
+        # Note: We don't delete the output file here as Flask will handle that
+        # after the client has downloaded it
 @app.route("/chat", methods=["POST"])
 def chat():
+    """
+    Process text input from the user:
+    1. Generate an AI response to the input
+    2. Return the response as JSON
+    """
+    try:
+        data = request.get_json()
+        if not data or "text" not in data:
+            return jsonify({"error": "Missing 'text' in request body"}), 400
+        user_input = data["text"].strip()
+        if not user_input:
+            return jsonify({"error": "Empty input"}), 400
+        # Generate AI response
+        prompt = f"User: {user_input}\nAI:"
+        response_raw = llm(prompt)[0]["generated_text"]
+        # Extract only the AI's response
+        ai_response = extract_ai_response(response_raw, user_input)
+        return jsonify({"response": ai_response})
+    except Exception as e:
+        print(f"Error in /chat: {str(e)}")
+        return jsonify({"error": str(e)}), 500
 @app.route("/")
 def index():
+    """Simple route to check if the API is running"""
+    return "Metaverse AI Character API running. Models loaded and ready."
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, debug=True)