Spaces:

mac9087
/

metanice

Build error

App Files Files Community

mac9087 commited on 23 days ago

Commit

d1d82fe

verified ·

1 Parent(s): af08117

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -126

app.py CHANGED Viewed

@@ -5,149 +5,53 @@ from transformers import pipeline
 from TTS.api import TTS
 import tempfile
 import os
-import re
 app = Flask(__name__)
 CORS(app)
-# Load models once at startup for better performance
-print("Loading AI models...")
 whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
 llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", max_new_tokens=100)
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
-print("All models loaded successfully!")
-def extract_ai_response(full_text, user_input):
-    """
-    Extract only the AI's response from the generated text using multiple strategies.
-    This helps prevent the TTS engine from repeating the user's input.
-    """
-    # Strategy 1: Try to find text after "AI:" marker
-    if "AI:" in full_text:
-        try:
-            return full_text.split("AI:")[1].strip()
-        except IndexError:
-            pass  # Fall through to next strategy
-    # Strategy 2: Try to find text after the user input
-    if user_input in full_text:
-        try:
-            return full_text[full_text.find(user_input) + len(user_input):].strip()
-        except:
-            pass  # Fall through to next strategy
-    # Strategy 3: Try to split by sentences and remove the first one (likely the input)
-    try:
-        sentences = re.split(r'[.!?]\s+', full_text)
-        if len(sentences) > 1:
-            return ' '.join(sentences[1:]).strip()
-    except:
-        pass  # Fall through to fallback
-    # Fallback: Return the original text if all else fails
-    return full_text.strip()
 @app.route("/talk", methods=["POST"])
 def talk():
-    """
-    Process audio from the user:
-    1. Transcribe the audio to text
-    2. Generate an AI response to the transcription
-    3. Convert the AI response to speech
-    4. Return the speech audio file
-    """
     if "audio" not in request.files:
         return jsonify({"error": "No audio file"}), 400
-    # Create a temporary file for the input audio
-    input_audio_path = None
-    output_audio_path = None
-    try:
-        # Save the uploaded audio to a temporary file
-        audio_file = request.files["audio"]
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-            input_audio_path = tmp.name
-            audio_file.save(input_audio_path)
-        # Transcribe the audio to text
-        segments, _ = whisper_model.transcribe(input_audio_path)
-        transcription = "".join([seg.text for seg in segments]).strip()
-        # Check if transcription was successful
-        if not transcription:
-            return jsonify({"error": "Could not transcribe audio"}), 400
-        print(f"Transcribed: '{transcription}'")
-        # Generate AI response
-        prompt = f"User: {transcription}\nAI:"
-        response_raw = llm(prompt)[0]["generated_text"]
-        # Extract only the AI's response
-        ai_response = extract_ai_response(response_raw, transcription)
-        print(f"AI Response: '{ai_response}'")
-        # Generate speech from the AI response
-        output_audio_path = tempfile.mktemp(suffix=".wav")
-        tts.tts_to_file(text=ai_response, file_path=output_audio_path)
-        # Return the audio file
-        return send_file(
-            output_audio_path,
-            mimetype="audio/wav",
-            as_attachment=True,
-            download_name="ai_response.wav"
-        )
-    except Exception as e:
-        print(f"Error in /talk: {str(e)}")
-        return jsonify({"error": str(e)}), 500
-    finally:
-        # Clean up the input audio file
-        if input_audio_path and os.path.exists(input_audio_path):
-            try:
-                os.unlink(input_audio_path)
-            except Exception as e:
-                print(f"Error deleting input file: {e}")
-        # Note: We don't delete the output file here as Flask will handle that
-        # after the client has downloaded it
 @app.route("/chat", methods=["POST"])
 def chat():
-    """
-    Process text input from the user:
-    1. Generate an AI response to the input
-    2. Return the response as JSON
-    """
-    try:
-        data = request.get_json()
-        if not data or "text" not in data:
-            return jsonify({"error": "Missing 'text' in request body"}), 400
-        user_input = data["text"].strip()
-        if not user_input:
-            return jsonify({"error": "Empty input"}), 400
-        # Generate AI response
-        prompt = f"User: {user_input}\nAI:"
-        response_raw = llm(prompt)[0]["generated_text"]
-        # Extract only the AI's response
-        ai_response = extract_ai_response(response_raw, user_input)
-        return jsonify({"response": ai_response})
-    except Exception as e:
-        print(f"Error in /chat: {str(e)}")
-        return jsonify({"error": str(e)}), 500
 @app.route("/")
 def index():
-    """Simple route to check if the API is running"""
-    return "Metaverse AI Character API running. Models loaded and ready."
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860, debug=True)

 from TTS.api import TTS
 import tempfile
 import os
 app = Flask(__name__)
 CORS(app)
+# Load models
 whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
 llm = pipeline("text-generation", model="tiiuae/falcon-rw-1b", max_new_tokens=100)
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
 @app.route("/talk", methods=["POST"])
 def talk():
     if "audio" not in request.files:
         return jsonify({"error": "No audio file"}), 400
+    # Save audio
+    audio_file = request.files["audio"]
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        audio_path = tmp.name
+        audio_file.save(audio_path)
+    # Transcribe
+    segments, _ = whisper_model.transcribe(audio_path)
+    transcription = "".join([seg.text for seg in segments])
+    # Generate response
+    response_text = llm(transcription)[0]["generated_text"]
+    # Synthesize speech
+    tts_audio_path = audio_path.replace(".wav", "_reply.wav")
+    tts.tts_to_file(text=response_text, file_path=tts_audio_path)
+    return send_file(tts_audio_path, mimetype="audio/wav")
 @app.route("/chat", methods=["POST"])
 def chat():
+    data = request.get_json()
+    if not data or "text" not in data:
+        return jsonify({"error": "Missing 'text' in request body"}), 400
+    user_input = data["text"]
+    response = llm(user_input)[0]["generated_text"]
+    return jsonify({"response": response})
 @app.route("/")
 def index():
+    return "Metaverse AI Character API running."
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)