Spaces:

mac9087
/

metanice

Build error

App Files Files Community

mac9087 commited on 8 days ago

Commit

d97558b

verified ·

1 Parent(s): 596a84e

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -339

app.py CHANGED Viewed

@@ -1,221 +1,96 @@
 from flask import Flask, request, jsonify, send_file
 from flask_cors import CORS
-from faster_whisper import WhisperModel
-from transformers import pipeline
-from TTS.api import TTS
 import tempfile
 import os
-import re
-import base64
-import threading
-import queue
 import time
 app = Flask(__name__)
 CORS(app)
-# Global variables to hold models and caches
-whisper_model = None
-llm = None
-tts = None
 response_cache = {}
-model_lock = threading.Lock()
-models_loaded = False
-loading_thread = None
-load_queue = queue.Queue()
-# Define paths with proper permissions
-TEMP_DIR = "/tmp/ai_models"
 os.makedirs(TEMP_DIR, exist_ok=True)
-# Environment variable to control model size
-# Set to "tiny" for fastest response, "base" for better quality but still fast
-WHISPER_MODEL_SIZE = os.environ.get("WHISPER_MODEL_SIZE", "tiny")
-def load_models():
-    """Load all models in background thread"""
-    global whisper_model, llm, tts, models_loaded
-    print("Starting model loading...")
-    try:
-        # Load Whisper model with optimized settings
-        whisper_model = WhisperModel(
-            WHISPER_MODEL_SIZE,
-            device="cpu",
-            compute_type="int8",
-            download_root=TEMP_DIR  # Use temp directory with write permissions
-        )
-    print("Whisper model loaded")
-    # Use a smaller, faster LLM
-    llm = pipeline(
-        "text-generation",
-        model="distilgpt2",  # Much smaller than falcon-rw-1b
-        max_new_tokens=40,   # Further reduce token count
-        device="cpu"
-    )
-    print("LLM loaded")
-    # Load TTS model
-    tts = TTS(
-        model_name="tts_models/en/ljspeech/fast_pitch",  # Using faster model
-        progress_bar=False,
-        gpu=False
-    )
-    print("TTS model loaded")
-    with model_lock:
-        models_loaded = True
-    except Exception as e:
-        print(f"Error loading Whisper model: {str(e)}")
-        whisper_model = None
-    # Mark models as loaded even if some failed - we'll use fallbacks
-    with model_lock:
-        models_loaded = True
-    print("Model loading completed")
-    # Process any pending requests that arrived during loading
-    while not load_queue.empty():
-        callback = load_queue.get()
-        callback()
-# Fallback methods for when models fail to load
-def fallback_transcribe(audio_path):
-    """Simple fallback when Whisper fails to load"""
-    # Just return empty text - in production you might want a more sophisticated fallback
-    return "I couldn't transcribe the audio due to technical issues."
-def fallback_generate_text(user_input):
-    """Simple rule-based response when LLM fails to load"""
-    # Very basic template responses
-    if not user_input or len(user_input) < 5:
-        return "I'm listening. Please continue."
-    if "?" in user_input:
-        return "That's an interesting question. I'm processing it now."
-    # Simple acknowledgment responses
-    responses = [
-        "I understand what you're saying.",
-        "I'm following your thoughts.",
-        "I hear you loud and clear.",
-        "I'm processing that information.",
-        "That makes sense to me."
-    ]
-    import random
-    return random.choice(responses)
-# Start loading models in background thread
-def start_loading_models():
-    global loading_thread
-    loading_thread = threading.Thread(target=load_models)
-    loading_thread.daemon = True
-    loading_thread.start()
-# Create temp directory and start loading
-try:
-    os.makedirs(TEMP_DIR, exist_ok=True)
-    print(f"Created model cache directory at {TEMP_DIR}")
-    start_loading_models()
-except Exception as e:
-    print(f"Error setting up model loading: {str(e)}")
-    # Automatically mark as loaded with no models
-    with model_lock:
-        models_loaded = True
-def ensure_models_loaded(callback):
-    """Ensure models are loaded before processing a request"""
-    with model_lock:
-        if models_loaded:
-            # Models already loaded, process immediately
-            callback()
-        else:
-            # Queue the callback for when models finish loading
-            load_queue.put(callback)
-            return jsonify({
-                "status": "loading",
-                "message": "Models are still loading. Please try again in a moment."
-            }), 503
-def process_response(input_text, generated_text):
-    """Process and clean up LLM response - optimized for speed"""
-    if not generated_text:
-        return "I'm not sure what to say about that."
-    # Make sure both are strings
-    input_text = str(input_text).strip()
-    generated_text = str(generated_text).strip()
-    # Extract the response portion (everything after the input)
-    if generated_text.startswith(input_text):
-        clean_response = generated_text[len(input_text):].strip()
     else:
-        clean_response = generated_text.strip()
-    # Fallback for empty responses
-    if not clean_response:
-        return "I'm listening."
-    # Simplified sentence extraction - just get first sentence for faster response
-    sentences = re.split(r'(?<=[.!?])\s+', clean_response, maxsplit=2)
-    if sentences:
-        # Just use the first sentence for maximum speed
-        result = sentences[0].strip()
-        # Add second sentence if it's not too long
-        if len(sentences) > 1 and len(sentences[1]) < 30:
-            result += " " + sentences[1].strip()
-    else:
-        result = clean_response
-    # Simple normalization
-    result = result.replace('"', '"').replace('"', '"')
-    result = result.replace(''', "'").replace(''', "'")
-    return result
-def generate_ai_response(user_input):
-    """Generate AI responses - with caching for speed"""
-    # Check cache for identical requests to avoid recomputation
-    cache_key = user_input.strip().lower()
-    if cache_key in response_cache:
-        print("Cache hit!")
-        return response_cache[cache_key]
-    if not user_input or len(user_input.strip()) < 2:
-        return "I'm listening. Please say more."
     try:
-        # If LLM failed to load, use fallback
-        if llm is None:
-            print("Using fallback text generation")
-            final_response = fallback_generate_text(user_input)
-        else:
-            start_time = time.time()
-            # Generate response with fewer tokens
-            raw_response = llm(user_input)[0]["generated_text"]
-            # Process to get clean, short response
-            final_response = process_response(user_input, raw_response)
-            print(f"LLM processing time: {time.time() - start_time:.2f}s")
-        # Cache the response for future identical requests
-        response_cache[cache_key] = final_response
-        # Limit cache size to prevent memory issues
-        if len(response_cache) > 100:
-            # Remove oldest entries (simple approach)
-            keys_to_remove = list(response_cache.keys())[:-50]
-            for k in keys_to_remove:
-                response_cache.pop(k, None)
-        return final_response
     except Exception as e:
-        print(f"Error generating AI response: {str(e)}")
-        return "I heard you, but I'm having trouble forming a response right now."
 @app.route("/talk", methods=["POST"])
 def talk():
@@ -224,161 +99,73 @@ def talk():
     audio_file = request.files["audio"]
-    def process_request():
-        nonlocal audio_file
         try:
-            # Prepare file paths
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav", dir=TEMP_DIR) as tmp:
-                audio_path = tmp.name
-                audio_file.save(audio_path)
-            # Transcribe audio
-            transcribe_start = time.time()
-            if whisper_model is None:
-                # Fallback if model failed to load
-                print("Using fallback transcription")
-                transcription = fallback_transcribe(audio_path)
-            else:
-                try:
-                    # Transcribe with optimized settings
-                    segments, _ = whisper_model.transcribe(
-                        audio_path,
-                        beam_size=1,  # Reduce beam size for speed
-                        vad_filter=True,  # Use voice activity detection to process only speech
-                        vad_parameters=dict(min_silence_duration_ms=500)  # Tune VAD for speed
-                    )
-                    transcription = "".join([seg.text for seg in segments])
-                except Exception as e:
-                    print(f"Whisper transcription error: {str(e)}")
-                    transcription = ""
-            print(f"Transcription time: {time.time() - transcribe_start:.2f}s")
-            if not transcription.strip():
-                final_response = "I didn't catch that. Could you please speak again?"
-            else:
-                final_response = generate_ai_response(transcription)
-            # Prepare TTS output path
-            tts_audio_path = audio_path.replace(".wav", "_reply.wav")
-            # Synthesize speech
-            tts_start = time.time()
-            if tts is None:
-                # If TTS failed to load, create a simple audio file with message
-                print("Using fallback TTS (no speech synthesis)")
-                # Just copy the input file as a placeholder
-                import shutil
-                shutil.copyfile(audio_path, tts_audio_path)
-            else:
-                try:
-                    # Synthesize speech with optimized settings
-                    tts.tts_to_file(
-                        text=final_response,
-                        file_path=tts_audio_path,
-                        speaker_wav=None,
-                        speed=1.1  # Slightly faster speech
-                    )
-                except Exception as e:
-                    print(f"TTS error: {str(e)}")
-                    # Just copy the input file as a placeholder
-                    import shutil
-                    shutil.copyfile(audio_path, tts_audio_path)
-            print(f"TTS time: {time.time() - tts_start:.2f}s")
-            # Return both the audio file and the text response
-            try:
-                response = send_file(tts_audio_path, mimetype="audio/wav")
-                encoded_response = base64.b64encode(final_response.encode('utf-8')).decode('ascii')
-                response.headers["X-Response-Text-Base64"] = encoded_response
-                response.headers["Access-Control-Expose-Headers"] = "X-Response-Text-Base64"
-                return response
-            except Exception as e:
-                print(f"Error sending file: {str(e)}")
-                return jsonify({
-                    "error": "Could not send audio response",
-                    "text_response": final_response
-                }), 500
         except Exception as e:
-            print(f"Error in talk endpoint: {str(e)}")
-            return jsonify({"error": str(e)}), 500
-        finally:
-            # Clean up temporary files
-            try:
-                if 'audio_path' in locals() and os.path.exists(audio_path):
-                    os.unlink(audio_path)
-                if 'tts_audio_path' in locals() and os.path.exists(tts_audio_path) and tts_audio_path != audio_path:
-                    os.unlink(tts_audio_path)
-            except Exception as cleanup_error:
-                print(f"Error cleaning up files: {str(cleanup_error)}")
-    # Ensure models are loaded before processing
-    return ensure_models_loaded(process_request)
-@app.route("/chat", methods=["POST"])
-def chat():
-    data = request.get_json()
-    if not data or "text" not in data:
-        return jsonify({"error": "Missing 'text' in request body"}), 400
-    user_input = data["text"]
-    def process_request():
-        try:
-            print(f"Text input: {user_input}")  # Debugging
-            # Start timing
-            start_time = time.time()
-            # Generate response
-            final_response = generate_ai_response(user_input)
-            # Report timing
-            print(f"Total processing time: {time.time() - start_time:.2f}s")
-            return jsonify({"response": final_response})
-        except Exception as e:
-            print(f"Error in chat endpoint: {str(e)}")
-            return jsonify({"response": "I'm having trouble processing that. Could you try again?", "error": str(e)})
-    # Ensure models are loaded before processing
-    return ensure_models_loaded(process_request)
 @app.route("/status", methods=["GET"])
 def status():
-    """Check if models are loaded and ready"""
-    with model_lock:
-        if models_loaded:
-            return jsonify({"status": "ready", "message": "All models loaded and ready"})
-        else:
-            return jsonify({"status": "loading", "message": "Models are still loading"})
 @app.route("/")
 def index():
-    return "Metaverse AI Character API running."
-# Add direct-response mode for maximum performance
-@app.route("/quick_chat", methods=["POST"])
-def quick_chat():
-    """Ultra-fast endpoint that skips ML models completely for instant responses"""
-    data = request.get_json()
-    if not data or "text" not in data:
-        return jsonify({"error": "Missing 'text' in request body"}), 400
-    try:
-        user_input = data["text"]
-        print(f"Quick chat input: {user_input}")
-        # Use simple rule-based responses for maximum speed
-        final_response = fallback_generate_text(user_input)
-        return jsonify({"response": final_response})
-    except Exception as e:
-        print(f"Error in quick_chat: {str(e)}")
-        return jsonify({"response": "I'm listening."})
 if __name__ == "__main__":
     # Use threaded server for better concurrency
     app.run(host="0.0.0.0", port=7860, threaded=True)

 from flask import Flask, request, jsonify, send_file
 from flask_cors import CORS
 import tempfile
 import os
 import time
+import random
+import base64
 app = Flask(__name__)
 CORS(app)
+# Simple storage for responses
 response_cache = {}
+# Configure paths
+TEMP_DIR = "/tmp/ai_responses"
 os.makedirs(TEMP_DIR, exist_ok=True)
+# Quick responses library for when no ML is needed
+QUICK_RESPONSES = [
+    "I understand what you're saying.",
+    "I'm following your thoughts.",
+    "I hear you loud and clear.",
+    "That makes sense to me.",
+    "I'm processing that information.",
+    "I hear what you're saying.",
+    "Interesting point.",
+    "I see where you're coming from.",
+    "That's a good perspective.",
+    "I'm with you on that.",
+    "Tell me more about that.",
+    "I'm listening carefully.",
+    "I appreciate your thoughts on this.",
+    "That's an interesting way to look at it.",
+    "I'm taking that into consideration."
+]
+# Responses for questions
+QUESTION_RESPONSES = [
+    "That's a good question. Let me think about it.",
+    "I'm considering different perspectives on that question.",
+    "That's something I've been thinking about as well.",
+    "That's an interesting question to explore.",
+    "I'm processing your question and considering how to respond."
+]
+def get_quick_response(user_input):
+    """Generate a fast response based on simple rules"""
+    # Check cache first for identical requests
+    cache_key = user_input.strip().lower()
+    if cache_key in response_cache:
+        return response_cache[cache_key]
+    # Minimal processing
+    if not user_input or len(user_input.strip()) < 3:
+        response = "I'm listening. Please tell me more."
+    elif "?" in user_input:
+        response = random.choice(QUESTION_RESPONSES)
     else:
+        response = random.choice(QUICK_RESPONSES)
+    # Cache the response
+    response_cache[cache_key] = response
+    # Limit cache size
+    if len(response_cache) > 100:
+        keys_to_remove = list(response_cache.keys())[:-50]
+        for k in keys_to_remove:
+            response_cache.pop(k, None)
+    return response
+@app.route("/chat", methods=["POST"])
+def chat():
+    data = request.get_json()
+    if not data or "text" not in data:
+        return jsonify({"error": "Missing 'text' in request body"}), 400
     try:
+        user_input = data["text"]
+        print(f"Text input: {user_input}")
+        # Add a tiny delay to make it seem like it's "thinking" (50-150ms)
+        time.sleep(random.uniform(0.05, 0.15))
+        # Get response
+        final_response = get_quick_response(user_input)
+        print(f"Text response: {final_response}")
+        return jsonify({"response": final_response})
     except Exception as e:
+        print(f"Error in chat endpoint: {str(e)}")
+        return jsonify({"response": "I'm listening."})
 @app.route("/talk", methods=["POST"])
 def talk():
     audio_file = request.files["audio"]
+    try:
+        # Save the input audio temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav", dir=TEMP_DIR) as tmp:
+            audio_path = tmp.name
+            audio_file.save(audio_path)
+        # We're not actually processing the audio, just echoing back a response
+        # In a real app, you would transcribe here
+        # Get a quick canned response
+        final_response = get_quick_response("Hello")
+        # In a real app, you would generate speech here
+        # For now, we'll just copy the input file as a placeholder
+        tts_audio_path = audio_path.replace(".wav", "_reply.wav")
+        # Add a small delay to mimic processing time
+        time.sleep(random.uniform(0.1, 0.3))
+        # Just copy the file for now since we can't actually generate speech
+        import shutil
+        shutil.copyfile(audio_path, tts_audio_path)
+        # Return both the audio file and the text response
         try:
+            response = send_file(tts_audio_path, mimetype="audio/wav")
+            encoded_response = base64.b64encode(final_response.encode('utf-8')).decode('ascii')
+            response.headers["X-Response-Text-Base64"] = encoded_response
+            response.headers["Access-Control-Expose-Headers"] = "X-Response-Text-Base64"
+            return response
         except Exception as e:
+            print(f"Error sending file: {str(e)}")
+            return jsonify({
+                "error": "Could not send audio response",
+                "text_response": final_response
+            }), 500
+    except Exception as e:
+        print(f"Error in talk endpoint: {str(e)}")
+        return jsonify({"error": str(e)}), 500
+    finally:
+        # Clean up temporary files
+        try:
+            if 'audio_path' in locals() and os.path.exists(audio_path):
+                os.unlink(audio_path)
+            if 'tts_audio_path' in locals() and os.path.exists(tts_audio_path) and tts_audio_path != audio_path:
+                os.unlink(tts_audio_path)
+        except Exception as cleanup_error:
+            print(f"Error cleaning up files: {str(cleanup_error)}")
+@app.route("/quick_chat", methods=["POST"])
+def quick_chat():
+    """Alias for chat endpoint for compatibility"""
+    return chat()
 @app.route("/status", methods=["GET"])
 def status():
+    """Simple status endpoint"""
+    return jsonify({
+        "status": "ready",
+        "message": "Simple response system running and ready"
+    })
 @app.route("/")
 def index():
+    return "Metaverse AI Character API running. Ultra-fast version."
 if __name__ == "__main__":
+    print("Starting ultra-fast response API...")
     # Use threaded server for better concurrency
     app.run(host="0.0.0.0", port=7860, threaded=True)