Spaces:

mac9087
/

metanice

Build error

App Files Files Community

mac9087 commited on 8 days ago

Commit

a77cb2e

verified ·

1 Parent(s): d997bfa

Update app.py

Browse files

Files changed (1) hide show

app.py +203 -156

app.py CHANGED Viewed

@@ -7,206 +7,234 @@ import tempfile
 import os
 import re
 import base64
 app = Flask(__name__)
 CORS(app)
-# Load models
-whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
-# Configure the LLM for short, conversational responses
-llm = pipeline(
-    "text-generation",
-    model="tiiuae/falcon-rw-1b",
-    max_new_tokens=50,  # Reduced token count for shorter responses
-)
-tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
 def process_response(input_text, generated_text):
-    # Handle the case where generated_text might be None
     if not generated_text:
         return "I'm not sure what to say about that."
     # Make sure both are strings
     input_text = str(input_text).strip()
     generated_text = str(generated_text).strip()
-    # Skip empty input
-    if not input_text:
-        clean_response = generated_text
-    # Remove the input text from the beginning of the response
-    elif generated_text.startswith(input_text):
         clean_response = generated_text[len(input_text):].strip()
     else:
         clean_response = generated_text.strip()
-    # If we ended up with nothing, provide a default response
     if not clean_response:
         return "I'm listening."
-    # Split into sentences and take only the first 1-2 meaningful sentences
-    sentences = re.split(r'(?<=[.!?])\s+', clean_response)
-    # Filter out empty or very short sentences
-    meaningful_sentences = [s for s in sentences if len(s) > 5]
-    # Take just 1-2 sentences for a casual, human-like response
-    if meaningful_sentences:
-        if len(meaningful_sentences) > 2:
-            result = " ".join(meaningful_sentences[:2])
-        else:
-            result = " ".join(meaningful_sentences)
     else:
-        # If no meaningful sentences, but we have short sentences, use those
-        if sentences and any(s.strip() for s in sentences):
-            short_sentences = [s for s in sentences if s.strip()]
-            result = " ".join(short_sentences[:2])
-        else:
-            # Fallback if no good sentences were found
-            result = "I'm not sure what to say about that."
-    # Remove any repetitive phrases
-    result = remove_repetitions(result)
-    # Normalize quotes to ASCII equivalents
-    result = normalize_quotes(result)
     return result
-def normalize_quotes(text):
-    """Replace curly quotes and other problematic Unicode characters with ASCII equivalents"""
-    # Replace curly quotes with straight quotes
-    text = text.replace('"', '"').replace('"', '"')
-    text = text.replace(''', "'").replace(''', "'")
-    # Add more replacements as needed
-    return text
-def remove_repetitions(text):
-    # Simple repetition removal
-    words = text.split()
-    if len(words) <= 5:  # Don't process very short responses
-        return text
-    result = []
-    for i in range(len(words)):
-        # Check if this word starts a repeated phrase
-        if i < len(words) - 3:  # Need at least 3 words to check for repetition
-            # Check if next 3+ words appear earlier in the text
-            is_repetition = False
-            for j in range(3, min(10, len(words) - i)):  # Check phrases of length 3 to 10
-                phrase = " ".join(words[i:i+j])
-                if phrase in " ".join(result):
-                    is_repetition = True
-                    break
-            if not is_repetition:
-                result.append(words[i])
-        else:
-            result.append(words[i])
-    return " ".join(result)
 def generate_ai_response(user_input):
-    """
-    Centralized function to generate AI responses to ensure consistency
-    between text and voice responses.
-    """
-    # Handle empty or too short input
     if not user_input or len(user_input.strip()) < 2:
         return "I'm listening. Please say more."
     try:
-        # Generate response
         raw_response = llm(user_input)[0]["generated_text"]
         # Process to get clean, short response
         final_response = process_response(user_input, raw_response)
         return final_response
     except Exception as e:
         print(f"Error generating AI response: {str(e)}")
-        # Return a default response if anything goes wrong
         return "I heard you, but I'm having trouble forming a response right now."
 @app.route("/talk", methods=["POST"])
 def talk():
     if "audio" not in request.files:
         return jsonify({"error": "No audio file"}), 400
-    # Save audio
     audio_file = request.files["audio"]
-    try:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-            audio_path = tmp.name
-            audio_file.save(audio_path)
-        # Transcribe
         try:
-            segments, _ = whisper_model.transcribe(audio_path)
-            transcription = "".join([seg.text for seg in segments])
-            print(f"Transcription: {transcription}")  # Debugging
             if not transcription.strip():
-                # Handle empty transcription
                 final_response = "I didn't catch that. Could you please speak again?"
             else:
-                # Use the centralized function to generate a response
                 final_response = generate_ai_response(transcription)
-            print(f"Voice response: {final_response}")  # Debugging
-        except Exception as e:
-            print(f"Transcription error: {str(e)}")
-            final_response = "I had trouble understanding that. Could you try again?"
-        # Prepare TTS output path
-        tts_audio_path = audio_path.replace(".wav", "_reply.wav")
-        try:
-            # Synthesize speech
-            tts.tts_to_file(text=final_response, file_path=tts_audio_path)
-            if not os.path.exists(tts_audio_path) or os.path.getsize(tts_audio_path) == 0:
-                raise Exception("TTS failed to generate audio file")
-        except Exception as e:
-            print(f"TTS error: {str(e)}")
-            # If TTS fails, generate a simple audio file with a message
-            # In a production app, you might want to have a pre-recorded fallback audio
-            tts_audio_path = audio_path  # Just reuse the input path for now
-            final_response = "Sorry, I couldn't generate audio right now."
-        # Return both the audio file and the text response
-        try:
-            response = send_file(tts_audio_path, mimetype="audio/wav")
-            # Base64 encode the response text to avoid Unicode issues in headers
-            encoded_response = base64.b64encode(final_response.encode('utf-8')).decode('ascii')
-            response.headers["X-Response-Text-Base64"] = encoded_response
-            response.headers["Access-Control-Expose-Headers"] = "X-Response-Text-Base64"
-            return response
         except Exception as e:
-            print(f"Error sending file: {str(e)}")
-            return jsonify({
-                "error": "Could not send audio response",
-                "text_response": final_response
-            }), 500
-    except Exception as e:
-        print(f"Error in talk endpoint: {str(e)}")
-        return jsonify({"error": str(e)}), 500
-    finally:
-        # Clean up temporary files
-        try:
-            if 'audio_path' in locals() and os.path.exists(audio_path):
-                os.unlink(audio_path)
-            if 'tts_audio_path' in locals() and os.path.exists(tts_audio_path) and tts_audio_path != audio_path:
-                os.unlink(tts_audio_path)
-        except Exception as cleanup_error:
-            print(f"Error cleaning up files: {str(cleanup_error)}")
 @app.route("/chat", methods=["POST"])
 def chat():
@@ -214,23 +242,42 @@ def chat():
     if not data or "text" not in data:
         return jsonify({"error": "Missing 'text' in request body"}), 400
-    try:
-        user_input = data["text"]
-        print(f"Text input: {user_input}")  # Debugging
-        # Use the centralized function to generate a response
-        final_response = generate_ai_response(user_input)
-        print(f"Text response: {final_response}")  # Debugging
-        return jsonify({"response": final_response})
-    except Exception as e:
-        print(f"Error in chat endpoint: {str(e)}")
-        return jsonify({"response": "I'm having trouble processing that. Could you try again?", "error": str(e)})
 @app.route("/")
 def index():
     return "Metaverse AI Character API running."
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

 import os
 import re
 import base64
+import threading
+import queue
+import time
 app = Flask(__name__)
 CORS(app)
+# Global variables to hold models and caches
+whisper_model = None
+llm = None
+tts = None
+response_cache = {}
+model_lock = threading.Lock()
+models_loaded = False
+loading_thread = None
+load_queue = queue.Queue()
+# Use a smaller Whisper model for faster inference
+WHISPER_MODEL_SIZE = "tiny"  # Changed from "small" to "tiny"
+def load_models():
+    """Load all models in background thread"""
+    global whisper_model, llm, tts, models_loaded
+    print("Starting model loading...")
+    # Load Whisper model with optimized settings
+    whisper_model = WhisperModel(
+        WHISPER_MODEL_SIZE,
+        device="cpu",
+        compute_type="int8",
+        download_root="./models"  # Cache models to disk
+    )
+    print("Whisper model loaded")
+    # Use a smaller, faster LLM
+    llm = pipeline(
+        "text-generation",
+        model="distilgpt2",  # Much smaller than falcon-rw-1b
+        max_new_tokens=40,   # Further reduce token count
+        device="cpu"
+    )
+    print("LLM loaded")
+    # Load TTS model
+    tts = TTS(
+        model_name="tts_models/en/ljspeech/fast_pitch",  # Using faster model
+        progress_bar=False,
+        gpu=False
+    )
+    print("TTS model loaded")
+    with model_lock:
+        models_loaded = True
+    print("All models loaded successfully")
+    # Process any pending requests that arrived during loading
+    while not load_queue.empty():
+        callback = load_queue.get()
+        callback()
+# Start loading models in background thread
+def start_loading_models():
+    global loading_thread
+    loading_thread = threading.Thread(target=load_models)
+    loading_thread.daemon = True
+    loading_thread.start()
+start_loading_models()
+def ensure_models_loaded(callback):
+    """Ensure models are loaded before processing a request"""
+    with model_lock:
+        if models_loaded:
+            # Models already loaded, process immediately
+            callback()
+        else:
+            # Queue the callback for when models finish loading
+            load_queue.put(callback)
+            return jsonify({
+                "status": "loading",
+                "message": "Models are still loading. Please try again in a moment."
+            }), 503
 def process_response(input_text, generated_text):
+    """Process and clean up LLM response - optimized for speed"""
     if not generated_text:
         return "I'm not sure what to say about that."
     # Make sure both are strings
     input_text = str(input_text).strip()
     generated_text = str(generated_text).strip()
+    # Extract the response portion (everything after the input)
+    if generated_text.startswith(input_text):
         clean_response = generated_text[len(input_text):].strip()
     else:
         clean_response = generated_text.strip()
+    # Fallback for empty responses
     if not clean_response:
         return "I'm listening."
+    # Simplified sentence extraction - just get first sentence for faster response
+    sentences = re.split(r'(?<=[.!?])\s+', clean_response, maxsplit=2)
+    if sentences:
+        # Just use the first sentence for maximum speed
+        result = sentences[0].strip()
+        # Add second sentence if it's not too long
+        if len(sentences) > 1 and len(sentences[1]) < 30:
+            result += " " + sentences[1].strip()
     else:
+        result = clean_response
+    # Simple normalization
+    result = result.replace('"', '"').replace('"', '"')
+    result = result.replace(''', "'").replace(''', "'")
     return result
 def generate_ai_response(user_input):
+    """Generate AI responses - with caching for speed"""
+    # Check cache for identical requests to avoid recomputation
+    cache_key = user_input.strip().lower()
+    if cache_key in response_cache:
+        print("Cache hit!")
+        return response_cache[cache_key]
     if not user_input or len(user_input.strip()) < 2:
         return "I'm listening. Please say more."
     try:
+        start_time = time.time()
+        # Generate response with fewer tokens
         raw_response = llm(user_input)[0]["generated_text"]
         # Process to get clean, short response
         final_response = process_response(user_input, raw_response)
+        print(f"LLM processing time: {time.time() - start_time:.2f}s")
+        # Cache the response for future identical requests
+        response_cache[cache_key] = final_response
+        # Limit cache size to prevent memory issues
+        if len(response_cache) > 100:
+            # Remove oldest entries (simple approach)
+            keys_to_remove = list(response_cache.keys())[:-50]
+            for k in keys_to_remove:
+                response_cache.pop(k, None)
         return final_response
     except Exception as e:
         print(f"Error generating AI response: {str(e)}")
         return "I heard you, but I'm having trouble forming a response right now."
 @app.route("/talk", methods=["POST"])
 def talk():
     if "audio" not in request.files:
         return jsonify({"error": "No audio file"}), 400
     audio_file = request.files["audio"]
+    def process_request():
+        nonlocal audio_file
         try:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+                audio_path = tmp.name
+                audio_file.save(audio_path)
+            # Transcribe with optimized settings
+            transcribe_start = time.time()
+            segments, _ = whisper_model.transcribe(
+                audio_path,
+                beam_size=1,  # Reduce beam size for speed
+                vad_filter=True,  # Use voice activity detection to process only speech
+                vad_parameters=dict(min_silence_duration_ms=500)  # Tune VAD for speed
+            )
+            transcription = "".join([seg.text for seg in segments])
+            print(f"Transcription time: {time.time() - transcribe_start:.2f}s")
             if not transcription.strip():
                 final_response = "I didn't catch that. Could you please speak again?"
             else:
                 final_response = generate_ai_response(transcription)
+            # Prepare TTS output path
+            tts_audio_path = audio_path.replace(".wav", "_reply.wav")
+            # Synthesize speech with optimized settings
+            tts_start = time.time()
+            tts.tts_to_file(
+                text=final_response,
+                file_path=tts_audio_path,
+                speaker_wav=None,
+                speed=1.1  # Slightly faster speech
+            )
+            print(f"TTS time: {time.time() - tts_start:.2f}s")
+            # Return both the audio file and the text response
+            try:
+                response = send_file(tts_audio_path, mimetype="audio/wav")
+                encoded_response = base64.b64encode(final_response.encode('utf-8')).decode('ascii')
+                response.headers["X-Response-Text-Base64"] = encoded_response
+                response.headers["Access-Control-Expose-Headers"] = "X-Response-Text-Base64"
+                return response
+            except Exception as e:
+                print(f"Error sending file: {str(e)}")
+                return jsonify({
+                    "error": "Could not send audio response",
+                    "text_response": final_response
+                }), 500
         except Exception as e:
+            print(f"Error in talk endpoint: {str(e)}")
+            return jsonify({"error": str(e)}), 500
+        finally:
+            # Clean up temporary files
+            try:
+                if 'audio_path' in locals() and os.path.exists(audio_path):
+                    os.unlink(audio_path)
+                if 'tts_audio_path' in locals() and os.path.exists(tts_audio_path) and tts_audio_path != audio_path:
+                    os.unlink(tts_audio_path)
+            except Exception as cleanup_error:
+                print(f"Error cleaning up files: {str(cleanup_error)}")
+    # Ensure models are loaded before processing
+    return ensure_models_loaded(process_request)
 @app.route("/chat", methods=["POST"])
 def chat():
     if not data or "text" not in data:
         return jsonify({"error": "Missing 'text' in request body"}), 400
+    user_input = data["text"]
+    def process_request():
+        try:
+            print(f"Text input: {user_input}")  # Debugging
+            # Start timing
+            start_time = time.time()
+            # Generate response
+            final_response = generate_ai_response(user_input)
+            # Report timing
+            print(f"Total processing time: {time.time() - start_time:.2f}s")
+            return jsonify({"response": final_response})
+        except Exception as e:
+            print(f"Error in chat endpoint: {str(e)}")
+            return jsonify({"response": "I'm having trouble processing that. Could you try again?", "error": str(e)})
+    # Ensure models are loaded before processing
+    return ensure_models_loaded(process_request)
+@app.route("/status", methods=["GET"])
+def status():
+    """Check if models are loaded and ready"""
+    with model_lock:
+        if models_loaded:
+            return jsonify({"status": "ready", "message": "All models loaded and ready"})
+        else:
+            return jsonify({"status": "loading", "message": "Models are still loading"})
 @app.route("/")
 def index():
     return "Metaverse AI Character API running."
 if __name__ == "__main__":
+    # Use threaded server for better concurrency
+    app.run(host="0.0.0.0", port=7860, threaded=True)