mac9087 commited on
Commit
0426b81
·
verified ·
1 Parent(s): d97558b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +428 -109
app.py CHANGED
@@ -1,133 +1,364 @@
1
  from flask import Flask, request, jsonify, send_file
2
  from flask_cors import CORS
 
 
 
3
  import tempfile
4
  import os
5
- import time
6
- import random
7
  import base64
 
 
 
 
 
 
8
 
9
  app = Flask(__name__)
10
  CORS(app)
11
 
12
- # Simple storage for responses
13
- response_cache = {}
14
-
15
- # Configure paths
16
- TEMP_DIR = "/tmp/ai_responses"
17
- os.makedirs(TEMP_DIR, exist_ok=True)
18
-
19
- # Quick responses library for when no ML is needed
20
- QUICK_RESPONSES = [
21
- "I understand what you're saying.",
22
- "I'm following your thoughts.",
23
- "I hear you loud and clear.",
24
- "That makes sense to me.",
25
- "I'm processing that information.",
26
- "I hear what you're saying.",
27
- "Interesting point.",
28
- "I see where you're coming from.",
29
- "That's a good perspective.",
30
- "I'm with you on that.",
31
- "Tell me more about that.",
32
- "I'm listening carefully.",
33
- "I appreciate your thoughts on this.",
34
- "That's an interesting way to look at it.",
35
- "I'm taking that into consideration."
36
- ]
37
-
38
- # Responses for questions
39
- QUESTION_RESPONSES = [
40
- "That's a good question. Let me think about it.",
41
- "I'm considering different perspectives on that question.",
42
- "That's something I've been thinking about as well.",
43
- "That's an interesting question to explore.",
44
- "I'm processing your question and considering how to respond."
45
- ]
46
-
47
- def get_quick_response(user_input):
48
- """Generate a fast response based on simple rules"""
49
- # Check cache first for identical requests
50
- cache_key = user_input.strip().lower()
51
- if cache_key in response_cache:
52
- return response_cache[cache_key]
53
-
54
- # Minimal processing
55
- if not user_input or len(user_input.strip()) < 3:
56
- response = "I'm listening. Please tell me more."
57
- elif "?" in user_input:
58
- response = random.choice(QUESTION_RESPONSES)
59
- else:
60
- response = random.choice(QUICK_RESPONSES)
61
 
62
- # Cache the response
63
- response_cache[cache_key] = response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- # Limit cache size
66
- if len(response_cache) > 100:
67
- keys_to_remove = list(response_cache.keys())[:-50]
68
- for k in keys_to_remove:
69
- response_cache.pop(k, None)
70
 
71
- return response
72
-
73
- @app.route("/chat", methods=["POST"])
74
- def chat():
75
- data = request.get_json()
76
- if not data or "text" not in data:
77
- return jsonify({"error": "Missing 'text' in request body"}), 400
78
 
 
 
 
 
 
 
 
 
 
79
  try:
80
- user_input = data["text"]
81
- print(f"Text input: {user_input}")
 
82
 
83
- # Add a tiny delay to make it seem like it's "thinking" (50-150ms)
84
- time.sleep(random.uniform(0.05, 0.15))
85
 
86
- # Get response
87
- final_response = get_quick_response(user_input)
88
- print(f"Text response: {final_response}")
 
 
89
 
90
- return jsonify({"response": final_response})
 
 
 
 
 
 
91
  except Exception as e:
92
- print(f"Error in chat endpoint: {str(e)}")
93
- return jsonify({"response": "I'm listening."})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  @app.route("/talk", methods=["POST"])
96
  def talk():
 
97
  if "audio" not in request.files:
98
  return jsonify({"error": "No audio file"}), 400
99
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  audio_file = request.files["audio"]
101
 
102
  try:
103
- # Save the input audio temporarily
104
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav", dir=TEMP_DIR) as tmp:
105
  audio_path = tmp.name
106
  audio_file.save(audio_path)
107
 
108
- # We're not actually processing the audio, just echoing back a response
109
- # In a real app, you would transcribe here
110
-
111
- # Get a quick canned response
112
- final_response = get_quick_response("Hello")
113
-
114
- # In a real app, you would generate speech here
115
- # For now, we'll just copy the input file as a placeholder
116
- tts_audio_path = audio_path.replace(".wav", "_reply.wav")
117
-
118
- # Add a small delay to mimic processing time
119
- time.sleep(random.uniform(0.1, 0.3))
120
-
121
- # Just copy the file for now since we can't actually generate speech
122
- import shutil
123
- shutil.copyfile(audio_path, tts_audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  # Return both the audio file and the text response
126
  try:
127
  response = send_file(tts_audio_path, mimetype="audio/wav")
 
 
128
  encoded_response = base64.b64encode(final_response.encode('utf-8')).decode('ascii')
129
  response.headers["X-Response-Text-Base64"] = encoded_response
130
  response.headers["Access-Control-Expose-Headers"] = "X-Response-Text-Base64"
 
 
 
 
 
 
 
 
 
131
  return response
132
  except Exception as e:
133
  print(f"Error sending file: {str(e)}")
@@ -135,6 +366,7 @@ def talk():
135
  "error": "Could not send audio response",
136
  "text_response": final_response
137
  }), 500
 
138
  except Exception as e:
139
  print(f"Error in talk endpoint: {str(e)}")
140
  return jsonify({"error": str(e)}), 500
@@ -143,29 +375,116 @@ def talk():
143
  try:
144
  if 'audio_path' in locals() and os.path.exists(audio_path):
145
  os.unlink(audio_path)
146
- if 'tts_audio_path' in locals() and os.path.exists(tts_audio_path) and tts_audio_path != audio_path:
147
  os.unlink(tts_audio_path)
148
  except Exception as cleanup_error:
149
  print(f"Error cleaning up files: {str(cleanup_error)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
- @app.route("/quick_chat", methods=["POST"])
152
- def quick_chat():
153
- """Alias for chat endpoint for compatibility"""
154
- return chat()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- @app.route("/status", methods=["GET"])
157
- def status():
158
- """Simple status endpoint"""
 
 
 
159
  return jsonify({
160
- "status": "ready",
161
- "message": "Simple response system running and ready"
 
 
 
162
  })
163
 
164
- @app.route("/")
165
- def index():
166
- return "Metaverse AI Character API running. Ultra-fast version."
167
 
168
  if __name__ == "__main__":
169
- print("Starting ultra-fast response API...")
170
- # Use threaded server for better concurrency
171
- app.run(host="0.0.0.0", port=7860, threaded=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from flask import Flask, request, jsonify, send_file
2
  from flask_cors import CORS
3
+ from faster_whisper import WhisperModel
4
+ from transformers import pipeline
5
+ from TTS.api import TTS
6
  import tempfile
7
  import os
8
+ import re
 
9
  import base64
10
+ import threading
11
+ import functools
12
+ import time
13
+ from cachetools import LRUCache, cached, TTLCache
14
+ import gc
15
+ import psutil
16
 
17
  app = Flask(__name__)
18
  CORS(app)
19
 
20
+ # Global configuration for low CPU environment
21
+ MODEL_CACHE_SIZE = 200 # Increased cache size to reduce recomputation
22
+ MODEL_CACHE_TTL = 7200 # Increased cache TTL to 2 hours
23
+ USE_GPU = False # No GPU available
24
+
25
+ # Load models lazily
26
+ whisper_model = None
27
+ llm = None
28
+ tts = None
29
+ models_loaded = False
30
+ models_lock = threading.Lock()
31
+
32
+ # Initialize caches
33
+ response_cache = TTLCache(maxsize=MODEL_CACHE_SIZE, ttl=MODEL_CACHE_TTL)
34
+
35
+ def load_models():
36
+ """Load models optimized for low CPU environments"""
37
+ global whisper_model, llm, tts, models_loaded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ if models_loaded:
40
+ return
41
+
42
+ with models_lock:
43
+ if models_loaded: # Double-check to avoid race condition
44
+ return
45
+
46
+ print("Loading models for low-resource environment...")
47
+ start_time = time.time()
48
+
49
+ # Force garbage collection before loading models
50
+ gc.collect()
51
+
52
+ # Choose smallest/fastest model options and optimize for CPU
53
+ device = "cpu" # Force CPU for limited resources
54
+ compute_type = "int8" # Use int8 quantization for faster inference
55
+
56
+ # Monitor memory usage
57
+ def log_memory():
58
+ process = psutil.Process(os.getpid())
59
+ memory_info = process.memory_info()
60
+ memory_mb = memory_info.rss / 1024 / 1024
61
+ print(f"Memory usage: {memory_mb:.2f} MB")
62
+
63
+ # Load whisper model first (most critical for voice input)
64
+ print("Loading whisper model...")
65
+ log_memory()
66
+ whisper_model = WhisperModel("tiny", device=device, compute_type=compute_type)
67
+
68
+ # Load LLM next
69
+ print("Loading language model...")
70
+ log_memory()
71
+ llm = pipeline(
72
+ "text-generation",
73
+ model="tiiuae/falcon-rw-1b", # Consider switching to a smaller model if available
74
+ max_new_tokens=30, # Reduced token count for faster generation
75
+ device=-1, # Force CPU
76
+ )
77
+
78
+ # Finally load TTS
79
+ print("Loading TTS model...")
80
+ log_memory()
81
+ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC",
82
+ progress_bar=False,
83
+ gpu=False)
84
+
85
+ # Force garbage collection again after loading
86
+ gc.collect()
87
+
88
+ models_loaded = True
89
+ log_memory()
90
+ print(f"Models loaded in {time.time() - start_time:.2f} seconds")
91
+
92
+ @cached(cache=response_cache)
93
+ def generate_ai_response(user_input):
94
+ """
95
+ Generate AI responses with caching to avoid repetitive processing.
96
+ Optimized for low CPU environments.
97
+ """
98
+ load_models() # Ensure models are loaded
99
 
100
+ # Handle empty or too short input
101
+ if not user_input or len(user_input.strip()) < 2:
102
+ return "I'm listening. Please say more."
 
 
103
 
104
+ # Normalize and simplify input to improve cache hits
105
+ normalized_input = user_input.lower().strip()
 
 
 
 
 
106
 
107
+ # Check for very similar recent inputs to maximize cache usage
108
+ for cached_input in response_cache.keys():
109
+ if cached_input and normalized_input and (
110
+ cached_input.lower() in normalized_input or
111
+ normalized_input in cached_input.lower() or
112
+ levenshtein_distance(normalized_input, cached_input.lower()) < 5):
113
+ print(f"Using cached similar response for: {cached_input}")
114
+ return response_cache[cached_input]
115
+
116
  try:
117
+ # Start with a small timeout for real-time experience
118
+ start_time = time.time()
119
+ timeout = 3.0 # 3 seconds max for real-time response
120
 
121
+ # Generate response with monitoring
122
+ raw_response = llm(user_input, max_new_tokens=30)[0]["generated_text"]
123
 
124
+ # Check if we're taking too long
125
+ elapsed = time.time() - start_time
126
+ if elapsed > timeout:
127
+ print(f"Response generation taking too long: {elapsed:.2f}s")
128
+ return "Let me think about that for a moment."
129
 
130
+ # Process to get clean, short response
131
+ final_response = process_response(user_input, raw_response)
132
+
133
+ # Force garbage collection after processing to keep memory usage low
134
+ gc.collect()
135
+
136
+ return final_response
137
  except Exception as e:
138
+ print(f"Error generating AI response: {str(e)}")
139
+ # Return a default response if anything goes wrong
140
+ return "I heard you, but I'm having trouble forming a response right now."
141
+
142
+ def levenshtein_distance(s1, s2):
143
+ """
144
+ Calculate simple string similarity for cache optimization.
145
+ A simpler implementation than full Levenshtein to save CPU cycles.
146
+ """
147
+ if len(s1) < len(s2):
148
+ return levenshtein_distance(s2, s1)
149
+
150
+ if not s2:
151
+ return len(s1)
152
+
153
+ previous_row = range(len(s2) + 1)
154
+ for i, c1 in enumerate(s1):
155
+ current_row = [i + 1]
156
+ for j, c2 in enumerate(s2):
157
+ insertions = previous_row[j + 1] + 1
158
+ deletions = current_row[j] + 1
159
+ substitutions = previous_row[j] + (c1 != c2)
160
+ current_row.append(min(insertions, deletions, substitutions))
161
+ previous_row = current_row
162
+
163
+ return previous_row[-1]
164
+
165
+ def process_response(input_text, generated_text):
166
+ """Optimized response processing function"""
167
+ # Handle the case where generated_text might be None
168
+ if not generated_text:
169
+ return "I'm not sure what to say about that."
170
+
171
+ # Make sure both are strings
172
+ input_text = str(input_text).strip()
173
+ generated_text = str(generated_text).strip()
174
+
175
+ # Skip empty input
176
+ if not input_text:
177
+ clean_response = generated_text
178
+ # Remove the input text from the beginning of the response
179
+ elif generated_text.startswith(input_text):
180
+ clean_response = generated_text[len(input_text):].strip()
181
+ else:
182
+ clean_response = generated_text.strip()
183
+
184
+ # If we ended up with nothing, provide a default response
185
+ if not clean_response:
186
+ return "I'm listening."
187
+
188
+ # Split into sentences more efficiently
189
+ sentences = re.split(r'(?<=[.!?])\s+', clean_response)
190
+
191
+ # Filter out empty or very short sentences
192
+ meaningful_sentences = [s for s in sentences if len(s) > 5]
193
+
194
+ # Take just 1-2 sentences for a casual, human-like response
195
+ if meaningful_sentences:
196
+ if len(meaningful_sentences) > 2:
197
+ result = " ".join(meaningful_sentences[:2])
198
+ else:
199
+ result = " ".join(meaningful_sentences)
200
+ else:
201
+ # If no meaningful sentences, but we have short sentences, use those
202
+ short_sentences = [s for s in sentences if s.strip()]
203
+ if short_sentences:
204
+ result = " ".join(short_sentences[:2])
205
+ else:
206
+ # Fallback if no good sentences were found
207
+ result = "I'm not sure what to say about that."
208
+
209
+ # Remove any repetitive phrases
210
+ result = remove_repetitions(result)
211
+
212
+ # Normalize quotes to ASCII equivalents
213
+ result = normalize_quotes(result)
214
+
215
+ return result
216
+
217
+ def normalize_quotes(text):
218
+ """Replace curly quotes with straight quotes - optimized version"""
219
+ replacements = {
220
+ '"': '"', '"': '"',
221
+ ''': "'", ''': "'"
222
+ }
223
+ for old, new in replacements.items():
224
+ text = text.replace(old, new)
225
+ return text
226
+
227
+ def remove_repetitions(text):
228
+ """Optimized repetition removal function"""
229
+ words = text.split()
230
+ if len(words) <= 5: # Don't process very short responses
231
+ return text
232
+
233
+ result = []
234
+ text_so_far = ""
235
+
236
+ for i in range(len(words)):
237
+ # Check if this word starts a repeated phrase
238
+ if i < len(words) - 3: # Need at least 3 words to check for repetition
239
+ # Check if next 3+ words appear earlier in the text
240
+ is_repetition = False
241
+
242
+ for j in range(3, min(10, len(words) - i)): # Check phrases of length 3 to 10
243
+ phrase = " ".join(words[i:i+j])
244
+ if phrase in text_so_far:
245
+ is_repetition = True
246
+ break
247
+
248
+ if not is_repetition:
249
+ result.append(words[i])
250
+ text_so_far += words[i] + " "
251
+ else:
252
+ result.append(words[i])
253
+ text_so_far += words[i] + " "
254
+
255
+ return " ".join(result)
256
 
257
  @app.route("/talk", methods=["POST"])
258
  def talk():
259
+ """Optimized voice API endpoint for low-resource environments"""
260
  if "audio" not in request.files:
261
  return jsonify({"error": "No audio file"}), 400
262
 
263
+ # Get current memory usage
264
+ process = psutil.Process(os.getpid())
265
+ memory_before = process.memory_info().rss / 1024 / 1024
266
+ print(f"Memory before processing: {memory_before:.2f} MB")
267
+
268
+ # Ensure models are loaded
269
+ load_models()
270
+
271
+ # Start timing for end-to-end processing
272
+ start_time = time.time()
273
+
274
+ # Save audio
275
  audio_file = request.files["audio"]
276
 
277
  try:
278
+ # Use in-memory processing when possible to avoid disk I/O
279
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
280
  audio_path = tmp.name
281
  audio_file.save(audio_path)
282
 
283
+ # Transcribe with optimized settings
284
+ try:
285
+ # Set beam_size=1 for faster transcription with slight accuracy trade-off
286
+ segments, _ = whisper_model.transcribe(
287
+ audio_path,
288
+ beam_size=1,
289
+ vad_filter=True, # Filter out non-speech
290
+ language="en" # Specify language if known
291
+ )
292
+ transcription = "".join([seg.text for seg in segments])
293
+
294
+ print(f"Transcription: {transcription}")
295
+ print(f"Transcription time: {time.time() - start_time:.2f}s")
296
+
297
+ if not transcription.strip():
298
+ final_response = "I didn't catch that. Could you please speak again?"
299
+ else:
300
+ # Use the cached response generator
301
+ final_response = generate_ai_response(transcription)
302
+
303
+ print(f"Voice response: {final_response}")
304
+ print(f"Response generation time: {time.time() - start_time:.2f}s")
305
+
306
+ # Cache frequently used responses as pre-synthesized audio files
307
+ response_hash = str(hash(final_response))
308
+ cached_audio_path = os.path.join(tempfile.gettempdir(), f"cached_response_{response_hash}.wav")
309
+
310
+ if os.path.exists(cached_audio_path):
311
+ print("Using cached audio response")
312
+ tts_audio_path = cached_audio_path
313
+ else:
314
+ # Prepare TTS output path
315
+ tts_audio_path = audio_path.replace(".wav", "_reply.wav")
316
+
317
+ try:
318
+ # Synthesize speech with optimized settings
319
+ tts.tts_to_file(
320
+ text=final_response,
321
+ file_path=tts_audio_path,
322
+ speed=1.1 # Slightly faster speech for quicker responses
323
+ )
324
+
325
+ if not os.path.exists(tts_audio_path) or os.path.getsize(tts_audio_path) == 0:
326
+ raise Exception("TTS failed to generate audio file")
327
+
328
+ # Cache this response for future use
329
+ if len(final_response) < 100: # Only cache short responses
330
+ try:
331
+ import shutil
332
+ shutil.copy(tts_audio_path, cached_audio_path)
333
+ except Exception as cache_error:
334
+ print(f"Error caching audio: {str(cache_error)}")
335
+
336
+ except Exception as e:
337
+ print(f"TTS error: {str(e)}")
338
+ tts_audio_path = audio_path
339
+ final_response = "Sorry, I couldn't generate audio right now."
340
+ except Exception as e:
341
+ print(f"Transcription error: {str(e)}")
342
+ final_response = "I had trouble understanding that. Could you try again?"
343
+ tts_audio_path = audio_path
344
 
345
  # Return both the audio file and the text response
346
  try:
347
  response = send_file(tts_audio_path, mimetype="audio/wav")
348
+
349
+ # Base64 encode the response text
350
  encoded_response = base64.b64encode(final_response.encode('utf-8')).decode('ascii')
351
  response.headers["X-Response-Text-Base64"] = encoded_response
352
  response.headers["Access-Control-Expose-Headers"] = "X-Response-Text-Base64"
353
+
354
+ # Log total processing time
355
+ print(f"Total processing time: {time.time() - start_time:.2f}s")
356
+ memory_after = process.memory_info().rss / 1024 / 1024
357
+ print(f"Memory after processing: {memory_after:.2f} MB")
358
+
359
+ # Force garbage collection
360
+ gc.collect()
361
+
362
  return response
363
  except Exception as e:
364
  print(f"Error sending file: {str(e)}")
 
366
  "error": "Could not send audio response",
367
  "text_response": final_response
368
  }), 500
369
+
370
  except Exception as e:
371
  print(f"Error in talk endpoint: {str(e)}")
372
  return jsonify({"error": str(e)}), 500
 
375
  try:
376
  if 'audio_path' in locals() and os.path.exists(audio_path):
377
  os.unlink(audio_path)
378
+ if 'tts_audio_path' in locals() and tts_audio_path != cached_audio_path and tts_audio_path != audio_path and os.path.exists(tts_audio_path):
379
  os.unlink(tts_audio_path)
380
  except Exception as cleanup_error:
381
  print(f"Error cleaning up files: {str(cleanup_error)}")
382
+
383
+ # Final garbage collection
384
+ gc.collect()
385
+
386
+ @app.route("/chat", methods=["POST"])
387
+ def chat():
388
+ data = request.get_json()
389
+ if not data or "text" not in data:
390
+ return jsonify({"error": "Missing 'text' in request body"}), 400
391
+
392
+ # Ensure models are loaded
393
+ load_models()
394
+
395
+ try:
396
+ user_input = data["text"]
397
+ print(f"Text input: {user_input}") # Debugging
398
+
399
+ # Use the cached response generator
400
+ final_response = generate_ai_response(user_input)
401
+
402
+ print(f"Text response: {final_response}") # Debugging
403
+
404
+ return jsonify({"response": final_response})
405
+ except Exception as e:
406
+ print(f"Error in chat endpoint: {str(e)}")
407
+ return jsonify({"response": "I'm having trouble processing that. Could you try again?", "error": str(e)})
408
+
409
+ @app.route("/")
410
+ def index():
411
+ return "Metaverse AI Character API running."
412
+
413
+ # Cache for frequently used TTS responses
414
+ tts_audio_cache = {}
415
 
416
+ # Pre-cache common responses
417
+ def precache_common_responses():
418
+ """Pre-generate audio for common responses to save processing time"""
419
+ common_responses = [
420
+ "I didn't catch that. Could you please speak again?",
421
+ "I'm listening. Please say more.",
422
+ "I heard you, but I'm having trouble forming a response right now.",
423
+ "I'm not sure what to say about that.",
424
+ "Let me think about that for a moment."
425
+ ]
426
+
427
+ global tts
428
+ if tts is None:
429
+ load_models()
430
+
431
+ print("Pre-caching common audio responses...")
432
+ for response in common_responses:
433
+ try:
434
+ response_hash = str(hash(response))
435
+ cached_path = os.path.join(tempfile.gettempdir(), f"cached_response_{response_hash}.wav")
436
+
437
+ if not os.path.exists(cached_path):
438
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
439
+ tmp_path = tmp.name
440
+
441
+ tts.tts_to_file(text=response, file_path=tmp_path)
442
+ os.rename(tmp_path, cached_path)
443
+
444
+ tts_audio_cache[response] = cached_path
445
+ print(f"Cached: {response}")
446
+ except Exception as e:
447
+ print(f"Failed to cache response '{response}': {str(e)}")
448
+
449
+ print("Finished pre-caching")
450
 
451
+ # Health check endpoint to verify API is running properly
452
+ @app.route("/health", methods=["GET"])
453
+ def health_check():
454
+ """Health check endpoint to verify API is running"""
455
+ memory_usage = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024
456
+
457
  return jsonify({
458
+ "status": "ok",
459
+ "models_loaded": models_loaded,
460
+ "memory_usage_mb": round(memory_usage, 2),
461
+ "cache_size": len(response_cache),
462
+ "uptime_seconds": time.time() - startup_time
463
  })
464
 
465
+ # Track startup time
466
+ startup_time = time.time()
 
467
 
468
  if __name__ == "__main__":
469
+ print("Starting Metaverse AI Character API (Optimized for real-time on 2vCPU)...")
470
+
471
+ # Start loading models in a background thread
472
+ model_thread = threading.Thread(target=load_models)
473
+ model_thread.daemon = True # Allow the thread to be terminated when the main program exits
474
+ model_thread.start()
475
+
476
+ # Start pre-caching in another thread
477
+ cache_thread = threading.Thread(target=precache_common_responses)
478
+ cache_thread.daemon = True
479
+ cache_thread.start()
480
+
481
+ # Optimize Flask for low-resource environment
482
+ # Use threaded=True with lower thread count to prevent CPU overload
483
+ app.run(
484
+ host="0.0.0.0",
485
+ port=7860,
486
+ threaded=True,
487
+ # Options below reduce resource usage
488
+ debug=False, # Disable debug mode for production
489
+ use_reloader=False # Disable reloader to prevent duplicate processes
490
+ )