mac9087 commited on
Commit
d97558b
·
verified ·
1 Parent(s): 596a84e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -339
app.py CHANGED
@@ -1,221 +1,96 @@
1
  from flask import Flask, request, jsonify, send_file
2
  from flask_cors import CORS
3
- from faster_whisper import WhisperModel
4
- from transformers import pipeline
5
- from TTS.api import TTS
6
  import tempfile
7
  import os
8
- import re
9
- import base64
10
- import threading
11
- import queue
12
  import time
 
 
13
 
14
  app = Flask(__name__)
15
  CORS(app)
16
 
17
- # Global variables to hold models and caches
18
- whisper_model = None
19
- llm = None
20
- tts = None
21
  response_cache = {}
22
- model_lock = threading.Lock()
23
- models_loaded = False
24
- loading_thread = None
25
- load_queue = queue.Queue()
26
 
27
- # Define paths with proper permissions
28
- TEMP_DIR = "/tmp/ai_models"
29
  os.makedirs(TEMP_DIR, exist_ok=True)
30
 
31
- # Environment variable to control model size
32
- # Set to "tiny" for fastest response, "base" for better quality but still fast
33
- WHISPER_MODEL_SIZE = os.environ.get("WHISPER_MODEL_SIZE", "tiny")
34
-
35
- def load_models():
36
- """Load all models in background thread"""
37
- global whisper_model, llm, tts, models_loaded
38
-
39
- print("Starting model loading...")
40
-
41
- try:
42
- # Load Whisper model with optimized settings
43
- whisper_model = WhisperModel(
44
- WHISPER_MODEL_SIZE,
45
- device="cpu",
46
- compute_type="int8",
47
- download_root=TEMP_DIR # Use temp directory with write permissions
48
- )
49
- print("Whisper model loaded")
50
-
51
- # Use a smaller, faster LLM
52
- llm = pipeline(
53
- "text-generation",
54
- model="distilgpt2", # Much smaller than falcon-rw-1b
55
- max_new_tokens=40, # Further reduce token count
56
- device="cpu"
57
- )
58
- print("LLM loaded")
59
-
60
- # Load TTS model
61
- tts = TTS(
62
- model_name="tts_models/en/ljspeech/fast_pitch", # Using faster model
63
- progress_bar=False,
64
- gpu=False
65
- )
66
- print("TTS model loaded")
67
-
68
- with model_lock:
69
- models_loaded = True
70
-
71
- except Exception as e:
72
- print(f"Error loading Whisper model: {str(e)}")
73
- whisper_model = None
74
-
75
- # Mark models as loaded even if some failed - we'll use fallbacks
76
- with model_lock:
77
- models_loaded = True
78
-
79
- print("Model loading completed")
80
-
81
- # Process any pending requests that arrived during loading
82
- while not load_queue.empty():
83
- callback = load_queue.get()
84
- callback()
85
-
86
- # Fallback methods for when models fail to load
87
- def fallback_transcribe(audio_path):
88
- """Simple fallback when Whisper fails to load"""
89
- # Just return empty text - in production you might want a more sophisticated fallback
90
- return "I couldn't transcribe the audio due to technical issues."
91
-
92
- def fallback_generate_text(user_input):
93
- """Simple rule-based response when LLM fails to load"""
94
- # Very basic template responses
95
- if not user_input or len(user_input) < 5:
96
- return "I'm listening. Please continue."
97
-
98
- if "?" in user_input:
99
- return "That's an interesting question. I'm processing it now."
100
-
101
- # Simple acknowledgment responses
102
- responses = [
103
- "I understand what you're saying.",
104
- "I'm following your thoughts.",
105
- "I hear you loud and clear.",
106
- "I'm processing that information.",
107
- "That makes sense to me."
108
- ]
109
- import random
110
- return random.choice(responses)
111
-
112
- # Start loading models in background thread
113
- def start_loading_models():
114
- global loading_thread
115
- loading_thread = threading.Thread(target=load_models)
116
- loading_thread.daemon = True
117
- loading_thread.start()
118
-
119
- # Create temp directory and start loading
120
- try:
121
- os.makedirs(TEMP_DIR, exist_ok=True)
122
- print(f"Created model cache directory at {TEMP_DIR}")
123
- start_loading_models()
124
- except Exception as e:
125
- print(f"Error setting up model loading: {str(e)}")
126
- # Automatically mark as loaded with no models
127
- with model_lock:
128
- models_loaded = True
129
-
130
- def ensure_models_loaded(callback):
131
- """Ensure models are loaded before processing a request"""
132
- with model_lock:
133
- if models_loaded:
134
- # Models already loaded, process immediately
135
- callback()
136
- else:
137
- # Queue the callback for when models finish loading
138
- load_queue.put(callback)
139
- return jsonify({
140
- "status": "loading",
141
- "message": "Models are still loading. Please try again in a moment."
142
- }), 503
143
-
144
- def process_response(input_text, generated_text):
145
- """Process and clean up LLM response - optimized for speed"""
146
- if not generated_text:
147
- return "I'm not sure what to say about that."
148
-
149
- # Make sure both are strings
150
- input_text = str(input_text).strip()
151
- generated_text = str(generated_text).strip()
152
 
153
- # Extract the response portion (everything after the input)
154
- if generated_text.startswith(input_text):
155
- clean_response = generated_text[len(input_text):].strip()
 
 
156
  else:
157
- clean_response = generated_text.strip()
158
 
159
- # Fallback for empty responses
160
- if not clean_response:
161
- return "I'm listening."
162
 
163
- # Simplified sentence extraction - just get first sentence for faster response
164
- sentences = re.split(r'(?<=[.!?])\s+', clean_response, maxsplit=2)
165
- if sentences:
166
- # Just use the first sentence for maximum speed
167
- result = sentences[0].strip()
168
- # Add second sentence if it's not too long
169
- if len(sentences) > 1 and len(sentences[1]) < 30:
170
- result += " " + sentences[1].strip()
171
- else:
172
- result = clean_response
173
 
174
- # Simple normalization
175
- result = result.replace('"', '"').replace('"', '"')
176
- result = result.replace(''', "'").replace(''', "'")
177
-
178
- return result
179
 
180
- def generate_ai_response(user_input):
181
- """Generate AI responses - with caching for speed"""
182
- # Check cache for identical requests to avoid recomputation
183
- cache_key = user_input.strip().lower()
184
- if cache_key in response_cache:
185
- print("Cache hit!")
186
- return response_cache[cache_key]
187
-
188
- if not user_input or len(user_input.strip()) < 2:
189
- return "I'm listening. Please say more."
190
-
191
  try:
192
- # If LLM failed to load, use fallback
193
- if llm is None:
194
- print("Using fallback text generation")
195
- final_response = fallback_generate_text(user_input)
196
- else:
197
- start_time = time.time()
198
- # Generate response with fewer tokens
199
- raw_response = llm(user_input)[0]["generated_text"]
200
-
201
- # Process to get clean, short response
202
- final_response = process_response(user_input, raw_response)
203
- print(f"LLM processing time: {time.time() - start_time:.2f}s")
204
 
205
- # Cache the response for future identical requests
206
- response_cache[cache_key] = final_response
207
 
208
- # Limit cache size to prevent memory issues
209
- if len(response_cache) > 100:
210
- # Remove oldest entries (simple approach)
211
- keys_to_remove = list(response_cache.keys())[:-50]
212
- for k in keys_to_remove:
213
- response_cache.pop(k, None)
214
 
215
- return final_response
216
  except Exception as e:
217
- print(f"Error generating AI response: {str(e)}")
218
- return "I heard you, but I'm having trouble forming a response right now."
219
 
220
  @app.route("/talk", methods=["POST"])
221
  def talk():
@@ -224,161 +99,73 @@ def talk():
224
 
225
  audio_file = request.files["audio"]
226
 
227
- def process_request():
228
- nonlocal audio_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  try:
230
- # Prepare file paths
231
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav", dir=TEMP_DIR) as tmp:
232
- audio_path = tmp.name
233
- audio_file.save(audio_path)
234
-
235
- # Transcribe audio
236
- transcribe_start = time.time()
237
- if whisper_model is None:
238
- # Fallback if model failed to load
239
- print("Using fallback transcription")
240
- transcription = fallback_transcribe(audio_path)
241
- else:
242
- try:
243
- # Transcribe with optimized settings
244
- segments, _ = whisper_model.transcribe(
245
- audio_path,
246
- beam_size=1, # Reduce beam size for speed
247
- vad_filter=True, # Use voice activity detection to process only speech
248
- vad_parameters=dict(min_silence_duration_ms=500) # Tune VAD for speed
249
- )
250
- transcription = "".join([seg.text for seg in segments])
251
- except Exception as e:
252
- print(f"Whisper transcription error: {str(e)}")
253
- transcription = ""
254
-
255
- print(f"Transcription time: {time.time() - transcribe_start:.2f}s")
256
-
257
- if not transcription.strip():
258
- final_response = "I didn't catch that. Could you please speak again?"
259
- else:
260
- final_response = generate_ai_response(transcription)
261
-
262
- # Prepare TTS output path
263
- tts_audio_path = audio_path.replace(".wav", "_reply.wav")
264
-
265
- # Synthesize speech
266
- tts_start = time.time()
267
- if tts is None:
268
- # If TTS failed to load, create a simple audio file with message
269
- print("Using fallback TTS (no speech synthesis)")
270
- # Just copy the input file as a placeholder
271
- import shutil
272
- shutil.copyfile(audio_path, tts_audio_path)
273
- else:
274
- try:
275
- # Synthesize speech with optimized settings
276
- tts.tts_to_file(
277
- text=final_response,
278
- file_path=tts_audio_path,
279
- speaker_wav=None,
280
- speed=1.1 # Slightly faster speech
281
- )
282
- except Exception as e:
283
- print(f"TTS error: {str(e)}")
284
- # Just copy the input file as a placeholder
285
- import shutil
286
- shutil.copyfile(audio_path, tts_audio_path)
287
-
288
- print(f"TTS time: {time.time() - tts_start:.2f}s")
289
-
290
- # Return both the audio file and the text response
291
- try:
292
- response = send_file(tts_audio_path, mimetype="audio/wav")
293
- encoded_response = base64.b64encode(final_response.encode('utf-8')).decode('ascii')
294
- response.headers["X-Response-Text-Base64"] = encoded_response
295
- response.headers["Access-Control-Expose-Headers"] = "X-Response-Text-Base64"
296
- return response
297
- except Exception as e:
298
- print(f"Error sending file: {str(e)}")
299
- return jsonify({
300
- "error": "Could not send audio response",
301
- "text_response": final_response
302
- }), 500
303
-
304
  except Exception as e:
305
- print(f"Error in talk endpoint: {str(e)}")
306
- return jsonify({"error": str(e)}), 500
307
- finally:
308
- # Clean up temporary files
309
- try:
310
- if 'audio_path' in locals() and os.path.exists(audio_path):
311
- os.unlink(audio_path)
312
- if 'tts_audio_path' in locals() and os.path.exists(tts_audio_path) and tts_audio_path != audio_path:
313
- os.unlink(tts_audio_path)
314
- except Exception as cleanup_error:
315
- print(f"Error cleaning up files: {str(cleanup_error)}")
316
-
317
- # Ensure models are loaded before processing
318
- return ensure_models_loaded(process_request)
 
 
 
319
 
320
- @app.route("/chat", methods=["POST"])
321
- def chat():
322
- data = request.get_json()
323
- if not data or "text" not in data:
324
- return jsonify({"error": "Missing 'text' in request body"}), 400
325
-
326
- user_input = data["text"]
327
-
328
- def process_request():
329
- try:
330
- print(f"Text input: {user_input}") # Debugging
331
-
332
- # Start timing
333
- start_time = time.time()
334
-
335
- # Generate response
336
- final_response = generate_ai_response(user_input)
337
-
338
- # Report timing
339
- print(f"Total processing time: {time.time() - start_time:.2f}s")
340
-
341
- return jsonify({"response": final_response})
342
- except Exception as e:
343
- print(f"Error in chat endpoint: {str(e)}")
344
- return jsonify({"response": "I'm having trouble processing that. Could you try again?", "error": str(e)})
345
-
346
- # Ensure models are loaded before processing
347
- return ensure_models_loaded(process_request)
348
 
349
  @app.route("/status", methods=["GET"])
350
  def status():
351
- """Check if models are loaded and ready"""
352
- with model_lock:
353
- if models_loaded:
354
- return jsonify({"status": "ready", "message": "All models loaded and ready"})
355
- else:
356
- return jsonify({"status": "loading", "message": "Models are still loading"})
357
 
358
  @app.route("/")
359
  def index():
360
- return "Metaverse AI Character API running."
361
-
362
- # Add direct-response mode for maximum performance
363
- @app.route("/quick_chat", methods=["POST"])
364
- def quick_chat():
365
- """Ultra-fast endpoint that skips ML models completely for instant responses"""
366
- data = request.get_json()
367
- if not data or "text" not in data:
368
- return jsonify({"error": "Missing 'text' in request body"}), 400
369
-
370
- try:
371
- user_input = data["text"]
372
- print(f"Quick chat input: {user_input}")
373
-
374
- # Use simple rule-based responses for maximum speed
375
- final_response = fallback_generate_text(user_input)
376
-
377
- return jsonify({"response": final_response})
378
- except Exception as e:
379
- print(f"Error in quick_chat: {str(e)}")
380
- return jsonify({"response": "I'm listening."})
381
 
382
  if __name__ == "__main__":
 
383
  # Use threaded server for better concurrency
384
  app.run(host="0.0.0.0", port=7860, threaded=True)
 
1
  from flask import Flask, request, jsonify, send_file
2
  from flask_cors import CORS
 
 
 
3
  import tempfile
4
  import os
 
 
 
 
5
  import time
6
+ import random
7
+ import base64
8
 
9
  app = Flask(__name__)
10
  CORS(app)
11
 
12
+ # Simple storage for responses
 
 
 
13
  response_cache = {}
 
 
 
 
14
 
15
+ # Configure paths
16
+ TEMP_DIR = "/tmp/ai_responses"
17
  os.makedirs(TEMP_DIR, exist_ok=True)
18
 
19
+ # Quick responses library for when no ML is needed
20
+ QUICK_RESPONSES = [
21
+ "I understand what you're saying.",
22
+ "I'm following your thoughts.",
23
+ "I hear you loud and clear.",
24
+ "That makes sense to me.",
25
+ "I'm processing that information.",
26
+ "I hear what you're saying.",
27
+ "Interesting point.",
28
+ "I see where you're coming from.",
29
+ "That's a good perspective.",
30
+ "I'm with you on that.",
31
+ "Tell me more about that.",
32
+ "I'm listening carefully.",
33
+ "I appreciate your thoughts on this.",
34
+ "That's an interesting way to look at it.",
35
+ "I'm taking that into consideration."
36
+ ]
37
+
38
+ # Responses for questions
39
+ QUESTION_RESPONSES = [
40
+ "That's a good question. Let me think about it.",
41
+ "I'm considering different perspectives on that question.",
42
+ "That's something I've been thinking about as well.",
43
+ "That's an interesting question to explore.",
44
+ "I'm processing your question and considering how to respond."
45
+ ]
46
+
47
+ def get_quick_response(user_input):
48
+ """Generate a fast response based on simple rules"""
49
+ # Check cache first for identical requests
50
+ cache_key = user_input.strip().lower()
51
+ if cache_key in response_cache:
52
+ return response_cache[cache_key]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # Minimal processing
55
+ if not user_input or len(user_input.strip()) < 3:
56
+ response = "I'm listening. Please tell me more."
57
+ elif "?" in user_input:
58
+ response = random.choice(QUESTION_RESPONSES)
59
  else:
60
+ response = random.choice(QUICK_RESPONSES)
61
 
62
+ # Cache the response
63
+ response_cache[cache_key] = response
 
64
 
65
+ # Limit cache size
66
+ if len(response_cache) > 100:
67
+ keys_to_remove = list(response_cache.keys())[:-50]
68
+ for k in keys_to_remove:
69
+ response_cache.pop(k, None)
 
 
 
 
 
70
 
71
+ return response
 
 
 
 
72
 
73
+ @app.route("/chat", methods=["POST"])
74
+ def chat():
75
+ data = request.get_json()
76
+ if not data or "text" not in data:
77
+ return jsonify({"error": "Missing 'text' in request body"}), 400
78
+
 
 
 
 
 
79
  try:
80
+ user_input = data["text"]
81
+ print(f"Text input: {user_input}")
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Add a tiny delay to make it seem like it's "thinking" (50-150ms)
84
+ time.sleep(random.uniform(0.05, 0.15))
85
 
86
+ # Get response
87
+ final_response = get_quick_response(user_input)
88
+ print(f"Text response: {final_response}")
 
 
 
89
 
90
+ return jsonify({"response": final_response})
91
  except Exception as e:
92
+ print(f"Error in chat endpoint: {str(e)}")
93
+ return jsonify({"response": "I'm listening."})
94
 
95
  @app.route("/talk", methods=["POST"])
96
  def talk():
 
99
 
100
  audio_file = request.files["audio"]
101
 
102
+ try:
103
+ # Save the input audio temporarily
104
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav", dir=TEMP_DIR) as tmp:
105
+ audio_path = tmp.name
106
+ audio_file.save(audio_path)
107
+
108
+ # We're not actually processing the audio, just echoing back a response
109
+ # In a real app, you would transcribe here
110
+
111
+ # Get a quick canned response
112
+ final_response = get_quick_response("Hello")
113
+
114
+ # In a real app, you would generate speech here
115
+ # For now, we'll just copy the input file as a placeholder
116
+ tts_audio_path = audio_path.replace(".wav", "_reply.wav")
117
+
118
+ # Add a small delay to mimic processing time
119
+ time.sleep(random.uniform(0.1, 0.3))
120
+
121
+ # Just copy the file for now since we can't actually generate speech
122
+ import shutil
123
+ shutil.copyfile(audio_path, tts_audio_path)
124
+
125
+ # Return both the audio file and the text response
126
  try:
127
+ response = send_file(tts_audio_path, mimetype="audio/wav")
128
+ encoded_response = base64.b64encode(final_response.encode('utf-8')).decode('ascii')
129
+ response.headers["X-Response-Text-Base64"] = encoded_response
130
+ response.headers["Access-Control-Expose-Headers"] = "X-Response-Text-Base64"
131
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  except Exception as e:
133
+ print(f"Error sending file: {str(e)}")
134
+ return jsonify({
135
+ "error": "Could not send audio response",
136
+ "text_response": final_response
137
+ }), 500
138
+ except Exception as e:
139
+ print(f"Error in talk endpoint: {str(e)}")
140
+ return jsonify({"error": str(e)}), 500
141
+ finally:
142
+ # Clean up temporary files
143
+ try:
144
+ if 'audio_path' in locals() and os.path.exists(audio_path):
145
+ os.unlink(audio_path)
146
+ if 'tts_audio_path' in locals() and os.path.exists(tts_audio_path) and tts_audio_path != audio_path:
147
+ os.unlink(tts_audio_path)
148
+ except Exception as cleanup_error:
149
+ print(f"Error cleaning up files: {str(cleanup_error)}")
150
 
151
+ @app.route("/quick_chat", methods=["POST"])
152
+ def quick_chat():
153
+ """Alias for chat endpoint for compatibility"""
154
+ return chat()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  @app.route("/status", methods=["GET"])
157
  def status():
158
+ """Simple status endpoint"""
159
+ return jsonify({
160
+ "status": "ready",
161
+ "message": "Simple response system running and ready"
162
+ })
 
163
 
164
  @app.route("/")
165
  def index():
166
+ return "Metaverse AI Character API running. Ultra-fast version."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  if __name__ == "__main__":
169
+ print("Starting ultra-fast response API...")
170
  # Use threaded server for better concurrency
171
  app.run(host="0.0.0.0", port=7860, threaded=True)