Spaces:

mac9087
/

magical-box

Running

App Files Files Community

mac9087 commited on 10 days ago

Commit

d5ed7cc

verified ·

1 Parent(s): 9c8ecc3

Update app.py

Browse files

Files changed (1) hide show

app.py +209 -55

app.py CHANGED Viewed

@@ -11,6 +11,9 @@ import uuid
 import time
 import sys
 import gc  # For explicit garbage collection
 # Set environment variables before anything else
 os.environ['SHAPEE_NO_INTERACTIVE'] = '1'
@@ -92,11 +95,19 @@ xm = None
 model = None
 diffusion = None
 def load_models_if_needed():
     global xm, model, diffusion
     if xm is None or model is None or diffusion is None:
         print("Loading models for the first time...")
         try:
             xm = load_model('transmitter', device=device)
             model = load_model('text300M', device=device)
             diffusion = diffusion_from_config(load_config('diffusion'))
@@ -105,78 +116,103 @@ def load_models_if_needed():
             print(f"Error loading models: {e}")
             raise
-@app.route('/generate', methods=['POST'])
-def generate_3d():
     try:
         # Load models if not already loaded
         load_models_if_needed()
-        # Get the prompt from the request
-        data = request.json
-        if not data or 'prompt' not in data:
-            return jsonify({"error": "No prompt provided"}), 400
-        prompt = data['prompt']
-        print(f"Received prompt: {prompt}")
         # Set parameters for CPU performance (reduced steps and other optimizations)
         batch_size = 1
         guidance_scale = 15.0
-        # *** OPTIMIZATION: Significantly reduce steps for low-memory environments ***
-        karras_steps = 16  # Reduced from 32 to 16 for better performance
         # *** OPTIMIZATION: Run garbage collection before starting intensive task ***
         gc.collect()
         torch.cuda.empty_cache() if torch.cuda.is_available() else None
         # Generate latents with the text-to-3D model
-        print("Starting latent generation with reduced steps...")
-        latents = sample_latents(
-            batch_size=batch_size,
-            model=model,
-            diffusion=diffusion,
-            guidance_scale=guidance_scale,
-            model_kwargs=dict(texts=[prompt] * batch_size),
-            progress=True,
-            clip_denoised=True,
-            use_fp16=False,  # CPU doesn't support fp16
-            use_karras=True,
-            karras_steps=karras_steps,  # *** OPTIMIZATION: Reduced steps ***
-            sigma_min=1e-3,
-            sigma_max=160,
-            s_churn=0,
-        )
-        print("Latent generation complete!")
         # *** OPTIMIZATION: Run garbage collection after intensive step ***
         gc.collect()
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
         # Generate a unique filename
         unique_id = str(uuid.uuid4())
         filename = f"{output_dir}/{unique_id}"
-        # Convert latent to mesh
-        print("Decoding mesh...")
         t0 = time.time()
-        # *** OPTIMIZATION: Use simplified decoding for memory constraints ***
-        mesh = decode_latent_mesh(xm, latents[0]).tri_mesh()
         print(f"Mesh decoded in {time.time() - t0:.2f} seconds")
         # *** OPTIMIZATION: Clear latents from memory as they're no longer needed ***
         del latents
         gc.collect()
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
         # Save as GLB
-        print("Saving as GLB...")
         glb_path = f"{filename}.glb"
         mesh.write_glb(glb_path)
         # Save as OBJ
-        print("Saving as OBJ...")
         obj_path = f"{filename}.obj"
         with open(obj_path, 'w') as f:
             mesh.write_obj(f)
@@ -185,21 +221,74 @@ def generate_3d():
         del mesh
         gc.collect()
-        print("Files saved successfully!")
         # Return paths to the generated files
-        return jsonify({
             "success": True,
             "message": "3D model generated successfully",
             "glb_url": f"/download/{os.path.basename(glb_path)}",
             "obj_url": f"/download/{os.path.basename(obj_path)}"
-        })
     except Exception as e:
-        print(f"Error during generation: {str(e)}")
         import traceback
         traceback.print_exc()
-        return jsonify({"error": str(e)}), 500
 @app.route('/download/<filename>', methods=['GET'])
 def download_file(filename):
@@ -213,16 +302,31 @@ def health_check():
     """Simple health check endpoint to verify the app is running"""
     # Check available memory
     try:
-        import psutil
         memory_info = psutil.virtual_memory()
         memory_usage = f"{memory_info.percent}% (Available: {memory_info.available / (1024**3):.2f} GB)"
-    except ImportError:
-        memory_usage = "psutil not installed"
     return jsonify({
         "status": "ok",
         "message": "Service is running",
-        "memory_usage": memory_usage
     })
 @app.route('/', methods=['GET'])
@@ -230,25 +334,75 @@ def home():
     """Landing page with usage instructions"""
     return """
     <html>
-        <head><title>Text to 3D API</title></head>
         <body>
             <h1>Text to 3D API</h1>
-            <p>This is a simple API that converts text prompts to 3D models.</p>
             <h2>How to use:</h2>
             <pre>
-            POST /generate
-            Content-Type: application/json
-            {
-                "prompt": "A futuristic building"
-            }
             </pre>
-            <p>The response will include URLs to download the generated models in GLB and OBJ formats.</p>
         </body>
     </html>
     """
 if __name__ == '__main__':
     # Recommended to run with gunicorn for production with increased timeout:
     # $ gunicorn app:app --bind 0.0.0.0:7860 --timeout 300 --workers 1
-    app.run(host='0.0.0.0', port=7860, debug=True)

 import time
 import sys
 import gc  # For explicit garbage collection
+import threading
+import queue
+import psutil
 # Set environment variables before anything else
 os.environ['SHAPEE_NO_INTERACTIVE'] = '1'
 model = None
 diffusion = None
+# Job queue and results dictionary
+job_queue = queue.Queue()
+job_results = {}
+generation_thread = None
+is_thread_running = False
 def load_models_if_needed():
     global xm, model, diffusion
     if xm is None or model is None or diffusion is None:
         print("Loading models for the first time...")
         try:
+            # Set lower precision for memory optimization
+            torch.set_default_dtype(torch.float32)  # Use float32 instead of float64
             xm = load_model('transmitter', device=device)
             model = load_model('text300M', device=device)
             diffusion = diffusion_from_config(load_config('diffusion'))
             print(f"Error loading models: {e}")
             raise
+def worker_thread():
+    global is_thread_running
+    is_thread_running = True
+    try:
+        while True:
+            try:
+                # Get job from queue with a timeout
+                job_id, prompt = job_queue.get(timeout=1)
+                print(f"Processing job {job_id} with prompt: {prompt}")
+                # Process the job
+                result = process_job(job_id, prompt)
+                # Store the result
+                job_results[job_id] = result
+            except queue.Empty:
+                # No jobs in queue, continue waiting
+                pass
+            except Exception as e:
+                print(f"Error in worker thread: {e}")
+                import traceback
+                traceback.print_exc()
+                # If there was a job being processed, mark it as failed
+                if 'job_id' in locals():
+                    job_results[job_id] = {
+                        "success": False,
+                        "error": str(e)
+                    }
+    finally:
+        is_thread_running = False
+def process_job(job_id, prompt):
     try:
         # Load models if not already loaded
         load_models_if_needed()
         # Set parameters for CPU performance (reduced steps and other optimizations)
         batch_size = 1
         guidance_scale = 15.0
+        # *** EXTREME OPTIMIZATION: Significantly reduce steps for low-memory environments ***
+        karras_steps = 8  # Reduced from 16 to 8 for even better performance
         # *** OPTIMIZATION: Run garbage collection before starting intensive task ***
         gc.collect()
         torch.cuda.empty_cache() if torch.cuda.is_available() else None
         # Generate latents with the text-to-3D model
+        print(f"Starting latent generation for job {job_id} with {karras_steps} steps...")
+        # Force lower precision
+        with torch.inference_mode():
+            latents = sample_latents(
+                batch_size=batch_size,
+                model=model,
+                diffusion=diffusion,
+                guidance_scale=guidance_scale,
+                model_kwargs=dict(texts=[prompt] * batch_size),
+                progress=True,
+                clip_denoised=True,
+                use_fp16=False,  # CPU doesn't support fp16
+                use_karras=True,
+                karras_steps=karras_steps,
+                sigma_min=1e-3,
+                sigma_max=160,
+                s_churn=0,
+            )
+        print(f"Latent generation complete for job {job_id}!")
         # *** OPTIMIZATION: Run garbage collection after intensive step ***
         gc.collect()
         # Generate a unique filename
         unique_id = str(uuid.uuid4())
         filename = f"{output_dir}/{unique_id}"
+        # Convert latent to mesh with optimization settings
+        print(f"Decoding mesh for job {job_id}...")
         t0 = time.time()
+        # *** OPTIMIZATION: Use simplified decoding with lower resolution ***
+        mesh = decode_latent_mesh(xm, latents[0], max_points=4000).tri_mesh()  # Reduced point count
         print(f"Mesh decoded in {time.time() - t0:.2f} seconds")
         # *** OPTIMIZATION: Clear latents from memory as they're no longer needed ***
         del latents
         gc.collect()
         # Save as GLB
+        print(f"Saving job {job_id} as GLB...")
         glb_path = f"{filename}.glb"
         mesh.write_glb(glb_path)
         # Save as OBJ
+        print(f"Saving job {job_id} as OBJ...")
         obj_path = f"{filename}.obj"
         with open(obj_path, 'w') as f:
             mesh.write_obj(f)
         del mesh
         gc.collect()
+        print(f"Files saved successfully for job {job_id}!")
         # Return paths to the generated files
+        return {
             "success": True,
             "message": "3D model generated successfully",
             "glb_url": f"/download/{os.path.basename(glb_path)}",
             "obj_url": f"/download/{os.path.basename(obj_path)}"
+        }
     except Exception as e:
+        print(f"Error during generation for job {job_id}: {str(e)}")
         import traceback
         traceback.print_exc()
+        return {
+            "success": False,
+            "error": str(e)
+        }
+def ensure_worker_thread_running():
+    global generation_thread, is_thread_running
+    if generation_thread is None or not generation_thread.is_alive():
+        print("Starting worker thread...")
+        generation_thread = threading.Thread(target=worker_thread, daemon=True)
+        generation_thread.start()
+@app.route('/generate', methods=['POST'])
+def generate_3d():
+    # Get the prompt from the request
+    data = request.json
+    if not data or 'prompt' not in data:
+        return jsonify({"error": "No prompt provided"}), 400
+    prompt = data['prompt']
+    print(f"Received prompt: {prompt}")
+    # Generate a job ID
+    job_id = str(uuid.uuid4())
+    # Add job to queue
+    ensure_worker_thread_running()
+    job_queue.put((job_id, prompt))
+    # Return job ID immediately
+    return jsonify({
+        "success": True,
+        "message": "Job submitted successfully",
+        "job_id": job_id,
+        "status_url": f"/status/{job_id}"
+    })
+@app.route('/status/<job_id>', methods=['GET'])
+def job_status(job_id):
+    if job_id in job_results:
+        result = job_results[job_id]
+        # Clean up memory if the job is complete and successful
+        if result.get("success", False):
+            return jsonify(result)
+        else:
+            return jsonify({"error": result.get("error", "Unknown error")}), 500
+    else:
+        # Job is still in progress
+        return jsonify({
+            "success": None,
+            "message": "Job is still processing",
+            "job_id": job_id
+        })
 @app.route('/download/<filename>', methods=['GET'])
 def download_file(filename):
     """Simple health check endpoint to verify the app is running"""
     # Check available memory
     try:
         memory_info = psutil.virtual_memory()
         memory_usage = f"{memory_info.percent}% (Available: {memory_info.available / (1024**3):.2f} GB)"
+        # Check CPU usage
+        cpu_usage = f"{psutil.cpu_percent(interval=0.1)}%"
+        # Get queue status
+        queue_size = job_queue.qsize()
+        # Get active jobs
+        active_jobs = len(job_results)
+    except Exception as e:
+        memory_usage = "Error getting system info"
+        cpu_usage = "Error getting CPU info"
+        queue_size = "Unknown"
+        active_jobs = "Unknown"
     return jsonify({
         "status": "ok",
         "message": "Service is running",
+        "memory_usage": memory_usage,
+        "cpu_usage": cpu_usage,
+        "queue_size": queue_size,
+        "active_jobs": active_jobs,
+        "worker_running": is_thread_running
     })
 @app.route('/', methods=['GET'])
     """Landing page with usage instructions"""
     return """
     <html>
+        <head>
+            <title>Text to 3D API</title>
+            <style>
+                body { font-family: Arial, sans-serif; line-height: 1.6; margin: 0; padding: 20px; max-width: 800px; margin: 0 auto; }
+                pre { background: #f4f4f4; padding: 15px; border-radius: 5px; overflow-x: auto; }
+                code { background: #f4f4f4; padding: 2px 5px; border-radius: 3px; }
+                h1, h2 { color: #333; }
+            </style>
+        </head>
         <body>
             <h1>Text to 3D API</h1>
+            <p>This is an optimized API that converts text prompts to 3D models.</p>
             <h2>How to use:</h2>
+            <h3>Step 1: Submit a generation job</h3>
+            <pre>
+POST /generate
+Content-Type: application/json
+{
+    "prompt": "A futuristic building"
+}
+            </pre>
+            <p>Response:</p>
             <pre>
+{
+    "success": true,
+    "message": "Job submitted successfully",
+    "job_id": "123e4567-e89b-12d3-a456-426614174000",
+    "status_url": "/status/123e4567-e89b-12d3-a456-426614174000"
+}
+            </pre>
+            <h3>Step 2: Check job status</h3>
+            <pre>
+GET /status/123e4567-e89b-12d3-a456-426614174000
+            </pre>
+            <p>Response (while processing):</p>
+            <pre>
+{
+    "success": null,
+    "message": "Job is still processing",
+    "job_id": "123e4567-e89b-12d3-a456-426614174000"
+}
             </pre>
+            <p>Response (when complete):</p>
+            <pre>
+{
+    "success": true,
+    "message": "3D model generated successfully",
+    "glb_url": "/download/abc123.glb",
+    "obj_url": "/download/abc123.obj"
+}
+            </pre>
+            <h3>Step 3: Download the files</h3>
+            <p>Use the provided URLs to download the GLB and OBJ files.</p>
+            <h2>Health Check:</h2>
+            <pre>GET /health</pre>
+            <p>Provides information about the service status and resource usage.</p>
         </body>
     </html>
     """
 if __name__ == '__main__':
+    # Start the worker thread
+    ensure_worker_thread_running()
     # Recommended to run with gunicorn for production with increased timeout:
     # $ gunicorn app:app --bind 0.0.0.0:7860 --timeout 300 --workers 1
+    app.run(host='0.0.0.0', port=7860, debug=False)  # Set debug=False in production