Spaces:

mac9087
/

rightnight

Running

App Files Files Community

mac9087 commited on 8 days ago

Commit

27f2066

verified ·

1 Parent(s): d9a9271

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -118

app.py CHANGED Viewed

@@ -15,11 +15,10 @@ from huggingface_hub import snapshot_download, login, HfFileSystem
 from flask_cors import CORS
 import numpy as np
 import trimesh
-from transformers import pipeline, AutoImageProcessor, AutoModelForDepthEstimation
 from scipy.ndimage import gaussian_filter
 from scipy import interpolate
 import cv2
-from rembg import remove
 app = Flask(__name__)
 CORS(app)
@@ -45,8 +44,6 @@ processing_jobs = {}
 # Model variables
 dpt_estimator = None
-depth_anything_model = None
-depth_anything_processor = None
 model_loaded = False
 model_loading = False
@@ -89,21 +86,36 @@ def allowed_file(filename):
 def remove_background(image_path):
     try:
-        with open(image_path, "rb") as img_file:
-            img_data = img_file.read()
-        result = remove(img_data)
-        img = Image.open(io.BytesIO(result)).convert("RGBA")
-        # Check if image is fully transparent
-        img_array = np.array(img)
-        if np.all(img_array[:, :, 3] == 0):
-            print(f"Warning: Image {image_path} is fully transparent or no object detected")
             return None
-        # Create black background
-        black_bg = Image.new("RGB", img.size, (0, 0, 0))
-        black_bg.paste(img, (0, 0), img)
-        return black_bg
     except Exception as e:
         print(f"Error in remove_background for {image_path}: {str(e)}")
         raise
@@ -111,7 +123,7 @@ def remove_background(image_path):
 def preprocess_image(image_path):
     img = remove_background(image_path)
     if img is None:
-        raise ValueError("Image is fully transparent or no object detected")
     if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
         if img.width > img.height:
@@ -135,15 +147,15 @@ def preprocess_image(image_path):
     return img
 def load_models():
-    global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
     if model_loaded:
-        return dpt_estimator, depth_anything_model, depth_anything_processor
     if model_loading:
         while model_loading and not model_loaded:
             time.sleep(0.5)
-        return dpt_estimator, depth_anything_model, depth_anything_processor
     try:
         model_loading = True
@@ -155,7 +167,8 @@ def load_models():
             login(token=hf_token)
             print("Authenticated with Hugging Face token")
         else:
-            print("Warning: HF_TOKEN not found in environment")
         dpt_model_name = "Intel/dpt-large"
         fs = HfFileSystem(token=hf_token)
@@ -195,49 +208,8 @@ def load_models():
         print("DPT-Large loaded")
         gc.collect()
-        da_model_name = "LiheYoung/depth-anything-v2-small"
-        da_model_cached = os.path.exists(os.path.join(CACHE_DIR, "hub", "models--LiheYoung--depth-anything-v2-small"))
-        if not da_model_cached:
-            for attempt in range(max_retries):
-                try:
-                    print(f"Attempting to download {da_model_name}, attempt {attempt+1}")
-                    snapshot_download(
-                        repo_id=da_model_name,
-                        cache_dir=CACHE_DIR,
-                        resume_download=True,
-                        token=hf_token
-                    )
-                    print(f"Successfully downloaded {da_model_name}")
-                    break
-                except Exception as e:
-                    if attempt < max_retries - 1:
-                        print(f"Depth Anything download attempt {attempt+1} failed: {str(e)}. Retrying after {retry_delay}s...")
-                        time.sleep(retry_delay)
-                        retry_delay *= 2
-                    else:
-                        print(f"Failed to load Depth Anything: {str(e)}. Falling back to DPT-Large only.")
-                        depth_anything_model = None
-                        depth_anything_processor = None
-                        model_loaded = True
-                        return dpt_estimator, None, None
-        else:
-            print(f"{da_model_name} already cached in {CACHE_DIR}")
-        depth_anything_processor = AutoImageProcessor.from_pretrained(
-            da_model_name,
-            cache_dir=CACHE_DIR,
-            token=hf_token
-        )
-        depth_anything_model = AutoModelForDepthEstimation.from_pretrained(
-            da_model_name,
-            cache_dir=CACHE_DIR,
-            token=hf_token
-        ).to("cpu")
         model_loaded = True
-        print("Depth Anything loaded")
-        return dpt_estimator, depth_anything_model, depth_anything_processor
     except Exception as e:
         print(f"Error loading models: {str(e)}")
@@ -246,38 +218,6 @@ def load_models():
     finally:
         model_loading = False
-def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
-    if isinstance(dpt_depth, Image.Image):
-        dpt_depth = np.array(dpt_depth)
-    if isinstance(da_depth, torch.Tensor):
-        da_depth = da_depth.cpu().numpy()
-    if len(dpt_depth.shape) > 2:
-        dpt_depth = np.mean(dpt_depth, axis=2)
-    if len(da_depth.shape) > 2:
-        da_depth = np.mean(da_depth, axis=2)
-    if dpt_depth.shape != da_depth.shape:
-        da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
-    p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [1, 99])
-    p_low_da, p_high_da = np.percentile(da_depth, [1, 99])
-    dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
-    da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
-    if detail_level == 'high':
-        weight_da = 0.7
-        edges = cv2.Canny((da_depth * 255).astype(np.uint8), 50, 150)
-        edge_mask = (edges > 0).astype(np.float32)
-        dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
-        da_weight = gaussian_filter(edge_mask, sigma=1.0)
-        fused_depth = dpt_weight * dpt_depth + da_weight * da_depth * weight_da + (1 - weight_da) * dpt_depth
-    else:
-        weight_da = 0.5 if detail_level == 'medium' else 0.3
-        fused_depth = (1 - weight_da) * dpt_depth + weight_da * da_depth
-    fused_depth = np.clip(fused_depth, 0, 1)
-    return fused_depth
 def enhance_depth_map(depth_map, detail_level='medium'):
     enhanced_depth = depth_map.copy().astype(np.float32)
     p_low, p_high = np.percentile(enhanced_depth, [1, 99])
@@ -420,7 +360,7 @@ def combine_meshes(meshes):
 def health_check():
     return jsonify({
         "status": "healthy",
-        "model": "DPT-Large + Depth Anything (Multi-View)",
         "device": "cpu"
     }), 200
@@ -527,7 +467,7 @@ def convert_image_to_3d():
             processing_jobs[job_id]['progress'] = 10
             try:
-                dpt_model, da_model, da_processor = load_models()
                 processing_jobs[job_id]['progress'] = 20
             except Exception as e:
                 processing_jobs[job_id]['status'] = 'error'
@@ -543,26 +483,13 @@ def convert_image_to_3d():
                             dpt_result = dpt_model(image)
                             dpt_depth = dpt_result["depth"]
-                            if da_model and da_processor:
-                                inputs = da_processor(images=image, return_tensors="pt")
-                                inputs = {k: v.to("cpu") for k, v in inputs.items()}
-                                outputs = da_model(**inputs)
-                                da_depth = outputs.predicted_depth.squeeze()
-                                da_depth = torch.nn.functional.interpolate(
-                                    da_depth.unsqueeze(0).unsqueeze(0),
-                                    size=(image.height, image.width),
-                                    mode='bicubic',
-                                    align_corners=False
-                                ).squeeze()
-                                fused_depth = fuse_depth_maps(dpt_depth, da_depth, detail_level)
-                            else:
-                                fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
-                                if len(fused_depth.shape) > 2:
-                                    fused_depth = np.mean(fused_depth, axis=2)
-                                p_low, p_high = np.percentile(fused_depth, [1, 99])
-                                fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
-                            mesh = depth_to_mesh(fused_depth, image, resolution=mesh_resolution, detail_level=detail_level, view_angle=view_angles[view])
                             meshes.append(mesh)
                             gc.collect()
@@ -748,7 +675,7 @@ def model_info(job_id):
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
-        "message": "Multi-View Image to 3D API (DPT-Large + Depth Anything)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
@@ -766,7 +693,7 @@ def index():
             "detail_level": "low, medium, or high",
             "texture_quality": "low, medium, or high"
         },
-        "description": "Creates high-quality 3D models from multiple 2D images using DPT-Large and Depth Anything."
     }), 200
 if __name__ == '__main__':

 from flask_cors import CORS
 import numpy as np
 import trimesh
+from transformers import pipeline
 from scipy.ndimage import gaussian_filter
 from scipy import interpolate
 import cv2
 app = Flask(__name__)
 CORS(app)
 # Model variables
 dpt_estimator = None
 model_loaded = False
 model_loading = False
 def remove_background(image_path):
     try:
+        # Load image
+        img = cv2.imread(image_path)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # Initialize mask and models for GrabCut
+        mask = np.zeros(img.shape[:2], np.uint8)
+        bgd_model = np.zeros((1, 65), np.float64)
+        fgd_model = np.zeros((1, 65), np.float64)
+        # Define initial rectangle (10% border margin)
+        h, w = img.shape[:2]
+        margin = int(min(w, h) * 0.1)
+        rect = (margin, margin, w - 2 * margin, h - 2 * margin)
+        # Run GrabCut
+        cv2.grabCut(img, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
+        # Create final mask (0 for background, 1 for foreground)
+        mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
+        # Check if foreground exists
+        if np.sum(mask2) == 0:
+            print(f"Warning: No foreground detected in {image_path}")
             return None
+        # Apply mask and set background to black
+        img = img * mask2[:, :, np.newaxis]
+        img_pil = Image.fromarray(img).convert("RGB")
+        return img_pil
     except Exception as e:
         print(f"Error in remove_background for {image_path}: {str(e)}")
         raise
 def preprocess_image(image_path):
     img = remove_background(image_path)
     if img is None:
+        raise ValueError("No foreground detected in image")
     if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
         if img.width > img.height:
     return img
 def load_models():
+    global dpt_estimator, model_loaded, model_loading
     if model_loaded:
+        return dpt_estimator
     if model_loading:
         while model_loading and not model_loaded:
             time.sleep(0.5)
+        return dpt_estimator
     try:
         model_loading = True
             login(token=hf_token)
             print("Authenticated with Hugging Face token")
         else:
+            print("Error: HF_TOKEN not found in environment. Intel/dpt-large requires authentication.")
+            raise ValueError("HF_TOKEN is required for Intel/dpt-large")
         dpt_model_name = "Intel/dpt-large"
         fs = HfFileSystem(token=hf_token)
         print("DPT-Large loaded")
         gc.collect()
         model_loaded = True
+        return dpt_estimator
     except Exception as e:
         print(f"Error loading models: {str(e)}")
     finally:
         model_loading = False
 def enhance_depth_map(depth_map, detail_level='medium'):
     enhanced_depth = depth_map.copy().astype(np.float32)
     p_low, p_high = np.percentile(enhanced_depth, [1, 99])
 def health_check():
     return jsonify({
         "status": "healthy",
+        "model": "DPT-Large (Multi-View)",
         "device": "cpu"
     }), 200
             processing_jobs[job_id]['progress'] = 10
             try:
+                dpt_model = load_models()
                 processing_jobs[job_id]['progress'] = 20
             except Exception as e:
                 processing_jobs[job_id]['status'] = 'error'
                             dpt_result = dpt_model(image)
                             dpt_depth = dpt_result["depth"]
+                            depth_map = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
+                            if len(depth_map.shape) > 2:
+                                depth_map = np.mean(depth_map, axis=2)
+                            p_low, p_high = np.percentile(depth_map, [1, 99])
+                            depth_map = np.clip((depth_map - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else depth_map
+                            mesh = depth_to_mesh(depth_map, image, resolution=mesh_resolution, detail_level=detail_level, view_angle=view_angles[view])
                             meshes.append(mesh)
                             gc.collect()
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
+        "message": "Multi-View Image to 3D API (DPT-Large)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
             "detail_level": "low, medium, or high",
             "texture_quality": "low, medium, or high"
         },
+        "description": "Creates 3D models from multiple 2D images using Intel DPT-Large with custom background removal."
     }), 200
 if __name__ == '__main__':