Spaces:

mac9087
/

rightnight

Sleeping

App Files Files Community

mac9087 commited on Apr 26

Commit

f77b9b6

verified ·

1 Parent(s): 89bd619

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -75

app.py CHANGED Viewed

@@ -84,50 +84,8 @@ def process_with_timeout(function, args, timeout):
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-def remove_background(image):
-    """Remove background using OpenCV GrabCut algorithm with improved precision"""
-    img_array = np.array(image)
-    # Convert to RGB if image has alpha channel
-    if img_array.shape[2] == 4:
-        img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
-    # Create mask for GrabCut
-    mask = np.zeros(img_array.shape[:2], np.uint8)
-    bgdModel = np.zeros((1, 65), np.float64)
-    fgdModel = np.zeros((1, 65), np.float64)
-    # Define a tighter rectangle for foreground, adjusting based on image content
-    height, width = img_array.shape[:2]
-    rect = (int(width * 0.1), int(height * 0.1), int(width * 0.8), int(height * 0.8))
-    # Run GrabCut with multiple iterations for better accuracy
-    cv2.grabCut(img_array, mask, rect, bgdModel, fgdModel, 10, cv2.GC_INIT_WITH_RECT)
-    # Refine mask using edge detection to preserve subject edges
-    mask2 = np.where((mask == cv2.GC_PR_FGD) | (mask == cv2.GC_FGD), 1, 0).astype('uint8')
-    edges = cv2.Canny(mask2 * 255, 50, 150)
-    mask2 = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
-    mask2 = cv2.erode(mask2, np.ones((3, 3), np.uint8), iterations=1)
-    # Apply mask to image
-    result = img_array * mask2[:, :, np.newaxis]
-    # Create alpha channel
-    alpha = mask2 * 255
-    result = np.dstack((result, alpha))
-    return Image.fromarray(result, 'RGBA')
 def preprocess_image(image_path):
     with Image.open(image_path) as img:
-        # Handle PNG transparency
-        if img.mode == 'RGBA':
-            # Create white background
-            background = Image.new('RGB', img.size, (255, 255, 255))
-            background.paste(img, mask=img.split()[3])
-            img = background
         img = img.convert("RGB")
         if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
@@ -139,11 +97,7 @@ def preprocess_image(image_path):
                 new_width = int(img.width * (MAX_DIMENSION / img.height))
             img = img.resize((new_width, new_height), Image.LANCZOS)
-        # Remove background and convert back to RGB for processor
-        img_with_alpha = remove_background(img)
-        img_rgb = img_with_alpha.convert("RGB")  # Convert to RGB for processor
-        img_array = np.array(img_rgb)
         if len(img_array.shape) == 3 and img_array.shape[2] == 3:
             lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
             l, a, b = cv2.split(lab)
@@ -151,9 +105,9 @@ def preprocess_image(image_path):
             cl = clahe.apply(l)
             enhanced_lab = cv2.merge((cl, a, b))
             img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
-            img_rgb = Image.fromarray(img_array)
-        return img_rgb  # Return RGB image
 def load_models():
     global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
@@ -170,11 +124,13 @@ def load_models():
         model_loading = True
         print("Loading models...")
         hf_token = os.environ.get('HF_TOKEN')
         if hf_token:
             login(token=hf_token)
             print("Authenticated with Hugging Face token")
         dpt_model_name = "Intel/dpt-large"
         max_retries = 3
         retry_delay = 5
@@ -205,6 +161,7 @@ def load_models():
         print("DPT-Large loaded")
         gc.collect()
         da_model_name = "depth-anything/Depth-Anything-V2-Small-hf"
         for attempt in range(max_retries):
             try:
@@ -262,20 +219,20 @@ def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
     if dpt_depth.shape != da_depth.shape:
         da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
-    p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [5, 95])
-    p_low_da, p_high_da = np.percentile(da_depth, [5, 95])
     dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
     da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
     if detail_level == 'high':
-        weight_da = 0.6
         edges = cv2.Canny((da_depth * 255).astype(np.uint8), 50, 150)
         edge_mask = (edges > 0).astype(np.float32)
         dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
         da_weight = gaussian_filter(edge_mask, sigma=1.0)
         fused_depth = dpt_weight * dpt_depth + da_weight * da_depth * weight_da + (1 - weight_da) * dpt_depth
     else:
-        weight_da = 0.4 if detail_level == 'medium' else 0.2
         fused_depth = (1 - weight_da) * dpt_depth + weight_da * da_depth
     fused_depth = np.clip(fused_depth, 0, 1)
@@ -283,25 +240,25 @@ def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
 def enhance_depth_map(depth_map, detail_level='medium'):
     enhanced_depth = depth_map.copy().astype(np.float32)
-    p_low, p_high = np.percentile(enhanced_depth, [5, 95])
     enhanced_depth = np.clip(enhanced_depth, p_low, p_high)
     enhanced_depth = (enhanced_depth - p_low) / (p_high - p_low) if p_high > p_low else enhanced_depth
     if detail_level == 'high':
-        blurred = gaussian_filter(enhanced_depth, sigma=1.0)
         mask = enhanced_depth - blurred
-        enhanced_depth = enhanced_depth + 1.0 * mask
-        smooth1 = gaussian_filter(enhanced_depth, sigma=0.3)
-        smooth2 = gaussian_filter(enhanced_depth, sigma=1.5)
         edge_mask = enhanced_depth - smooth2
-        enhanced_depth = smooth1 + 0.8 * edge_mask
     elif detail_level == 'medium':
-        blurred = gaussian_filter(enhanced_depth, sigma=0.7)
         mask = enhanced_depth - blurred
-        enhanced_depth = enhanced_depth + 0.6 * mask
-        enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.4)
-    else:
         enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.5)
     enhanced_depth = np.clip(enhanced_depth, 0, 1)
     return enhanced_depth
@@ -322,16 +279,16 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
         dx = np.gradient(z_values, axis=1)
         dy = np.gradient(z_values, axis=0)
         gradient_magnitude = np.sqrt(dx**2 + dy**2)
-        edge_mask = np.clip(gradient_magnitude * 2, 0, 0.1)
-        z_values = z_values + edge_mask * (z_values - gaussian_filter(z_values, sigma=0.5))
-    z_min, z_max = np.percentile(z_values, [10, 90])
-    z_values = np.clip((z_values - z_min) / (z_max - z_min), 0, 1) if z_max > z_min else z_values
-    z_scaling = 1.5 if detail_level == 'high' else 1.2 if detail_level == 'medium' else 1.0
     z_values = z_values * z_scaling
-    x_grid = (x_grid / w - 0.5) * 1.5
-    y_grid = (y_grid / h - 0.5) * 1.5
     vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
     faces = []
@@ -358,7 +315,7 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
     mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
     if image:
-        img_array = np.array(image.convert("RGB"))  # Ensure RGB for consistency
         vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
         for i in range(resolution):
             for j in range(resolution):
@@ -379,13 +336,17 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
                     vertex_colors[vertex_idx, :3] = [r, g, b]
                     vertex_colors[vertex_idx, 3] = 255
                 elif len(img_array.shape) == 3 and img_array.shape[2] == 4:
-                    for c in range(3):  # Use only RGB channels
                         vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
                                                         wx*(1-wy)*img_array[y0, x1, c] +
                                                         (1-wx)*wy*img_array[y1, x0, c] +
                                                         wx*wy*img_array[y1, x1, c])
                     vertex_colors[vertex_idx, 3] = 255
-        mesh.visual.vertex_colors = vertex_colors
     if detail_level != 'high':
         mesh = mesh.smoothed(method='laplacian', iterations=1)
@@ -498,9 +459,11 @@ def convert_image_to_3d():
             try:
                 def estimate_depth():
                     with torch.no_grad():
                         dpt_result = dpt_model(image)
                         dpt_depth = dpt_result["depth"]
                         if da_model and da_processor:
                             inputs = da_processor(images=image, return_tensors="pt")
                             inputs = {k: v.to("cpu") for k, v in inputs.items()}
@@ -517,7 +480,7 @@ def convert_image_to_3d():
                             fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
                             if len(fused_depth.shape) > 2:
                                 fused_depth = np.mean(fused_depth, axis=2)
-                            p_low, p_high = np.percentile(fused_depth, [5, 95])
                             fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
                         return fused_depth
@@ -722,4 +685,4 @@ def index():
 if __name__ == '__main__':
     cleanup_old_jobs()
     port = int(os.environ.get('PORT', 7860))
-    app.run(host='0.0.0.0', port=port)

 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 def preprocess_image(image_path):
     with Image.open(image_path) as img:
         img = img.convert("RGB")
         if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
                 new_width = int(img.width * (MAX_DIMENSION / img.height))
             img = img.resize((new_width, new_height), Image.LANCZOS)
+        img_array = np.array(img)
         if len(img_array.shape) == 3 and img_array.shape[2] == 3:
             lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
             l, a, b = cv2.split(lab)
             cl = clahe.apply(l)
             enhanced_lab = cv2.merge((cl, a, b))
             img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
+            img = Image.fromarray(img_array)
+        return img
 def load_models():
     global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
         model_loading = True
         print("Loading models...")
+        # Authenticate with Hugging Face
         hf_token = os.environ.get('HF_TOKEN')
         if hf_token:
             login(token=hf_token)
             print("Authenticated with Hugging Face token")
+        # DPT-Large
         dpt_model_name = "Intel/dpt-large"
         max_retries = 3
         retry_delay = 5
         print("DPT-Large loaded")
         gc.collect()
+        # Depth Anything
         da_model_name = "depth-anything/Depth-Anything-V2-Small-hf"
         for attempt in range(max_retries):
             try:
     if dpt_depth.shape != da_depth.shape:
         da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
+    p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [1, 99])
+    p_low_da, p_high_da = np.percentile(da_depth, [1, 99])
     dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
     da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
     if detail_level == 'high':
+        weight_da = 0.7
         edges = cv2.Canny((da_depth * 255).astype(np.uint8), 50, 150)
         edge_mask = (edges > 0).astype(np.float32)
         dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
         da_weight = gaussian_filter(edge_mask, sigma=1.0)
         fused_depth = dpt_weight * dpt_depth + da_weight * da_depth * weight_da + (1 - weight_da) * dpt_depth
     else:
+        weight_da = 0.5 if detail_level == 'medium' else 0.3
         fused_depth = (1 - weight_da) * dpt_depth + weight_da * da_depth
     fused_depth = np.clip(fused_depth, 0, 1)
 def enhance_depth_map(depth_map, detail_level='medium'):
     enhanced_depth = depth_map.copy().astype(np.float32)
+    p_low, p_high = np.percentile(enhanced_depth, [1, 99])
     enhanced_depth = np.clip(enhanced_depth, p_low, p_high)
     enhanced_depth = (enhanced_depth - p_low) / (p_high - p_low) if p_high > p_low else enhanced_depth
     if detail_level == 'high':
+        blurred = gaussian_filter(enhanced_depth, sigma=1.5)
         mask = enhanced_depth - blurred
+        enhanced_depth = enhanced_depth + 1.5 * mask
+        smooth1 = gaussian_filter(enhanced_depth, sigma=0.5)
+        smooth2 = gaussian_filter(enhanced_depth, sigma=2.0)
         edge_mask = enhanced_depth - smooth2
+        enhanced_depth = smooth1 + 1.2 * edge_mask
     elif detail_level == 'medium':
+        blurred = gaussian_filter(enhanced_depth, sigma=1.0)
         mask = enhanced_depth - blurred
+        enhanced_depth = enhanced_depth + 0.8 * mask
         enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.5)
+    else:
+        enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.7)
     enhanced_depth = np.clip(enhanced_depth, 0, 1)
     return enhanced_depth
         dx = np.gradient(z_values, axis=1)
         dy = np.gradient(z_values, axis=0)
         gradient_magnitude = np.sqrt(dx**2 + dy**2)
+        edge_mask = np.clip(gradient_magnitude * 5, 0, 0.2)
+        z_values = z_values + edge_mask * (z_values - gaussian_filter(z_values, sigma=1.0))
+    z_min, z_max = np.percentile(z_values, [2, 98])
+    z_values = (z_values - z_min) / (z_max - z_min) if z_max > z_min else z_values
+    z_scaling = 2.5 if detail_level == 'high' else 2.0 if detail_level == 'medium' else 1.5
     z_values = z_values * z_scaling
+    x_grid = (x_grid / w - 0.5) * 2.0
+    y_grid = (y_grid / h - 0.5) * 2.0
     vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
     faces = []
     mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
     if image:
+        img_array = np.array(image)
         vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
         for i in range(resolution):
             for j in range(resolution):
                     vertex_colors[vertex_idx, :3] = [r, g, b]
                     vertex_colors[vertex_idx, 3] = 255
                 elif len(img_array.shape) == 3 and img_array.shape[2] == 4:
+                    for c in range(4):
                         vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
                                                         wx*(1-wy)*img_array[y0, x1, c] +
                                                         (1-wx)*wy*img_array[y1, x0, c] +
                                                         wx*wy*img_array[y1, x1, c])
+                else:
+                    gray = int((1-wx)*(1-wy)*img_array[y0, x0] + wx*(1-wy)*img_array[y0, x1] +
+                              (1-wx)*wy*img_array[y1, x0] + wx*wy*img_array[y1, x1])
+                    vertex_colors[vertex_idx, :3] = [gray, gray, gray]
                     vertex_colors[vertex_idx, 3] = 255
+            mesh.visual.vertex_colors = vertex_colors
     if detail_level != 'high':
         mesh = mesh.smoothed(method='laplacian', iterations=1)
             try:
                 def estimate_depth():
                     with torch.no_grad():
+                        # DPT-Large
                         dpt_result = dpt_model(image)
                         dpt_depth = dpt_result["depth"]
+                        # Depth Anything (if loaded)
                         if da_model and da_processor:
                             inputs = da_processor(images=image, return_tensors="pt")
                             inputs = {k: v.to("cpu") for k, v in inputs.items()}
                             fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
                             if len(fused_depth.shape) > 2:
                                 fused_depth = np.mean(fused_depth, axis=2)
+                            p_low, p_high = np.percentile(fused_depth, [1, 99])
                             fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
                         return fused_depth
 if __name__ == '__main__':
     cleanup_old_jobs()
     port = int(os.environ.get('PORT', 7860))
+    app.run(host='0.0.0.0', port=port)