Spaces:

pavank007
/

image-blurr-effects

Sleeping

App Files Files Community

pavank007 commited on Mar 31

Commit

ecb0ce5

verified ·

1 Parent(s): ea0e157

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -11

app.py CHANGED Viewed

@@ -8,9 +8,9 @@ from transformers import DPTImageProcessor, DPTForDepthEstimation
 import warnings
 warnings.filterwarnings("ignore")
-# Load segmentation model
-seg_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-tiny-coco-instance")
-seg_model = AutoModelForSemanticSegmentation.from_pretrained("facebook/mask2former-swin-tiny-coco-instance")
 # Load depth estimation model
 depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
@@ -99,22 +99,44 @@ def apply_depth_blur(image, depth_map, max_sigma=25):
     return result
 def get_segmentation_mask(image_pil):
-    """Get segmentation mask for person class from an image."""
     # Process the image with the segmentation model
-    inputs = seg_processor(images=image_pil, return_tensors="pt")
-    outputs = seg_model(**inputs)
     # Get the predicted segmentation mask
-    predicted_mask = seg_processor.post_process_semantic_segmentation(outputs, target_sizes=[image_pil.size[::-1]])[0]
     # Convert the mask to a numpy array
     mask_np = predicted_mask.cpu().numpy()
-    # Get mask for person class (typically class 0 in COCO dataset)
-    person_mask = np.zeros_like(mask_np)
-    person_mask[mask_np == 0] = 1  # Assuming person is class 0
-    return person_mask
 def get_depth_map(image_pil):
     """Get depth map from an image."""

 import warnings
 warnings.filterwarnings("ignore")
+# Load segmentation model - using SegFormer which is compatible with AutoModelForSemanticSegmentation
+seg_processor = AutoImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+seg_model = AutoModelForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
 # Load depth estimation model
 depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
     return result
 def get_segmentation_mask(image_pil):
+    """Get segmentation mask for person/foreground from an image."""
+    # Resize the image to the size expected by the segmentation model
+    width, height = image_pil.size
+    image_pil_resized = image_pil.resize((512, 512))
     # Process the image with the segmentation model
+    inputs = seg_processor(images=image_pil_resized, return_tensors="pt")
+    with torch.no_grad():
+        outputs = seg_model(**inputs)
     # Get the predicted segmentation mask
+    logits = outputs.logits
+    upsampled_logits = torch.nn.functional.interpolate(
+        logits,
+        size=(512, 512),
+        mode="bilinear",
+        align_corners=False,
+    )
+    # Get the predicted segmentation mask
+    predicted_mask = upsampled_logits.argmax(dim=1)[0]
     # Convert the mask to a numpy array
     mask_np = predicted_mask.cpu().numpy()
+    # Create a foreground mask - considering classes that are likely to be foreground
+    # The ADE20K dataset has 150 classes, so we need to choose which ones to consider as foreground
+    # Common foreground classes: person (12), animal classes, and objects like furniture
+    # This is a simplified approach - you may need to adjust based on your needs
+    foreground_classes = [12, 13, 14, 15, 16, 17, 18, 19, 20]  # Person and some objects
+    foreground_mask = np.zeros_like(mask_np)
+    for cls in foreground_classes:
+        foreground_mask[mask_np == cls] = 1
+    # Resize back to original image size
+    foreground_mask = cv2.resize(foreground_mask, (width, height))
+    return foreground_mask
 def get_depth_map(image_pil):
     """Get depth map from an image."""