Spaces:

cborg
/

imgprivllm

Running on Zero

App Files Files Community

hugohabicht01 commited on Apr 21

Commit

dae4d1c

1 Parent(s): 335bcd6

automatically download sam weights

Browse files

Files changed (2) hide show

blurnonymize.py +169 -63
utils.py +1 -5

blurnonymize.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import json
 import traceback
 from typing import Literal, Optional
@@ -7,7 +7,6 @@ import matplotlib.patches as patches
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
-from pydantic import BaseModel
 from sam2.build_sam import build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
 from utils import *
@@ -15,6 +14,7 @@ from utils import *
 # --- Utility Functions (kept outside the class) ---
 def blur_image(img: np.ndarray):
     """Applies Gaussian blur to an image."""
     return cv2.GaussianBlur(img, (35, 35), 50)
@@ -26,13 +26,14 @@ def plot_polygon_mask(image: np.ndarray, polygons: list[list[tuple[int, int]]]):
     """
     plt.imshow(image)
     for polygon in polygons:
-        if not polygon: continue # Skip empty polygons
         polygon_array = np.array(polygon).reshape(-1, 2)
         x, y = zip(*polygon_array)
         x = list(x) + [x[0]]
         y = list(y) + [y[0]]
-        plt.plot(x, y, '-r', linewidth=2)
-    plt.axis('off')
     plt.tight_layout()
     plt.show()
@@ -41,12 +42,18 @@ def visualize_boxes(image, findings):
     """Visualizes bounding boxes on an image."""
     fig, ax = plt.subplots(1)
     ax.imshow(image)
-    colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
     for i, finding in enumerate(findings):
         [x_min, y_min, x_max, y_max] = finding.bounding_box
         color = colors[i % len(colors)]
-        rect = patches.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, linewidth=2, edgecolor=color,
-                                 facecolor='none')
         ax.add_patch(rect)
         print(f"Finding {i + 1} (Color: {color}):")
     if not findings:
@@ -55,8 +62,10 @@ def visualize_boxes(image, findings):
     plt.yticks(np.arange(0, image.shape[0], 50))
     plt.show()
 # --- SAM Visualization Helpers (kept outside the class) ---
 def show_mask(mask, ax, random_color=False, borders=True):
     """Displays a single mask on a matplotlib axis."""
     if random_color:
@@ -69,23 +78,54 @@ def show_mask(mask, ax, random_color=False, borders=True):
     if borders:
         contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
         # contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours] # Optional smoothing
-        mask_image = cv2.drawContours(mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2)
     ax.imshow(mask_image)
 def show_points(coords, labels, ax, marker_size=375):
     """Displays points (positive/negative) on a matplotlib axis."""
     pos_points = coords[labels == 1]
     neg_points = coords[labels == 0]
-    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
-    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
 def show_box(box, ax):
     """Displays a bounding box on a matplotlib axis."""
     x0, y0 = box[0], box[1]
     w, h = box[2] - box[0], box[3] - box[1]
-    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
-def show_masks(image, masks, scores, point_coords=None, box_coords=None, input_labels=None, borders=True):
     """Displays multiple masks resulting from SAM prediction."""
     for i, (mask, score) in enumerate(zip(masks, scores)):
         plt.figure(figsize=(10, 10))
@@ -98,16 +138,48 @@ def show_masks(image, masks, scores, point_coords=None, box_coords=None, input_l
             show_box(box_coords, plt.gca())
         if len(scores) > 1:
             plt.title(f"Mask {i + 1}, Score: {score:.3f}", fontsize=18)
-        plt.axis('off')
         plt.show()
 # --- ImageBlurnonymizer Class ---
 class ImageBlurnonymizer:
-    def __init__(self, checkpoint="./sam2.1_hiera_large.pt", model_cfg="configs/sam2.1/sam2.1_hiera_l.yaml"):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.predictor = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint, device=self.device))
     @staticmethod
     def _smoothen_mask(mask: np.ndarray):
@@ -118,18 +190,18 @@ class ImageBlurnonymizer:
     @staticmethod
     def _mask_from_bbox(image_shape, bbox: tuple[int, int, int, int]):
         """Creates a simple rectangular mask from a bounding box."""
-        height, width, *_ = image_shape # Allow for 2D or 3D shape tuple
         xmin, ymin, xmax, ymax = bbox
         mask = np.zeros((height, width), dtype=np.uint8)
         mask[ymin:ymax, xmin:xmax] = 1
-        return mask # No need for np.array() conversion
     @staticmethod
     def _apply_blur_mask(image: np.ndarray, mask: np.ndarray):
         """Applies a blur to an image based on a mask."""
-        if mask.ndim == 2: # Ensure mask is 3-channel for broadcasting
-             mask = np.stack((mask,) * image.shape[2], axis=-1)
-        blurred = blur_image(image) # Use the utility function
         return np.where(mask, blurred, image)
     @staticmethod
@@ -138,19 +210,22 @@ class ImageBlurnonymizer:
         try:
             converted = (binary_mask * 255).astype(np.uint8)
             # Use RETR_TREE to get hierarchy, CHAIN_APPROX_SIMPLE for efficiency
-            contours, _ = cv2.findContours(converted, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
             polygons = []
             for contour in contours:
                 approx_contour = cv2.approxPolyDP(contour, epsilon, True)
                 # Ensure points are converted correctly
-                polygon = [(int(point[0][0]), int(point[0][1])) for point in approx_contour]
                 polygons.append(polygon)
             return polygons
         except Exception as e:
             print(f"An error occurred during polygon conversion: {e}")
             print(traceback.format_exc())
-            return None # Return None on error
     def get_segmentation_mask(self, image: np.ndarray, bbox: tuple[int, int, int, int]):
         """
@@ -159,9 +234,17 @@ class ImageBlurnonymizer:
         Adds points within the bounding box to guide SAM towards the intended object (e.g., face)
         and away from surrounding elements (e.g., hair).
         """
         x_min, y_min, x_max, y_max = bbox
         x_width = x_max - x_min
-        y_height = y_max - y_min # Corrected variable name
         # Handle cases where box dimensions are too small for third calculations
         x_third = x_width // 3 if x_width >= 3 else 0
@@ -175,18 +258,17 @@ class ImageBlurnonymizer:
             points.append([center_point[0], center_point[1] - y_third])
             points.append([center_point[0], center_point[1] + y_third])
         if x_third > 0:
-             points.append([center_point[0] + x_third, center_point[1]])
-             points.append([center_point[0] - x_third, center_point[1]])
         # Ensure points are valid coordinates (e.g., non-negative)
         points = [[max(0, p[0]), max(0, p[1])] for p in points]
         self.predictor.set_image(image)
         masks, scores, _ = self.predictor.predict(
-            box=np.array(bbox), # Predictor might expect numpy array
             point_coords=np.array(points),
-            point_labels=np.ones(len(points)), # Label 1 for inclusion
             multimask_output=True,
         )
@@ -197,11 +279,17 @@ class ImageBlurnonymizer:
         return self._smoothen_mask(best_mask), best_score
-    def censor_image_blur(self, image: np.ndarray, raw_out: str,
-                          method: Optional[Literal['segmentation', 'bbox']] = 'segmentation', verbose=False):
         """
         Censors an image by blurring regions identified in the raw_out string (LLM output).
         """
         json_output = parse_json_response(raw_out)
         # Ensure json_output is a list before passing to parse_into_models
         if isinstance(json_output, dict):
@@ -209,75 +297,93 @@ class ImageBlurnonymizer:
         elif isinstance(json_output, list):
             findings_list = json_output
         else:
-             # Handle unexpected type or raise an error
-             print(f"Warning: Unexpected output type from parse_json_response: {type(json_output)}")
-             findings_list = []
-        parsed = parse_into_models(findings_list)
         # Filter findings based on severity
         filtered = [entry for entry in parsed if entry.severity > 0]
         if verbose:
-            visualize_boxes(image, filtered) # Use external visualization
         masks = []
         for finding in filtered:
-            bbox = finding.bounding_box # Assuming finding has a 'bounding_box' attribute
-            if method == 'segmentation':
-                mask, _ = self.get_segmentation_mask(image, bbox) # Use instance method
                 if verbose:
                     polygons = self._binary_mask_to_polygon(mask)
-                    if polygons: # Check if polygon conversion was successful
-                       plot_polygon_mask(image, polygons) # Use external visualization
-            elif method == 'bbox':
-                 mask = self._mask_from_bbox(image.shape, bbox) # Use static method
             else:
-                 print(f"Warning: Unknown method '{method}'. Defaulting to no mask for this finding.")
-                 continue # Skip if method is invalid
             masks.append(mask)
-        if masks: # Check if any masks were generated
             # Combine masks: logical OR ensures any pixel in any mask is included
             combined_mask = np.zeros_like(masks[0], dtype=np.uint8)
             for mask in masks:
                 # Ensure masks are boolean or uint8 for logical_or
-                combined_mask = np.logical_or(combined_mask, mask.astype(bool)).astype(np.uint8)
-            return self._apply_blur_mask(image, combined_mask) # Use static method
-        return image # Return original image if no masks
-    def censor_image_blur_easy(self, image: np.ndarray, boxes: list[BoundingBox],
-                          method: Optional[Literal['segmentation', 'bbox']] = 'segmentation', verbose=False):
         """
         Censors an image by blurring regions defined by a list of BoundingBox objects.
         """
         masks = []
         for box in boxes:
-            bbox_tuple = box.to_tuple() # Convert BoundingBox object to tuple
-            if method == 'segmentation':
                 mask, _ = self.get_segmentation_mask(image, bbox_tuple)
                 if verbose:
                     polygons = self._binary_mask_to_polygon(mask)
                     if polygons:
                         plot_polygon_mask(image, polygons)
-            elif method == 'bbox':
                 mask = self._mask_from_bbox(image.shape, bbox_tuple)
             else:
-                 print(f"Warning: Unknown method '{method}'. Defaulting to no mask for this box.")
-                 continue
             masks.append(mask)
         if masks:
             combined_mask = np.zeros_like(masks[0], dtype=np.uint8)
             for mask in masks:
-                combined_mask = np.logical_or(combined_mask, mask.astype(bool)).astype(np.uint8)
             return self._apply_blur_mask(image, combined_mask)
         return image
 # Example Usage (Optional - keep outside class):
 # if __name__ == '__main__':
 #     # Load an image

+import os
 import traceback
 from typing import Literal, Optional
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 from sam2.build_sam import build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
 from utils import *
 # --- Utility Functions (kept outside the class) ---
 def blur_image(img: np.ndarray):
     """Applies Gaussian blur to an image."""
     return cv2.GaussianBlur(img, (35, 35), 50)
     """
     plt.imshow(image)
     for polygon in polygons:
+        if not polygon:
+            continue  # Skip empty polygons
         polygon_array = np.array(polygon).reshape(-1, 2)
         x, y = zip(*polygon_array)
         x = list(x) + [x[0]]
         y = list(y) + [y[0]]
+        plt.plot(x, y, "-r", linewidth=2)
+    plt.axis("off")
     plt.tight_layout()
     plt.show()
     """Visualizes bounding boxes on an image."""
     fig, ax = plt.subplots(1)
     ax.imshow(image)
+    colors = ["r", "g", "b", "c", "m", "y", "k"]
     for i, finding in enumerate(findings):
         [x_min, y_min, x_max, y_max] = finding.bounding_box
         color = colors[i % len(colors)]
+        rect = patches.Rectangle(
+            (x_min, y_min),
+            x_max - x_min,
+            y_max - y_min,
+            linewidth=2,
+            edgecolor=color,
+            facecolor="none",
+        )
         ax.add_patch(rect)
         print(f"Finding {i + 1} (Color: {color}):")
     if not findings:
     plt.yticks(np.arange(0, image.shape[0], 50))
     plt.show()
 # --- SAM Visualization Helpers (kept outside the class) ---
 def show_mask(mask, ax, random_color=False, borders=True):
     """Displays a single mask on a matplotlib axis."""
     if random_color:
     if borders:
         contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
         # contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours] # Optional smoothing
+        mask_image = cv2.drawContours(
+            mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2
+        )
     ax.imshow(mask_image)
 def show_points(coords, labels, ax, marker_size=375):
     """Displays points (positive/negative) on a matplotlib axis."""
     pos_points = coords[labels == 1]
     neg_points = coords[labels == 0]
+    ax.scatter(
+        pos_points[:, 0],
+        pos_points[:, 1],
+        color="green",
+        marker="*",
+        s=marker_size,
+        edgecolor="white",
+        linewidth=1.25,
+    )
+    ax.scatter(
+        neg_points[:, 0],
+        neg_points[:, 1],
+        color="red",
+        marker="*",
+        s=marker_size,
+        edgecolor="white",
+        linewidth=1.25,
+    )
 def show_box(box, ax):
     """Displays a bounding box on a matplotlib axis."""
     x0, y0 = box[0], box[1]
     w, h = box[2] - box[0], box[3] - box[1]
+    ax.add_patch(
+        plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2)
+    )
+def show_masks(
+    image,
+    masks,
+    scores,
+    point_coords=None,
+    box_coords=None,
+    input_labels=None,
+    borders=True,
+):
     """Displays multiple masks resulting from SAM prediction."""
     for i, (mask, score) in enumerate(zip(masks, scores)):
         plt.figure(figsize=(10, 10))
             show_box(box_coords, plt.gca())
         if len(scores) > 1:
             plt.title(f"Mask {i + 1}, Score: {score:.3f}", fontsize=18)
+        plt.axis("off")
         plt.show()
 # --- ImageBlurnonymizer Class ---
 class ImageBlurnonymizer:
+    def __init__(self):
+        self.predictor = None
+        self.device = None
+        self.model_cfg = None
+        self.checkpoint_name = "./sam2.1_hiera_small.pt"
+        self.model_cfg_name = "./sam2.1_hiera_s.yaml"
+        self.init_sam()
+    def init_sam(self, force=False):
+        # only initialize SAM if it hasn't been initialized yet
+        if self.predictor is not None and not force:
+            return
+        self.download_weights()
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        sam = build_sam2(self.model_cfg_name, self.checkpoint_name, device=self.device)
+        self.predictor = SAM2ImagePredictor(sam)
+    def download_weights(self):
+        # TODO: check whether these files already exist, if not, download them
+        # files names are in self.checkpoint_name and self.model_cfg_name
+        checkpoint_url = "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_small.pt"
+        cfg_url = "https://raw.githubusercontent.com/facebookresearch/sam2/refs/heads/main/sam2/configs/sam2.1/sam2.1_hiera_s.yaml"
+        if not os.path.exists(self.checkpoint_name):
+            print(
+                f"Downloading checkpoint from {checkpoint_url} to {self.checkpoint_name}"
+            )
+            torch.hub.download_url_to_file(checkpoint_url, self.checkpoint_name)
+        if not os.path.exists(self.model_cfg_name):
+            print(f"Downloading config from {cfg_url} to {self.model_cfg_name}")
+            torch.hub.download_url_to_file(cfg_url, self.model_cfg_name)
     @staticmethod
     def _smoothen_mask(mask: np.ndarray):
     @staticmethod
     def _mask_from_bbox(image_shape, bbox: tuple[int, int, int, int]):
         """Creates a simple rectangular mask from a bounding box."""
+        height, width, *_ = image_shape  # Allow for 2D or 3D shape tuple
         xmin, ymin, xmax, ymax = bbox
         mask = np.zeros((height, width), dtype=np.uint8)
         mask[ymin:ymax, xmin:xmax] = 1
+        return mask  # No need for np.array() conversion
     @staticmethod
     def _apply_blur_mask(image: np.ndarray, mask: np.ndarray):
         """Applies a blur to an image based on a mask."""
+        if mask.ndim == 2:  # Ensure mask is 3-channel for broadcasting
+            mask = np.stack((mask,) * image.shape[2], axis=-1)
+        blurred = blur_image(image)  # Use the utility function
         return np.where(mask, blurred, image)
     @staticmethod
         try:
             converted = (binary_mask * 255).astype(np.uint8)
             # Use RETR_TREE to get hierarchy, CHAIN_APPROX_SIMPLE for efficiency
+            contours, _ = cv2.findContours(
+                converted, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+            )
             polygons = []
             for contour in contours:
                 approx_contour = cv2.approxPolyDP(contour, epsilon, True)
                 # Ensure points are converted correctly
+                polygon = [
+                    (int(point[0][0]), int(point[0][1])) for point in approx_contour
+                ]
                 polygons.append(polygon)
             return polygons
         except Exception as e:
             print(f"An error occurred during polygon conversion: {e}")
             print(traceback.format_exc())
+            return None  # Return None on error
     def get_segmentation_mask(self, image: np.ndarray, bbox: tuple[int, int, int, int]):
         """
         Adds points within the bounding box to guide SAM towards the intended object (e.g., face)
         and away from surrounding elements (e.g., hair).
         """
+        if self.predictor is None:
+            raise Exception("[-] sam has not been initialized")
+        if torch.cuda.is_available() and self.device == "cpu":
+            # class instance was wrongly initialized to run on cpu, but gpu is avaiable
+            self.init_sam(force=True)
         x_min, y_min, x_max, y_max = bbox
         x_width = x_max - x_min
+        y_height = y_max - y_min  # Corrected variable name
         # Handle cases where box dimensions are too small for third calculations
         x_third = x_width // 3 if x_width >= 3 else 0
             points.append([center_point[0], center_point[1] - y_third])
             points.append([center_point[0], center_point[1] + y_third])
         if x_third > 0:
+            points.append([center_point[0] + x_third, center_point[1]])
+            points.append([center_point[0] - x_third, center_point[1]])
         # Ensure points are valid coordinates (e.g., non-negative)
         points = [[max(0, p[0]), max(0, p[1])] for p in points]
         self.predictor.set_image(image)
         masks, scores, _ = self.predictor.predict(
+            box=np.array(bbox),  # Predictor might expect numpy array
             point_coords=np.array(points),
+            point_labels=np.ones(len(points)),  # Label 1 for inclusion
             multimask_output=True,
         )
         return self._smoothen_mask(best_mask), best_score
+    def censor_image_blur(
+        self,
+        image: np.ndarray,
+        raw_out: str,
+        method: Optional[Literal["segmentation", "bbox"]] = "segmentation",
+        verbose=False,
+    ):
         """
         Censors an image by blurring regions identified in the raw_out string (LLM output).
         """
+        self.init_sam()
         json_output = parse_json_response(raw_out)
         # Ensure json_output is a list before passing to parse_into_models
         if isinstance(json_output, dict):
         elif isinstance(json_output, list):
             findings_list = json_output
         else:
+            # Handle unexpected type or raise an error
+            print(
+                f"Warning: Unexpected output type from parse_json_response: {type(json_output)}"
+            )
+            findings_list = []
+        parsed = parse_into_models(findings_list) # type: ignore
         # Filter findings based on severity
         filtered = [entry for entry in parsed if entry.severity > 0]
         if verbose:
+            visualize_boxes(image, filtered)  # Use external visualization
         masks = []
         for finding in filtered:
+            bbox = (
+                finding.bounding_box
+            )  # Assuming finding has a 'bounding_box' attribute
+            if method == "segmentation":
+                mask, _ = self.get_segmentation_mask(image, bbox)  # Use instance method
                 if verbose:
                     polygons = self._binary_mask_to_polygon(mask)
+                    if polygons:  # Check if polygon conversion was successful
+                        plot_polygon_mask(image, polygons)  # Use external visualization
+            elif method == "bbox":
+                mask = self._mask_from_bbox(image.shape, bbox)  # Use static method
             else:
+                print(
+                    f"Warning: Unknown method '{method}'. Defaulting to no mask for this finding."
+                )
+                continue  # Skip if method is invalid
             masks.append(mask)
+        if masks:  # Check if any masks were generated
             # Combine masks: logical OR ensures any pixel in any mask is included
             combined_mask = np.zeros_like(masks[0], dtype=np.uint8)
             for mask in masks:
                 # Ensure masks are boolean or uint8 for logical_or
+                combined_mask = np.logical_or(combined_mask, mask.astype(bool)).astype(
+                    np.uint8
+                )
+            return self._apply_blur_mask(image, combined_mask)  # Use static method
+        return image  # Return original image if no masks
+    def censor_image_blur_easy(
+        self,
+        image: np.ndarray,
+        boxes: list[BoundingBox],
+        method: Optional[Literal["segmentation", "bbox"]] = "segmentation",
+        verbose=False,
+    ):
         """
         Censors an image by blurring regions defined by a list of BoundingBox objects.
         """
+        self.init_sam()
         masks = []
         for box in boxes:
+            bbox_tuple = box.to_tuple()  # Convert BoundingBox object to tuple
+            if method == "segmentation":
                 mask, _ = self.get_segmentation_mask(image, bbox_tuple)
                 if verbose:
                     polygons = self._binary_mask_to_polygon(mask)
                     if polygons:
                         plot_polygon_mask(image, polygons)
+            elif method == "bbox":
                 mask = self._mask_from_bbox(image.shape, bbox_tuple)
             else:
+                print(
+                    f"Warning: Unknown method '{method}'. Defaulting to no mask for this box."
+                )
+                continue
             masks.append(mask)
         if masks:
             combined_mask = np.zeros_like(masks[0], dtype=np.uint8)
             for mask in masks:
+                combined_mask = np.logical_or(combined_mask, mask.astype(bool)).astype(
+                    np.uint8
+                )
             return self._apply_blur_mask(image, combined_mask)
         return image
 # Example Usage (Optional - keep outside class):
 # if __name__ == '__main__':
 #     # Load an image

utils.py CHANGED Viewed

@@ -138,11 +138,7 @@ def parse_into_models(findings: list[dict]) -> list[Finding]:
     Returns:
         A list of validated Finding model instances.
     """
-    parsed = []
-    for box in findings:
-        model_finding = Finding.model_validate(box)
-        parsed.append(model_finding)
-    return parsed
 def parse_all_safe(out: str) -> list[Finding] | None:

     Returns:
         A list of validated Finding model instances.
     """
+    return [Finding.model_validate(box) for box in findings]
 def parse_all_safe(out: str) -> list[Finding] | None: