EdgeTAM

Runtime error

App Files Files Community

bla commited on 7 days ago

Commit

5b2f03e

verified ·

1 Parent(s): 807f473

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -87

app.py CHANGED Viewed

@@ -264,6 +264,8 @@ def segment_with_points(
     # Define the circle radius as a fraction of the smaller dimension
     fraction = 0.01  # You can adjust this value as needed
     radius = int(fraction * min(w, h))
     # Create a transparent layer to draw on
     transparent_layer = np.zeros((h, w, 4), dtype=np.uint8)
@@ -280,9 +282,8 @@ def segment_with_points(
         transparent_background, transparent_layer
     )
-    # Let's add a positive click at (x, y) = (210, 350) to get started
     points = np.array(session_state["input_points"], dtype=np.float32)
-    # for labels, `1` means positive click and `0` means negative click
     labels = np.array(session_state["input_labels"], np.int32)
     try:
@@ -301,24 +302,52 @@ def segment_with_points(
             labels=labels,
         )
-        # Create the mask
-        mask_array = (out_mask_logits[0] > 0.0).cpu().numpy()
-        # Ensure the mask has the same size as the frame
-        if mask_array.shape[:2] != (h, w):
-            mask_array = cv2.resize(
-                mask_array.astype(np.uint8),
-                (w, h),
-                interpolation=cv2.INTER_NEAREST
-            ).astype(bool)
-        mask_image = show_mask(mask_array)
-        # Make sure mask_image has the same size as the background
-        if mask_image.size != transparent_background.size:
-            mask_image = mask_image.resize(transparent_background.size, Image.NEAREST)
-        first_frame_output = Image.alpha_composite(transparent_background, mask_image)
     except Exception as e:
         print(f"Error in segmentation: {e}")
         import traceback
@@ -326,46 +355,66 @@ def segment_with_points(
         # Return just the points as fallback
         first_frame_output = selected_point_map
-    return selected_point_map, first_frame_output, session_state
 def show_mask(mask, obj_id=None, random_color=False, convert_to_image=True):
     """Convert binary mask to RGBA image for visualization."""
-    if random_color:
-        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
-    else:
-        cmap = plt.get_cmap("tab10")
-        cmap_idx = 0 if obj_id is None else obj_id
-        color = np.array([*cmap(cmap_idx)[:3], 0.6])
-    # Handle different mask shapes properly
     if len(mask.shape) == 2:
         h, w = mask.shape
     else:
         h, w = mask.shape[-2:]
-    # Ensure correct reshaping based on mask dimensions
-    mask_reshaped = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
-    mask_rgba = (mask_reshaped * 255).astype(np.uint8)
-    if convert_to_image:
-        try:
-            # Ensure the mask has correct RGBA shape (h, w, 4)
-            if mask_rgba.shape[2] != 4:
-                # If not RGBA, create a proper RGBA array
-                proper_mask = np.zeros((h, w, 4), dtype=np.uint8)
-                # Copy available channels
-                proper_mask[:, :, :min(mask_rgba.shape[2], 4)] = mask_rgba[:, :, :min(mask_rgba.shape[2], 4)]
-                mask_rgba = proper_mask
-            # Create the PIL image
-            return Image.fromarray(mask_rgba, "RGBA")
-        except Exception as e:
-            print(f"Error converting mask to image: {e}")
-            # Fallback: create a blank transparent image of correct size
-            blank = np.zeros((h, w, 4), dtype=np.uint8)
-            return Image.fromarray(blank, "RGBA")
-    return mask_rgba
 def update_progress(progress_percent, progress_bar):
     """Update progress bar during processing."""
@@ -398,17 +447,11 @@ def propagate_to_all(
         video_segments = {}  # video_segments contains the per-frame segmentation results
         print("Starting propagate_in_video on CPU")
-        progress.tqdm.reset()
-        # Get the count for progress reporting
-        all_frames_count = 0
-        for _ in predictor.propagate_in_video(session_state["inference_state"], count_only=True):
-            all_frames_count += 1
-        print(f"Total frames to process: {all_frames_count}")
-        progress.tqdm.total = all_frames_count
         # Now do the actual processing with progress updates
         for out_frame_idx, out_obj_ids, out_mask_logits in predictor.propagate_in_video(
             session_state["inference_state"]
         ):
@@ -420,12 +463,13 @@ def propagate_to_all(
                 }
                 # Update progress
-                progress.tqdm.update(1)
-                progress_percent = min(100, int((out_frame_idx + 1) / all_frames_count * 100))
                 session_state["progress"] = progress_percent
                 if out_frame_idx % 10 == 0:
-                    print(f"Processed frame {out_frame_idx}/{all_frames_count} ({progress_percent}%)")
                 # Release memory periodically
                 if out_frame_idx % chunk_size == 0:
@@ -445,10 +489,8 @@ def propagate_to_all(
         print(f"Total frames processed: {total_frames}")
         # Update progress to show rendering phase
-        progress.tqdm.reset()
-        progress.tqdm.total = 2  # Two phases: rendering and video creation
-        progress.tqdm.update(1)
         session_state["progress"] = 50
         # Limit to max 50 frames for CPU processing
         max_output_frames = 50
@@ -464,12 +506,12 @@ def propagate_to_all(
         # Create output frames
         output_frames = []
-        progress.tqdm.reset()
-        progress.tqdm.total = (total_frames // vis_frame_stride) + 1
-        for out_frame_idx in range(0, total_frames, vis_frame_stride):
             if out_frame_idx not in video_segments or OBJ_ID not in video_segments[out_frame_idx]:
-                progress.tqdm.update(1)
                 continue
             try:
@@ -481,41 +523,50 @@ def propagate_to_all(
                     frame_idx = out_frame_idx
                 frame = session_state["all_frames"][frame_idx]
-                transparent_background = Image.fromarray(frame).convert("RGBA")
-                # Get the mask and ensure it's the right size
                 out_mask = video_segments[out_frame_idx][OBJ_ID]
-                # Ensure the mask is not empty and has the right dimensions
-                if out_mask.size == 0:
-                    print(f"Warning: Empty mask for frame {out_frame_idx}")
-                    # Create an empty mask of the right size
-                    out_mask = np.zeros((h, w), dtype=bool)
-                # Resize mask if dimensions don't match
                 mask_h, mask_w = out_mask.shape[:2]
-                if mask_h != h or mask_w != w:
-                    print(f"Resizing mask from {mask_h}x{mask_w} to {h}x{w}")
-                    out_mask = cv2.resize(
-                        out_mask.astype(np.uint8),
-                        (w, h),
-                        interpolation=cv2.INTER_NEAREST
-                    ).astype(bool)
-                mask_image = show_mask(out_mask)
-                # Make sure mask has same dimensions as background
-                if mask_image.size != transparent_background.size:
-                    mask_image = mask_image.resize(transparent_background.size, Image.NEAREST)
-                output_frame = Image.alpha_composite(transparent_background, mask_image)
-                output_frame = np.array(output_frame)
                 output_frames.append(output_frame)
                 # Update progress
-                progress.tqdm.update(1)
-                progress_percent = 50 + min(50, int((len(output_frames) / (total_frames // vis_frame_stride)) * 50))
                 session_state["progress"] = progress_percent
                 # Clear memory periodically
                 if len(output_frames) % 10 == 0:

     # Define the circle radius as a fraction of the smaller dimension
     fraction = 0.01  # You can adjust this value as needed
     radius = int(fraction * min(w, h))
+    if radius < 3:
+        radius = 3  # Ensure minimum visibility
     # Create a transparent layer to draw on
     transparent_layer = np.zeros((h, w, 4), dtype=np.uint8)
         transparent_background, transparent_layer
     )
+    # Use the clicked points and labels
     points = np.array(session_state["input_points"], dtype=np.float32)
     labels = np.array(session_state["input_labels"], np.int32)
     try:
             labels=labels,
         )
+        # Create the mask and check dimensions first
+        out_mask = (out_mask_logits[0] > 0.0).cpu().numpy()
+        # Convert to RGB for visualization
+        # Create an overlay with semi-transparent color
+        overlay = np.zeros((h, w, 3), dtype=np.uint8)
+        # Create a colored mask - blue with opacity
+        overlay_mask = np.zeros_like(overlay)
+        # Resize mask carefully if needed - handle empty dimensions
+        if out_mask.shape[0] > 0 and out_mask.shape[1] > 0:
+            # Check if dimensions differ
+            if out_mask.shape[:2] != (h, w):
+                print(f"Resizing mask from {out_mask.shape[:2]} to {h}x{w}")
+                # Use numpy/PIL for resizing to avoid OpenCV issues
+                from PIL import Image
+                mask_img = Image.fromarray(out_mask.astype(np.uint8) * 255)
+                mask_img = mask_img.resize((w, h), Image.NEAREST)
+                out_mask = np.array(mask_img) > 0
+            # Apply mask color
+            overlay_mask[out_mask] = [0, 120, 255]  # Blue color for mask
+        # Blend original frame with mask
+        alpha = 0.5  # Opacity
+        frame_with_mask = cv2.addWeighted(
+            first_frame, 1, overlay_mask, alpha, 0
+        )
+        # Add points on top of mask
+        points_overlay = np.zeros((h, w, 4), dtype=np.uint8)
+        for index, track in enumerate(session_state["input_points"]):
+            if session_state["input_labels"][index] == 1:
+                cv2.circle(points_overlay, track, radius, (0, 255, 0, 255), -1)  # Green
+            else:
+                cv2.circle(points_overlay, track, radius, (255, 0, 0, 255), -1)  # Red
+        # Convert to PIL for overlay
+        frame_with_mask_pil = Image.fromarray(frame_with_mask)
+        points_overlay_pil = Image.fromarray(points_overlay, "RGBA")
+        # Final composite
+        first_frame_output = Image.alpha_composite(
+            frame_with_mask_pil.convert("RGBA"), points_overlay_pil
+        )
     except Exception as e:
         print(f"Error in segmentation: {e}")
         import traceback
         # Return just the points as fallback
         first_frame_output = selected_point_map
+    return selected_point_map, np.array(first_frame_output), session_state
 def show_mask(mask, obj_id=None, random_color=False, convert_to_image=True):
     """Convert binary mask to RGBA image for visualization."""
+    # Check if mask is valid
+    if mask is None or mask.size == 0:
+        print("Warning: Empty mask provided to show_mask")
+        # Return an empty transparent mask
+        if convert_to_image:
+            return Image.new('RGBA', (100, 100), (0, 0, 0, 0))
+        else:
+            return np.zeros((100, 100, 4), dtype=np.uint8)
+    # Get mask dimensions
     if len(mask.shape) == 2:
         h, w = mask.shape
     else:
         h, w = mask.shape[-2:]
+    if h == 0 or w == 0:
+        print(f"Warning: Invalid mask dimensions: {h}x{w}")
+        # Return an empty transparent mask
+        if convert_to_image:
+            return Image.new('RGBA', (100, 100), (0, 0, 0, 0))
+        else:
+            return np.zeros((100, 100, 4), dtype=np.uint8)
+    # Set the color for visualization
+    if random_color:
+        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
+    else:
+        cmap = plt.get_cmap("tab10")
+        cmap_idx = 0 if obj_id is None else obj_id
+        color = np.array([*cmap(cmap_idx)[:3], 0.6])
+    try:
+        # Create a colored visualization of the mask
+        colored_mask = np.zeros((h, w, 4), dtype=np.uint8)
+        # Apply color to mask areas (where mask is True)
+        for i in range(3):  # RGB channels
+            colored_mask[:, :, i] = (mask * color[i] * 255).astype(np.uint8)
+        # Set alpha channel
+        colored_mask[:, :, 3] = (mask * color[3] * 255).astype(np.uint8)
+        if convert_to_image:
+            return Image.fromarray(colored_mask, "RGBA")
+        else:
+            return colored_mask
+    except Exception as e:
+        print(f"Error in show_mask: {e}")
+        import traceback
+        traceback.print_exc()
+        # Return a fallback transparent image
+        if convert_to_image:
+            return Image.new('RGBA', (h, w), (0, 0, 0, 0))
+        else:
+            return np.zeros((h, w, 4), dtype=np.uint8)
 def update_progress(progress_percent, progress_bar):
     """Update progress bar during processing."""
         video_segments = {}  # video_segments contains the per-frame segmentation results
         print("Starting propagate_in_video on CPU")
+        # Get the count for progress reporting (estimate)
+        all_frames_count = 300  # Reasonable estimate
         # Now do the actual processing with progress updates
+        current_frame = 0
         for out_frame_idx, out_obj_ids, out_mask_logits in predictor.propagate_in_video(
             session_state["inference_state"]
         ):
                 }
                 # Update progress
+                current_frame += 1
+                progress_percent = min(50, int((current_frame / all_frames_count) * 50))
                 session_state["progress"] = progress_percent
+                progress(progress_percent/100, desc="Processing frames")
                 if out_frame_idx % 10 == 0:
+                    print(f"Processed frame {out_frame_idx} ({progress_percent}%)")
                 # Release memory periodically
                 if out_frame_idx % chunk_size == 0:
         print(f"Total frames processed: {total_frames}")
         # Update progress to show rendering phase
         session_state["progress"] = 50
+        progress(0.5, desc="Rendering video")
         # Limit to max 50 frames for CPU processing
         max_output_frames = 50
         # Create output frames
         output_frames = []
+        frame_indices = list(range(0, total_frames, vis_frame_stride))
+        total_output_frames = len(frame_indices)
+        for i, out_frame_idx in enumerate(frame_indices):
             if out_frame_idx not in video_segments or OBJ_ID not in video_segments[out_frame_idx]:
                 continue
             try:
                     frame_idx = out_frame_idx
                 frame = session_state["all_frames"][frame_idx]
+                # Create a colored overlay rather than using transparency
+                # Get the mask
                 out_mask = video_segments[out_frame_idx][OBJ_ID]
+                # Ensure the mask is not empty and has valid dimensions
+                if out_mask.size == 0 or 0 in out_mask.shape:
+                    print(f"Warning: Invalid mask for frame {out_frame_idx}")
+                    # Skip this frame
+                    continue
+                # Get dimensions
+                frame_h, frame_w = frame.shape[:2]
                 mask_h, mask_w = out_mask.shape[:2]
+                # Resize mask using PIL if dimensions don't match (avoid OpenCV)
+                if mask_h != frame_h or mask_w != frame_w:
+                    print(f"Resizing mask from {mask_h}x{mask_w} to {frame_h}x{frame_w}")
+                    try:
+                        mask_img = Image.fromarray(out_mask.astype(np.uint8) * 255)
+                        mask_img = mask_img.resize((frame_w, frame_h), Image.NEAREST)
+                        out_mask = np.array(mask_img) > 0
+                    except Exception as e:
+                        print(f"Error resizing mask: {e}")
+                        # Skip this frame if resize fails
+                        continue
+                # Create an overlay with semi-transparent color
+                overlay = np.zeros_like(frame)
+                # Set blue color for mask area
+                overlay[out_mask] = [0, 120, 255]  # BGR format for OpenCV
+                # Blend with original frame
+                alpha = 0.5
+                output_frame = cv2.addWeighted(frame, 1, overlay, alpha, 0)
+                # Add to output frames
                 output_frames.append(output_frame)
                 # Update progress
+                progress_percent = 50 + min(50, int((i / total_output_frames) * 50))
                 session_state["progress"] = progress_percent
+                progress(progress_percent/100, desc=f"Rendering video frames ({i}/{total_output_frames})")
                 # Clear memory periodically
                 if len(output_frames) % 10 == 0: