EdgeTAM

Runtime error

App Files Files Community

bla commited on 8 days ago

Commit

fa0b563

verified ·

1 Parent(s): 2a466e4

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -7

app.py CHANGED Viewed

@@ -281,6 +281,7 @@ def segment_with_points(
     # Ensure we have a valid first frame and inference state
     if session_state["first_frame"] is None or session_state["inference_state"] is None:
          print("Error: Cannot segment. No video loaded or inference state missing.")
          return (
              session_state["first_frame"], # points_map remains unchanged
              None, # output_image remains unchanged or cleared
@@ -323,6 +324,7 @@ def segment_with_points(
     # Convert the transparent layer back to an image and composite onto the first frame
     transparent_layer_points_pil = Image.fromarray(transparent_layer_points, "RGBA")
     # Combine the first frame image with the points layer for the points_map output
     selected_point_map_img = Image.alpha_composite(
         first_frame_pil.copy(), transparent_layer_points_pil
     )
@@ -337,7 +339,9 @@ def segment_with_points(
     # Add new points to the predictor's state and get the mask for the first frame
     # This call performs segmentation on the current frame (frame_idx=0) using all accumulated points
     try:
         _, _, out_mask_logits = predictor.add_new_points(
             inference_state=session_state["inference_state"],
             frame_idx=0, # Always segment on the first frame initially
@@ -347,8 +351,7 @@ def segment_with_points(
         )
         # Process logits: detach from graph, move to CPU, apply threshold
-        # out_mask_logits is [batch_size, H, W] (batch_size=1 here)
-        # out_mask_logits[0] is the tensor for obj_id=OBJ_ID
         mask_tensor = (out_mask_logits[0][0].detach().cpu() > 0.0) # Apply threshold and get the single mask tensor [H, W]
         mask_numpy = mask_tensor.numpy() # Convert to numpy
@@ -356,12 +359,12 @@ def segment_with_points(
         mask_image_pil = show_mask(mask_numpy, obj_id=OBJ_ID) # show_mask returns RGBA PIL Image
         # Composite the mask onto the first frame for the output_image
         first_frame_output_img = Image.alpha_composite(first_frame_pil.copy(), mask_image_pil)
     except Exception as e:
         print(f"Error during segmentation on first frame: {e}")
-        # On error, return the points_map but clear the output_image
-        first_frame_output_img = None
     return selected_point_map_img, first_frame_output_img, session_state
@@ -399,7 +402,9 @@ def show_mask(mask, obj_id=None, random_color=False, convert_to_image=True):
     # Apply color where mask is True
     # Need to reshape color to be broadcastable [1, 1, 4]
     colored_mask = np.zeros((h, w, 4), dtype=np.float32) # Start with fully transparent black
-    colored_mask[mask] = color # Apply color where mask is True
     # Convert to uint8 [0-255]
     colored_mask_uint8 = (colored_mask * 255).astype(np.uint8)
@@ -446,6 +451,7 @@ def propagate_to_all(
              video_segments[out_frame_idx] = {
                  # out_mask_logits is a list of tensors (one per object tracked in this frame)
                  # Each tensor is [batch_size, H, W]. Batch size is 1 here.
                  out_obj_id: (out_mask_logits[i][0].detach().cpu() > 0.0).numpy()
                  for i, out_obj_id in enumerate(out_obj_ids)
              }
@@ -603,7 +609,7 @@ with gr.Blocks() as demo:
                 # points_map is where users click to add points. Needs to be interactive.
                 # Shows the first frame with points drawn on it.
                 points_map = gr.Image(
-                    label="Frame with Point Prompt",
                     type="numpy",
                     interactive=True, # Make interactive to capture clicks
                     height=400, # Set a fixed height for better UI
@@ -626,7 +632,7 @@ with gr.Blocks() as demo:
                 # output_image shows the segmentation mask prediction on the *first* frame
                 output_image = gr.Image(
-                    label="Reference Mask (First Frame)",
                     type="numpy",
                     interactive=False, # Not interactive, just displays the mask
                     height=400, # Match height of points_map
@@ -749,5 +755,6 @@ with gr.Blocks() as demo:
 # Launch the Gradio demo
 demo.queue() # Enable queuing for sequential processing under concurrency limits
 print("Gradio demo starting...")
 demo.launch()
 print("Gradio demo launched.")

     # Ensure we have a valid first frame and inference state
     if session_state["first_frame"] is None or session_state["inference_state"] is None:
          print("Error: Cannot segment. No video loaded or inference state missing.")
+         # Return current states to avoid errors, without changing UI much
          return (
              session_state["first_frame"], # points_map remains unchanged
              None, # output_image remains unchanged or cleared
     # Convert the transparent layer back to an image and composite onto the first frame
     transparent_layer_points_pil = Image.fromarray(transparent_layer_points, "RGBA")
     # Combine the first frame image with the points layer for the points_map output
+    # points_map shows the first frame *with the points you added*.
     selected_point_map_img = Image.alpha_composite(
         first_frame_pil.copy(), transparent_layer_points_pil
     )
     # Add new points to the predictor's state and get the mask for the first frame
     # This call performs segmentation on the current frame (frame_idx=0) using all accumulated points
+    first_frame_output_img = None # Initialize output mask image as None in case of error
     try:
+        # Note: predictor.add_new_points modifies the internal inference_state
         _, _, out_mask_logits = predictor.add_new_points(
             inference_state=session_state["inference_state"],
             frame_idx=0, # Always segment on the first frame initially
         )
         # Process logits: detach from graph, move to CPU, apply threshold
+        # out_mask_logits is a list of tensors [tensor([H, W])] for the requested obj_id
         mask_tensor = (out_mask_logits[0][0].detach().cpu() > 0.0) # Apply threshold and get the single mask tensor [H, W]
         mask_numpy = mask_tensor.numpy() # Convert to numpy
         mask_image_pil = show_mask(mask_numpy, obj_id=OBJ_ID) # show_mask returns RGBA PIL Image
         # Composite the mask onto the first frame for the output_image
+        # output_image shows the first frame *with the segmentation mask result*.
         first_frame_output_img = Image.alpha_composite(first_frame_pil.copy(), mask_image_pil)
     except Exception as e:
         print(f"Error during segmentation on first frame: {e}")
+        # On error, first_frame_output_img remains None
     return selected_point_map_img, first_frame_output_img, session_state
     # Apply color where mask is True
     # Need to reshape color to be broadcastable [1, 1, 4]
     colored_mask = np.zeros((h, w, 4), dtype=np.float32) # Start with fully transparent black
+    # Apply the color only where the mask is True.
+    # This directly creates the colored overlay with transparency.
+    colored_mask[mask] = color
     # Convert to uint8 [0-255]
     colored_mask_uint8 = (colored_mask * 255).astype(np.uint8)
              video_segments[out_frame_idx] = {
                  # out_mask_logits is a list of tensors (one per object tracked in this frame)
                  # Each tensor is [batch_size, H, W]. Batch size is 1 here.
+                 # Access the first element of the batch [0]
                  out_obj_id: (out_mask_logits[i][0].detach().cpu() > 0.0).numpy()
                  for i, out_obj_id in enumerate(out_obj_ids)
              }
                 # points_map is where users click to add points. Needs to be interactive.
                 # Shows the first frame with points drawn on it.
                 points_map = gr.Image(
+                    label="Click on the First Frame to Add Points", # Clearer label
                     type="numpy",
                     interactive=True, # Make interactive to capture clicks
                     height=400, # Set a fixed height for better UI
                 # output_image shows the segmentation mask prediction on the *first* frame
                 output_image = gr.Image(
+                    label="Segmentation Mask on First Frame", # Clearer label
                     type="numpy",
                     interactive=False, # Not interactive, just displays the mask
                     height=400, # Match height of points_map
 # Launch the Gradio demo
 demo.queue() # Enable queuing for sequential processing under concurrency limits
 print("Gradio demo starting...")
+# Removed share=True for local debugging unless you specifically need a public link
 demo.launch()
 print("Gradio demo launched.")