EdgeTAM

Runtime error

App Files Files Community

bla commited on 7 days ago

Commit

5bc3a57

verified ·

1 Parent(s): e508568

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -74

app.py CHANGED Viewed

@@ -10,16 +10,16 @@ from datetime import datetime
 import gradio as gr
-# This line might be related to GPU, kept from original
-os.environ["TORCH_CUDNN_SDPA_ENABLED"] = "0,1,2,3,4,5,6,7"
 import tempfile
 import cv2
 import matplotlib.pyplot as plt
-# spaces import and decorators are for Hugging Face Spaces GPU allocation,
-# if running locally without spaces, these can be removed or will be ignored.
-import spaces
 import numpy as np
 import torch
 from moviepy.editor import ImageSequenceClip
@@ -38,7 +38,7 @@ description_p = """# Instructions
                 </ol>
               """
-# examples
 examples = [
     ["examples/01_dog.mp4"],
     ["examples/02_cups.mp4"],
@@ -75,33 +75,30 @@ OBJ_ID = 0
 sam2_checkpoint = "checkpoints/edgetam.pt"
 model_cfg = "edgetam.yaml"
-# Model built for CPU but immediately moved to CUDA in original code
 predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
-# *** Original code moves to CUDA ***
-predictor.to("cuda")
-print("predictor loaded on CUDA")
-# use bfloat16 for the entire demo - Original code uses CUDA bfloat16
-torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
-# Original CUDA settings
-if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
-    # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
-    torch.backends.cuda.matmul.allow_tf32 = True
-    torch.backends.cudnn.allow_tf32 = True
-elif not torch.cuda.is_available():
-    print("Warning: CUDA not available. The original code is configured for GPU.")
-    # Note: Without a GPU, the .to("cuda") calls will likely cause errors.
 def get_video_fps(video_path):
-    # Open the video file
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
-        print("Error: Could not open video.")
         return None
-    # Get the FPS of the video
     fps = cap.get(cv2.CAP_PROP_FPS)
     cap.release() # Release the capture object
     return fps
@@ -114,17 +111,15 @@ def reset(session_state):
     session_state["input_labels"] = []
     # Reset the predictor state if it exists
     if session_state["inference_state"] is not None:
-        # Assuming predictor.reset_state handles None or invalid states gracefully
-        # Or you might need to explicitly pass the state object if required
         try:
             predictor.reset_state(session_state["inference_state"])
              # Explicitly delete or re-init the state object if a full reset is intended
-             # This depends on how predictor.reset_state works
-             # session_state["inference_state"] = None # Example if reset_state doesn't fully clear
         except Exception as e:
              print(f"Error resetting predictor state: {e}")
-             # If reset fails, perhaps force-clear the state object
-             session_state["inference_state"] = None
     session_state["first_frame"] = None
     session_state["all_frames"] = None
@@ -132,13 +127,16 @@ def reset(session_state):
     # Also reset video path if stored
     session_state["video_path"] = None
-    # Resetting UI components
     return (
         None, # video_in (clears the video player)
         gr.update(open=True), # video_in_drawer (opens accordion)
         None, # points_map (clears the image)
         None, # output_image (clears the image)
         gr.update(value=None, visible=False), # output_video (hides and clears)
         session_state, # return updated session state
     )
@@ -158,11 +156,20 @@ def clear_points(session_state):
             print("Predictor state reset for clearing points.")
             # If you need to re-initialize the state for the *same* video after clearing points,
             # you might need to call predictor.init_state again here, using the stored video_path.
-            # session_state["inference_state"] = predictor.init_state(video_path=session_state["video_path"], device="cuda") # Or device="cpu" if modified earlier
         except Exception as e:
              print(f"Error resetting predictor state during clear_points: {e}")
-             # If reset fails, this might leave old masks. Depending on SAM2's behavior,
-             # you might need a more aggressive state clear or re-initialization.
     # Return the original first frame image for points_map and clear the output_image
     first_frame_img = session_state["first_frame"] if session_state["first_frame"] is not None else None
@@ -175,8 +182,7 @@ def clear_points(session_state):
     )
-# Added @spaces.GPU decorator back as it was in the original code
-@spaces.GPU
 def preprocess_video_in(video_path, session_state):
     """Loads video frames and initializes the predictor state."""
     print(f"Processing video: {video_path}")
@@ -230,30 +236,30 @@ def preprocess_video_in(video_path, session_state):
             }
         )
-    session_state["first_frame"] = copy.deepcopy(first_frame)
     session_state["all_frames"] = all_frames
     session_state["video_path"] = video_path # Store video path
     session_state["input_points"] = []
     session_state["input_labels"] = []
-    # Original code did NOT pass device here. It uses the device the predictor is on.
     session_state["inference_state"] = predictor.init_state(video_path=video_path)
-    print("Video loaded and predictor state initialized.")
     # Enable buttons after successful load
     return [
         gr.update(open=False),  # video_in_drawer
-        first_frame,  # points_map
-        None,  # output_image
-        gr.update(value=None, visible=False),  # output_video
-        gr.update(interactive=True), # propagate_btn
-        gr.update(interactive=True), # clear_points_btn
-        gr.update(interactive=True), # reset_btn
         session_state, # session_state
     ]
-# Added @spaces.GPU decorator back as it was in the original code
-@spaces.GPU
 def segment_with_points(
     point_type,
     session_state,
@@ -263,7 +269,7 @@ def segment_with_points(
     # Ensure we have state and first frame
     if session_state["first_frame"] is None or session_state["inference_state"] is None:
          print("Error: Cannot segment. No video loaded or inference state missing.")
-         # Return current images and state without changes
          return (
              session_state.get("first_frame"), # points_map (show first frame if exists)
              None, # output_image (keep cleared)
@@ -310,17 +316,16 @@ def segment_with_points(
         first_frame_pil.copy(), transparent_layer_points_pil
     )
-    # Prepare points and labels as tensors on the correct device (CUDA in original code)
     points = np.array(session_state["input_points"], dtype=np.float32)
     labels = np.array(session_state["input_labels"], np.int32)
-    # Ensure tensors are on the correct device (CUDA as per original code setup)
-    device = next(predictor.parameters()).device # Get the device the model is on
     points_tensor = torch.tensor(points, dtype=torch.float32, device=device).unsqueeze(0) # Add batch dim
     labels_tensor = torch.tensor(labels, dtype=torch.int32, device=device).unsqueeze(0) # Add batch dim
-    # Add new points to the predictor's state and get the mask for the first frame
-    # This call performs segmentation on the current frame (frame_idx=0) using all accumulated points
     first_frame_output_img = None # Initialize output mask image as None in case of error
     try:
         # Note: predictor.add_new_points modifies the internal inference_state
@@ -349,9 +354,9 @@ def segment_with_points(
         print(f"Error during segmentation on first frame: {e}")
         # On error, first_frame_output_img remains None
-    # Original code clears CUDA cache here
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
     return selected_point_map_img, first_frame_output_img, session_state
@@ -402,8 +407,7 @@ def show_mask(mask, obj_id=None, random_color=False, convert_to_image=True):
         return colored_mask_uint8
-# Added @spaces.GPU decorator back as it was in the original code
-@spaces.GPU
 def propagate_to_all(
     video_in, # Keep video_in path as in original
     session_state,
@@ -478,9 +482,9 @@ def propagate_to_all(
         output_frames.append(output_frame_np)
-    # Original code clears CUDA cache here
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
     # Define output path in a temporary directory
     unique_id = datetime.now().strftime("%Y%m%d%H%M%S%f") # Use microseconds for more uniqueness
@@ -514,10 +518,17 @@ def propagate_to_all(
         )
     # Write the result to a file. Use 'libx264' codec for broad compatibility.
     try:
-        print(f"Writing video file with codec='libx264', fps={fps}")
-        # Added basic moviepy writing parameters back, similar to original intent
-        clip.write_videofile(final_vid_output_path, codec="libx264", fps=fps)
         print("Video writing complete.")
         # Return the path and make the video player visible
         return (
@@ -541,7 +552,7 @@ def propagate_to_all(
         )
-def update_ui():
     """Simply returns a Gradio update to make the output video visible."""
     return gr.update(visible=True)
@@ -589,7 +600,7 @@ with gr.Blocks() as demo:
                 points_map = gr.Image(
                     label="Click on the First Frame to Add Points", # Clearer label
                     type="numpy",
-                    interactive=True, # <--- THIS WAS CHANGED FROM False TO True
                     height=400, # Set a fixed height for better UI
                     width="auto", # Let width adjust
                     show_share_button=False,
@@ -604,8 +615,7 @@ with gr.Blocks() as demo:
                     examples_per_page=8,
                     cache_examples=False, # Do not cache processed examples, as state is involved
                 )
-                # Add padding/space - removed extra lines as they take up a lot of space
-                # gr.Markdown("<br>")
                 # output_image shows the segmentation mask prediction on the *first* frame
                 output_image = gr.Image(
@@ -704,14 +714,16 @@ with gr.Blocks() as demo:
             output_video,  # Update output video player with result
             session_state, # Update session state
         ],
-        # concurrency_limit from original code (may need adjustment based on your hardware/GPU)
-        concurrency_limit=10,
-        queue=False, # queue from original code
     )
 # Launch the Gradio demo
-demo.queue() # Enable queuing
 print("Gradio demo starting...")
 demo.launch()
 print("Gradio demo launched.")

 import gradio as gr
+# Removed GPU-specific environment variable setting
+# os.environ["TORCH_CUDNN_SDPA_ENABLED"] = "0,1,2,3,4,5,6,7"
 import tempfile
 import cv2
 import matplotlib.pyplot as plt
 import numpy as np
+# Removed spaces decorator import for CPU-only demo
+# import spaces # Removed spaces import
 import torch
 from moviepy.editor import ImageSequenceClip
                 </ol>
               """
+# examples - Keep examples, they are input files
 examples = [
     ["examples/01_dog.mp4"],
     ["examples/02_cups.mp4"],
 sam2_checkpoint = "checkpoints/edgetam.pt"
 model_cfg = "edgetam.yaml"
+# Ensure predictor is explicitly built for CPU
 predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
+# Removed .to("cuda") - predictor is already on CPU from build_sam2_video_predictor
+# predictor.to("cuda")
+print("predictor loaded on CPU")
+# Removed CUDA specific autocast and backend settings
+# torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
+# if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
+#     torch.backends.cuda.matmul.allow_tf32 = True
+#     torch.backends.cudnn.allow_tf32 = True
+# elif not torch.cuda.is_available():
+#     print("Warning: CUDA not available. Running on CPU.")
 def get_video_fps(video_path):
+    """Gets the frames per second of a video file."""
+    if video_path is None or not os.path.exists(video_path):
+         print(f"Warning: Video file not found at {video_path}")
+         return None
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
+        print(f"Error: Could not open video file {video_path}.")
         return None
     fps = cap.get(cv2.CAP_PROP_FPS)
     cap.release() # Release the capture object
     return fps
     session_state["input_labels"] = []
     # Reset the predictor state if it exists
     if session_state["inference_state"] is not None:
         try:
+            # Assuming predictor.reset_state handles clearing current masks/features
             predictor.reset_state(session_state["inference_state"])
              # Explicitly delete or re-init the state object if a full reset is intended
+             # This depends on how predictor.reset_state works. Setting to None is safest for a full reset.
+            session_state["inference_state"] = None
         except Exception as e:
              print(f"Error resetting predictor state: {e}")
+             session_state["inference_state"] = None # Force-clear on error
     session_state["first_frame"] = None
     session_state["all_frames"] = None
     # Also reset video path if stored
     session_state["video_path"] = None
+    # Resetting UI components and disabling buttons
     return (
         None, # video_in (clears the video player)
         gr.update(open=True), # video_in_drawer (opens accordion)
         None, # points_map (clears the image)
         None, # output_image (clears the image)
         gr.update(value=None, visible=False), # output_video (hides and clears)
+        gr.update(interactive=False), # propagate_btn disabled
+        gr.update(interactive=False), # clear_points_btn disabled
+        gr.update(interactive=False), # reset_btn disabled
         session_state, # return updated session state
     )
             print("Predictor state reset for clearing points.")
             # If you need to re-initialize the state for the *same* video after clearing points,
             # you might need to call predictor.init_state again here, using the stored video_path.
+            # Since we are on CPU, device="cpu" is implicit now.
+            if session_state["video_path"] is not None:
+                 session_state["inference_state"] = predictor.init_state(video_path=session_state["video_path"])
+                 print("Predictor state re-initialized after clearing points.")
+            else:
+                 print("Warning: Could not re-initialize state after clear_points (video_path missing).")
+                 session_state["inference_state"] = None # Ensure state is None if video_path is gone
         except Exception as e:
              print(f"Error resetting predictor state during clear_points: {e}")
+             # If reset fails, this might leave old masks. Force-clear state on error.
+             session_state["inference_state"] = None
     # Return the original first frame image for points_map and clear the output_image
     first_frame_img = session_state["first_frame"] if session_state["first_frame"] is not None else None
     )
+# Removed @spaces.GPU decorator
 def preprocess_video_in(video_path, session_state):
     """Loads video frames and initializes the predictor state."""
     print(f"Processing video: {video_path}")
             }
         )
+    # Update session state with frames and path
+    session_state["first_frame"] = copy.deepcopy(first_frame) # Store a copy
     session_state["all_frames"] = all_frames
     session_state["video_path"] = video_path # Store video path
     session_state["input_points"] = []
     session_state["input_labels"] = []
+    # Initialize state WITHOUT the device argument (uses predictor's device, which is CPU)
     session_state["inference_state"] = predictor.init_state(video_path=video_path)
+    print("Video loaded and predictor state initialized on CPU.")
     # Enable buttons after successful load
     return [
         gr.update(open=False),  # video_in_drawer
+        first_frame,  # points_map (shows first frame)
+        None,  # output_image (cleared initially)
+        gr.update(value=None, visible=False),  # output_video (hidden initially)
+        gr.update(interactive=True), # propagate_btn enabled
+        gr.update(interactive=True), # clear_points_btn enabled
+        gr.update(interactive=True), # reset_btn enabled
         session_state, # session_state
     ]
+# Removed @spaces.GPU decorator
 def segment_with_points(
     point_type,
     session_state,
     # Ensure we have state and first frame
     if session_state["first_frame"] is None or session_state["inference_state"] is None:
          print("Error: Cannot segment. No video loaded or inference state missing.")
+         # Return current states to avoid errors, without changing UI much
          return (
              session_state.get("first_frame"), # points_map (show first frame if exists)
              None, # output_image (keep cleared)
         first_frame_pil.copy(), transparent_layer_points_pil
     )
+    # Prepare points and labels as tensors on the correct device (CPU in this version)
     points = np.array(session_state["input_points"], dtype=np.float32)
     labels = np.array(session_state["input_labels"], np.int32)
+    # Ensure tensors are on the correct device (CPU)
+    device = next(predictor.parameters()).device # Get the device the model is on (should be "cpu")
     points_tensor = torch.tensor(points, dtype=torch.float32, device=device).unsqueeze(0) # Add batch dim
     labels_tensor = torch.tensor(labels, dtype=torch.int32, device=device).unsqueeze(0) # Add batch dim
     first_frame_output_img = None # Initialize output mask image as None in case of error
     try:
         # Note: predictor.add_new_points modifies the internal inference_state
         print(f"Error during segmentation on first frame: {e}")
         # On error, first_frame_output_img remains None
+    # Removed CUDA cache clearing call
+    # if torch.cuda.is_available():
+    #     torch.cuda.empty_cache()
     return selected_point_map_img, first_frame_output_img, session_state
         return colored_mask_uint8
+# Removed @spaces.GPU decorator
 def propagate_to_all(
     video_in, # Keep video_in path as in original
     session_state,
         output_frames.append(output_frame_np)
+    # Removed CUDA cache clearing call
+    # if torch.cuda.is_available():
+    #     torch.cuda.empty_cache()
     # Define output path in a temporary directory
     unique_id = datetime.now().strftime("%Y%m%d%H%M%S%f") # Use microseconds for more uniqueness
         )
     # Write the result to a file. Use 'libx264' codec for broad compatibility.
+    # Added CPU optimization parameters for moviepy write
     try:
+        print(f"Writing video file with codec='libx264', fps={fps}, preset='medium', threads='auto'")
+        clip.write_videofile(
+            final_vid_output_path,
+            codec="libx264",
+            fps=fps, # Ensure correct FPS is used during writing
+            preset="medium", # CPU optimization: 'fast', 'faster', 'veryfast' are options for speed vs size
+            threads="auto", # CPU optimization: Use multiple cores
+            logger=None # Suppress moviepy output
+        )
         print("Video writing complete.")
         # Return the path and make the video player visible
         return (
         )
+def update_output_video_visibility():
     """Simply returns a Gradio update to make the output video visible."""
     return gr.update(visible=True)
                 points_map = gr.Image(
                     label="Click on the First Frame to Add Points", # Clearer label
                     type="numpy",
+                    interactive=True, # <--- CHANGED TO True to enable clicking
                     height=400, # Set a fixed height for better UI
                     width="auto", # Let width adjust
                     show_share_button=False,
                     examples_per_page=8,
                     cache_examples=False, # Do not cache processed examples, as state is involved
                 )
+                # Removed extra blank lines
                 # output_image shows the segmentation mask prediction on the *first* frame
                 output_image = gr.Image(
             output_video,  # Update output video player with result
             session_state, # Update session state
         ],
+        # CPU Optimization: Limit concurrency to 1 to prevent resource exhaustion.
+        # Queue=True ensures requests wait if another is processing.
+        concurrency_limit=1,
+        queue=True,
     )
 # Launch the Gradio demo
+demo.queue() # Enable queuing for sequential processing under concurrency limits
 print("Gradio demo starting...")
+# Removed share=True for local debugging unless you specifically need a public link
 demo.launch()
 print("Gradio demo launched.")