EdgeTAM

Runtime error

App Files Files Community

bla commited on 5 days ago

Commit

628bfb2

verified ·

1 Parent(s): 5dc8194

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -96

app.py CHANGED Viewed

@@ -126,7 +126,7 @@ def preprocess_video_in(video_path, session_state):
             session_state,
         )
-    # Read the first frame
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         print(f"Error: Could not open video at {video_path}.")
@@ -139,65 +139,61 @@ def preprocess_video_in(video_path, session_state):
             session_state,
         )
-    # For CPU optimization - determine video properties
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     fps = cap.get(cv2.CAP_PROP_FPS)
     print(f"Video info: {frame_width}x{frame_height}, {total_frames} frames, {fps} FPS")
-    # Determine if we need to resize for CPU performance
     target_width = 640  # Target width for processing on CPU
     scale_factor = 1.0
     if frame_width > target_width:
         scale_factor = target_width / frame_width
         new_width = int(frame_width * scale_factor)
         new_height = int(frame_height * scale_factor)
         print(f"Resizing video for CPU processing: {frame_width}x{frame_height} -> {new_width}x{new_height}")
-    # Read frames - for CPU we'll be more selective about which frames to keep
     frame_number = 0
     first_frame = None
     all_frames = []
-    # For CPU optimization, skip frames if video is too long
-    frame_stride = 1
-    if total_frames > 300:  # If more than 300 frames
-        frame_stride = max(1, int(total_frames / 300))  # Process at most ~300 frames
-        print(f"Video has {total_frames} frames, using stride of {frame_stride} to reduce processing load")
     while True:
         ret, frame = cap.read()
         if not ret:
             break
-        if frame_number % frame_stride == 0:  # Process every frame_stride frames
             try:
                 # Resize the frame if needed
                 if scale_factor != 1.0:
                     frame = cv2.resize(
-                        frame,
-                        (int(frame_width * scale_factor), int(frame_height * scale_factor)),
                         interpolation=cv2.INTER_AREA
                     )
                 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 frame = np.array(frame)
-                # Store the first frame
                 if first_frame is None:
                     first_frame = frame
                 all_frames.append(frame)
             except Exception as e:
                 print(f"Error processing frame {frame_number}: {e}")
         frame_number += 1
     cap.release()
-    # Ensure we have at least one frame
     if first_frame is None or len(all_frames) == 0:
         print("Error: No frames could be extracted from the video.")
         return (
@@ -208,9 +204,9 @@ def preprocess_video_in(video_path, session_state):
             gr.update(value=0, visible=False),  # progress_bar
             session_state,
         )
     print(f"Successfully extracted {len(all_frames)} frames from video")
     session_state["first_frame"] = copy.deepcopy(first_frame)
     session_state["all_frames"] = all_frames
     session_state["frame_stride"] = frame_stride
@@ -227,7 +223,7 @@ def preprocess_video_in(video_path, session_state):
         import traceback
         traceback.print_exc()
         session_state["inference_state"] = None
     return [
         gr.update(open=False),  # video_in_drawer
         first_frame,  # points_map
@@ -320,6 +316,11 @@ def segment_with_points(
                 print(f"Resizing mask from {out_mask.shape[:2]} to {h}x{w}")
                 # Use numpy/PIL for resizing to avoid OpenCV issues
                 from PIL import Image
                 mask_img = Image.fromarray(out_mask.astype(np.uint8) * 255)
                 mask_img = mask_img.resize((w, h), Image.NEAREST)
                 out_mask = np.array(mask_img) > 0
@@ -449,7 +450,7 @@ def propagate_to_all(
         print("Starting propagate_in_video on CPU")
         # Get the count for progress reporting (estimate)
-        all_frames_count = 300  # Reasonable estimate
         # Now do the actual processing with progress updates
         current_frame = 0
@@ -494,7 +495,7 @@ def propagate_to_all(
         progress(0.5, desc="Rendering video")
         # Limit to max 50 frames for CPU processing
-        max_output_frames = 50
         vis_frame_stride = max(1, total_frames // max_output_frames)
         print(f"Using stride of {vis_frame_stride} for output video generation")
@@ -543,6 +544,10 @@ def propagate_to_all(
                 if mask_h != frame_h or mask_w != frame_w:
                     print(f"Resizing mask from {mask_h}x{mask_w} to {frame_h}x{frame_w}")
                     try:
                         mask_img = Image.fromarray(out_mask.astype(np.uint8) * 255)
                         mask_img = mask_img.resize((frame_w, frame_h), Image.NEAREST)
                         out_mask = np.array(mask_img) > 0
@@ -758,69 +763,4 @@ with gr.Blocks() as demo:
         queue=False,
     )
-    # triggered when we click on image to add new points
-    points_map.select(
-        fn=segment_with_points,
-        inputs=[
-            point_type,  # "include" or "exclude"
-            session_state,
-        ],
-        outputs=[
-            points_map,  # updated image with points
-            output_image,
-            session_state,
-        ],
-        queue=False,
-    )
-    # Clear every points clicked and added to the map
-    clear_points_btn.click(
-        fn=clear_points,
-        inputs=session_state,
-        outputs=[
-            points_map,
-            output_image,
-            output_video,
-            progress_bar,
-            session_state,
-        ],
-        queue=False,
-    )
-    reset_btn.click(
-        fn=reset,
-        inputs=session_state,
-        outputs=[
-            video_in,
-            video_in_drawer,
-            points_map,
-            output_image,
-            output_video,
-            progress_bar,
-            session_state,
-        ],
-        queue=False,
-    )
-    propagate_btn.click(
-        fn=update_ui,
-        inputs=[],
-        outputs=[output_video, progress_bar],
-        queue=False,
-    ).then(
-        fn=propagate_to_all,
-        inputs=[
-            video_in,
-            session_state,
-        ],
-        outputs=[
-            output_video,
-            progress_bar,
-            session_state,
-        ],
-        queue=True,  # Use queue for CPU processing
-    )
-demo.queue()
-demo.launch()

             session_state,
         )
+    # Read the video
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         print(f"Error: Could not open video at {video_path}.")
             session_state,
         )
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     fps = cap.get(cv2.CAP_PROP_FPS)
     print(f"Video info: {frame_width}x{frame_height}, {total_frames} frames, {fps} FPS")
     target_width = 640  # Target width for processing on CPU
     scale_factor = 1.0
     if frame_width > target_width:
         scale_factor = target_width / frame_width
         new_width = int(frame_width * scale_factor)
         new_height = int(frame_height * scale_factor)
         print(f"Resizing video for CPU processing: {frame_width}x{frame_height} -> {new_width}x{new_height}")
+    # Even more aggressive frame skipping for very long videos on CPU
+    frame_stride = 1
+    max_frames = 150 # Maximum number of frames to process
+    if total_frames > max_frames:
+        frame_stride = max(1, int(total_frames / max_frames))
+        print(f"Video has {total_frames} frames, using stride of {frame_stride} to limit to {max_frames}")
     frame_number = 0
     first_frame = None
     all_frames = []
     while True:
         ret, frame = cap.read()
         if not ret:
             break
+        if frame_number % frame_stride == 0:
             try:
                 # Resize the frame if needed
                 if scale_factor != 1.0:
                     frame = cv2.resize(
+                        frame,
+                        (int(frame_width * scale_factor), int(frame_height * scale_factor)),
                         interpolation=cv2.INTER_AREA
                     )
                 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 frame = np.array(frame)
                 if first_frame is None:
                     first_frame = frame
                 all_frames.append(frame)
             except Exception as e:
                 print(f"Error processing frame {frame_number}: {e}")
         frame_number += 1
     cap.release()
     if first_frame is None or len(all_frames) == 0:
         print("Error: No frames could be extracted from the video.")
         return (
             gr.update(value=0, visible=False),  # progress_bar
             session_state,
         )
     print(f"Successfully extracted {len(all_frames)} frames from video")
     session_state["first_frame"] = copy.deepcopy(first_frame)
     session_state["all_frames"] = all_frames
     session_state["frame_stride"] = frame_stride
         import traceback
         traceback.print_exc()
         session_state["inference_state"] = None
     return [
         gr.update(open=False),  # video_in_drawer
         first_frame,  # points_map
                 print(f"Resizing mask from {out_mask.shape[:2]} to {h}x{w}")
                 # Use numpy/PIL for resizing to avoid OpenCV issues
                 from PIL import Image
+                # Ensure mask is boolean type
+                if out_mask.dtype != np.bool_:
+                    out_mask = out_mask > 0
                 mask_img = Image.fromarray(out_mask.astype(np.uint8) * 255)
                 mask_img = mask_img.resize((w, h), Image.NEAREST)
                 out_mask = np.array(mask_img) > 0
         print("Starting propagate_in_video on CPU")
         # Get the count for progress reporting (estimate)
+        all_frames_count = 100  # Reasonable estimate
         # Now do the actual processing with progress updates
         current_frame = 0
         progress(0.5, desc="Rendering video")
         # Limit to max 50 frames for CPU processing
+        max_output_frames = 30
         vis_frame_stride = max(1, total_frames // max_output_frames)
         print(f"Using stride of {vis_frame_stride} for output video generation")
                 if mask_h != frame_h or mask_w != frame_w:
                     print(f"Resizing mask from {mask_h}x{mask_w} to {frame_h}x{frame_w}")
                     try:
+                        # Ensure mask is boolean type
+                        if out_mask.dtype != np.bool_:
+                            out_mask = out_mask > 0
                         mask_img = Image.fromarray(out_mask.astype(np.uint8) * 255)
                         mask_img = mask_img.resize((frame_w, frame_h), Image.NEAREST)
                         out_mask = np.array(mask_img) > 0
         queue=False,
     )
+    # triggered when we click