Spaces:
bla
/
Runtime error

bla commited on
Commit
5bc3a57
·
verified ·
1 Parent(s): e508568

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -74
app.py CHANGED
@@ -10,16 +10,16 @@ from datetime import datetime
10
 
11
  import gradio as gr
12
 
13
- # This line might be related to GPU, kept from original
14
- os.environ["TORCH_CUDNN_SDPA_ENABLED"] = "0,1,2,3,4,5,6,7"
 
15
  import tempfile
16
 
17
  import cv2
18
  import matplotlib.pyplot as plt
19
- # spaces import and decorators are for Hugging Face Spaces GPU allocation,
20
- # if running locally without spaces, these can be removed or will be ignored.
21
- import spaces
22
  import numpy as np
 
 
23
  import torch
24
 
25
  from moviepy.editor import ImageSequenceClip
@@ -38,7 +38,7 @@ description_p = """# Instructions
38
  </ol>
39
  """
40
 
41
- # examples
42
  examples = [
43
  ["examples/01_dog.mp4"],
44
  ["examples/02_cups.mp4"],
@@ -75,33 +75,30 @@ OBJ_ID = 0
75
 
76
  sam2_checkpoint = "checkpoints/edgetam.pt"
77
  model_cfg = "edgetam.yaml"
78
- # Model built for CPU but immediately moved to CUDA in original code
79
  predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
80
- # *** Original code moves to CUDA ***
81
- predictor.to("cuda")
82
- print("predictor loaded on CUDA")
83
-
84
- # use bfloat16 for the entire demo - Original code uses CUDA bfloat16
85
- torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
86
- # Original CUDA settings
87
- if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
88
- # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
89
- torch.backends.cuda.matmul.allow_tf32 = True
90
- torch.backends.cudnn.allow_tf32 = True
91
- elif not torch.cuda.is_available():
92
- print("Warning: CUDA not available. The original code is configured for GPU.")
93
- # Note: Without a GPU, the .to("cuda") calls will likely cause errors.
94
 
95
 
96
  def get_video_fps(video_path):
97
- # Open the video file
 
 
 
98
  cap = cv2.VideoCapture(video_path)
99
-
100
  if not cap.isOpened():
101
- print("Error: Could not open video.")
102
  return None
103
-
104
- # Get the FPS of the video
105
  fps = cap.get(cv2.CAP_PROP_FPS)
106
  cap.release() # Release the capture object
107
  return fps
@@ -114,17 +111,15 @@ def reset(session_state):
114
  session_state["input_labels"] = []
115
  # Reset the predictor state if it exists
116
  if session_state["inference_state"] is not None:
117
- # Assuming predictor.reset_state handles None or invalid states gracefully
118
- # Or you might need to explicitly pass the state object if required
119
  try:
 
120
  predictor.reset_state(session_state["inference_state"])
121
  # Explicitly delete or re-init the state object if a full reset is intended
122
- # This depends on how predictor.reset_state works
123
- # session_state["inference_state"] = None # Example if reset_state doesn't fully clear
124
  except Exception as e:
125
  print(f"Error resetting predictor state: {e}")
126
- # If reset fails, perhaps force-clear the state object
127
- session_state["inference_state"] = None
128
 
129
  session_state["first_frame"] = None
130
  session_state["all_frames"] = None
@@ -132,13 +127,16 @@ def reset(session_state):
132
  # Also reset video path if stored
133
  session_state["video_path"] = None
134
 
135
- # Resetting UI components
136
  return (
137
  None, # video_in (clears the video player)
138
  gr.update(open=True), # video_in_drawer (opens accordion)
139
  None, # points_map (clears the image)
140
  None, # output_image (clears the image)
141
  gr.update(value=None, visible=False), # output_video (hides and clears)
 
 
 
142
  session_state, # return updated session state
143
  )
144
 
@@ -158,11 +156,20 @@ def clear_points(session_state):
158
  print("Predictor state reset for clearing points.")
159
  # If you need to re-initialize the state for the *same* video after clearing points,
160
  # you might need to call predictor.init_state again here, using the stored video_path.
161
- # session_state["inference_state"] = predictor.init_state(video_path=session_state["video_path"], device="cuda") # Or device="cpu" if modified earlier
 
 
 
 
 
 
 
 
162
  except Exception as e:
163
  print(f"Error resetting predictor state during clear_points: {e}")
164
- # If reset fails, this might leave old masks. Depending on SAM2's behavior,
165
- # you might need a more aggressive state clear or re-initialization.
 
166
 
167
  # Return the original first frame image for points_map and clear the output_image
168
  first_frame_img = session_state["first_frame"] if session_state["first_frame"] is not None else None
@@ -175,8 +182,7 @@ def clear_points(session_state):
175
  )
176
 
177
 
178
- # Added @spaces.GPU decorator back as it was in the original code
179
- @spaces.GPU
180
  def preprocess_video_in(video_path, session_state):
181
  """Loads video frames and initializes the predictor state."""
182
  print(f"Processing video: {video_path}")
@@ -230,30 +236,30 @@ def preprocess_video_in(video_path, session_state):
230
  }
231
  )
232
 
233
- session_state["first_frame"] = copy.deepcopy(first_frame)
 
234
  session_state["all_frames"] = all_frames
235
  session_state["video_path"] = video_path # Store video path
236
  session_state["input_points"] = []
237
  session_state["input_labels"] = []
238
- # Original code did NOT pass device here. It uses the device the predictor is on.
239
  session_state["inference_state"] = predictor.init_state(video_path=video_path)
240
- print("Video loaded and predictor state initialized.")
241
 
242
  # Enable buttons after successful load
243
  return [
244
  gr.update(open=False), # video_in_drawer
245
- first_frame, # points_map
246
- None, # output_image
247
- gr.update(value=None, visible=False), # output_video
248
- gr.update(interactive=True), # propagate_btn
249
- gr.update(interactive=True), # clear_points_btn
250
- gr.update(interactive=True), # reset_btn
251
  session_state, # session_state
252
  ]
253
 
254
 
255
- # Added @spaces.GPU decorator back as it was in the original code
256
- @spaces.GPU
257
  def segment_with_points(
258
  point_type,
259
  session_state,
@@ -263,7 +269,7 @@ def segment_with_points(
263
  # Ensure we have state and first frame
264
  if session_state["first_frame"] is None or session_state["inference_state"] is None:
265
  print("Error: Cannot segment. No video loaded or inference state missing.")
266
- # Return current images and state without changes
267
  return (
268
  session_state.get("first_frame"), # points_map (show first frame if exists)
269
  None, # output_image (keep cleared)
@@ -310,17 +316,16 @@ def segment_with_points(
310
  first_frame_pil.copy(), transparent_layer_points_pil
311
  )
312
 
313
- # Prepare points and labels as tensors on the correct device (CUDA in original code)
314
  points = np.array(session_state["input_points"], dtype=np.float32)
315
  labels = np.array(session_state["input_labels"], np.int32)
316
 
317
- # Ensure tensors are on the correct device (CUDA as per original code setup)
318
- device = next(predictor.parameters()).device # Get the device the model is on
319
  points_tensor = torch.tensor(points, dtype=torch.float32, device=device).unsqueeze(0) # Add batch dim
320
  labels_tensor = torch.tensor(labels, dtype=torch.int32, device=device).unsqueeze(0) # Add batch dim
321
 
322
- # Add new points to the predictor's state and get the mask for the first frame
323
- # This call performs segmentation on the current frame (frame_idx=0) using all accumulated points
324
  first_frame_output_img = None # Initialize output mask image as None in case of error
325
  try:
326
  # Note: predictor.add_new_points modifies the internal inference_state
@@ -349,9 +354,9 @@ def segment_with_points(
349
  print(f"Error during segmentation on first frame: {e}")
350
  # On error, first_frame_output_img remains None
351
 
352
- # Original code clears CUDA cache here
353
- if torch.cuda.is_available():
354
- torch.cuda.empty_cache()
355
 
356
  return selected_point_map_img, first_frame_output_img, session_state
357
 
@@ -402,8 +407,7 @@ def show_mask(mask, obj_id=None, random_color=False, convert_to_image=True):
402
  return colored_mask_uint8
403
 
404
 
405
- # Added @spaces.GPU decorator back as it was in the original code
406
- @spaces.GPU
407
  def propagate_to_all(
408
  video_in, # Keep video_in path as in original
409
  session_state,
@@ -478,9 +482,9 @@ def propagate_to_all(
478
 
479
  output_frames.append(output_frame_np)
480
 
481
- # Original code clears CUDA cache here
482
- if torch.cuda.is_available():
483
- torch.cuda.empty_cache()
484
 
485
  # Define output path in a temporary directory
486
  unique_id = datetime.now().strftime("%Y%m%d%H%M%S%f") # Use microseconds for more uniqueness
@@ -514,10 +518,17 @@ def propagate_to_all(
514
  )
515
 
516
  # Write the result to a file. Use 'libx264' codec for broad compatibility.
 
517
  try:
518
- print(f"Writing video file with codec='libx264', fps={fps}")
519
- # Added basic moviepy writing parameters back, similar to original intent
520
- clip.write_videofile(final_vid_output_path, codec="libx264", fps=fps)
 
 
 
 
 
 
521
  print("Video writing complete.")
522
  # Return the path and make the video player visible
523
  return (
@@ -541,7 +552,7 @@ def propagate_to_all(
541
  )
542
 
543
 
544
- def update_ui():
545
  """Simply returns a Gradio update to make the output video visible."""
546
  return gr.update(visible=True)
547
 
@@ -589,7 +600,7 @@ with gr.Blocks() as demo:
589
  points_map = gr.Image(
590
  label="Click on the First Frame to Add Points", # Clearer label
591
  type="numpy",
592
- interactive=True, # <--- THIS WAS CHANGED FROM False TO True
593
  height=400, # Set a fixed height for better UI
594
  width="auto", # Let width adjust
595
  show_share_button=False,
@@ -604,8 +615,7 @@ with gr.Blocks() as demo:
604
  examples_per_page=8,
605
  cache_examples=False, # Do not cache processed examples, as state is involved
606
  )
607
- # Add padding/space - removed extra lines as they take up a lot of space
608
- # gr.Markdown("<br>")
609
 
610
  # output_image shows the segmentation mask prediction on the *first* frame
611
  output_image = gr.Image(
@@ -704,14 +714,16 @@ with gr.Blocks() as demo:
704
  output_video, # Update output video player with result
705
  session_state, # Update session state
706
  ],
707
- # concurrency_limit from original code (may need adjustment based on your hardware/GPU)
708
- concurrency_limit=10,
709
- queue=False, # queue from original code
 
710
  )
711
 
712
 
713
  # Launch the Gradio demo
714
- demo.queue() # Enable queuing
715
  print("Gradio demo starting...")
 
716
  demo.launch()
717
  print("Gradio demo launched.")
 
10
 
11
  import gradio as gr
12
 
13
+ # Removed GPU-specific environment variable setting
14
+ # os.environ["TORCH_CUDNN_SDPA_ENABLED"] = "0,1,2,3,4,5,6,7"
15
+
16
  import tempfile
17
 
18
  import cv2
19
  import matplotlib.pyplot as plt
 
 
 
20
  import numpy as np
21
+ # Removed spaces decorator import for CPU-only demo
22
+ # import spaces # Removed spaces import
23
  import torch
24
 
25
  from moviepy.editor import ImageSequenceClip
 
38
  </ol>
39
  """
40
 
41
+ # examples - Keep examples, they are input files
42
  examples = [
43
  ["examples/01_dog.mp4"],
44
  ["examples/02_cups.mp4"],
 
75
 
76
  sam2_checkpoint = "checkpoints/edgetam.pt"
77
  model_cfg = "edgetam.yaml"
78
+ # Ensure predictor is explicitly built for CPU
79
  predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cpu")
80
+ # Removed .to("cuda") - predictor is already on CPU from build_sam2_video_predictor
81
+ # predictor.to("cuda")
82
+ print("predictor loaded on CPU")
83
+
84
+ # Removed CUDA specific autocast and backend settings
85
+ # torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
86
+ # if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
87
+ # torch.backends.cuda.matmul.allow_tf32 = True
88
+ # torch.backends.cudnn.allow_tf32 = True
89
+ # elif not torch.cuda.is_available():
90
+ # print("Warning: CUDA not available. Running on CPU.")
 
 
 
91
 
92
 
93
  def get_video_fps(video_path):
94
+ """Gets the frames per second of a video file."""
95
+ if video_path is None or not os.path.exists(video_path):
96
+ print(f"Warning: Video file not found at {video_path}")
97
+ return None
98
  cap = cv2.VideoCapture(video_path)
 
99
  if not cap.isOpened():
100
+ print(f"Error: Could not open video file {video_path}.")
101
  return None
 
 
102
  fps = cap.get(cv2.CAP_PROP_FPS)
103
  cap.release() # Release the capture object
104
  return fps
 
111
  session_state["input_labels"] = []
112
  # Reset the predictor state if it exists
113
  if session_state["inference_state"] is not None:
 
 
114
  try:
115
+ # Assuming predictor.reset_state handles clearing current masks/features
116
  predictor.reset_state(session_state["inference_state"])
117
  # Explicitly delete or re-init the state object if a full reset is intended
118
+ # This depends on how predictor.reset_state works. Setting to None is safest for a full reset.
119
+ session_state["inference_state"] = None
120
  except Exception as e:
121
  print(f"Error resetting predictor state: {e}")
122
+ session_state["inference_state"] = None # Force-clear on error
 
123
 
124
  session_state["first_frame"] = None
125
  session_state["all_frames"] = None
 
127
  # Also reset video path if stored
128
  session_state["video_path"] = None
129
 
130
+ # Resetting UI components and disabling buttons
131
  return (
132
  None, # video_in (clears the video player)
133
  gr.update(open=True), # video_in_drawer (opens accordion)
134
  None, # points_map (clears the image)
135
  None, # output_image (clears the image)
136
  gr.update(value=None, visible=False), # output_video (hides and clears)
137
+ gr.update(interactive=False), # propagate_btn disabled
138
+ gr.update(interactive=False), # clear_points_btn disabled
139
+ gr.update(interactive=False), # reset_btn disabled
140
  session_state, # return updated session state
141
  )
142
 
 
156
  print("Predictor state reset for clearing points.")
157
  # If you need to re-initialize the state for the *same* video after clearing points,
158
  # you might need to call predictor.init_state again here, using the stored video_path.
159
+ # Since we are on CPU, device="cpu" is implicit now.
160
+ if session_state["video_path"] is not None:
161
+ session_state["inference_state"] = predictor.init_state(video_path=session_state["video_path"])
162
+ print("Predictor state re-initialized after clearing points.")
163
+ else:
164
+ print("Warning: Could not re-initialize state after clear_points (video_path missing).")
165
+ session_state["inference_state"] = None # Ensure state is None if video_path is gone
166
+
167
+
168
  except Exception as e:
169
  print(f"Error resetting predictor state during clear_points: {e}")
170
+ # If reset fails, this might leave old masks. Force-clear state on error.
171
+ session_state["inference_state"] = None
172
+
173
 
174
  # Return the original first frame image for points_map and clear the output_image
175
  first_frame_img = session_state["first_frame"] if session_state["first_frame"] is not None else None
 
182
  )
183
 
184
 
185
+ # Removed @spaces.GPU decorator
 
186
  def preprocess_video_in(video_path, session_state):
187
  """Loads video frames and initializes the predictor state."""
188
  print(f"Processing video: {video_path}")
 
236
  }
237
  )
238
 
239
+ # Update session state with frames and path
240
+ session_state["first_frame"] = copy.deepcopy(first_frame) # Store a copy
241
  session_state["all_frames"] = all_frames
242
  session_state["video_path"] = video_path # Store video path
243
  session_state["input_points"] = []
244
  session_state["input_labels"] = []
245
+ # Initialize state WITHOUT the device argument (uses predictor's device, which is CPU)
246
  session_state["inference_state"] = predictor.init_state(video_path=video_path)
247
+ print("Video loaded and predictor state initialized on CPU.")
248
 
249
  # Enable buttons after successful load
250
  return [
251
  gr.update(open=False), # video_in_drawer
252
+ first_frame, # points_map (shows first frame)
253
+ None, # output_image (cleared initially)
254
+ gr.update(value=None, visible=False), # output_video (hidden initially)
255
+ gr.update(interactive=True), # propagate_btn enabled
256
+ gr.update(interactive=True), # clear_points_btn enabled
257
+ gr.update(interactive=True), # reset_btn enabled
258
  session_state, # session_state
259
  ]
260
 
261
 
262
+ # Removed @spaces.GPU decorator
 
263
  def segment_with_points(
264
  point_type,
265
  session_state,
 
269
  # Ensure we have state and first frame
270
  if session_state["first_frame"] is None or session_state["inference_state"] is None:
271
  print("Error: Cannot segment. No video loaded or inference state missing.")
272
+ # Return current states to avoid errors, without changing UI much
273
  return (
274
  session_state.get("first_frame"), # points_map (show first frame if exists)
275
  None, # output_image (keep cleared)
 
316
  first_frame_pil.copy(), transparent_layer_points_pil
317
  )
318
 
319
+ # Prepare points and labels as tensors on the correct device (CPU in this version)
320
  points = np.array(session_state["input_points"], dtype=np.float32)
321
  labels = np.array(session_state["input_labels"], np.int32)
322
 
323
+ # Ensure tensors are on the correct device (CPU)
324
+ device = next(predictor.parameters()).device # Get the device the model is on (should be "cpu")
325
  points_tensor = torch.tensor(points, dtype=torch.float32, device=device).unsqueeze(0) # Add batch dim
326
  labels_tensor = torch.tensor(labels, dtype=torch.int32, device=device).unsqueeze(0) # Add batch dim
327
 
328
+
 
329
  first_frame_output_img = None # Initialize output mask image as None in case of error
330
  try:
331
  # Note: predictor.add_new_points modifies the internal inference_state
 
354
  print(f"Error during segmentation on first frame: {e}")
355
  # On error, first_frame_output_img remains None
356
 
357
+ # Removed CUDA cache clearing call
358
+ # if torch.cuda.is_available():
359
+ # torch.cuda.empty_cache()
360
 
361
  return selected_point_map_img, first_frame_output_img, session_state
362
 
 
407
  return colored_mask_uint8
408
 
409
 
410
+ # Removed @spaces.GPU decorator
 
411
  def propagate_to_all(
412
  video_in, # Keep video_in path as in original
413
  session_state,
 
482
 
483
  output_frames.append(output_frame_np)
484
 
485
+ # Removed CUDA cache clearing call
486
+ # if torch.cuda.is_available():
487
+ # torch.cuda.empty_cache()
488
 
489
  # Define output path in a temporary directory
490
  unique_id = datetime.now().strftime("%Y%m%d%H%M%S%f") # Use microseconds for more uniqueness
 
518
  )
519
 
520
  # Write the result to a file. Use 'libx264' codec for broad compatibility.
521
+ # Added CPU optimization parameters for moviepy write
522
  try:
523
+ print(f"Writing video file with codec='libx264', fps={fps}, preset='medium', threads='auto'")
524
+ clip.write_videofile(
525
+ final_vid_output_path,
526
+ codec="libx264",
527
+ fps=fps, # Ensure correct FPS is used during writing
528
+ preset="medium", # CPU optimization: 'fast', 'faster', 'veryfast' are options for speed vs size
529
+ threads="auto", # CPU optimization: Use multiple cores
530
+ logger=None # Suppress moviepy output
531
+ )
532
  print("Video writing complete.")
533
  # Return the path and make the video player visible
534
  return (
 
552
  )
553
 
554
 
555
+ def update_output_video_visibility():
556
  """Simply returns a Gradio update to make the output video visible."""
557
  return gr.update(visible=True)
558
 
 
600
  points_map = gr.Image(
601
  label="Click on the First Frame to Add Points", # Clearer label
602
  type="numpy",
603
+ interactive=True, # <--- CHANGED TO True to enable clicking
604
  height=400, # Set a fixed height for better UI
605
  width="auto", # Let width adjust
606
  show_share_button=False,
 
615
  examples_per_page=8,
616
  cache_examples=False, # Do not cache processed examples, as state is involved
617
  )
618
+ # Removed extra blank lines
 
619
 
620
  # output_image shows the segmentation mask prediction on the *first* frame
621
  output_image = gr.Image(
 
714
  output_video, # Update output video player with result
715
  session_state, # Update session state
716
  ],
717
+ # CPU Optimization: Limit concurrency to 1 to prevent resource exhaustion.
718
+ # Queue=True ensures requests wait if another is processing.
719
+ concurrency_limit=1,
720
+ queue=True,
721
  )
722
 
723
 
724
  # Launch the Gradio demo
725
+ demo.queue() # Enable queuing for sequential processing under concurrency limits
726
  print("Gradio demo starting...")
727
+ # Removed share=True for local debugging unless you specifically need a public link
728
  demo.launch()
729
  print("Gradio demo launched.")