Spaces:

reab5555
/

Owlv2-Video-Object-Detection

Paused

reab5555 commited on Jul 24, 2024

Commit

5813a90

verified ·

1 Parent(s): a63d0d6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,8 @@ from transformers import Owlv2Processor, Owlv2ForObjectDetection
 import numpy as np
 import os
-device = 'cuda'
 processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
 model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
@@ -32,7 +33,7 @@ def detect_objects_in_frame(image, target):
     boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
     for box, score, label in zip(boxes, scores, labels):
-        if score.item() >= 0.5:
             box = [round(i, 2) for i in box.tolist()]
             object_label = text[label]
             confidence = round(score.item(), 3)
@@ -58,10 +59,11 @@ def process_video(video_path, target, progress=gr.Progress()):
     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     original_fps = int(cap.get(cv2.CAP_PROP_FPS))
     original_duration = frame_count / original_fps
     output_path = "output_video.mp4"
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_path, fourcc, original_fps, (int(cap.get(3)), int(cap.get(4))))
     batch_size = 64
     frames = []
@@ -71,6 +73,9 @@ def process_video(video_path, target, progress=gr.Progress()):
         if not ret:
             break
         pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
         frames.append(pil_img)

 import numpy as np
 import os
+# Check if CUDA is available, otherwise use CPU
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
 processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
 model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
     boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
     for box, score, label in zip(boxes, scores, labels):
+        if score.item() >= 0.25:
             box = [round(i, 2) for i in box.tolist()]
             object_label = text[label]
             confidence = round(score.item(), 3)
     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     original_fps = int(cap.get(cv2.CAP_PROP_FPS))
     original_duration = frame_count / original_fps
+    output_fps = 5
     output_path = "output_video.mp4"
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path, fourcc, output_fps, (int(cap.get(3)), int(cap.get(4))))
     batch_size = 64
     frames = []
         if not ret:
             break
+        if frame % (original_fps // output_fps) != 0:
+            continue
         pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
         frames.append(pil_img)