Spaces:
Runtime error
Runtime error
File size: 4,812 Bytes
3d4164f cde077a b6e99c4 3d4164f f91f3ee eae416f 3d4164f cde077a 01810da cde077a af97fb5 cde077a 3d4164f 88085f8 cde077a e778157 cde077a 24c0734 88085f8 cde077a eae416f 01810da 45a1906 54587f9 88085f8 54587f9 82b673e 54587f9 82b673e 54587f9 82b673e 949f30c 24c0734 82b673e 949f30c eae416f 54587f9 eae416f 697faf5 cde077a 3d4164f cde077a 3d4164f d02e63e 3d4164f b1f9647 3d4164f cde077a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
from ultralytics import YOLO
import cv2
from PIL import Image
import time
import numpy as np
import uuid
import spaces
model = YOLO("model/yolo11n_6-2-25.pt")
SUBSAMPLE = 2
def draw_boxes(frame, results):
for r in results:
boxes = r.boxes
for box in boxes:
x1, y1, x2, y2 = box.xyxy[0]
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)
cls = r.names[box.cls[0].item()]
# object details
org = [x1, y1]
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1
color = (255, 0, 0)
thickness = 2
cv2.putText(frame, cls, org, font, fontScale, color, thickness)
return frame
#@spaces.GPU
def video_detection(cap):
video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
#video_codec = cv2.VideoWriter_fourcc(*'h264')
fps = int(cap.get(cv2.CAP_PROP_FPS))
#desired_fps = fps // SUBSAMPLE
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2
iterating, frame = cap.read()
n_frames = 0
n_chunks = 0
#name = f"output_{uuid.uuid4()}.mp4"
name = f"output_{n_chunks}.mp4" #if stream_as_mp4 else '.ts'}
segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
batch = []
while iterating:
frame = cv2.resize( frame, (0,0), fx=0.5, fy=0.5)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#if n_frames % SUBSAMPLE == 0:
#batch.append(frame)
#if len(batch) == 2 * desired_fps:
#if len(batch) == 4:
#inputs = image_processor(images=batch, return_tensors="pt").to("cuda")
print(f"starting batch of size {len(batch)}")
start = time.time()
#with torch.no_grad():
# outputs = model(**inputs)
results = model(frame, stream=True)
end = time.time()
print("time taken for inference", end - start)
start = time.time()
#boxes = image_processor.post_process_object_detection(
# outputs,
# target_sizes=torch.tensor([(height, width)] * len(batch)),
# threshold=conf_threshold)
"""
for i, (array, box) in enumerate(zip(batch, boxes)):
pil_image = draw_bounding_boxes(Image.fromarray(array), box, model, conf_threshold)
frame = np.array(pil_image)
# Convert RGB to BGR
frame = frame[:, :, ::-1].copy()
segment_file.write(frame)
"""
frame = draw_boxes(frame, results)
segment_file.write(frame)
"""
for i, r in enumerate(results):
# Plot results image
im_bgr = r.plot() # BGR-order numpy array
im_rgb = Image.fromarray(im_bgr[..., ::-1]) # RGB-order PIL image
frame = np.array(im_rgb)
# Convert RGB to BGR
frame = frame[:, :, ::-1].copy()
segment_file.write(frame)
"""
if n_frames == 16:
n_chunks += 1
segment_file.release()
n_frames = 0
yield frame, name
#name = f"output_{n_chunks}{'.mp4' if stream_as_mp4 else '.ts'}"
name = f"output_{n_chunks}.mp4"
segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
else:
yield frame, None
#batch = []
#segment_file.release()
#yield None, name
#end = time.time()
#print("time taken for processing boxes", end - start)
#name = f"output_{uuid.uuid4()}.mp4"
#segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
iterating, frame = cap.read()
n_frames += 1
cap.release()
segment_file.release()
cv2.destroyAllWindows()
yield None, name
"""
#@spaces.GPU
def video_detection(cap):
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
out = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'h264'), fps, (frame_width, frame_height))
count = 0
while cap.isOpened():
success, frame = cap.read()
if not success:
break
#results = model(frame, stream=True, device='cuda', verbose=False)
results = model(frame, stream=True)
frame = draw_boxes(frame, results)
out.write(frame)
#if not count % 10:
yield frame, None
# print(count)
count += 1
cap.release()
out.release()
cv2.destroyAllWindows()
yield None, 'output_video.mp4'
"""
|