File size: 4,812 Bytes
3d4164f
 
cde077a
 
 
 
b6e99c4
3d4164f
f91f3ee
eae416f
3d4164f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cde077a
01810da
cde077a
af97fb5
 
cde077a
3d4164f
88085f8
cde077a
 
 
 
 
 
e778157
cde077a
24c0734
 
88085f8
cde077a
eae416f
 
01810da
45a1906
54587f9
88085f8
54587f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82b673e
 
 
54587f9
 
 
 
 
 
 
 
82b673e
54587f9
82b673e
 
 
 
949f30c
24c0734
 
 
82b673e
949f30c
eae416f
54587f9
 
 
 
 
 
 
 
eae416f
 
697faf5
 
 
 
 
 
cde077a
 
3d4164f
 
 
 
 
 
cde077a
3d4164f
 
 
 
 
 
 
d02e63e
 
3d4164f
 
 
 
b1f9647
 
3d4164f
 
 
 
 
 
 
 
cde077a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from ultralytics import YOLO
import cv2
from PIL import Image
import time
import numpy as np
import uuid
import spaces

model = YOLO("model/yolo11n_6-2-25.pt")
SUBSAMPLE = 2

def draw_boxes(frame, results):
    for r in results:
        boxes = r.boxes

        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)

            cls = r.names[box.cls[0].item()]

            # object details
            org = [x1, y1]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2

            cv2.putText(frame, cls, org, font, fontScale, color, thickness)

    return frame
    
#@spaces.GPU   
def video_detection(cap):
    video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
    #video_codec = cv2.VideoWriter_fourcc(*'h264')
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    #desired_fps = fps // SUBSAMPLE
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2

    iterating, frame = cap.read()

    n_frames = 0
    n_chunks = 0

    #name = f"output_{uuid.uuid4()}.mp4"
    name = f"output_{n_chunks}.mp4" #if stream_as_mp4 else '.ts'}
    segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
    batch = []
    while iterating:
        frame = cv2.resize( frame, (0,0), fx=0.5, fy=0.5)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        #if n_frames % SUBSAMPLE == 0:
        #batch.append(frame)
        #if len(batch) == 2 * desired_fps:
        #if len(batch) == 4:
        #inputs = image_processor(images=batch, return_tensors="pt").to("cuda")

        print(f"starting batch of size {len(batch)}")
        start = time.time()
        #with torch.no_grad():
        #    outputs = model(**inputs)
        results = model(frame, stream=True)
        end = time.time()
        print("time taken for inference", end - start)

        start = time.time()
        #boxes = image_processor.post_process_object_detection(
        #    outputs,
         #   target_sizes=torch.tensor([(height, width)] * len(batch)),
        #    threshold=conf_threshold)
        """
        for i, (array, box) in enumerate(zip(batch, boxes)):
            pil_image = draw_bounding_boxes(Image.fromarray(array), box, model, conf_threshold)
            frame = np.array(pil_image)
            # Convert RGB to BGR
            frame = frame[:, :, ::-1].copy()
            segment_file.write(frame)
        """
        frame = draw_boxes(frame, results)
        segment_file.write(frame)
        """
        for i, r in enumerate(results):
            # Plot results image
            im_bgr = r.plot()  # BGR-order numpy array
            im_rgb = Image.fromarray(im_bgr[..., ::-1])  # RGB-order PIL image
            frame = np.array(im_rgb)
            # Convert RGB to BGR
            frame = frame[:, :, ::-1].copy()
            segment_file.write(frame)
        """
            
        if n_frames == 16:
             n_chunks += 1
             segment_file.release()
             n_frames = 0
             yield  frame, name
             #name = f"output_{n_chunks}{'.mp4' if stream_as_mp4 else '.ts'}"
             name = f"output_{n_chunks}.mp4"
             segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
        else:
            yield  frame, None

        #batch = []
        #segment_file.release()
        #yield None, name
        #end = time.time()
        #print("time taken for processing boxes", end - start)
        #name = f"output_{uuid.uuid4()}.mp4"
        #segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore

        iterating, frame = cap.read()
        n_frames += 1
    cap.release()
    segment_file.release()
    cv2.destroyAllWindows()

    yield None, name
        
"""
#@spaces.GPU
def video_detection(cap):
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    out = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'h264'), fps, (frame_width, frame_height))
    
    count = 0
    while cap.isOpened():
        success, frame = cap.read()

        if not success:
            break

        #results = model(frame, stream=True, device='cuda', verbose=False)
        results = model(frame, stream=True)

        frame = draw_boxes(frame, results)

        out.write(frame)
        #if not count % 10:
        yield frame, None
        # print(count)
        count += 1

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    yield None, 'output_video.mp4'
"""