File size: 6,513 Bytes
caff61e
359afbb
 
 
 
679a693
 
bf9434d
679a693
 
 
 
359afbb
679a693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3545274
679a693
359afbb
679a693
 
 
 
 
 
 
 
3545274
679a693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aeec4bb
679a693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf9434d
 
 
679a693
bf9434d
679a693
bf9434d
 
 
 
 
 
 
679a693
bf9434d
679a693
c1a4fa5
ebbb1aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18a593e
bf9434d
ebbb1aa
18a593e
ebbb1aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aeec4bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebbb1aa
 
 
 
 
bf9434d
aeec4bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
from pathlib import Path
from PIL import Image

# Create cache directory for models
os.makedirs("models", exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load YOLOv5 Nano model
model_path = Path("models/yolov5n.pt")
if model_path.exists():
    print(f"Loading model from cache: {model_path}")
    model = torch.hub.load("ultralytics/yolov5", "custom", path=str(model_path), source="local").to(device)
else:
    print("Downloading YOLOv5n model and caching...")
    model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True).to(device)
    torch.save(model.state_dict(), model_path)

# Optimize model for speed
model.conf = 0.3  # Lower confidence threshold
model.iou = 0.3   # Non-Maximum Suppression IoU threshold
model.classes = None  # Detect all 80+ COCO classes

if device.type == "cuda":
    model.half()  # Use FP16 for faster inference
else:
    torch.set_num_threads(os.cpu_count())

model.eval()

# Pre-generate colors for bounding boxes
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(model.names), 3), dtype=np.uint8)

def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        return "Error: Could not open video file."

    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_path = "output_video.mp4"
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    total_frames = 0
    total_time = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break  
        
        start_time = time.time()
        
        # Convert frame for YOLOv5
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = model(img, size=640)
        
        inference_time = time.time() - start_time
        total_time += inference_time
        total_frames += 1
        
        detections = results.pred[0].cpu().numpy()

        for *xyxy, conf, cls in detections:
            x1, y1, x2, y2 = map(int, xyxy)
            class_id = int(cls)
            color = colors[class_id].tolist()
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 3, lineType=cv2.LINE_AA)
            label = f"{model.names[class_id]} {conf:.2f}"
            cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)

        # Calculate FPS
        avg_fps = total_frames / total_time if total_time > 0 else 0
        cv2.putText(frame, f"FPS: {avg_fps:.2f}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        out.write(frame)

    cap.release()
    out.release()
    
    return output_path

def process_image(image):
    img = np.array(image)
    results = model(img, size=640)

    detections = results.pred[0].cpu().numpy()

    for *xyxy, conf, cls in detections:
        x1, y1, x2, y2 = map(int, xyxy)
        class_id = int(cls)
        color = colors[class_id].tolist()
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 3, lineType=cv2.LINE_AA)
        label = f"{model.names[class_id]} {conf:.2f}"
        cv2.putText(img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)

    return Image.fromarray(img)


css = """
#title {
    text-align: center;
    color: #2C3E50;
    font-size: 2.5rem;
    margin: 1.5rem 0;
    text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
}

.gradio-container {
    background-color: #F5F7FA;
}

.tab-item {
    background-color: white;
    border-radius: 10px;
    padding: 20px;
    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
    margin: 10px;
}

.button-row {
    display: flex;
    justify-content: space-around;
    margin: 1rem 0;
}

#video-process-btn, #submit-btn {
    background-color: #3498DB;
    border: none;
}

#clear-btn {
    background-color: #E74C3C;
    border: none;
}

.output-container {
    margin-top: 1.5rem;
    border: 2px dashed #3498DB;
    border-radius: 10px;
    padding: 10px;
}

.footer {
    text-align: center;
    margin-top: 2rem;
    font-size: 0.9rem;
    color: #7F8C8D;
}
"""

with gr.Blocks(css=css, title="Real-Time YOLOv5 Video & Image Object Detection") as demo:
    gr.Markdown("""# Real-Time YOLOv5 Object Detection""", elem_id="title")
    
    with gr.Tabs():
        with gr.TabItem("Video Detection", elem_classes="tab-item"):
            with gr.Row():
                video_input = gr.Video(
                    label="Upload Video", 
                    interactive=True, 
                    elem_id="video-input"
                )
            
            with gr.Row(elem_classes="button-row"):
                process_button = gr.Button(
                    "Process Video", 
                    variant="primary", 
                    elem_id="video-process-btn"
                )
            
            with gr.Row(elem_classes="output-container"):
                video_output = gr.Video(
                    label="Processed Video", 
                    elem_id="video-output"
                )
            
            process_button.click(
                fn=process_video, 
                inputs=video_input, 
                outputs=video_output
            )
            
        with gr.TabItem("Image Detection", elem_classes="tab-item"):
    with gr.Row():
        image_input = gr.Image(
            type="pil", 
            label="Upload Image", 
            interactive=True
        )
    
    with gr.Row(elem_classes="button-row"):  # Move button row here just below image_input
        clear_button = gr.Button(
            "Clear", 
            variant="secondary", 
            elem_id="clear-btn"
        )
        submit_button = gr.Button(
            "Detect Objects", 
            variant="primary", 
            elem_id="submit-btn"
        )
    
    # Define image_output below input and buttons
    image_output = gr.Image(
        label="Detected Objects", 
        elem_id="image-output"
    )
    
    clear_button.click(
        fn=lambda: None, 
        inputs=None, 
        outputs=image_output
    )
    
    submit_button.click(
        fn=process_image, 
        inputs=image_input, 
        outputs=image_output
    )

    
    gr.Markdown("""
    ### Powered by YOLOv5.
    This application allows real-time object detection using the YOLOv5 model.
    """, elem_classes="footer")

demo.launch()