File size: 4,781 Bytes
caff61e
359afbb
 
 
 
679a693
 
bf9434d
679a693
 
 
 
359afbb
679a693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3545274
679a693
359afbb
679a693
 
 
 
 
 
 
 
3545274
679a693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf9434d
 
 
679a693
bf9434d
679a693
bf9434d
 
 
 
 
 
 
679a693
bf9434d
679a693
18a593e
 
 
 
 
bf9434d
 
 
18a593e
 
 
bf9434d
 
 
3545274
eb084b3
18a593e
 
 
3545274
1e2d403
21b3635
 
 
bf9434d
 
18a593e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
from pathlib import Path
from PIL import Image

# Create cache directory for models
os.makedirs("models", exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load YOLOv5 Nano model
model_path = Path("models/yolov5n.pt")
if model_path.exists():
    print(f"Loading model from cache: {model_path}")
    model = torch.hub.load("ultralytics/yolov5", "custom", path=str(model_path), source="local").to(device)
else:
    print("Downloading YOLOv5n model and caching...")
    model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True).to(device)
    torch.save(model.state_dict(), model_path)

# Optimize model for speed
model.conf = 0.3  # Lower confidence threshold
model.iou = 0.3   # Non-Maximum Suppression IoU threshold
model.classes = None  # Detect all 80+ COCO classes

if device.type == "cuda":
    model.half()  # Use FP16 for faster inference
else:
    torch.set_num_threads(os.cpu_count())

model.eval()

# Pre-generate colors for bounding boxes
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(model.names), 3), dtype=np.uint8)

def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        return "Error: Could not open video file."

    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_path = "output_video.mp4"
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    total_frames = 0
    total_time = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break  # Break if no more frames
        
        start_time = time.time()
        
        # Convert frame for YOLOv5
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = model(img, size=640)
        
        inference_time = time.time() - start_time
        total_time += inference_time
        total_frames += 1
        
        detections = results.pred[0].cpu().numpy()

        for *xyxy, conf, cls in detections:
            x1, y1, x2, y2 = map(int, xyxy)
            class_id = int(cls)
            color = colors[class_id].tolist()
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 3, lineType=cv2.LINE_AA)
            label = f"{model.names[class_id]} {conf:.2f}"
            cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)

        # Calculate FPS
        avg_fps = total_frames / total_time if total_time > 0 else 0
        cv2.putText(frame, f"FPS: {avg_fps:.2f}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        out.write(frame)

    cap.release()
    out.release()
    
    return output_path

def process_image(image):
    img = np.array(image)
    results = model(img, size=640)

    detections = results.pred[0].cpu().numpy()

    for *xyxy, conf, cls in detections:
        x1, y1, x2, y2 = map(int, xyxy)
        class_id = int(cls)
        color = colors[class_id].tolist()
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 3, lineType=cv2.LINE_AA)
        label = f"{model.names[class_id]} {conf:.2f}"
        cv2.putText(img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)

    return Image.fromarray(img)

with gr.Blocks(title="Real-Time YOLOv5 Video & Image Object Detection") as demo:
    gr.Markdown("""
    # Real-Time YOLOv5 Object Detection
    """, elem_id="title")
    
    with gr.Tabs():
        with gr.TabItem("Video Detection"):
            with gr.Row():
                video_input = gr.Video(label="Upload Video", interactive=True, elem_id="video-input")
                process_button = gr.Button("Process Video", variant="primary", elem_id="video-process-btn")
            video_output = gr.Video(label="Processed Video", elem_id="video-output")
            process_button.click(fn=process_video, inputs=video_input, outputs=video_output)

        with gr.TabItem("Image Detection"):
            with gr.Row():
                image_input = gr.Image(type="pil", label="Upload Image", interactive=True)
            with gr.Row():
                clear_button = gr.Button("Clear", variant="secondary", elem_id="clear-btn")
                submit_button = gr.Button("Detect Objects", variant="primary", elem_id="submit-btn")
            clear_button.click(fn=lambda: None, inputs=None, outputs=image_output)
            submit_button.click(fn=process_image, inputs=image_input, outputs=image_output)
            with gr.Row():
                image_output = gr.Image(label="Detected Objects", elem_id="image-output")
            

demo.launch()