File size: 7,629 Bytes
caff61e
bccf53b
dc80d48
2420aaa
0152e0c
a186d85
2420aaa
 
a186d85
d791bba
2420aaa
 
 
 
 
 
3006b90
 
2420aaa
 
3006b90
2420aaa
3006b90
 
2420aaa
36e1064
2420aaa
3006b90
 
2420aaa
 
0152e0c
2420aaa
 
 
 
 
 
 
 
d5e3d23
2420aaa
 
 
 
 
a4bd3f4
a186d85
8513c99
2420aaa
 
 
 
 
 
9b2d010
 
2420aaa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac43c04
2420aaa
 
 
 
 
 
 
 
ac43c04
0e19825
8513c99
a4bd3f4
 
 
2420aaa
a4bd3f4
 
3006b90
a4bd3f4
a83113c
a4bd3f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2420aaa
 
a4bd3f4
 
2420aaa
 
 
 
 
a4bd3f4
2420aaa
9b2d010
2420aaa
 
9b2d010
a186d85
2420aaa
 
 
 
 
 
a4bd3f4
 
 
 
 
 
 
 
 
 
 
9b2d010
2420aaa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8513c99
3006b90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
import threading
from queue import Queue
from pathlib import Path

# Create cache directory for models
os.makedirs("models", exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Use YOLOv5n (nano) for higher FPS
model_path = Path("models/yolov5n.pt")
if model_path.exists():
    print(f"Loading model from cache: {model_path}")
    model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True, source="local", path=str(model_path)).to(device)
else:
    print("Downloading YOLOv5n model and caching...")
    model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True).to(device)
    torch.save(model.state_dict(), model_path)

# Model configurations for better performance
model.conf = 0.5  # Confidence threshold
model.iou = 0.45  # IOU threshold
model.classes = None  # Detect all classes
model.max_det = 20  # Limit detections for speed

if device.type == "cuda":
    model.half()  # Half precision for CUDA
else:
    torch.set_num_threads(os.cpu_count())

model.eval()

# Precompute colors for bounding boxes
np.random.seed(42)
colors = np.random.uniform(0, 255, size=(len(model.names), 3))

# Performance tracking
total_inference_time = 0
inference_count = 0
last_fps_values = []  # Store recent FPS values

def detect_objects(image):
    """Process a single image for object detection"""
    global total_inference_time, inference_count
    
    if image is None:
        return None
    
    start_time = time.time()
    output_image = image.copy()
    input_size = 640
    
    # Optimize input for inference
    with torch.no_grad():
        results = model(image, size=input_size)
    
    inference_time = time.time() - start_time
    total_inference_time += inference_time
    inference_count += 1
    avg_inference_time = total_inference_time / inference_count
    
    detections = results.pred[0].cpu().numpy()
    
    # Draw detections
    for *xyxy, conf, cls in detections:
        x1, y1, x2, y2 = map(int, xyxy)
        class_id = int(cls)
        color = colors[class_id].tolist()
        
        # Bounding box
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 3, lineType=cv2.LINE_AA)
        
        # Label with class name and confidence
        label = f"{model.names[class_id]} {conf:.2f}"
        font_scale, font_thickness = 0.9, 2  
        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)
        
        cv2.rectangle(output_image, (x1, y1 - h - 10), (x1 + w + 10, y1), color, -1)
        cv2.putText(output_image, label, (x1 + 5, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), font_thickness, lineType=cv2.LINE_AA)
    
    fps = 1 / inference_time
    
    # Stylish FPS display
    overlay = output_image.copy()
    cv2.rectangle(overlay, (10, 10), (300, 80), (0, 0, 0), -1)
    output_image = cv2.addWeighted(overlay, 0.6, output_image, 0.4, 0)
    cv2.putText(output_image, f"FPS: {fps:.2f}", (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, lineType=cv2.LINE_AA)
    cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (20, 70),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, lineType=cv2.LINE_AA)
    
    return output_image

def process_webcam_frame(frame):
    """Process a single frame from webcam"""
    global last_fps_values
    
    if frame is None:
        return None
    
    start_time = time.time()
    
    # Use a smaller size for real-time
    input_size = 384
    
    # Process the frame
    with torch.no_grad():
        results = model(frame, size=input_size)
    
    # Calculate FPS
    inference_time = time.time() - start_time
    current_fps = 1 / inference_time if inference_time > 0 else 30
    
    # Update FPS history (keep last 30 values)
    last_fps_values.append(current_fps)
    if len(last_fps_values) > 30:
        last_fps_values.pop(0)
    avg_fps = sum(last_fps_values) / len(last_fps_values)
    
    # Create output image
    output = frame.copy()
    
    # Draw detections
    detections = results.pred[0].cpu().numpy()
    for *xyxy, conf, cls in detections:
        x1, y1, x2, y2 = map(int, xyxy)
        class_id = int(cls)
        color = colors[class_id].tolist()
        
        # Draw rectangle and label
        cv2.rectangle(output, (x1, y1), (x2, y2), color, 2, lineType=cv2.LINE_AA)
        
        label = f"{model.names[class_id]} {conf:.2f}"
        font_scale, font_thickness = 0.6, 1
        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)
        
        cv2.rectangle(output, (x1, y1 - h - 5), (x1 + w + 5, y1), color, -1)
        cv2.putText(output, label, (x1 + 3, y1 - 3),
                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), font_thickness, lineType=cv2.LINE_AA)
    
    # Add FPS counter
    cv2.rectangle(output, (10, 10), (210, 80), (0, 0, 0), -1)
    cv2.putText(output, f"FPS: {current_fps:.1f}", (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, lineType=cv2.LINE_AA)
    cv2.putText(output, f"Avg FPS: {avg_fps:.1f}", (20, 70),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, lineType=cv2.LINE_AA)
    
    return output

def process_uploaded_image(image):
    """Process an uploaded image"""
    return detect_objects(image)

# Setup Gradio interface
example_images = ["spring_street_after.jpg", "pexels-hikaique-109919.jpg"]
os.makedirs("examples", exist_ok=True)

# Simplified interface with proper webcam handling
with gr.Blocks(title="YOLOv5 Object Detection - Real-time & Image Upload") as demo:
    gr.Markdown("""
    # YOLOv5 Object Detection
    ## Real-time webcam detection and image upload processing
    """)
    
    with gr.Tabs():
        with gr.TabItem("Real-time Detection"):
            gr.Markdown("""
            ### Real-time Object Detection
            Using your webcam for continuous object detection at 30+ FPS.
            """)
            # Use Gradio's webcam component with processing function
            webcam = gr.Webcam(label="Webcam Input")
            webcam_output = gr.Image(label="Real-time Detection")
            detect_button = gr.Button("Detect Objects")
            
            # Connect webcam to processor
            detect_button.click(
                fn=process_webcam_frame,
                inputs=webcam,
                outputs=webcam_output
            )
        
        with gr.TabItem("Image Upload"):
            gr.Markdown("""
            ### Image Upload Detection
            Upload an image to detect objects.
            """)
            with gr.Row():
                with gr.Column(scale=1):
                    input_image = gr.Image(label="Input Image", type="numpy")
                    submit_button = gr.Button("Submit", variant="primary")
                    clear_button = gr.Button("Clear")
                
                with gr.Column(scale=1):
                    output_image = gr.Image(label="Detected Objects", type="numpy")
            
            gr.Examples(
                examples=example_images,
                inputs=input_image,
                outputs=output_image,
                fn=process_uploaded_image,
                cache_examples=True
            )
    
    # Set up event handlers
    submit_button.click(fn=process_uploaded_image, inputs=input_image, outputs=output_image)
    clear_button.click(lambda: (None, None), None, [input_image, output_image])

demo.launch(share=False)