File size: 5,902 Bytes
caff61e
bccf53b
dc80d48
0152e0c
 
a186d85
 
caff61e
a186d85
 
 
 
0152e0c
0e19825
b5a364c
a186d85
 
 
 
 
 
 
 
 
 
 
36e1064
0e19825
 
 
 
 
a186d85
0152e0c
a186d85
 
 
 
 
0152e0c
0e19825
 
0152e0c
0e19825
 
 
e82b28e
a186d85
 
 
 
8513c99
0e19825
 
 
 
 
 
 
 
 
a186d85
0e19825
a186d85
 
 
 
0e19825
 
 
 
a186d85
0e19825
 
 
8513c99
a186d85
0e19825
3e3644e
a186d85
 
 
 
 
 
0e19825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a186d85
 
0e19825
 
 
 
a186d85
 
0e19825
 
8513c99
a186d85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e19825
 
 
a186d85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8513c99
a186d85
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
from pathlib import Path

# Create cache directory for models if it doesn't exist
os.makedirs("models", exist_ok=True)

# Check device availability - Hugging Face Spaces often provides GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load YOLOv5x model with caching for faster startup
model_path = Path("models/yolov5x.pt")
if model_path.exists():
    print(f"Loading model from cache: {model_path}")
    model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True, 
                          source="local", path=str(model_path)).to(device)
else:
    print("Downloading YOLOv5x model and caching...")
    model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True).to(device)
    # Cache the model for faster startup next time
    torch.save(model.state_dict(), model_path)

# Optimization configurations
model.conf = 0.3  # Confidence threshold of 0.3 as specified
model.iou = 0.3   # NMS IoU threshold of 0.3 as specified
model.classes = None  # Detect all 80+ COCO classes

# Optimize for GPU if available
if device.type == "cuda":
    # Use mixed precision for performance boost
    model.half()
else:
    # On CPU, optimize operations
    torch.set_num_threads(os.cpu_count())

# Set model to evaluation mode for inference
model.eval()

# Assign fixed colors to each class for consistent visualization
np.random.seed(42)  # For reproducible colors
colors = np.random.uniform(0, 255, size=(len(model.names), 3))

# Track performance metrics
total_inference_time = 0
inference_count = 0

def detect_objects(image):
    """
    Process input image for object detection using YOLOv5
    
    Args:
        image: Input image as numpy array
        
    Returns:
        output_image: Image with detection results visualized
    """
    global total_inference_time, inference_count
    
    if image is None:
        return None
    
    start_time = time.time()
    
    # Create a copy for drawing results
    output_image = image.copy()
    
    # Fixed input size for optimal processing
    input_size = 640
    
    # Perform inference with no gradient calculation
    with torch.no_grad():
        # Convert image to tensor for faster processing
        results = model(image, size=input_size)
    
    # Record inference time (model processing only)
    inference_time = time.time() - start_time
    total_inference_time += inference_time
    inference_count += 1
    avg_inference_time = total_inference_time / inference_count
    
    # Extract detections from first (and only) image
    detections = results.pred[0].cpu().numpy()
    
    # Draw each detection on the output image
    for *xyxy, conf, cls in detections:
        # Extract coordinates and convert to integers
        x1, y1, x2, y2 = map(int, xyxy)
        class_id = int(cls)
        
        # Get color for this class
        color = colors[class_id].tolist()
        
        # Draw bounding box
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2)
        
        # Create label with class name and confidence score
        label = f"{model.names[class_id]} {conf:.2f}"
        
        # Calculate text size for background rectangle
        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        
        # Draw label background
        cv2.rectangle(output_image, (x1, y1 - 20), (x1 + w, y1), color, -1)
        
        # Draw label text
        cv2.putText(output_image, label, (x1, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    # Calculate FPS
    fps = 1 / inference_time
    
    # Add FPS counter to the image
    cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (10, 70),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    return output_image

# Define example images - these will be stored in the same directory as this script
example_images = [
    "examples/spring_street_after.jpg", 
    "examples/pexels-hikaique-109919.jpg"
]

# Make sure example directory exists
os.makedirs("examples", exist_ok=True)

# Create Gradio interface - optimized for Hugging Face Spaces
with gr.Blocks(title="Optimized YOLOv5 Object Detection") as demo:
    gr.Markdown("""
    # Optimized YOLOv5 Object Detection
    
    This system utilizes YOLOv5 to detect 80+ object types from the COCO dataset.
    
    **Performance Features:**
    - Processing speed: Optimized for 30+ FPS at 640x640 resolution
    - Confidence threshold: 0.3
    - IoU threshold: 0.3
    - Real-time FPS display
    
    Simply upload an image or take a photo with your camera to see the detections!
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(label="Input Image", type="numpy")
            with gr.Row():
                camera_button = gr.Button("Take Photo from Camera")
                clear_button = gr.Button("Clear")
                
        with gr.Column(scale=1):
            output_image = gr.Image(label="Detected Objects", type="numpy")
    
    # Example gallery
    gr.Examples(
        examples=example_images,
        inputs=input_image,
        outputs=output_image,
        fn=detect_objects,
        cache_examples=True  # Cache for faster response
    )
    
    # Set up the inference call
    input_image.change(fn=detect_objects, inputs=input_image, outputs=output_image)
    
    # Event listeners for buttons
    camera_button.click(lambda: None, None, input_image, js="() => {document.querySelector('button.webcam').click(); return null}")
    clear_button.click(lambda: None, None, [input_image, output_image])

# Launch for Hugging Face Spaces
demo.launch()