File size: 6,063 Bytes
caff61e
bccf53b
dc80d48
0152e0c
 
a186d85
 
caff61e
a186d85
 
 
 
0152e0c
0e19825
b5a364c
a186d85
 
 
 
 
 
 
 
 
 
 
36e1064
0e19825
 
 
 
 
a186d85
0152e0c
a186d85
 
 
 
 
0152e0c
0e19825
 
0152e0c
0e19825
 
 
e82b28e
a186d85
 
 
 
8513c99
0e19825
 
 
 
 
 
 
 
 
a186d85
0e19825
a186d85
 
 
 
0e19825
 
 
 
a186d85
0e19825
 
 
8513c99
a186d85
0e19825
3e3644e
a186d85
 
 
 
 
 
0e19825
 
 
 
 
 
 
 
 
 
 
 
6fea677
0e19825
 
 
 
6fea677
 
0e19825
6fea677
 
 
0e19825
6fea677
 
 
 
0e19825
a186d85
 
0e19825
6fea677
 
 
 
 
 
 
 
0e19825
a186d85
 
0e19825
 
8513c99
a186d85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e19825
 
 
a186d85
6fea677
a186d85
 
 
 
 
 
6fea677
a186d85
 
 
 
 
 
 
 
 
 
 
 
 
 
6fea677
 
a186d85
6fea677
8513c99
a186d85
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
from pathlib import Path

# Create cache directory for models if it doesn't exist
os.makedirs("models", exist_ok=True)

# Check device availability - Hugging Face Spaces often provides GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load YOLOv5x model with caching for faster startup
model_path = Path("models/yolov5x.pt")
if model_path.exists():
    print(f"Loading model from cache: {model_path}")
    model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True, 
                          source="local", path=str(model_path)).to(device)
else:
    print("Downloading YOLOv5x model and caching...")
    model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True).to(device)
    # Cache the model for faster startup next time
    torch.save(model.state_dict(), model_path)

# Optimization configurations
model.conf = 0.3  # Confidence threshold of 0.3 as specified
model.iou = 0.3   # NMS IoU threshold of 0.3 as specified
model.classes = None  # Detect all 80+ COCO classes

# Optimize for GPU if available
if device.type == "cuda":
    # Use mixed precision for performance boost
    model.half()
else:
    # On CPU, optimize operations
    torch.set_num_threads(os.cpu_count())

# Set model to evaluation mode for inference
model.eval()

# Assign fixed colors to each class for consistent visualization
np.random.seed(42)  # For reproducible colors
colors = np.random.uniform(0, 255, size=(len(model.names), 3))

# Track performance metrics
total_inference_time = 0
inference_count = 0

def detect_objects(image):
    """
    Process input image for object detection using YOLOv5
    
    Args:
        image: Input image as numpy array
        
    Returns:
        output_image: Image with detection results visualized
    """
    global total_inference_time, inference_count
    
    if image is None:
        return None
    
    start_time = time.time()
    
    # Create a copy for drawing results
    output_image = image.copy()
    
    # Fixed input size for optimal processing
    input_size = 640
    
    # Perform inference with no gradient calculation
    with torch.no_grad():
        # Convert image to tensor for faster processing
        results = model(image, size=input_size)
    
    # Record inference time (model processing only)
    inference_time = time.time() - start_time
    total_inference_time += inference_time
    inference_count += 1
    avg_inference_time = total_inference_time / inference_count
    
    # Extract detections from first (and only) image
    detections = results.pred[0].cpu().numpy()
    
    # Draw each detection on the output image
    for *xyxy, conf, cls in detections:
        # Extract coordinates and convert to integers
        x1, y1, x2, y2 = map(int, xyxy)
        class_id = int(cls)
        
        # Get color for this class
        color = colors[class_id].tolist()
        
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 4)
        
        # Create label with class name and confidence score
        label = f"{model.names[class_id]} {conf:.2f}"
        
        font_scale = 0.8
        font_thickness = 2
        
        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)
  
        cv2.rectangle(output_image, (x1, y1 - h - 10), (x1 + w + 10, y1), color, -1)
        
        cv2.putText(output_image, label, (x1 + 5, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), font_thickness + 1)
        cv2.putText(output_image, label, (x1 + 5, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), font_thickness)
    
    # Calculate FPS
    fps = 1 / inference_time
    
    fps_overlay = output_image.copy()
    cv2.rectangle(fps_overlay, (5, 5), (250, 80), (0, 0, 0), -1)
    # Apply the overlay with transparency
    alpha = 0.7
    output_image = cv2.addWeighted(fps_overlay, alpha, output_image, 1 - alpha, 0)
    
    # Display FPS with larger font
    cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 35),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (10, 70),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    return output_image

# Define example images - these will be stored in the same directory as this script
example_images = [
    "examples/spring_street_after.jpg", 
    "examples/pexels-hikaique-109919.jpg"
]

# Make sure example directory exists
os.makedirs("examples", exist_ok=True)

# Create Gradio interface - optimized for Hugging Face Spaces
with gr.Blocks(title="Optimized YOLOv5 Object Detection") as demo:
    gr.Markdown("""
    # Optimized YOLOv5 Object Detection
    
    This system utilizes YOLOv5 to detect 80+ object types from the COCO dataset.
    
    **Performance Features:**
    - Processing speed: Optimized for 30+ FPS at 640x640 resolution
    - Confidence threshold: 0.3
    - IoU threshold: 0.3
    
    Simply upload an image and click Submit to see the detections!
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(label="Input Image", type="numpy")
            with gr.Row():
                submit_button = gr.Button("Submit", variant="primary")
                clear_button = gr.Button("Clear")
                
        with gr.Column(scale=1):
            output_image = gr.Image(label="Detected Objects", type="numpy")
    
    # Example gallery
    gr.Examples(
        examples=example_images,
        inputs=input_image,
        outputs=output_image,
        fn=detect_objects,
        cache_examples=True  # Cache for faster response
    )
    
    # Set up button event handlers
    submit_button.click(fn=detect_objects, inputs=input_image, outputs=output_image)
    clear_button.click(lambda: None, None, [input_image, output_image])
    

# Launch for Hugging Face Spaces
demo.launch()