File size: 3,370 Bytes
caff61e
359afbb
 
 
 
 
 
 
 
 
 
 
 
 
fa9a701
359afbb
fa9a701
 
 
 
 
 
359afbb
fa9a701
 
 
 
6de980c
fa9a701
359afbb
fa9a701
 
359afbb
fa9a701
 
6de980c
359afbb
6de980c
fa9a701
 
6de980c
fa9a701
 
6de980c
359afbb
fa9a701
359afbb
 
 
 
fa9a701
359afbb
fa9a701
 
 
 
359afbb
fa9a701
 
 
 
 
 
 
359afbb
fa9a701
 
 
359afbb
fa9a701
 
 
 
 
 
 
359afbb
fa9a701
 
 
 
 
 
359afbb
fa9a701
 
 
359afbb
fa9a701
6de980c
fa9a701
 
6de980c
fa9a701
 
359afbb
fa9a701
 
 
 
6de980c
fa9a701
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
from pathlib import Path

# Create cache directory for models
os.makedirs("models", exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Use smaller YOLOv5n model instead of x-large
model_path = Path("models/yolov5n.pt")
if not model_path.exists():
    print("Downloading and caching YOLOv5n...")
    torch.hub.download_url_to_file("https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5n.pt", "models/yolov5n.pt")

# Optimized model loading
model = torch.hub.load("ultralytics/yolov5", "custom", path=str(model_path), autoshape=False).to(device)

# Model optimizations
model.conf = 0.5  # Slightly lower confidence threshold
model.iou = 0.45  # Lower IoU threshold for faster NMS
model.classes = None  # Detect all classes

# Precision optimizations
if device.type == "cuda":
    model.half()  # FP16 inference
    torch.backends.cudnn.benchmark = True  # Better CUDA performance
else:
    model.float()
    torch.set_num_threads(2)  # Limit CPU threads for better resource management

model.eval()

# Simplified color generation
colors = np.random.rand(len(model.names), 3) * 255

total_time = 0
frame_count = 0

def detect_objects(image):
    global total_time, frame_count
    
    if image is None:
        return None
    
    start = time.perf_counter()
    
    # Reduce input size and use optimized preprocessing
    input_size = 320  # Reduced from 640
    im = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    im = cv2.resize(im, (input_size, input_size))
    
    with torch.no_grad():
        if device.type == "cuda":
            im = torch.from_numpy(im).to(device).half().permute(2, 0, 1).unsqueeze(0) / 255
        else:
            im = torch.from_numpy(im).to(device).float().permute(2, 0, 1).unsqueeze(0) / 255
        
        pred = model(im, augment=False)[0]
    
    # Faster post-processing
    pred = pred.float() if device.type == "cpu" else pred.half()
    pred = non_max_suppression(pred, model.conf, model.iou, agnostic=False)[0]
    
    # Optimized visualization
    output = image.copy()
    if pred is not None and len(pred):
        pred[:, :4] = scale_coords(im.shape[2:], pred[:, :4], output.shape).round()
        for *xyxy, conf, cls in pred:
            x1, y1, x2, y2 = map(int, xyxy)
            cv2.rectangle(output, (x1, y1), (x2, y2), colors[int(cls)].tolist(), 2)
    
    # FPS calculation
    dt = time.perf_counter() - start
    total_time += dt
    frame_count += 1
    fps = 1 / dt
    avg_fps = frame_count / total_time
    
    # Simplified FPS display
    cv2.putText(output, f"FPS: {fps:.1f}", (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    
    return output

# Use smaller example images
example_images = ["pexels-hikaique-109919.jpg", "spring_street_after.jpg"]

with gr.Blocks(title="Optimized YOLOv5") as demo:
    gr.Markdown("# Real-Time YOLOv5 Object Detection")
    with gr.Row():
        input_img = gr.Image(label="Input", source="webcam" if os.getenv('SPACE_ID') else None)
        output_img = gr.Image(label="Output")
    gr.Examples(examples=example_images, inputs=input_img, outputs=output_img, fn=detect_objects)
    input_img.change(fn=detect_objects, inputs=input_img, outputs=output_img)

demo.launch()