import torch import numpy as np import gradio as gr import cv2 import time import os from pathlib import Path # Create cache directory for models os.makedirs("models", exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # Use smaller YOLOv5n model instead of x-large model_path = Path("models/yolov5n.pt") if not model_path.exists(): print("Downloading and caching YOLOv5n...") torch.hub.download_url_to_file("https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5n.pt", "models/yolov5n.pt") # Optimized model loading model = torch.hub.load("ultralytics/yolov5", "custom", path=str(model_path), autoshape=False).to(device) # Model optimizations model.conf = 0.5 # Slightly lower confidence threshold model.iou = 0.45 # Lower IoU threshold for faster NMS model.classes = None # Detect all classes # Precision optimizations if device.type == "cuda": model.half() # FP16 inference torch.backends.cudnn.benchmark = True # Better CUDA performance else: model.float() torch.set_num_threads(2) # Limit CPU threads for better resource management model.eval() # Simplified color generation colors = np.random.rand(len(model.names), 3) * 255 total_time = 0 frame_count = 0 def detect_objects(image): global total_time, frame_count if image is None: return None start = time.perf_counter() # Reduce input size and use optimized preprocessing input_size = 320 # Reduced from 640 im = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) im = cv2.resize(im, (input_size, input_size)) with torch.no_grad(): if device.type == "cuda": im = torch.from_numpy(im).to(device).half().permute(2, 0, 1).unsqueeze(0) / 255 else: im = torch.from_numpy(im).to(device).float().permute(2, 0, 1).unsqueeze(0) / 255 pred = model(im, augment=False)[0] # Faster post-processing pred = pred.float() if device.type == "cpu" else pred.half() pred = non_max_suppression(pred, model.conf, model.iou, agnostic=False)[0] # Optimized visualization output = image.copy() if pred is not None and len(pred): pred[:, :4] = scale_coords(im.shape[2:], pred[:, :4], output.shape).round() for *xyxy, conf, cls in pred: x1, y1, x2, y2 = map(int, xyxy) cv2.rectangle(output, (x1, y1), (x2, y2), colors[int(cls)].tolist(), 2) # FPS calculation dt = time.perf_counter() - start total_time += dt frame_count += 1 fps = 1 / dt avg_fps = frame_count / total_time # Simplified FPS display cv2.putText(output, f"FPS: {fps:.1f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) return output # Use smaller example images example_images = ["pexels-hikaique-109919.jpg", "spring_street_after.jpg"] with gr.Blocks(title="Optimized YOLOv5") as demo: gr.Markdown("# Real-Time YOLOv5 Object Detection") with gr.Row(): input_img = gr.Image(label="Input", source="webcam" if os.getenv('SPACE_ID') else None) output_img = gr.Image(label="Output") gr.Examples(examples=example_images, inputs=input_img, outputs=output_img, fn=detect_objects) input_img.change(fn=detect_objects, inputs=input_img, outputs=output_img) demo.launch()