Spaces:

Aumkeshchy2003
/

Object_Detection

Running

App Files Files Community

Object_Detection / app.py

Aumkeshchy2003

Update app.py

a4bd3f4 verified 3 months ago

raw

history blame

7.63 kB

	import torch
	import numpy as np
	import gradio as gr
	import cv2
	import time
	import os
	import threading
	from queue import Queue
	from pathlib import Path

	# Create cache directory for models
	os.makedirs("models", exist_ok=True)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")

	# Use YOLOv5n (nano) for higher FPS
	model_path = Path("models/yolov5n.pt")
	if model_path.exists():
	print(f"Loading model from cache: {model_path}")
	model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True, source="local", path=str(model_path)).to(device)
	else:
	print("Downloading YOLOv5n model and caching...")
	model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True).to(device)
	torch.save(model.state_dict(), model_path)

	# Model configurations for better performance
	model.conf = 0.5 # Confidence threshold
	model.iou = 0.45 # IOU threshold
	model.classes = None # Detect all classes
	model.max_det = 20 # Limit detections for speed

	if device.type == "cuda":
	model.half() # Half precision for CUDA
	else:
	torch.set_num_threads(os.cpu_count())

	model.eval()

	# Precompute colors for bounding boxes
	np.random.seed(42)
	colors = np.random.uniform(0, 255, size=(len(model.names), 3))

	# Performance tracking
	total_inference_time = 0
	inference_count = 0
	last_fps_values = [] # Store recent FPS values

	def detect_objects(image):
	"""Process a single image for object detection"""
	global total_inference_time, inference_count

	if image is None:
	return None

	start_time = time.time()
	output_image = image.copy()
	input_size = 640

	# Optimize input for inference
	with torch.no_grad():
	results = model(image, size=input_size)

	inference_time = time.time() - start_time
	total_inference_time += inference_time
	inference_count += 1
	avg_inference_time = total_inference_time / inference_count

	detections = results.pred[0].cpu().numpy()

	# Draw detections
	for *xyxy, conf, cls in detections:
	x1, y1, x2, y2 = map(int, xyxy)
	class_id = int(cls)
	color = colors[class_id].tolist()

	# Bounding box
	cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 3, lineType=cv2.LINE_AA)

	# Label with class name and confidence
	label = f"{model.names[class_id]} {conf:.2f}"
	font_scale, font_thickness = 0.9, 2
	(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)

	cv2.rectangle(output_image, (x1, y1 - h - 10), (x1 + w + 10, y1), color, -1)
	cv2.putText(output_image, label, (x1 + 5, y1 - 5),
	cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), font_thickness, lineType=cv2.LINE_AA)

	fps = 1 / inference_time

	# Stylish FPS display
	overlay = output_image.copy()
	cv2.rectangle(overlay, (10, 10), (300, 80), (0, 0, 0), -1)
	output_image = cv2.addWeighted(overlay, 0.6, output_image, 0.4, 0)
	cv2.putText(output_image, f"FPS: {fps:.2f}", (20, 40),
	cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, lineType=cv2.LINE_AA)
	cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (20, 70),
	cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, lineType=cv2.LINE_AA)

	return output_image

	def process_webcam_frame(frame):
	"""Process a single frame from webcam"""
	global last_fps_values

	if frame is None:
	return None

	start_time = time.time()

	# Use a smaller size for real-time
	input_size = 384

	# Process the frame
	with torch.no_grad():
	results = model(frame, size=input_size)

	# Calculate FPS
	inference_time = time.time() - start_time
	current_fps = 1 / inference_time if inference_time > 0 else 30

	# Update FPS history (keep last 30 values)
	last_fps_values.append(current_fps)
	if len(last_fps_values) > 30:
	last_fps_values.pop(0)
	avg_fps = sum(last_fps_values) / len(last_fps_values)

	# Create output image
	output = frame.copy()

	# Draw detections
	detections = results.pred[0].cpu().numpy()
	for *xyxy, conf, cls in detections:
	x1, y1, x2, y2 = map(int, xyxy)
	class_id = int(cls)
	color = colors[class_id].tolist()

	# Draw rectangle and label
	cv2.rectangle(output, (x1, y1), (x2, y2), color, 2, lineType=cv2.LINE_AA)

	label = f"{model.names[class_id]} {conf:.2f}"
	font_scale, font_thickness = 0.6, 1
	(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)

	cv2.rectangle(output, (x1, y1 - h - 5), (x1 + w + 5, y1), color, -1)
	cv2.putText(output, label, (x1 + 3, y1 - 3),
	cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), font_thickness, lineType=cv2.LINE_AA)

	# Add FPS counter
	cv2.rectangle(output, (10, 10), (210, 80), (0, 0, 0), -1)
	cv2.putText(output, f"FPS: {current_fps:.1f}", (20, 40),
	cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, lineType=cv2.LINE_AA)
	cv2.putText(output, f"Avg FPS: {avg_fps:.1f}", (20, 70),
	cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, lineType=cv2.LINE_AA)

	return output

	def process_uploaded_image(image):
	"""Process an uploaded image"""
	return detect_objects(image)

	# Setup Gradio interface
	example_images = ["spring_street_after.jpg", "pexels-hikaique-109919.jpg"]
	os.makedirs("examples", exist_ok=True)

	# Simplified interface with proper webcam handling
	with gr.Blocks(title="YOLOv5 Object Detection - Real-time & Image Upload") as demo:
	gr.Markdown("""
	# YOLOv5 Object Detection
	## Real-time webcam detection and image upload processing
	""")

	with gr.Tabs():
	with gr.TabItem("Real-time Detection"):
	gr.Markdown("""
	### Real-time Object Detection
	Using your webcam for continuous object detection at 30+ FPS.
	""")
	# Use Gradio's webcam component with processing function
	webcam = gr.Webcam(label="Webcam Input")
	webcam_output = gr.Image(label="Real-time Detection")
	detect_button = gr.Button("Detect Objects")

	# Connect webcam to processor
	detect_button.click(
	fn=process_webcam_frame,
	inputs=webcam,
	outputs=webcam_output
	)

	with gr.TabItem("Image Upload"):
	gr.Markdown("""
	### Image Upload Detection
	Upload an image to detect objects.
	""")
	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(label="Input Image", type="numpy")
	submit_button = gr.Button("Submit", variant="primary")
	clear_button = gr.Button("Clear")

	with gr.Column(scale=1):
	output_image = gr.Image(label="Detected Objects", type="numpy")

	gr.Examples(
	examples=example_images,
	inputs=input_image,
	outputs=output_image,
	fn=process_uploaded_image,
	cache_examples=True
	)

	# Set up event handlers
	submit_button.click(fn=process_uploaded_image, inputs=input_image, outputs=output_image)
	clear_button.click(lambda: (None, None), None, [input_image, output_image])

	demo.launch(share=False)