Aumkeshchy2003 commited on
Commit
a186d85
·
verified ·
1 Parent(s): 0e19825

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -30
app.py CHANGED
@@ -3,22 +3,40 @@ import numpy as np
3
  import gradio as gr
4
  import cv2
5
  import time
 
 
6
 
7
- # Check device availability
 
 
 
8
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
  print(f"Using device: {device}")
10
 
11
- # Load YOLOv5x model (larger model for better accuracy)
12
- model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True).to(device)
 
 
 
 
 
 
 
 
 
13
 
14
  # Optimization configurations
15
  model.conf = 0.3 # Confidence threshold of 0.3 as specified
16
  model.iou = 0.3 # NMS IoU threshold of 0.3 as specified
17
  model.classes = None # Detect all 80+ COCO classes
18
 
19
- # Enable half-precision for GPU acceleration
20
  if device.type == "cuda":
21
- model.half() # Use FP16 for performance boost
 
 
 
 
22
 
23
  # Set model to evaluation mode for inference
24
  model.eval()
@@ -27,6 +45,10 @@ model.eval()
27
  np.random.seed(42) # For reproducible colors
28
  colors = np.random.uniform(0, 255, size=(len(model.names), 3))
29
 
 
 
 
 
30
  def detect_objects(image):
31
  """
32
  Process input image for object detection using YOLOv5
@@ -37,22 +59,30 @@ def detect_objects(image):
37
  Returns:
38
  output_image: Image with detection results visualized
39
  """
40
- start_time = time.time()
41
 
42
- # Convert image to RGB if it's in BGR format
43
- if image.shape[2] == 3 and image[0,0,0] == image[0,0,2]:
44
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
45
 
46
  # Create a copy for drawing results
47
  output_image = image.copy()
48
 
49
- # Resize input to 640x640 for optimal processing speed
50
  input_size = 640
51
 
52
  # Perform inference with no gradient calculation
53
  with torch.no_grad():
 
54
  results = model(image, size=input_size)
55
 
 
 
 
 
 
 
56
  # Extract detections from first (and only) image
57
  detections = results.pred[0].cpu().numpy()
58
 
@@ -81,34 +111,67 @@ def detect_objects(image):
81
  cv2.putText(output_image, label, (x1, y1 - 5),
82
  cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
83
 
84
- # Calculate and display FPS
85
- fps = 1 / (time.time() - start_time)
86
 
87
  # Add FPS counter to the image
88
  cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30),
89
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
90
-
91
- print(f"Detection complete - FPS: {fps:.2f}")
92
 
93
  return output_image
94
 
95
- # Create Gradio interface
96
- iface = gr.Interface(
97
- fn=detect_objects,
98
- inputs=gr.Image(type="numpy", label="Upload Image"),
99
- outputs=gr.Image(type="numpy", label="Detected Objects"),
100
- title="Optimized Object Detection with YOLOv5x",
101
- description="""
102
- This system utilizes YOLOv5x to detect 80+ object types from the COCO dataset.
 
 
 
 
 
 
 
 
 
103
  - Processing speed: Optimized for 30+ FPS at 640x640 resolution
104
  - Confidence threshold: 0.3
105
  - IoU threshold: 0.3
106
- - Color-coded bounding boxes with confidence scores
107
- """,
108
- allow_flagging="never",
109
- examples=["spring_street_after.jpg", "pexels-hikaique-109919.jpg"],
110
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- # Launch the interface
113
- if __name__ == "__main__":
114
- iface.launch()
 
3
  import gradio as gr
4
  import cv2
5
  import time
6
+ import os
7
+ from pathlib import Path
8
 
9
+ # Create cache directory for models if it doesn't exist
10
+ os.makedirs("models", exist_ok=True)
11
+
12
+ # Check device availability - Hugging Face Spaces often provides GPU
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  print(f"Using device: {device}")
15
 
16
+ # Load YOLOv5x model with caching for faster startup
17
+ model_path = Path("models/yolov5x.pt")
18
+ if model_path.exists():
19
+ print(f"Loading model from cache: {model_path}")
20
+ model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True,
21
+ source="local", path=str(model_path)).to(device)
22
+ else:
23
+ print("Downloading YOLOv5x model and caching...")
24
+ model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True).to(device)
25
+ # Cache the model for faster startup next time
26
+ torch.save(model.state_dict(), model_path)
27
 
28
  # Optimization configurations
29
  model.conf = 0.3 # Confidence threshold of 0.3 as specified
30
  model.iou = 0.3 # NMS IoU threshold of 0.3 as specified
31
  model.classes = None # Detect all 80+ COCO classes
32
 
33
+ # Optimize for GPU if available
34
  if device.type == "cuda":
35
+ # Use mixed precision for performance boost
36
+ model.half()
37
+ else:
38
+ # On CPU, optimize operations
39
+ torch.set_num_threads(os.cpu_count())
40
 
41
  # Set model to evaluation mode for inference
42
  model.eval()
 
45
  np.random.seed(42) # For reproducible colors
46
  colors = np.random.uniform(0, 255, size=(len(model.names), 3))
47
 
48
+ # Track performance metrics
49
+ total_inference_time = 0
50
+ inference_count = 0
51
+
52
  def detect_objects(image):
53
  """
54
  Process input image for object detection using YOLOv5
 
59
  Returns:
60
  output_image: Image with detection results visualized
61
  """
62
+ global total_inference_time, inference_count
63
 
64
+ if image is None:
65
+ return None
66
+
67
+ start_time = time.time()
68
 
69
  # Create a copy for drawing results
70
  output_image = image.copy()
71
 
72
+ # Fixed input size for optimal processing
73
  input_size = 640
74
 
75
  # Perform inference with no gradient calculation
76
  with torch.no_grad():
77
+ # Convert image to tensor for faster processing
78
  results = model(image, size=input_size)
79
 
80
+ # Record inference time (model processing only)
81
+ inference_time = time.time() - start_time
82
+ total_inference_time += inference_time
83
+ inference_count += 1
84
+ avg_inference_time = total_inference_time / inference_count
85
+
86
  # Extract detections from first (and only) image
87
  detections = results.pred[0].cpu().numpy()
88
 
 
111
  cv2.putText(output_image, label, (x1, y1 - 5),
112
  cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
113
 
114
+ # Calculate FPS
115
+ fps = 1 / inference_time
116
 
117
  # Add FPS counter to the image
118
  cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30),
119
  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
120
+ cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (10, 70),
121
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
122
 
123
  return output_image
124
 
125
+ # Define example images - these will be stored in the same directory as this script
126
+ example_images = [
127
+ "examples/spring_street_after.jpg",
128
+ "examples/pexels-hikaique-109919.jpg"
129
+ ]
130
+
131
+ # Make sure example directory exists
132
+ os.makedirs("examples", exist_ok=True)
133
+
134
+ # Create Gradio interface - optimized for Hugging Face Spaces
135
+ with gr.Blocks(title="Optimized YOLOv5 Object Detection") as demo:
136
+ gr.Markdown("""
137
+ # Optimized YOLOv5 Object Detection
138
+
139
+ This system utilizes YOLOv5 to detect 80+ object types from the COCO dataset.
140
+
141
+ **Performance Features:**
142
  - Processing speed: Optimized for 30+ FPS at 640x640 resolution
143
  - Confidence threshold: 0.3
144
  - IoU threshold: 0.3
145
+ - Real-time FPS display
146
+
147
+ Simply upload an image or take a photo with your camera to see the detections!
148
+ """)
149
+
150
+ with gr.Row():
151
+ with gr.Column(scale=1):
152
+ input_image = gr.Image(label="Input Image", type="numpy")
153
+ with gr.Row():
154
+ camera_button = gr.Button("Take Photo from Camera")
155
+ clear_button = gr.Button("Clear")
156
+
157
+ with gr.Column(scale=1):
158
+ output_image = gr.Image(label="Detected Objects", type="numpy")
159
+
160
+ # Example gallery
161
+ gr.Examples(
162
+ examples=example_images,
163
+ inputs=input_image,
164
+ outputs=output_image,
165
+ fn=detect_objects,
166
+ cache_examples=True # Cache for faster response
167
+ )
168
+
169
+ # Set up the inference call
170
+ input_image.change(fn=detect_objects, inputs=input_image, outputs=output_image)
171
+
172
+ # Event listeners for buttons
173
+ camera_button.click(lambda: None, None, input_image, js="() => {document.querySelector('button.webcam').click(); return null}")
174
+ clear_button.click(lambda: None, None, [input_image, output_image])
175
 
176
+ # Launch for Hugging Face Spaces
177
+ demo.launch()