Alessio Grancini commited on
Commit
1091c75
·
verified ·
1 Parent(s): b4b1c15

Update monocular_depth_estimator.py

Browse files
Files changed (1) hide show
  1. monocular_depth_estimator.py +122 -60
monocular_depth_estimator.py CHANGED
@@ -5,7 +5,6 @@ import time
5
  from midas.model_loader import default_models, load_model
6
  import os
7
  import urllib.request
8
- import spaces
9
 
10
  MODEL_FILE_URL = {
11
  "midas_v21_small_256" : "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt",
@@ -16,101 +15,164 @@ MODEL_FILE_URL = {
16
  }
17
 
18
  class MonocularDepthEstimator:
19
- def __init__(self, model_type="midas_v21_small_256", device="cpu"):
20
- self.device = device
21
- self.model, self.transform, *_ = load_model(self.device, f"models/{model_type}.pt", model_type)
22
  optimize=False,
23
  side_by_side=False,
24
  height=None,
25
  square=False,
26
  grayscale=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # Don't initialize any CUDA/GPU stuff here
29
- self.model_type = model_type
30
- self.model_weights_path = model_weights_path
31
  self.is_optimize = optimize
32
  self.is_square = square
33
  self.is_grayscale = grayscale
34
  self.height = height
35
  self.side_by_side = side_by_side
36
- self.model = None
37
- self.transform = None
38
- self.net_w = None
39
- self.net_h = None
40
 
41
- print("Initializing parameters...")
 
 
 
 
42
  if not os.path.exists(model_weights_path+model_type+".pt"):
43
  print("Model file not found. Downloading...")
 
44
  urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
45
  print("Model file downloaded successfully.")
46
 
47
- @spaces.GPU
48
- def load_model_if_needed(self):
49
- if self.model is None:
50
- print("Loading MiDaS model...")
51
- self.model, self.transform, self.net_w, self.net_h = load_model(
52
- 'cuda',
53
- self.model_weights_path + self.model_type + ".pt",
54
- self.model_type,
55
- self.is_optimize,
56
- self.height,
57
- self.is_square
58
- )
59
- print("Model loaded successfully")
60
 
61
- @spaces.GPU
62
- def predict(self, image, target_size):
63
- self.load_model_if_needed()
64
- img_tensor = torch.from_numpy(image).to('cuda').unsqueeze(0)
65
 
66
- if self.is_optimize:
67
  img_tensor = img_tensor.to(memory_format=torch.channels_last)
68
  img_tensor = img_tensor.half()
69
 
70
- with torch.no_grad():
71
- prediction = self.model.forward(img_tensor)
72
- prediction = (
73
- torch.nn.functional.interpolate(
74
- prediction.unsqueeze(1),
75
- size=target_size[::-1],
76
- mode="bicubic",
77
- align_corners=False,
78
- )
79
- .squeeze()
80
- .cpu()
81
- .numpy()
82
  )
 
 
 
 
83
 
84
  return prediction
85
 
86
  def process_prediction(self, depth_map):
 
 
 
 
 
 
 
 
 
 
 
 
87
  depth_min = depth_map.min()
88
  depth_max = depth_map.max()
89
  normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
 
 
 
90
  grayscale_depthmap = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2)
91
  depth_colormap = cv2.applyColorMap(np.uint8(grayscale_depthmap), cv2.COLORMAP_INFERNO)
 
92
  return normalized_depth/255, depth_colormap/255
93
 
94
- @spaces.GPU
95
  def make_prediction(self, image):
96
- try:
97
- print("Starting depth estimation...")
98
- image = image.copy()
99
- original_image_rgb = np.flip(image, 2)
100
- self.load_model_if_needed()
101
  image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
102
- pred = self.predict(image_tranformed, target_size=original_image_rgb.shape[1::-1])
 
 
 
 
103
  depthmap, depth_colormap = self.process_prediction(pred)
104
- print("Depth estimation complete")
105
- return depthmap, depth_colormap
106
- except Exception as e:
107
- print(f"Error in make_prediction: {str(e)}")
108
- import traceback
109
- print(traceback.format_exc())
110
- raise
111
-
112
- if __name__ == "__main__":
113
- depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
114
- depth_estimator.run("assets/videos/testvideo2.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
116
 
 
 
 
 
 
 
 
5
  from midas.model_loader import default_models, load_model
6
  import os
7
  import urllib.request
 
8
 
9
  MODEL_FILE_URL = {
10
  "midas_v21_small_256" : "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt",
 
15
  }
16
 
17
  class MonocularDepthEstimator:
18
+ def __init__(self,
19
+ model_type="midas_v21_small_256",
20
+ model_weights_path="models/",
21
  optimize=False,
22
  side_by_side=False,
23
  height=None,
24
  square=False,
25
  grayscale=False):
26
+
27
+ # model type
28
+ # MiDaS 3.1:
29
+ # For highest quality: dpt_beit_large_512
30
+ # For moderately less quality, but better speed-performance trade-off: dpt_swin2_large_384
31
+ # For embedded devices: dpt_swin2_tiny_256, dpt_levit_224
32
+ # For inference on Intel CPUs, OpenVINO may be used for the small legacy model: openvino_midas_v21_small .xml, .bin
33
+
34
+ # MiDaS 3.0:
35
+ # Legacy transformer models dpt_large_384 and dpt_hybrid_384
36
+
37
+ # MiDaS 2.1:
38
+ # Legacy convolutional models midas_v21_384 and midas_v21_small_256
39
 
40
+ # params
41
+ print("Initializing parameters and model...")
 
42
  self.is_optimize = optimize
43
  self.is_square = square
44
  self.is_grayscale = grayscale
45
  self.height = height
46
  self.side_by_side = side_by_side
 
 
 
 
47
 
48
+ # select device
49
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
+ print("Running inference on : %s" % self.device)
51
+
52
+ # loading model
53
  if not os.path.exists(model_weights_path+model_type+".pt"):
54
  print("Model file not found. Downloading...")
55
+ # Download the model file
56
  urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
57
  print("Model file downloaded successfully.")
58
 
59
+ self.model, self.transform, self.net_w, self.net_h = load_model(self.device, model_weights_path+model_type+".pt",
60
+ model_type, optimize, height, square)
61
+ print("Net width and height: ", (self.net_w, self.net_h))
62
+
63
+
64
+ def predict(self, image, model, target_size):
65
+
 
 
 
 
 
 
66
 
67
+ # convert img to tensor and load to gpu
68
+ img_tensor = torch.from_numpy(image).to(self.device).unsqueeze(0)
 
 
69
 
70
+ if self.is_optimize and self.device == torch.device("cuda"):
71
  img_tensor = img_tensor.to(memory_format=torch.channels_last)
72
  img_tensor = img_tensor.half()
73
 
74
+ prediction = model.forward(img_tensor)
75
+ prediction = (
76
+ torch.nn.functional.interpolate(
77
+ prediction.unsqueeze(1),
78
+ size=target_size[::-1],
79
+ mode="bicubic",
80
+ align_corners=False,
 
 
 
 
 
81
  )
82
+ .squeeze()
83
+ .cpu()
84
+ .numpy()
85
+ )
86
 
87
  return prediction
88
 
89
  def process_prediction(self, depth_map):
90
+ """
91
+ Take an RGB image and depth map and place them side by side. This includes a proper normalization of the depth map
92
+ for better visibility.
93
+ Args:
94
+ original_img: the RGB image
95
+ depth_img: the depth map
96
+ is_grayscale: use a grayscale colormap?
97
+ Returns:
98
+ the image and depth map place side by side
99
+ """
100
+
101
+ # normalizing depth image
102
  depth_min = depth_map.min()
103
  depth_max = depth_map.max()
104
  normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
105
+
106
+ # normalized_depth *= 3
107
+ # grayscale_depthmap = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2) / 3
108
  grayscale_depthmap = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2)
109
  depth_colormap = cv2.applyColorMap(np.uint8(grayscale_depthmap), cv2.COLORMAP_INFERNO)
110
+
111
  return normalized_depth/255, depth_colormap/255
112
 
 
113
  def make_prediction(self, image):
114
+ image = image.copy()
115
+ with torch.no_grad():
116
+ original_image_rgb = np.flip(image, 2) # in [0, 255] (flip required to get RGB)
117
+ # resizing the image to feed to the model
 
118
  image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
119
+
120
+ # monocular depth prediction
121
+ pred = self.predict(image_tranformed, self.model, target_size=original_image_rgb.shape[1::-1])
122
+
123
+ # process the model predictions
124
  depthmap, depth_colormap = self.process_prediction(pred)
125
+ return depthmap, depth_colormap
126
+
127
+ def run(self, input_path):
128
+
129
+ # input video
130
+ cap = cv2.VideoCapture(input_path)
131
+
132
+ # Check if camera opened successfully
133
+ if not cap.isOpened():
134
+ print("Error opening video file")
135
+
136
+ with torch.no_grad():
137
+ while cap.isOpened():
138
+
139
+ # Capture frame-by-frame
140
+ inference_start_time = time.time()
141
+ ret, frame = cap.read()
142
+
143
+ if ret == True:
144
+ _, depth_colormap = self.make_prediction(frame)
145
+ inference_end_time = time.time()
146
+ fps = round(1/(inference_end_time - inference_start_time))
147
+ cv2.putText(depth_colormap, f'FPS: {fps}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (10, 255, 100), 2)
148
+ cv2.imshow('MiDaS Depth Estimation - Press Escape to close window ', depth_colormap)
149
+
150
+ # Press ESC on keyboard to exit
151
+ if cv2.waitKey(1) == 27: # Escape key
152
+ break
153
+
154
+ else:
155
+ break
156
+
157
+
158
+ # When everything done, release
159
+ # the video capture object
160
+ cap.release()
161
+
162
+ # Closes all the frames
163
+ cv2.destroyAllWindows()
164
+
165
+
166
+
167
+ if __name__ == "__main__":
168
+ # params
169
+ INPUT_PATH = "assets/videos/testvideo2.mp4"
170
 
171
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
172
 
173
+ # set torch options
174
+ torch.backends.cudnn.enabled = True
175
+ torch.backends.cudnn.benchmark = True
176
+
177
+ depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
178
+ depth_estimator.run(INPUT_PATH)