Alessio Grancini commited on
Commit
2609a96
·
verified ·
1 Parent(s): 09543a7

Update monocular_depth_estimator.py

Browse files
Files changed (1) hide show
  1. monocular_depth_estimator.py +61 -46
monocular_depth_estimator.py CHANGED
@@ -17,54 +17,58 @@ MODEL_FILE_URL = {
17
 
18
  class MonocularDepthEstimator:
19
  def __init__(self,
20
- model_type="midas_v21_small_256",
21
- model_weights_path="models/",
22
- optimize=False,
23
- side_by_side=False,
24
- height=None,
25
- square=False,
26
- grayscale=False):
27
-
28
- # Store parameters but don't initialize CUDA
29
- self.model_type = model_type
30
- self.model_weights_path = model_weights_path
31
- self.is_optimize = optimize
32
- self.is_square = square
33
- self.is_grayscale = grayscale
34
- self.height = height
35
- self.side_by_side = side_by_side
36
- self.model = None # Model will be loaded in make_prediction
37
- self.transform = None
38
-
39
- # Download model if not exists
 
 
 
 
40
  if not os.path.exists(model_weights_path+model_type+".pt"):
41
  print("Model file not found. Downloading...")
42
  urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
43
  print("Model file downloaded successfully.")
44
 
45
  def load_model_if_needed(self):
46
- """Load model if not already loaded"""
47
  if self.model is None:
 
48
  self.model, self.transform, self.net_w, self.net_h = load_model(
49
- self.device,
50
  self.model_weights_path + self.model_type + ".pt",
51
- self.model_type,
52
- self.is_optimize,
53
- self.height,
54
  self.is_square
55
  )
 
56
  print("Net width and height: ", (self.net_w, self.net_h))
57
 
58
  @spaces.GPU
59
  def predict(self, image, target_size):
60
- """GPU-accelerated prediction"""
61
  # Load model if not loaded
62
  self.load_model_if_needed()
63
 
64
  # convert img to tensor and load to gpu
65
- img_tensor = torch.from_numpy(image).to(self.device).unsqueeze(0)
66
 
67
- if self.is_optimize and self.device == torch.device("cuda"):
68
  img_tensor = img_tensor.to(memory_format=torch.channels_last)
69
  img_tensor = img_tensor.half()
70
 
@@ -84,7 +88,7 @@ class MonocularDepthEstimator:
84
  return prediction
85
 
86
  def process_prediction(self, depth_map):
87
- """Process prediction (CPU operation, no GPU needed)"""
88
  depth_min = depth_map.min()
89
  depth_max = depth_map.max()
90
  normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
@@ -94,25 +98,32 @@ class MonocularDepthEstimator:
94
 
95
  return normalized_depth/255, depth_colormap/255
96
 
97
- @spaces.GPU(duration=30)
98
  def make_prediction(self, image):
99
- """Main prediction function with GPU acceleration"""
100
  image = image.copy()
101
- with torch.no_grad():
102
- original_image_rgb = np.flip(image, 2) # in [0, 255] (flip required to get RGB)
103
- # resizing the image to feed to the model
104
- image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
105
-
106
- # monocular depth prediction
107
- pred = self.predict(image_tranformed, target_size=original_image_rgb.shape[1::-1])
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- # process the model predictions
110
- depthmap, depth_colormap = self.process_prediction(pred)
111
- return depthmap, depth_colormap
112
-
113
- @spaces.GPU(duration=60)
114
  def run(self, input_path):
115
- """Video processing with GPU acceleration"""
116
  cap = cv2.VideoCapture(input_path)
117
 
118
  if not cap.isOpened():
@@ -139,6 +150,10 @@ class MonocularDepthEstimator:
139
  cap.release()
140
  cv2.destroyAllWindows()
141
 
142
- if __name__ == "__main__":
 
 
143
  depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
144
- depth_estimator.run("assets/videos/testvideo2.mp4")
 
 
 
17
 
18
  class MonocularDepthEstimator:
19
  def __init__(self,
20
+ model_type="midas_v21_small_256",
21
+ model_weights_path="models/",
22
+ optimize=False,
23
+ side_by_side=False,
24
+ height=None,
25
+ square=False,
26
+ grayscale=False):
27
+
28
+ # Store parameters but don't initialize CUDA
29
+ self.model_type = model_type
30
+ self.model_weights_path = model_weights_path
31
+ self.is_optimize = optimize
32
+ self.is_square = square
33
+ self.is_grayscale = grayscale
34
+ self.height = height
35
+ self.side_by_side = side_by_side
36
+ self.model = None
37
+ self.transform = None
38
+ self.net_w = None
39
+ self.net_h = None
40
+
41
+ print("Initializing parameters...")
42
+
43
+ # Download model if needed
44
  if not os.path.exists(model_weights_path+model_type+".pt"):
45
  print("Model file not found. Downloading...")
46
  urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
47
  print("Model file downloaded successfully.")
48
 
49
  def load_model_if_needed(self):
 
50
  if self.model is None:
51
+ print("Loading MiDaS model...")
52
  self.model, self.transform, self.net_w, self.net_h = load_model(
53
+ 'cuda',
54
  self.model_weights_path + self.model_type + ".pt",
55
+ self.model_type,
56
+ self.is_optimize,
57
+ self.height,
58
  self.is_square
59
  )
60
+ print("Model loaded successfully")
61
  print("Net width and height: ", (self.net_w, self.net_h))
62
 
63
  @spaces.GPU
64
  def predict(self, image, target_size):
 
65
  # Load model if not loaded
66
  self.load_model_if_needed()
67
 
68
  # convert img to tensor and load to gpu
69
+ img_tensor = torch.from_numpy(image).to('cuda').unsqueeze(0)
70
 
71
+ if self.is_optimize:
72
  img_tensor = img_tensor.to(memory_format=torch.channels_last)
73
  img_tensor = img_tensor.half()
74
 
 
88
  return prediction
89
 
90
  def process_prediction(self, depth_map):
91
+ # normalizing depth image
92
  depth_min = depth_map.min()
93
  depth_max = depth_map.max()
94
  normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
 
98
 
99
  return normalized_depth/255, depth_colormap/255
100
 
101
+ @spaces.GPU
102
  def make_prediction(self, image):
 
103
  image = image.copy()
104
+ try:
105
+ print("Starting depth estimation...")
106
+ with torch.no_grad():
107
+ original_image_rgb = np.flip(image, 2) # in [0, 255] (flip required to get RGB)
108
+ # resizing the image to feed to the model
109
+ self.load_model_if_needed()
110
+ image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
111
+
112
+ # monocular depth prediction
113
+ pred = self.predict(image_tranformed, target_size=original_image_rgb.shape[1::-1])
114
+
115
+ # process the model predictions
116
+ depthmap, depth_colormap = self.process_prediction(pred)
117
+ print("Depth estimation complete")
118
+ return depthmap, depth_colormap
119
+ except Exception as e:
120
+ print(f"Error in make_prediction: {str(e)}")
121
+ import traceback
122
+ print(traceback.format_exc())
123
+ raise
124
 
125
+ @spaces.GPU
 
 
 
 
126
  def run(self, input_path):
 
127
  cap = cv2.VideoCapture(input_path)
128
 
129
  if not cap.isOpened():
 
150
  cap.release()
151
  cv2.destroyAllWindows()
152
 
153
+
154
+ if name == "__main__":
155
+
156
  depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
157
+ depth_estimator.run("assets/videos/testvideo2.mp4")
158
+
159
+