huntrezz commited on
Commit
f8b3886
·
verified ·
1 Parent(s): 9771925

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -97
app.py CHANGED
@@ -1,98 +1,96 @@
1
- import cv2
2
- import torch
3
- from transformers import DPTForDepthEstimation, DPTImageProcessor
4
- import numpy as np
5
- import time
6
- import warnings
7
- warnings.filterwarnings("ignore", message="It looks like you are trying to rescale already rescaled images.")
8
-
9
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
- model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float16).to(device)
11
- processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
12
-
13
- cap = cv2.VideoCapture(0)
14
-
15
- def resize_image(image, target_size=(256, 256)):
16
- return cv2.resize(image, target_size)
17
-
18
-
19
-
20
- def manual_normalize(depth_map):
21
- min_val = np.min(depth_map)
22
- max_val = np.max(depth_map)
23
- if min_val != max_val:
24
- normalized = (depth_map - min_val) / (max_val - min_val)
25
- return (normalized * 255).astype(np.uint8)
26
- else:
27
- return np.zeros_like(depth_map, dtype=np.uint8)
28
-
29
- frame_skip = 4
30
- frame_count = 0
31
- color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)
32
-
33
- prev_frame_time = 0
34
-
35
- while True:
36
- ret, frame = cap.read()
37
- if not ret:
38
- break
39
-
40
- frame_count += 1
41
- if frame_count % frame_skip != 0:
42
- continue
43
-
44
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
45
- resized_frame = resize_image(rgb_frame)
46
-
47
- inputs = processor(images=resized_frame, return_tensors="pt").to(device)
48
- inputs = {k: v.to(torch.float16) for k, v in inputs.items()}
49
-
50
- with torch.no_grad():
51
- outputs = model(**inputs)
52
- predicted_depth = outputs.predicted_depth
53
-
54
- depth_map = predicted_depth.squeeze().cpu().numpy()
55
-
56
- # Check Input Data
57
- print(f"depth_map shape: {depth_map.shape}")
58
- print(f"depth_map min: {np.min(depth_map)}, max: {np.max(depth_map)}")
59
- print(f"depth_map dtype: {depth_map.dtype}")
60
-
61
- # Handle invalid values
62
- depth_map = np.nan_to_num(depth_map, nan=0.0, posinf=0.0, neginf=0.0)
63
-
64
- # Ensure depth_map is in float32 format
65
- depth_map = depth_map.astype(np.float32)
66
-
67
- # Check for zero-sized arrays
68
- if depth_map.size == 0:
69
- print("Error: depth_map is empty")
70
- depth_map = np.zeros((256, 256), dtype=np.uint8)
71
- else:
72
- # Handle empty or constant arrays
73
- if np.any(depth_map) and np.min(depth_map) != np.max(depth_map):
74
- depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
75
- else:
76
- depth_map = np.zeros_like(depth_map, dtype=np.uint8)
77
-
78
- # Use manual normalization as a fallback
79
- if np.all(depth_map == 0):
80
- depth_map = manual_normalize(depth_map)
81
-
82
- depth_map_colored = cv2.applyColorMap(depth_map, color_map)
83
- depth_map_colored = cv2.resize(depth_map_colored, (frame.shape[1], frame.shape[0]))
84
-
85
- combined = np.hstack((frame, depth_map_colored))
86
-
87
- new_frame_time = time.time()
88
- fps = 1 / (new_frame_time - prev_frame_time)
89
- prev_frame_time = new_frame_time
90
- cv2.putText(combined, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
91
-
92
- cv2.imshow('Webcam and Depth Map', combined)
93
-
94
- if cv2.waitKey(1) & 0xFF == ord('q'):
95
- break
96
-
97
- cap.release()
98
  cv2.destroyAllWindows()
 
1
+ import cv2
2
+ import torch
3
+ from transformers import DPTForDepthEstimation, DPTImageProcessor
4
+ import numpy as np
5
+ import time
6
+ import warnings
7
+ warnings.filterwarnings("ignore", message="It looks like you are trying to rescale already rescaled images.")
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float16).to(device)
11
+ processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
12
+
13
+ cap = cv2.VideoCapture(0)
14
+
15
+ def resize_image(image, target_size=(256, 256)):
16
+ return cv2.resize(image, target_size)
17
+
18
+ def manual_normalize(depth_map):
19
+ min_val = np.min(depth_map)
20
+ max_val = np.max(depth_map)
21
+ if min_val != max_val:
22
+ normalized = (depth_map - min_val) / (max_val - min_val)
23
+ return (normalized * 255).astype(np.uint8)
24
+ else:
25
+ return np.zeros_like(depth_map, dtype=np.uint8)
26
+
27
+ frame_skip = 4
28
+ frame_count = 0
29
+ color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)
30
+
31
+ prev_frame_time = 0
32
+
33
+ while True:
34
+ ret, frame = cap.read()
35
+ if not ret:
36
+ break
37
+
38
+ frame_count += 1
39
+ if frame_count % frame_skip != 0:
40
+ continue
41
+
42
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
43
+ resized_frame = resize_image(rgb_frame)
44
+
45
+ inputs = processor(images=resized_frame, return_tensors="pt").to(device)
46
+ inputs = {k: v.to(torch.float16) for k, v in inputs.items()}
47
+
48
+ with torch.no_grad():
49
+ outputs = model(**inputs)
50
+ predicted_depth = outputs.predicted_depth
51
+
52
+ depth_map = predicted_depth.squeeze().cpu().numpy()
53
+
54
+ # Check Input Data
55
+ print(f"depth_map shape: {depth_map.shape}")
56
+ print(f"depth_map min: {np.min(depth_map)}, max: {np.max(depth_map)}")
57
+ print(f"depth_map dtype: {depth_map.dtype}")
58
+
59
+ # Handle invalid values
60
+ depth_map = np.nan_to_num(depth_map, nan=0.0, posinf=0.0, neginf=0.0)
61
+
62
+ # Ensure depth_map is in float32 format
63
+ depth_map = depth_map.astype(np.float32)
64
+
65
+ # Check for zero-sized arrays
66
+ if depth_map.size == 0:
67
+ print("Error: depth_map is empty")
68
+ depth_map = np.zeros((256, 256), dtype=np.uint8)
69
+ else:
70
+ # Handle empty or constant arrays
71
+ if np.any(depth_map) and np.min(depth_map) != np.max(depth_map):
72
+ depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
73
+ else:
74
+ depth_map = np.zeros_like(depth_map, dtype=np.uint8)
75
+
76
+ # Use manual normalization as a fallback
77
+ if np.all(depth_map == 0):
78
+ depth_map = manual_normalize(depth_map)
79
+
80
+ depth_map_colored = cv2.applyColorMap(depth_map, color_map)
81
+ depth_map_colored = cv2.resize(depth_map_colored, (frame.shape[1], frame.shape[0]))
82
+
83
+ combined = np.hstack((frame, depth_map_colored))
84
+
85
+ new_frame_time = time.time()
86
+ fps = 1 / (new_frame_time - prev_frame_time)
87
+ prev_frame_time = new_frame_time
88
+ cv2.putText(combined, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
89
+
90
+ cv2.imshow('Webcam and Depth Map', combined)
91
+
92
+ if cv2.waitKey(1) & 0xFF == ord('q'):
93
+ break
94
+
95
+ cap.release()
 
 
96
  cv2.destroyAllWindows()