Spaces:

huntrezz
/

RealtimeMonocularDepth

Runtime error

App Files Files Community

huntrezz commited on Dec 1, 2024

Commit

f8b3886

verified ·

1 Parent(s): 9771925

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -97

app.py CHANGED Viewed

@@ -1,98 +1,96 @@
-import cv2
-import torch
-from transformers import DPTForDepthEstimation, DPTImageProcessor
-import numpy as np
-import time
-import warnings
-warnings.filterwarnings("ignore", message="It looks like you are trying to rescale already rescaled images.")
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float16).to(device)
-processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
-cap = cv2.VideoCapture(0)
-def resize_image(image, target_size=(256, 256)):
-    return cv2.resize(image, target_size)
-def manual_normalize(depth_map):
-    min_val = np.min(depth_map)
-    max_val = np.max(depth_map)
-    if min_val != max_val:
-        normalized = (depth_map - min_val) / (max_val - min_val)
-        return (normalized * 255).astype(np.uint8)
-    else:
-        return np.zeros_like(depth_map, dtype=np.uint8)
-frame_skip = 4
-frame_count = 0
-color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)
-prev_frame_time = 0
-while True:
-    ret, frame = cap.read()
-    if not ret:
-        break
-    frame_count += 1
-    if frame_count % frame_skip != 0:
-        continue
-    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    resized_frame = resize_image(rgb_frame)
-    inputs = processor(images=resized_frame, return_tensors="pt").to(device)
-    inputs = {k: v.to(torch.float16) for k, v in inputs.items()}
-    with torch.no_grad():
-        outputs = model(**inputs)
-        predicted_depth = outputs.predicted_depth
-    depth_map = predicted_depth.squeeze().cpu().numpy()
-    # Check Input Data
-    print(f"depth_map shape: {depth_map.shape}")
-    print(f"depth_map min: {np.min(depth_map)}, max: {np.max(depth_map)}")
-    print(f"depth_map dtype: {depth_map.dtype}")
-    # Handle invalid values
-    depth_map = np.nan_to_num(depth_map, nan=0.0, posinf=0.0, neginf=0.0)
-    # Ensure depth_map is in float32 format
-    depth_map = depth_map.astype(np.float32)
-    # Check for zero-sized arrays
-    if depth_map.size == 0:
-        print("Error: depth_map is empty")
-        depth_map = np.zeros((256, 256), dtype=np.uint8)
-    else:
-        # Handle empty or constant arrays
-        if np.any(depth_map) and np.min(depth_map) != np.max(depth_map):
-            depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
-        else:
-            depth_map = np.zeros_like(depth_map, dtype=np.uint8)
-    # Use manual normalization as a fallback
-    if np.all(depth_map == 0):
-        depth_map = manual_normalize(depth_map)
-    depth_map_colored = cv2.applyColorMap(depth_map, color_map)
-    depth_map_colored = cv2.resize(depth_map_colored, (frame.shape[1], frame.shape[0]))
-    combined = np.hstack((frame, depth_map_colored))
-    new_frame_time = time.time()
-    fps = 1 / (new_frame_time - prev_frame_time)
-    prev_frame_time = new_frame_time
-    cv2.putText(combined, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-    cv2.imshow('Webcam and Depth Map', combined)
-    if cv2.waitKey(1) & 0xFF == ord('q'):
-        break
-cap.release()
 cv2.destroyAllWindows()

+import cv2
+import torch
+from transformers import DPTForDepthEstimation, DPTImageProcessor
+import numpy as np
+import time
+import warnings
+warnings.filterwarnings("ignore", message="It looks like you are trying to rescale already rescaled images.")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float16).to(device)
+processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
+cap = cv2.VideoCapture(0)
+def resize_image(image, target_size=(256, 256)):
+    return cv2.resize(image, target_size)
+def manual_normalize(depth_map):
+    min_val = np.min(depth_map)
+    max_val = np.max(depth_map)
+    if min_val != max_val:
+        normalized = (depth_map - min_val) / (max_val - min_val)
+        return (normalized * 255).astype(np.uint8)
+    else:
+        return np.zeros_like(depth_map, dtype=np.uint8)
+frame_skip = 4
+frame_count = 0
+color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)
+prev_frame_time = 0
+while True:
+    ret, frame = cap.read()
+    if not ret:
+        break
+    frame_count += 1
+    if frame_count % frame_skip != 0:
+        continue
+    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    resized_frame = resize_image(rgb_frame)
+    inputs = processor(images=resized_frame, return_tensors="pt").to(device)
+    inputs = {k: v.to(torch.float16) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predicted_depth = outputs.predicted_depth
+    depth_map = predicted_depth.squeeze().cpu().numpy()
+    # Check Input Data
+    print(f"depth_map shape: {depth_map.shape}")
+    print(f"depth_map min: {np.min(depth_map)}, max: {np.max(depth_map)}")
+    print(f"depth_map dtype: {depth_map.dtype}")
+    # Handle invalid values
+    depth_map = np.nan_to_num(depth_map, nan=0.0, posinf=0.0, neginf=0.0)
+    # Ensure depth_map is in float32 format
+    depth_map = depth_map.astype(np.float32)
+    # Check for zero-sized arrays
+    if depth_map.size == 0:
+        print("Error: depth_map is empty")
+        depth_map = np.zeros((256, 256), dtype=np.uint8)
+    else:
+        # Handle empty or constant arrays
+        if np.any(depth_map) and np.min(depth_map) != np.max(depth_map):
+            depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
+        else:
+            depth_map = np.zeros_like(depth_map, dtype=np.uint8)
+    # Use manual normalization as a fallback
+    if np.all(depth_map == 0):
+        depth_map = manual_normalize(depth_map)
+    depth_map_colored = cv2.applyColorMap(depth_map, color_map)
+    depth_map_colored = cv2.resize(depth_map_colored, (frame.shape[1], frame.shape[0]))
+    combined = np.hstack((frame, depth_map_colored))
+    new_frame_time = time.time()
+    fps = 1 / (new_frame_time - prev_frame_time)
+    prev_frame_time = new_frame_time
+    cv2.putText(combined, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+    cv2.imshow('Webcam and Depth Map', combined)
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+cap.release()
 cv2.destroyAllWindows()