Surn commited on
Commit
e9b0e9f
·
1 Parent(s): da61252

Update z axis code

Browse files
Files changed (6) hide show
  1. .gitignore +46 -45
  2. README.md +11 -3
  3. app.py +28 -14
  4. pre-requirements.txt +1 -0
  5. requirements.txt +9 -5
  6. web-ui.bat +1 -1
.gitignore CHANGED
@@ -1,47 +1,48 @@
1
- # Python build
2
- .eggs/
3
- gradio.egg-info/*
4
- !gradio.egg-info/requires.txt
5
- !gradio.egg-info/PKG-INFO
6
- dist/
7
- *.pyc
8
- __pycache__/
9
- *.py[cod]
10
- *$py.class
11
- build/
12
-
13
- # JS build
14
- gradio/templates/frontend
15
- # Secrets
16
- .env
17
-
18
- # Gradio run artifacts
19
- *.db
20
- *.sqlite3
21
- gradio/launches.json
22
- flagged/
23
- gradio_cached_examples/
24
-
25
- # Tests
26
- .coverage
27
- coverage.xml
28
- test.txt
29
-
30
- # Demos
31
- demo/tmp.zip
32
- demo/files/*.avi
33
- demo/files/*.mp4
34
-
35
- # Etc
36
- .idea/*
37
- .DS_Store
38
- *.bak
39
- workspace.code-workspace
40
- *.h5
41
- .vscode/
42
-
43
- # log files
44
- .pnpm-debug.log
45
- venv/
 
46
  *.db-journal
47
  /.vs
 
1
+ # Python build
2
+ .eggs/
3
+ gradio.egg-info/*
4
+ !gradio.egg-info/requires.txt
5
+ !gradio.egg-info/PKG-INFO
6
+ dist/
7
+ *.pyc
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+ build/
12
+
13
+ # JS build
14
+ gradio/templates/frontend
15
+ # Secrets
16
+ .env
17
+
18
+ # Gradio run artifacts
19
+ *.db
20
+ *.sqlite3
21
+ gradio/launches.json
22
+ flagged/
23
+ gradio_cached_examples/
24
+
25
+ # Tests
26
+ .coverage
27
+ coverage.xml
28
+ test.txt
29
+
30
+ # Demos
31
+ demo/tmp.zip
32
+ demo/files/*.avi
33
+ demo/files/*.mp4
34
+ models/
35
+
36
+ # Etc
37
+ .idea/*
38
+ .DS_Store
39
+ *.bak
40
+ workspace.code-workspace
41
+ *.h5
42
+ .vscode/
43
+
44
+ # log files
45
+ .pnpm-debug.log
46
+ venv/
47
  *.db-journal
48
  /.vs
README.md CHANGED
@@ -1,13 +1,21 @@
1
  ---
2
  title: DPT Depth Estimation + 3D
3
  emoji: ⚡
4
- colorFrom: blue
 
5
  colorTo: red
 
6
  sdk: gradio
7
  sdk_version: 5.16.1
8
  app_file: app.py
9
- pinned: false
10
- short_description: Image to 3D with DPT + 3D Point Cloud
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
 
1
  ---
2
  title: DPT Depth Estimation + 3D
3
  emoji: ⚡
4
+ short_description: Image to 3D with DPT + 3D Point Cloud
5
+ colorFrom: yellow
6
  colorTo: red
7
+ python_version: 3.10.13
8
  sdk: gradio
9
  sdk_version: 5.16.1
10
  app_file: app.py
11
+ license: apache-2.0
12
+ tags:
13
+ - depth
14
+ - 3d
15
+ hf_oauth: true
16
+ fullWidth: false
17
+ thumbnail: >-
18
+ https://cdn-uploads.huggingface.co/production/uploads/6346595c9e5f0fe83fc60444/s0fQvcoiSBlH36AXpVwPi.png
19
  ---
20
 
21
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py CHANGED
@@ -12,7 +12,9 @@ from transformers import DPTForDepthEstimation, DPTImageProcessor
12
  image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
13
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
14
 
 
15
 
 
16
  def process_image(image_path, resized_width=800, z_scale=208):
17
  """
18
  Processes the input image to generate a depth map and a 3D mesh reconstruction.
@@ -47,11 +49,14 @@ def process_image(image_path, resized_width=800, z_scale=208):
47
  predicted_depth.unsqueeze(1),
48
  size=(image.height, image.width),
49
  mode="bicubic",
50
- align_corners=True,
51
  ).squeeze()
52
 
53
  # Normalize the depth image to 8-bit
54
- prediction = prediction.cpu().numpy()
 
 
 
55
  depth_min, depth_max = prediction.min(), prediction.max()
56
  depth_image = ((prediction - depth_min) / (depth_max - depth_min) * 255).astype("uint8")
57
 
@@ -61,9 +66,13 @@ def process_image(image_path, resized_width=800, z_scale=208):
61
  gltf_path = create_3d_obj(np.array(image), prediction, image_path, depth=8, z_scale=z_scale)
62
 
63
  img = Image.fromarray(depth_image)
64
- return [img, gltf_path, gltf_path]
65
 
 
 
 
 
66
 
 
67
  def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
68
  """
69
  Creates a 3D object from RGB and depth images.
@@ -94,8 +103,8 @@ def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
94
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
95
  width,
96
  height,
97
- fx=1.0,
98
- fy=1.0,
99
  cx=width / 2.0,
100
  cy=height / 2.0,
101
  )
@@ -105,16 +114,16 @@ def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
105
 
106
  # Scale the Z dimension
107
  points = np.asarray(pcd.points)
108
- depth_scaled = ((raw_depth - raw_depth.min()) / (raw_depth.max() - raw_depth.min())) * z_scale
109
  z_values = depth_scaled.flatten()[:len(points)]
110
  points[:, 2] *= z_values
111
  pcd.points = o3d.utility.Vector3dVector(points)
112
 
113
  # Estimate and orient normals
114
  pcd.estimate_normals(
115
- search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
116
  )
117
- pcd.orient_normals_towards_camera_location(camera_location=np.array([0.0, 0.0, 2.0 ]))
118
 
119
  # Apply transformations
120
  pcd.transform([[1, 0, 0, 0],
@@ -160,8 +169,8 @@ description = (
160
  )
161
  # Create Gradio sliders for resized_width and z_scale
162
  resized_width_slider = gr.Slider(
163
- minimum=400,
164
- maximum=1600,
165
  step=16,
166
  value=800,
167
  label="Resized Width",
@@ -169,15 +178,17 @@ resized_width_slider = gr.Slider(
169
  )
170
 
171
  z_scale_slider = gr.Slider(
172
- minimum=160,
173
- maximum=1024,
174
- step=16,
175
- value=208,
176
  label="Z-Scale",
177
  info="Adjust the scaling factor for the Z-axis in the 3D model."
178
  )
179
  examples = [["examples/" + img] for img in os.listdir("examples/")]
180
 
 
 
181
  iface = gr.Interface(
182
  fn=process_image,
183
  inputs=[
@@ -193,8 +204,11 @@ iface = gr.Interface(
193
  title=title,
194
  description=description,
195
  examples=examples,
 
 
196
  allow_flagging="never",
197
  cache_examples=False,
 
198
  theme="Surn/Beeuty"
199
  )
200
 
 
12
  image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
13
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
14
 
15
+ import spaces
16
 
17
+ @spaces.GPU(duration=90,progress=gr.Progress(track_tqdm=True))
18
  def process_image(image_path, resized_width=800, z_scale=208):
19
  """
20
  Processes the input image to generate a depth map and a 3D mesh reconstruction.
 
49
  predicted_depth.unsqueeze(1),
50
  size=(image.height, image.width),
51
  mode="bicubic",
52
+ align_corners=False,
53
  ).squeeze()
54
 
55
  # Normalize the depth image to 8-bit
56
+ if torch.cuda.is_available():
57
+ prediction = prediction.numpy()
58
+ else:
59
+ prediction = prediction.cpu().numpy()
60
  depth_min, depth_max = prediction.min(), prediction.max()
61
  depth_image = ((prediction - depth_min) / (depth_max - depth_min) * 255).astype("uint8")
62
 
 
66
  gltf_path = create_3d_obj(np.array(image), prediction, image_path, depth=8, z_scale=z_scale)
67
 
68
  img = Image.fromarray(depth_image)
 
69
 
70
+ if torch.cuda.is_available():
71
+ torch.cuda.empty_cache()
72
+ torch.cuda.ipc_collect()
73
+ return [img, gltf_path, gltf_path]
74
 
75
+ @spaces.GPU()
76
  def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
77
  """
78
  Creates a 3D object from RGB and depth images.
 
103
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
104
  width,
105
  height,
106
+ fx=z_scale,
107
+ fy=z_scale,
108
  cx=width / 2.0,
109
  cy=height / 2.0,
110
  )
 
114
 
115
  # Scale the Z dimension
116
  points = np.asarray(pcd.points)
117
+ depth_scaled = ((raw_depth - raw_depth.min()) / (raw_depth.max() - raw_depth.min())) * (z_scale*100)
118
  z_values = depth_scaled.flatten()[:len(points)]
119
  points[:, 2] *= z_values
120
  pcd.points = o3d.utility.Vector3dVector(points)
121
 
122
  # Estimate and orient normals
123
  pcd.estimate_normals(
124
+ search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=60)
125
  )
126
+ pcd.orient_normals_towards_camera_location(camera_location=np.array([0.0, 0.0, 1.5 ]))
127
 
128
  # Apply transformations
129
  pcd.transform([[1, 0, 0, 0],
 
169
  )
170
  # Create Gradio sliders for resized_width and z_scale
171
  resized_width_slider = gr.Slider(
172
+ minimum=256,
173
+ maximum=1760,
174
  step=16,
175
  value=800,
176
  label="Resized Width",
 
178
  )
179
 
180
  z_scale_slider = gr.Slider(
181
+ minimum=0.2,
182
+ maximum=3.0,
183
+ step=0.01,
184
+ value=0.5,
185
  label="Z-Scale",
186
  info="Adjust the scaling factor for the Z-axis in the 3D model."
187
  )
188
  examples = [["examples/" + img] for img in os.listdir("examples/")]
189
 
190
+ process_image.zerogpu = True
191
+ gr.set_static_paths(paths=["models/","examples/"])
192
  iface = gr.Interface(
193
  fn=process_image,
194
  inputs=[
 
204
  title=title,
205
  description=description,
206
  examples=examples,
207
+ examples_per_page=15,
208
+ flagging_mode=None,
209
  allow_flagging="never",
210
  cache_examples=False,
211
+ delete_cache=(86400,86400),
212
  theme="Surn/Beeuty"
213
  )
214
 
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip>=25.0.1
requirements.txt CHANGED
@@ -1,7 +1,11 @@
1
- torch
2
- transformers
 
 
 
3
  numpy
4
- Pillow
5
- gradio>=5.16.0
6
  jinja2
7
- open3d
 
 
1
+ git+https://github.com/huggingface/diffusers.git
2
+ git+https://github.com/huggingface/transformers.git
3
+ safetensors
4
+ sentencepiece
5
+ git+https://github.com/huggingface/peft.git
6
  numpy
7
+ Pillow>=11.1.0
8
+ torch>=2.4.1
9
  jinja2
10
+ open3d
11
+ spaces
web-ui.bat CHANGED
@@ -1,2 +1,2 @@
1
- python311 -m app.py
2
  pause
 
1
+ python311 -m app
2
  pause