mac9087 commited on
Commit
f77b9b6
·
verified ·
1 Parent(s): 89bd619

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -75
app.py CHANGED
@@ -84,50 +84,8 @@ def process_with_timeout(function, args, timeout):
84
  def allowed_file(filename):
85
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
86
 
87
- def remove_background(image):
88
- """Remove background using OpenCV GrabCut algorithm with improved precision"""
89
- img_array = np.array(image)
90
-
91
- # Convert to RGB if image has alpha channel
92
- if img_array.shape[2] == 4:
93
- img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
94
-
95
- # Create mask for GrabCut
96
- mask = np.zeros(img_array.shape[:2], np.uint8)
97
- bgdModel = np.zeros((1, 65), np.float64)
98
- fgdModel = np.zeros((1, 65), np.float64)
99
-
100
- # Define a tighter rectangle for foreground, adjusting based on image content
101
- height, width = img_array.shape[:2]
102
- rect = (int(width * 0.1), int(height * 0.1), int(width * 0.8), int(height * 0.8))
103
-
104
- # Run GrabCut with multiple iterations for better accuracy
105
- cv2.grabCut(img_array, mask, rect, bgdModel, fgdModel, 10, cv2.GC_INIT_WITH_RECT)
106
-
107
- # Refine mask using edge detection to preserve subject edges
108
- mask2 = np.where((mask == cv2.GC_PR_FGD) | (mask == cv2.GC_FGD), 1, 0).astype('uint8')
109
- edges = cv2.Canny(mask2 * 255, 50, 150)
110
- mask2 = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
111
- mask2 = cv2.erode(mask2, np.ones((3, 3), np.uint8), iterations=1)
112
-
113
- # Apply mask to image
114
- result = img_array * mask2[:, :, np.newaxis]
115
-
116
- # Create alpha channel
117
- alpha = mask2 * 255
118
- result = np.dstack((result, alpha))
119
-
120
- return Image.fromarray(result, 'RGBA')
121
-
122
  def preprocess_image(image_path):
123
  with Image.open(image_path) as img:
124
- # Handle PNG transparency
125
- if img.mode == 'RGBA':
126
- # Create white background
127
- background = Image.new('RGB', img.size, (255, 255, 255))
128
- background.paste(img, mask=img.split()[3])
129
- img = background
130
-
131
  img = img.convert("RGB")
132
 
133
  if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
@@ -139,11 +97,7 @@ def preprocess_image(image_path):
139
  new_width = int(img.width * (MAX_DIMENSION / img.height))
140
  img = img.resize((new_width, new_height), Image.LANCZOS)
141
 
142
- # Remove background and convert back to RGB for processor
143
- img_with_alpha = remove_background(img)
144
- img_rgb = img_with_alpha.convert("RGB") # Convert to RGB for processor
145
-
146
- img_array = np.array(img_rgb)
147
  if len(img_array.shape) == 3 and img_array.shape[2] == 3:
148
  lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
149
  l, a, b = cv2.split(lab)
@@ -151,9 +105,9 @@ def preprocess_image(image_path):
151
  cl = clahe.apply(l)
152
  enhanced_lab = cv2.merge((cl, a, b))
153
  img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
154
- img_rgb = Image.fromarray(img_array)
155
 
156
- return img_rgb # Return RGB image
157
 
158
  def load_models():
159
  global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
@@ -170,11 +124,13 @@ def load_models():
170
  model_loading = True
171
  print("Loading models...")
172
 
 
173
  hf_token = os.environ.get('HF_TOKEN')
174
  if hf_token:
175
  login(token=hf_token)
176
  print("Authenticated with Hugging Face token")
177
 
 
178
  dpt_model_name = "Intel/dpt-large"
179
  max_retries = 3
180
  retry_delay = 5
@@ -205,6 +161,7 @@ def load_models():
205
  print("DPT-Large loaded")
206
  gc.collect()
207
 
 
208
  da_model_name = "depth-anything/Depth-Anything-V2-Small-hf"
209
  for attempt in range(max_retries):
210
  try:
@@ -262,20 +219,20 @@ def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
262
  if dpt_depth.shape != da_depth.shape:
263
  da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
264
 
265
- p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [5, 95])
266
- p_low_da, p_high_da = np.percentile(da_depth, [5, 95])
267
  dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
268
  da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
269
 
270
  if detail_level == 'high':
271
- weight_da = 0.6
272
  edges = cv2.Canny((da_depth * 255).astype(np.uint8), 50, 150)
273
  edge_mask = (edges > 0).astype(np.float32)
274
  dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
275
  da_weight = gaussian_filter(edge_mask, sigma=1.0)
276
  fused_depth = dpt_weight * dpt_depth + da_weight * da_depth * weight_da + (1 - weight_da) * dpt_depth
277
  else:
278
- weight_da = 0.4 if detail_level == 'medium' else 0.2
279
  fused_depth = (1 - weight_da) * dpt_depth + weight_da * da_depth
280
 
281
  fused_depth = np.clip(fused_depth, 0, 1)
@@ -283,25 +240,25 @@ def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
283
 
284
  def enhance_depth_map(depth_map, detail_level='medium'):
285
  enhanced_depth = depth_map.copy().astype(np.float32)
286
- p_low, p_high = np.percentile(enhanced_depth, [5, 95])
287
  enhanced_depth = np.clip(enhanced_depth, p_low, p_high)
288
  enhanced_depth = (enhanced_depth - p_low) / (p_high - p_low) if p_high > p_low else enhanced_depth
289
 
290
  if detail_level == 'high':
291
- blurred = gaussian_filter(enhanced_depth, sigma=1.0)
292
  mask = enhanced_depth - blurred
293
- enhanced_depth = enhanced_depth + 1.0 * mask
294
- smooth1 = gaussian_filter(enhanced_depth, sigma=0.3)
295
- smooth2 = gaussian_filter(enhanced_depth, sigma=1.5)
296
  edge_mask = enhanced_depth - smooth2
297
- enhanced_depth = smooth1 + 0.8 * edge_mask
298
  elif detail_level == 'medium':
299
- blurred = gaussian_filter(enhanced_depth, sigma=0.7)
300
  mask = enhanced_depth - blurred
301
- enhanced_depth = enhanced_depth + 0.6 * mask
302
- enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.4)
303
- else:
304
  enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.5)
 
 
305
 
306
  enhanced_depth = np.clip(enhanced_depth, 0, 1)
307
  return enhanced_depth
@@ -322,16 +279,16 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
322
  dx = np.gradient(z_values, axis=1)
323
  dy = np.gradient(z_values, axis=0)
324
  gradient_magnitude = np.sqrt(dx**2 + dy**2)
325
- edge_mask = np.clip(gradient_magnitude * 2, 0, 0.1)
326
- z_values = z_values + edge_mask * (z_values - gaussian_filter(z_values, sigma=0.5))
327
 
328
- z_min, z_max = np.percentile(z_values, [10, 90])
329
- z_values = np.clip((z_values - z_min) / (z_max - z_min), 0, 1) if z_max > z_min else z_values
330
- z_scaling = 1.5 if detail_level == 'high' else 1.2 if detail_level == 'medium' else 1.0
331
  z_values = z_values * z_scaling
332
 
333
- x_grid = (x_grid / w - 0.5) * 1.5
334
- y_grid = (y_grid / h - 0.5) * 1.5
335
  vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
336
 
337
  faces = []
@@ -358,7 +315,7 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
358
  mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
359
 
360
  if image:
361
- img_array = np.array(image.convert("RGB")) # Ensure RGB for consistency
362
  vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
363
  for i in range(resolution):
364
  for j in range(resolution):
@@ -379,13 +336,17 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
379
  vertex_colors[vertex_idx, :3] = [r, g, b]
380
  vertex_colors[vertex_idx, 3] = 255
381
  elif len(img_array.shape) == 3 and img_array.shape[2] == 4:
382
- for c in range(3): # Use only RGB channels
383
  vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
384
  wx*(1-wy)*img_array[y0, x1, c] +
385
  (1-wx)*wy*img_array[y1, x0, c] +
386
  wx*wy*img_array[y1, x1, c])
 
 
 
 
387
  vertex_colors[vertex_idx, 3] = 255
388
- mesh.visual.vertex_colors = vertex_colors
389
 
390
  if detail_level != 'high':
391
  mesh = mesh.smoothed(method='laplacian', iterations=1)
@@ -498,9 +459,11 @@ def convert_image_to_3d():
498
  try:
499
  def estimate_depth():
500
  with torch.no_grad():
 
501
  dpt_result = dpt_model(image)
502
  dpt_depth = dpt_result["depth"]
503
 
 
504
  if da_model and da_processor:
505
  inputs = da_processor(images=image, return_tensors="pt")
506
  inputs = {k: v.to("cpu") for k, v in inputs.items()}
@@ -517,7 +480,7 @@ def convert_image_to_3d():
517
  fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
518
  if len(fused_depth.shape) > 2:
519
  fused_depth = np.mean(fused_depth, axis=2)
520
- p_low, p_high = np.percentile(fused_depth, [5, 95])
521
  fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
522
 
523
  return fused_depth
@@ -722,4 +685,4 @@ def index():
722
  if __name__ == '__main__':
723
  cleanup_old_jobs()
724
  port = int(os.environ.get('PORT', 7860))
725
- app.run(host='0.0.0.0', port=port)
 
84
  def allowed_file(filename):
85
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  def preprocess_image(image_path):
88
  with Image.open(image_path) as img:
 
 
 
 
 
 
 
89
  img = img.convert("RGB")
90
 
91
  if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
 
97
  new_width = int(img.width * (MAX_DIMENSION / img.height))
98
  img = img.resize((new_width, new_height), Image.LANCZOS)
99
 
100
+ img_array = np.array(img)
 
 
 
 
101
  if len(img_array.shape) == 3 and img_array.shape[2] == 3:
102
  lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
103
  l, a, b = cv2.split(lab)
 
105
  cl = clahe.apply(l)
106
  enhanced_lab = cv2.merge((cl, a, b))
107
  img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
108
+ img = Image.fromarray(img_array)
109
 
110
+ return img
111
 
112
  def load_models():
113
  global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
 
124
  model_loading = True
125
  print("Loading models...")
126
 
127
+ # Authenticate with Hugging Face
128
  hf_token = os.environ.get('HF_TOKEN')
129
  if hf_token:
130
  login(token=hf_token)
131
  print("Authenticated with Hugging Face token")
132
 
133
+ # DPT-Large
134
  dpt_model_name = "Intel/dpt-large"
135
  max_retries = 3
136
  retry_delay = 5
 
161
  print("DPT-Large loaded")
162
  gc.collect()
163
 
164
+ # Depth Anything
165
  da_model_name = "depth-anything/Depth-Anything-V2-Small-hf"
166
  for attempt in range(max_retries):
167
  try:
 
219
  if dpt_depth.shape != da_depth.shape:
220
  da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
221
 
222
+ p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [1, 99])
223
+ p_low_da, p_high_da = np.percentile(da_depth, [1, 99])
224
  dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
225
  da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
226
 
227
  if detail_level == 'high':
228
+ weight_da = 0.7
229
  edges = cv2.Canny((da_depth * 255).astype(np.uint8), 50, 150)
230
  edge_mask = (edges > 0).astype(np.float32)
231
  dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
232
  da_weight = gaussian_filter(edge_mask, sigma=1.0)
233
  fused_depth = dpt_weight * dpt_depth + da_weight * da_depth * weight_da + (1 - weight_da) * dpt_depth
234
  else:
235
+ weight_da = 0.5 if detail_level == 'medium' else 0.3
236
  fused_depth = (1 - weight_da) * dpt_depth + weight_da * da_depth
237
 
238
  fused_depth = np.clip(fused_depth, 0, 1)
 
240
 
241
  def enhance_depth_map(depth_map, detail_level='medium'):
242
  enhanced_depth = depth_map.copy().astype(np.float32)
243
+ p_low, p_high = np.percentile(enhanced_depth, [1, 99])
244
  enhanced_depth = np.clip(enhanced_depth, p_low, p_high)
245
  enhanced_depth = (enhanced_depth - p_low) / (p_high - p_low) if p_high > p_low else enhanced_depth
246
 
247
  if detail_level == 'high':
248
+ blurred = gaussian_filter(enhanced_depth, sigma=1.5)
249
  mask = enhanced_depth - blurred
250
+ enhanced_depth = enhanced_depth + 1.5 * mask
251
+ smooth1 = gaussian_filter(enhanced_depth, sigma=0.5)
252
+ smooth2 = gaussian_filter(enhanced_depth, sigma=2.0)
253
  edge_mask = enhanced_depth - smooth2
254
+ enhanced_depth = smooth1 + 1.2 * edge_mask
255
  elif detail_level == 'medium':
256
+ blurred = gaussian_filter(enhanced_depth, sigma=1.0)
257
  mask = enhanced_depth - blurred
258
+ enhanced_depth = enhanced_depth + 0.8 * mask
 
 
259
  enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.5)
260
+ else:
261
+ enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.7)
262
 
263
  enhanced_depth = np.clip(enhanced_depth, 0, 1)
264
  return enhanced_depth
 
279
  dx = np.gradient(z_values, axis=1)
280
  dy = np.gradient(z_values, axis=0)
281
  gradient_magnitude = np.sqrt(dx**2 + dy**2)
282
+ edge_mask = np.clip(gradient_magnitude * 5, 0, 0.2)
283
+ z_values = z_values + edge_mask * (z_values - gaussian_filter(z_values, sigma=1.0))
284
 
285
+ z_min, z_max = np.percentile(z_values, [2, 98])
286
+ z_values = (z_values - z_min) / (z_max - z_min) if z_max > z_min else z_values
287
+ z_scaling = 2.5 if detail_level == 'high' else 2.0 if detail_level == 'medium' else 1.5
288
  z_values = z_values * z_scaling
289
 
290
+ x_grid = (x_grid / w - 0.5) * 2.0
291
+ y_grid = (y_grid / h - 0.5) * 2.0
292
  vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
293
 
294
  faces = []
 
315
  mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
316
 
317
  if image:
318
+ img_array = np.array(image)
319
  vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
320
  for i in range(resolution):
321
  for j in range(resolution):
 
336
  vertex_colors[vertex_idx, :3] = [r, g, b]
337
  vertex_colors[vertex_idx, 3] = 255
338
  elif len(img_array.shape) == 3 and img_array.shape[2] == 4:
339
+ for c in range(4):
340
  vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
341
  wx*(1-wy)*img_array[y0, x1, c] +
342
  (1-wx)*wy*img_array[y1, x0, c] +
343
  wx*wy*img_array[y1, x1, c])
344
+ else:
345
+ gray = int((1-wx)*(1-wy)*img_array[y0, x0] + wx*(1-wy)*img_array[y0, x1] +
346
+ (1-wx)*wy*img_array[y1, x0] + wx*wy*img_array[y1, x1])
347
+ vertex_colors[vertex_idx, :3] = [gray, gray, gray]
348
  vertex_colors[vertex_idx, 3] = 255
349
+ mesh.visual.vertex_colors = vertex_colors
350
 
351
  if detail_level != 'high':
352
  mesh = mesh.smoothed(method='laplacian', iterations=1)
 
459
  try:
460
  def estimate_depth():
461
  with torch.no_grad():
462
+ # DPT-Large
463
  dpt_result = dpt_model(image)
464
  dpt_depth = dpt_result["depth"]
465
 
466
+ # Depth Anything (if loaded)
467
  if da_model and da_processor:
468
  inputs = da_processor(images=image, return_tensors="pt")
469
  inputs = {k: v.to("cpu") for k, v in inputs.items()}
 
480
  fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
481
  if len(fused_depth.shape) > 2:
482
  fused_depth = np.mean(fused_depth, axis=2)
483
+ p_low, p_high = np.percentile(fused_depth, [1, 99])
484
  fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
485
 
486
  return fused_depth
 
685
  if __name__ == '__main__':
686
  cleanup_old_jobs()
687
  port = int(os.environ.get('PORT', 7860))
688
+ app.run(host='0.0.0.0', port=port)