mac9087 commited on
Commit
8c9f945
·
verified ·
1 Parent(s): d33be4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -196
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import torch
3
  import time
@@ -11,11 +12,11 @@ import io
11
  import zipfile
12
  import uuid
13
  import traceback
14
- from huggingface_hub import snapshot_download, login, HfFileSystem
15
  from flask_cors import CORS
16
  import numpy as np
17
  import trimesh
18
- from transformers import pipeline
19
  from scipy.ndimage import gaussian_filter
20
  from scipy import interpolate
21
  import cv2
@@ -34,8 +35,6 @@ os.makedirs(RESULTS_FOLDER, exist_ok=True)
34
  os.makedirs(CACHE_DIR, exist_ok=True)
35
 
36
  os.environ['HF_HOME'] = CACHE_DIR
37
- os.environ['TRANSFORMERS_CACHE'] = os.path.join(CACHE_DIR, 'transformers')
38
- os.environ['HF_DATASETS_CACHE'] = os.path.join(CACHE_DIR, 'datasets')
39
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
40
  app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
41
 
@@ -44,6 +43,8 @@ processing_jobs = {}
44
 
45
  # Model variables
46
  dpt_estimator = None
 
 
47
  model_loaded = False
48
  model_loading = False
49
 
@@ -84,119 +85,72 @@ def process_with_timeout(function, args, timeout):
84
  def allowed_file(filename):
85
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
86
 
87
- def remove_background(image_path):
88
- try:
89
- # Load image
90
- img = cv2.imread(image_path)
91
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
92
-
93
- # Initialize mask and models for GrabCut
94
- mask = np.zeros(img.shape[:2], np.uint8)
95
- bgd_model = np.zeros((1, 65), np.float64)
96
- fgd_model = np.zeros((1, 65), np.float64)
97
-
98
- # Define initial rectangle (10% border margin)
99
- h, w = img.shape[:2]
100
- margin = int(min(w, h) * 0.1)
101
- rect = (margin, margin, w - 2 * margin, h - 2 * margin)
102
-
103
- # Run GrabCut
104
- cv2.grabCut(img, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
105
-
106
- # Create final mask (0 for background, 1 for foreground)
107
- mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
108
 
109
- # Check if foreground exists
110
- if np.sum(mask2) == 0:
111
- print(f"Warning: No foreground detected in {image_path}")
112
- return None
 
 
 
 
113
 
114
- # Apply mask and set background to black
115
- img = img * mask2[:, :, np.newaxis]
116
- img_pil = Image.fromarray(img).convert("RGB")
 
 
 
 
 
 
117
 
118
- return img_pil
119
- except Exception as e:
120
- print(f"Error in remove_background for {image_path}: {str(e)}")
121
- raise
122
-
123
- def preprocess_image(image_path):
124
- img = remove_background(image_path)
125
- if img is None:
126
- raise ValueError("No foreground detected in image")
127
-
128
- if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
129
- if img.width > img.height:
130
- new_width = MAX_DIMENSION
131
- new_height = int(img.height * (MAX_DIMENSION / img.width))
132
- else:
133
- new_height = MAX_DIMENSION
134
- new_width = int(img.width * (MAX_DIMENSION / img.height))
135
- img = img.resize((new_width, new_height), Image.LANCZOS)
136
-
137
- img_array = np.array(img)
138
- if len(img_array.shape) == 3 and img_array.shape[2] == 3:
139
- lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
140
- l, a, b = cv2.split(lab)
141
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
142
- cl = clahe.apply(l)
143
- enhanced_lab = cv2.merge((cl, a, b))
144
- img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
145
- img = Image.fromarray(img_array)
146
-
147
- return img
148
 
149
  def load_models():
150
- global dpt_estimator, model_loaded, model_loading
151
 
152
  if model_loaded:
153
- return dpt_estimator
154
 
155
  if model_loading:
156
  while model_loading and not model_loaded:
157
  time.sleep(0.5)
158
- return dpt_estimator
159
 
160
  try:
161
  model_loading = True
162
  print("Loading models...")
163
 
 
164
  hf_token = os.environ.get('HF_TOKEN')
165
  if hf_token:
166
- print("HF_TOKEN found, attempting login...")
167
  login(token=hf_token)
168
  print("Authenticated with Hugging Face token")
169
- else:
170
- print("Error: HF_TOKEN not found in environment. Intel/dpt-large requires authentication.")
171
- raise ValueError("HF_TOKEN is required for Intel/dpt-large")
172
 
 
173
  dpt_model_name = "Intel/dpt-large"
174
- fs = HfFileSystem(token=hf_token)
175
- model_cached = os.path.exists(os.path.join(CACHE_DIR, "hub", "models--Intel--dpt-large"))
176
-
177
- if not model_cached:
178
- max_retries = 3
179
- retry_delay = 5
180
- for attempt in range(max_retries):
181
- try:
182
- print(f"Attempting to download {dpt_model_name}, attempt {attempt+1}")
183
- snapshot_download(
184
- repo_id=dpt_model_name,
185
- cache_dir=CACHE_DIR,
186
- resume_download=True,
187
- token=hf_token
188
- )
189
- print(f"Successfully downloaded {dpt_model_name}")
190
- break
191
- except Exception as e:
192
- if attempt < max_retries - 1:
193
- print(f"DPT download attempt {attempt+1} failed: {str(e)}. Retrying after {retry_delay}s...")
194
- time.sleep(retry_delay)
195
- retry_delay *= 2
196
- else:
197
- raise
198
- else:
199
- print(f"{dpt_model_name} already cached in {CACHE_DIR}")
200
 
201
  dpt_estimator = pipeline(
202
  "depth-estimation",
@@ -208,8 +162,43 @@ def load_models():
208
  print("DPT-Large loaded")
209
  gc.collect()
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  model_loaded = True
212
- return dpt_estimator
 
213
 
214
  except Exception as e:
215
  print(f"Error loading models: {str(e)}")
@@ -218,6 +207,38 @@ def load_models():
218
  finally:
219
  model_loading = False
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  def enhance_depth_map(depth_map, detail_level='medium'):
222
  enhanced_depth = depth_map.copy().astype(np.float32)
223
  p_low, p_high = np.percentile(enhanced_depth, [1, 99])
@@ -243,7 +264,7 @@ def enhance_depth_map(depth_map, detail_level='medium'):
243
  enhanced_depth = np.clip(enhanced_depth, 0, 1)
244
  return enhanced_depth
245
 
246
- def depth_to_mesh(depth_map, image, resolution=80, detail_level='medium', view_angle=0):
247
  enhanced_depth = enhance_depth_map(depth_map, detail_level)
248
  h, w = enhanced_depth.shape
249
  x = np.linspace(0, w-1, resolution)
@@ -271,10 +292,6 @@ def depth_to_mesh(depth_map, image, resolution=80, detail_level='medium', view_a
271
  y_grid = (y_grid / h - 0.5) * 2.0
272
  vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
273
 
274
- if view_angle != 0:
275
- rotation_matrix = trimesh.transformations.rotation_matrix(view_angle, [0, 1, 0])
276
- vertices = trimesh.transform_points(vertices, rotation_matrix)
277
-
278
  faces = []
279
  for i in range(resolution-1):
280
  for j in range(resolution-1):
@@ -319,48 +336,29 @@ def depth_to_mesh(depth_map, image, resolution=80, detail_level='medium', view_a
319
  (1-wx)*wy*img_array[y1, x0, 2] + wx*wy*img_array[y1, x1, 2])
320
  vertex_colors[vertex_idx, :3] = [r, g, b]
321
  vertex_colors[vertex_idx, 3] = 255
 
 
 
 
 
 
322
  else:
323
  gray = int((1-wx)*(1-wy)*img_array[y0, x0] + wx*(1-wy)*img_array[y0, x1] +
324
  (1-wx)*wy*img_array[y1, x0] + wx*wy*img_array[y1, x1])
325
  vertex_colors[vertex_idx, :3] = [gray, gray, gray]
326
  vertex_colors[vertex_idx, 3] = 255
327
- mesh.visual.vertex_colors = vertex_colors
328
 
329
  if detail_level != 'high':
330
  mesh = mesh.smoothed(method='laplacian', iterations=1)
331
  mesh.fix_normals()
332
  return mesh
333
 
334
- def combine_meshes(meshes):
335
- if len(meshes) == 1:
336
- return meshes[0]
337
-
338
- combined_vertices = []
339
- combined_faces = []
340
- vertex_offset = 0
341
-
342
- for mesh in meshes:
343
- combined_vertices.append(mesh.vertices)
344
- combined_faces.append(mesh.faces + vertex_offset)
345
- vertex_offset += len(mesh.vertices)
346
-
347
- combined_vertices = np.vstack(combined_vertices)
348
- combined_faces = np.vstack(combined_faces)
349
-
350
- combined_mesh = trimesh.Trimesh(vertices=combined_vertices, faces=combined_faces)
351
-
352
- combined_mesh = combined_mesh.subdivide_to_size(max_edge=0.05)
353
- combined_mesh = combined_mesh.smoothed(method='laplacian', iterations=2)
354
- combined_mesh.fill_holes()
355
- combined_mesh.fix_normals()
356
-
357
- return combined_mesh
358
-
359
  @app.route('/health', methods=['GET'])
360
  def health_check():
361
  return jsonify({
362
  "status": "healthy",
363
- "model": "DPT-Large (Multi-View)",
364
  "device": "cpu"
365
  }), 200
366
 
@@ -398,23 +396,18 @@ def progress(job_id):
398
 
399
  @app.route('/convert', methods=['POST'])
400
  def convert_image_to_3d():
401
- required_views = ['front', 'back']
402
- optional_views = ['left', 'right']
403
- view_files = {}
404
-
405
- for view in required_views + optional_views:
406
- if view in request.files and request.files[view].filename != '':
407
- view_files[view] = request.files[view]
408
 
409
- if not all(view in view_files for view in required_views):
410
- return jsonify({"error": "Front and back images are required"}), 400
 
411
 
412
- for view, file in view_files.items():
413
- if not allowed_file(file.filename):
414
- return jsonify({"error": f"File type not allowed for {view}. Supported types: {', '.join(ALLOWED_EXTENSIONS)}"}), 400
415
 
416
  try:
417
- mesh_resolution = min(int(request.form.get('mesh_resolution', 80)), 120)
418
  output_format = request.form.get('output_format', 'glb').lower()
419
  detail_level = request.form.get('detail_level', 'medium').lower()
420
  texture_quality = request.form.get('texture_quality', 'medium').lower()
@@ -425,7 +418,7 @@ def convert_image_to_3d():
425
  return jsonify({"error": "Unsupported output format. Use 'obj' or 'glb'"}), 400
426
 
427
  if detail_level == 'high':
428
- mesh_resolution = min(int(mesh_resolution * 1.5), 120)
429
  elif detail_level == 'low':
430
  mesh_resolution = max(int(mesh_resolution * 0.7), 50)
431
 
@@ -433,12 +426,9 @@ def convert_image_to_3d():
433
  output_dir = os.path.join(RESULTS_FOLDER, job_id)
434
  os.makedirs(output_dir, exist_ok=True)
435
 
436
- filepaths = {}
437
- for view, file in view_files.items():
438
- filename = secure_filename(file.filename)
439
- filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{job_id}_{view}_{filename}")
440
- file.save(filepath)
441
- filepaths[view] = filepath
442
 
443
  processing_jobs[job_id] = {
444
  'status': 'processing',
@@ -450,53 +440,53 @@ def convert_image_to_3d():
450
  'created_at': time.time()
451
  }
452
 
453
- def process_images():
454
  thread = threading.current_thread()
455
  processing_jobs[job_id]['thread_alive'] = lambda: thread.is_alive()
456
 
457
  try:
458
  processing_jobs[job_id]['progress'] = 5
459
- images = {}
460
- for view, filepath in filepaths.items():
461
- try:
462
- images[view] = preprocess_image(filepath)
463
- except ValueError as e:
464
- processing_jobs[job_id]['status'] = 'error'
465
- processing_jobs[job_id]['error'] = f"Error preprocessing {view} image: {str(e)}"
466
- return
467
  processing_jobs[job_id]['progress'] = 10
468
 
469
  try:
470
- dpt_model = load_models()
471
- processing_jobs[job_id]['progress'] = 20
472
  except Exception as e:
473
  processing_jobs[job_id]['status'] = 'error'
474
  processing_jobs[job_id]['error'] = f"Error loading models: {str(e)}"
475
  return
476
 
477
  try:
478
- def estimate_depths():
479
- meshes = []
480
- view_angles = {'front': 0, 'back': np.pi, 'left': np.pi/2, 'right': -np.pi/2}
481
  with torch.no_grad():
482
- for view, image in images.items():
483
- dpt_result = dpt_model(image)
484
- dpt_depth = dpt_result["depth"]
485
-
486
- depth_map = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
487
- if len(depth_map.shape) > 2:
488
- depth_map = np.mean(depth_map, axis=2)
489
- p_low, p_high = np.percentile(depth_map, [1, 99])
490
- depth_map = np.clip((depth_map - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else depth_map
491
-
492
- mesh = depth_to_mesh(depth_map, image, resolution=mesh_resolution, detail_level=detail_level, view_angle=view_angles[view])
493
- meshes.append(mesh)
494
- gc.collect()
495
-
496
- combined_mesh = combine_meshes(meshes)
497
- return combined_mesh
 
 
 
 
 
 
 
 
 
498
 
499
- combined_mesh, error = process_with_timeout(estimate_depths, [], TIMEOUT_SECONDS)
500
 
501
  if error:
502
  if isinstance(error, TimeoutError):
@@ -506,11 +496,14 @@ def convert_image_to_3d():
506
  else:
507
  raise error
508
 
 
 
 
509
  processing_jobs[job_id]['progress'] = 80
510
 
511
  if output_format == 'obj':
512
  obj_path = os.path.join(output_dir, "model.obj")
513
- combined_mesh.export(
514
  obj_path,
515
  file_type='obj',
516
  include_normals=True,
@@ -531,7 +524,7 @@ def convert_image_to_3d():
531
 
532
  elif output_format == 'glb':
533
  glb_path = os.path.join(output_dir, "model.glb")
534
- combined_mesh.export(
535
  glb_path,
536
  file_type='glb'
537
  )
@@ -550,9 +543,8 @@ def convert_image_to_3d():
550
  print(error_details)
551
  return
552
 
553
- for filepath in filepaths.values():
554
- if os.path.exists(filepath):
555
- os.remove(filepath)
556
  gc.collect()
557
 
558
  except Exception as e:
@@ -561,11 +553,10 @@ def convert_image_to_3d():
561
  processing_jobs[job_id]['error'] = f"{str(e)}\n{error_details}"
562
  print(f"Error processing job {job_id}: {str(e)}")
563
  print(error_details)
564
- for filepath in filepaths.values():
565
- if os.path.exists(filepath):
566
- os.remove(filepath)
567
 
568
- processing_thread = threading.Thread(target=process_images)
569
  processing_thread.daemon = True
570
  processing_thread.start()
571
 
@@ -675,7 +666,7 @@ def model_info(job_id):
675
  @app.route('/', methods=['GET'])
676
  def index():
677
  return jsonify({
678
- "message": "Multi-View Image to 3D API (DPT-Large)",
679
  "endpoints": [
680
  "/convert",
681
  "/progress/<job_id>",
@@ -684,19 +675,16 @@ def index():
684
  "/model-info/<job_id>"
685
  ],
686
  "parameters": {
687
- "front": "Image file (required)",
688
- "back": "Image file (required)",
689
- "left": "Image file (optional)",
690
- "right": "Image file (optional)",
691
- "mesh_resolution": "Integer (50-120)",
692
  "output_format": "obj or glb",
693
  "detail_level": "low, medium, or high",
694
  "texture_quality": "low, medium, or high"
695
  },
696
- "description": "Creates 3D models from multiple 2D images using Intel DPT-Large with custom background removal."
697
  }), 200
698
 
699
  if __name__ == '__main__':
700
  cleanup_old_jobs()
701
  port = int(os.environ.get('PORT', 7860))
702
- app.run(host='0.0.0.0', port=port)
 
 
1
+ ```python
2
  import os
3
  import torch
4
  import time
 
12
  import zipfile
13
  import uuid
14
  import traceback
15
+ from huggingface_hub import snapshot_download, login
16
  from flask_cors import CORS
17
  import numpy as np
18
  import trimesh
19
+ from transformers import pipeline, AutoImageProcessor, AutoModelForDepthEstimation
20
  from scipy.ndimage import gaussian_filter
21
  from scipy import interpolate
22
  import cv2
 
35
  os.makedirs(CACHE_DIR, exist_ok=True)
36
 
37
  os.environ['HF_HOME'] = CACHE_DIR
 
 
38
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
39
  app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
40
 
 
43
 
44
  # Model variables
45
  dpt_estimator = None
46
+ depth_anything_model = None
47
+ depth_anything_processor = None
48
  model_loaded = False
49
  model_loading = False
50
 
 
85
  def allowed_file(filename):
86
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
87
 
88
+ def preprocess_image(image_path):
89
+ with Image.open(image_path) as img:
90
+ img = img.convert("RGB")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
93
+ if img.width > img.height:
94
+ new_width = MAX_DIMENSION
95
+ new_height = int(img.height * (MAX_DIMENSION / img.width))
96
+ else:
97
+ new_height = MAX_DIMENSION
98
+ new_width = int(img.width * (MAX_DIMENSION / img.height))
99
+ img = img.resize((new_width, new_height), Image.LANCZOS)
100
 
101
+ img_array = np.array(img)
102
+ if len(img_array.shape) == 3 and img_array.shape[2] == 3:
103
+ lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
104
+ l, a, b = cv2.split(lab)
105
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
106
+ cl = clahe.apply(l)
107
+ enhanced_lab = cv2.merge((cl, a, b))
108
+ img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
109
+ img = Image.fromarray(img_array)
110
 
111
+ return img
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def load_models():
114
+ global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
115
 
116
  if model_loaded:
117
+ return dpt_estimator, depth_anything_model, depth_anything_processor
118
 
119
  if model_loading:
120
  while model_loading and not model_loaded:
121
  time.sleep(0.5)
122
+ return dpt_estimator, depth_anything_model, depth_anything_processor
123
 
124
  try:
125
  model_loading = True
126
  print("Loading models...")
127
 
128
+ # Authenticate with Hugging Face
129
  hf_token = os.environ.get('HF_TOKEN')
130
  if hf_token:
 
131
  login(token=hf_token)
132
  print("Authenticated with Hugging Face token")
 
 
 
133
 
134
+ # DPT-Large
135
  dpt_model_name = "Intel/dpt-large"
136
+ max_retries = 3
137
+ retry_delay = 5
138
+ for attempt in range(max_retries):
139
+ try:
140
+ snapshot_download(
141
+ repo_id=dpt_model_name,
142
+ cache_dir=CACHE_DIR,
143
+ resume_download=True,
144
+ token=hf_token
145
+ )
146
+ break
147
+ except Exception as e:
148
+ if attempt < max_retries - 1:
149
+ print(f"DPT download attempt {attempt+1} failed: {str(e)}. Retrying...")
150
+ time.sleep(retry_delay)
151
+ retry_delay *= 2
152
+ else:
153
+ raise
 
 
 
 
 
 
 
 
154
 
155
  dpt_estimator = pipeline(
156
  "depth-estimation",
 
162
  print("DPT-Large loaded")
163
  gc.collect()
164
 
165
+ # Depth Anything
166
+ da_model_name = "depth-anything/Depth-Anything-V2-Small-hf"
167
+ for attempt in range(max_retries):
168
+ try:
169
+ snapshot_download(
170
+ repo_id=da_model_name,
171
+ cache_dir=CACHE_DIR,
172
+ resume_download=True,
173
+ token=hf_token
174
+ )
175
+ break
176
+ except Exception as e:
177
+ if attempt < max_retries - 1:
178
+ print(f"Depth Anything download attempt {attempt+1} failed: {str(e)}. Retrying...")
179
+ time.sleep(retry_delay)
180
+ retry_delay *= 2
181
+ else:
182
+ print(f"Failed to load Depth Anything: {str(e)}. Falling back to DPT-Large only.")
183
+ depth_anything_model = None
184
+ depth_anything_processor = None
185
+ model_loaded = True
186
+ return dpt_estimator, None, None
187
+
188
+ depth_anything_processor = AutoImageProcessor.from_pretrained(
189
+ da_model_name,
190
+ cache_dir=CACHE_DIR,
191
+ token=hf_token
192
+ )
193
+ depth_anything_model = AutoModelForDepthEstimation.from_pretrained(
194
+ da_model_name,
195
+ cache_dir=CACHE_DIR,
196
+ token=hf_token
197
+ ).to("cpu")
198
+
199
  model_loaded = True
200
+ print("Depth Anything loaded")
201
+ return dpt_estimator, depth_anything_model, depth_anything_processor
202
 
203
  except Exception as e:
204
  print(f"Error loading models: {str(e)}")
 
207
  finally:
208
  model_loading = False
209
 
210
+ def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
211
+ if isinstance(dpt_depth, Image.Image):
212
+ dpt_depth = np.array(dpt_depth)
213
+ if isinstance(da_depth, torch.Tensor):
214
+ da_depth = da_depth.cpu().numpy()
215
+ if len(dpt_depth.shape) > 2:
216
+ dpt_depth = np.mean(dpt_depth, axis=2)
217
+ if len(da_depth.shape) > 2:
218
+ da_depth = np.mean(da_depth, axis=2)
219
+
220
+ if dpt_depth.shape != da_depth.shape:
221
+ da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
222
+
223
+ p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [1, 99])
224
+ p_low_da, p_high_da = np.percentile(da_depth, [1, 99])
225
+ dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
226
+ da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
227
+
228
+ if detail_level == 'high':
229
+ weight_da = 0.7
230
+ edges = cv2.Canny((da_depth * 255).astype(np.uint8), 50, 150)
231
+ edge_mask = (edges > 0).astype(np.float32)
232
+ dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
233
+ da_weight = gaussian_filter(edge_mask, sigma=1.0)
234
+ fused_depth = dpt_weight * dpt_depth + da_weight * da_depth * weight_da + (1 - weight_da) * dpt_depth
235
+ else:
236
+ weight_da = 0.5 if detail_level == 'medium' else 0.3
237
+ fused_depth = (1 - weight_da) * dpt_depth + weight_da * da_depth
238
+
239
+ fused_depth = np.clip(fused_depth, 0, 1)
240
+ return fused_depth
241
+
242
  def enhance_depth_map(depth_map, detail_level='medium'):
243
  enhanced_depth = depth_map.copy().astype(np.float32)
244
  p_low, p_high = np.percentile(enhanced_depth, [1, 99])
 
264
  enhanced_depth = np.clip(enhanced_depth, 0, 1)
265
  return enhanced_depth
266
 
267
+ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
268
  enhanced_depth = enhance_depth_map(depth_map, detail_level)
269
  h, w = enhanced_depth.shape
270
  x = np.linspace(0, w-1, resolution)
 
292
  y_grid = (y_grid / h - 0.5) * 2.0
293
  vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
294
 
 
 
 
 
295
  faces = []
296
  for i in range(resolution-1):
297
  for j in range(resolution-1):
 
336
  (1-wx)*wy*img_array[y1, x0, 2] + wx*wy*img_array[y1, x1, 2])
337
  vertex_colors[vertex_idx, :3] = [r, g, b]
338
  vertex_colors[vertex_idx, 3] = 255
339
+ elif len(img_array.shape) == 3 and img_array.shape[2] == 4:
340
+ for c in range(4):
341
+ vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
342
+ wx*(1-wy)*img_array[y0, x1, c] +
343
+ (1-wx)*wy*img_array[y1, x0, c] +
344
+ wx*wy*img_array[y1, x1, c])
345
  else:
346
  gray = int((1-wx)*(1-wy)*img_array[y0, x0] + wx*(1-wy)*img_array[y0, x1] +
347
  (1-wx)*wy*img_array[y1, x0] + wx*wy*img_array[y1, x1])
348
  vertex_colors[vertex_idx, :3] = [gray, gray, gray]
349
  vertex_colors[vertex_idx, 3] = 255
350
+ mesh.visual.vertex_colors = vertex_colors
351
 
352
  if detail_level != 'high':
353
  mesh = mesh.smoothed(method='laplacian', iterations=1)
354
  mesh.fix_normals()
355
  return mesh
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  @app.route('/health', methods=['GET'])
358
  def health_check():
359
  return jsonify({
360
  "status": "healthy",
361
+ "model": "DPT-Large + Depth Anything",
362
  "device": "cpu"
363
  }), 200
364
 
 
396
 
397
  @app.route('/convert', methods=['POST'])
398
  def convert_image_to_3d():
399
+ if 'image' not in request.files:
400
+ return jsonify({"error": "No image provided"}), 400
 
 
 
 
 
401
 
402
+ file = request.files['image']
403
+ if file.filename == '':
404
+ return jsonify({"error": "No image selected"}), 400
405
 
406
+ if not allowed_file(file.filename):
407
+ return jsonify({"error": f"File type not allowed. Supported types: {', '.join(ALLOWED_EXTENSIONS)}"}), 400
 
408
 
409
  try:
410
+ mesh_resolution = min(int(request.form.get('mesh_resolution', 100)), 150)
411
  output_format = request.form.get('output_format', 'glb').lower()
412
  detail_level = request.form.get('detail_level', 'medium').lower()
413
  texture_quality = request.form.get('texture_quality', 'medium').lower()
 
418
  return jsonify({"error": "Unsupported output format. Use 'obj' or 'glb'"}), 400
419
 
420
  if detail_level == 'high':
421
+ mesh_resolution = min(int(mesh_resolution * 1.5), 150)
422
  elif detail_level == 'low':
423
  mesh_resolution = max(int(mesh_resolution * 0.7), 50)
424
 
 
426
  output_dir = os.path.join(RESULTS_FOLDER, job_id)
427
  os.makedirs(output_dir, exist_ok=True)
428
 
429
+ filename = secure_filename(file.filename)
430
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{job_id}_{filename}")
431
+ file.save(filepath)
 
 
 
432
 
433
  processing_jobs[job_id] = {
434
  'status': 'processing',
 
440
  'created_at': time.time()
441
  }
442
 
443
+ def process_image():
444
  thread = threading.current_thread()
445
  processing_jobs[job_id]['thread_alive'] = lambda: thread.is_alive()
446
 
447
  try:
448
  processing_jobs[job_id]['progress'] = 5
449
+ image = preprocess_image(filepath)
 
 
 
 
 
 
 
450
  processing_jobs[job_id]['progress'] = 10
451
 
452
  try:
453
+ dpt_model, da_model, da_processor = load_models()
454
+ processing_jobs[job_id]['progress'] = 30
455
  except Exception as e:
456
  processing_jobs[job_id]['status'] = 'error'
457
  processing_jobs[job_id]['error'] = f"Error loading models: {str(e)}"
458
  return
459
 
460
  try:
461
+ def estimate_depth():
 
 
462
  with torch.no_grad():
463
+ # DPT-Large
464
+ dpt_result = dpt_model(image)
465
+ dpt_depth = dpt_result["depth"]
466
+
467
+ # Depth Anything (if loaded)
468
+ if da_model and da_processor:
469
+ inputs = da_processor(images=image, return_tensors="pt")
470
+ inputs = {k: v.to("cpu") for k, v in inputs.items()}
471
+ outputs = da_model(**inputs)
472
+ da_depth = outputs.predicted_depth.squeeze()
473
+ da_depth = torch.nn.functional.interpolate(
474
+ da_depth.unsqueeze(0).unsqueeze(0),
475
+ size=(image.height, image.width),
476
+ mode='bicubic',
477
+ align_corners=False
478
+ ).squeeze()
479
+ fused_depth = fuse_depth_maps(dpt_depth, da_depth, detail_level)
480
+ else:
481
+ fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
482
+ if len(fused_depth.shape) > 2:
483
+ fused_depth = np.mean(fused_depth, axis=2)
484
+ p_low, p_high = np.percentile(fused_depth, [1, 99])
485
+ fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
486
+
487
+ return fused_depth
488
 
489
+ fused_depth, error = process_with_timeout(estimate_depth, [], TIMEOUT_SECONDS)
490
 
491
  if error:
492
  if isinstance(error, TimeoutError):
 
496
  else:
497
  raise error
498
 
499
+ processing_jobs[job_id]['progress'] = 60
500
+ mesh_resolution_int = int(mesh_resolution)
501
+ mesh = depth_to_mesh(fused_depth, image, resolution=mesh_resolution_int, detail_level=detail_level)
502
  processing_jobs[job_id]['progress'] = 80
503
 
504
  if output_format == 'obj':
505
  obj_path = os.path.join(output_dir, "model.obj")
506
+ mesh.export(
507
  obj_path,
508
  file_type='obj',
509
  include_normals=True,
 
524
 
525
  elif output_format == 'glb':
526
  glb_path = os.path.join(output_dir, "model.glb")
527
+ mesh.export(
528
  glb_path,
529
  file_type='glb'
530
  )
 
543
  print(error_details)
544
  return
545
 
546
+ if os.path.exists(filepath):
547
+ os.remove(filepath)
 
548
  gc.collect()
549
 
550
  except Exception as e:
 
553
  processing_jobs[job_id]['error'] = f"{str(e)}\n{error_details}"
554
  print(f"Error processing job {job_id}: {str(e)}")
555
  print(error_details)
556
+ if os.path.exists(filepath):
557
+ os.remove(filepath)
 
558
 
559
+ processing_thread = threading.Thread(target=process_image)
560
  processing_thread.daemon = True
561
  processing_thread.start()
562
 
 
666
  @app.route('/', methods=['GET'])
667
  def index():
668
  return jsonify({
669
+ "message": "Image to 3D API (DPT-Large + Depth Anything)",
670
  "endpoints": [
671
  "/convert",
672
  "/progress/<job_id>",
 
675
  "/model-info/<job_id>"
676
  ],
677
  "parameters": {
678
+ "mesh_resolution": "Integer (50-150)",
 
 
 
 
679
  "output_format": "obj or glb",
680
  "detail_level": "low, medium, or high",
681
  "texture_quality": "low, medium, or high"
682
  },
683
+ "description": "Creates high-quality 3D models from 2D images using DPT-Large and Depth Anything."
684
  }), 200
685
 
686
  if __name__ == '__main__':
687
  cleanup_old_jobs()
688
  port = int(os.environ.get('PORT', 7860))
689
+ app.run(host='0.0.0.0', port=port)
690
+ ```