hujiecpp commited on
Commit
2da96e5
·
1 Parent(s): ea05641

init project

Browse files
Files changed (1) hide show
  1. app.py +17 -16
app.py CHANGED
@@ -38,7 +38,8 @@ from modules.pe3r.models import Models
38
  import torchvision.transforms as tvf
39
 
40
  silent = False
41
- pe3r = Models('cpu') #
 
42
 
43
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
44
  cam_color=None, as_pointcloud=False,
@@ -241,14 +242,14 @@ def slerp_multiple(vectors, t_values):
241
  return interpolated_vector
242
 
243
  @torch.no_grad
244
- def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
245
 
246
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
247
 
248
  sam_mask=[]
249
  img_area = original_size[0] * original_size[1]
250
 
251
- obj_results = yolov8(yolov8_image,device=device,retina_masks=False,imgsz=1024,conf=0.25,iou=0.95,verbose=False)
252
  input_boxes1 = obj_results[0].boxes.xyxy
253
  input_boxes1 = input_boxes1.cpu().numpy()
254
  input_boxes1 = transform.apply_boxes(input_boxes1, original_size)
@@ -262,21 +263,21 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
262
 
263
  # input_boxes = torch.cat((input_boxes1, input_boxes2), dim=0)
264
 
265
- input_image = mobilesamv2.preprocess(sam1_image)
266
- image_embedding = mobilesamv2.image_encoder(input_image)['last_hidden_state']
267
 
268
  image_embedding=torch.repeat_interleave(image_embedding, 320, dim=0)
269
- prompt_embedding=mobilesamv2.prompt_encoder.get_dense_pe()
270
  prompt_embedding=torch.repeat_interleave(prompt_embedding, 320, dim=0)
271
  for (boxes,) in batch_iterator(320, input_boxes):
272
  with torch.no_grad():
273
  image_embedding=image_embedding[0:boxes.shape[0],:,:,:]
274
  prompt_embedding=prompt_embedding[0:boxes.shape[0],:,:,:]
275
- sparse_embeddings, dense_embeddings = mobilesamv2.prompt_encoder(
276
  points=None,
277
  boxes=boxes,
278
  masks=None,)
279
- low_res_masks, _ = mobilesamv2.mask_decoder(
280
  image_embeddings=image_embedding,
281
  image_pe=prompt_embedding,
282
  sparse_prompt_embeddings=sparse_embeddings,
@@ -284,8 +285,8 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
284
  multimask_output=False,
285
  simple_type=True,
286
  )
287
- low_res_masks=mobilesamv2.postprocess_masks(low_res_masks, input_size, original_size)
288
- sam_mask_pre = (low_res_masks > mobilesamv2.mask_threshold)
289
  for mask in sam_mask_pre:
290
  if mask.sum() / img_area > 0.002:
291
  sam_mask.append(mask.squeeze(1))
@@ -299,7 +300,7 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
299
  @torch.no_grad
300
  def get_cog_feats(images):
301
 
302
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
303
 
304
  cog_seg_maps = []
305
  rev_cog_seg_maps = []
@@ -311,7 +312,7 @@ def get_cog_feats(images):
311
  np_images = images.np_images
312
  np_images_size = images.np_images_size
313
 
314
- sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[0], np_images[0], np_images_size[0], sam1_images_size[0], images.sam1_transform)
315
  for mask in sam1_masks:
316
  _, _, _ = pe3r.sam2.add_new_mask(
317
  inference_state=inference_state,
@@ -333,7 +334,7 @@ def get_cog_feats(images):
333
  if out_frame_idx == 0:
334
  continue
335
 
336
- sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform)
337
 
338
  for sam1_mask in sam1_masks:
339
  flg = 1
@@ -445,7 +446,7 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
445
  then run get_3D_model_from_scene
446
  """
447
 
448
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
449
 
450
  if len(filelist) < 2:
451
  raise gradio.Error("Please input at least 2 images.")
@@ -510,7 +511,7 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
510
  def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
511
  mask_sky, clean_depth, transparent_cams, cam_size):
512
 
513
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
514
 
515
  texts = [text]
516
  inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
 
38
  import torchvision.transforms as tvf
39
 
40
  silent = False
41
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
42
+ pe3r = Models(device) #
43
 
44
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
45
  cam_color=None, as_pointcloud=False,
 
242
  return interpolated_vector
243
 
244
  @torch.no_grad
245
+ def get_mask_from_img_sam1(sam1_image, yolov8_image, original_size, input_size, transform):
246
 
247
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
248
 
249
  sam_mask=[]
250
  img_area = original_size[0] * original_size[1]
251
 
252
+ obj_results = pe3r.yolov8(yolov8_image,device=device,retina_masks=False,imgsz=1024,conf=0.25,iou=0.95,verbose=False)
253
  input_boxes1 = obj_results[0].boxes.xyxy
254
  input_boxes1 = input_boxes1.cpu().numpy()
255
  input_boxes1 = transform.apply_boxes(input_boxes1, original_size)
 
263
 
264
  # input_boxes = torch.cat((input_boxes1, input_boxes2), dim=0)
265
 
266
+ input_image = pe3r.mobilesamv2.preprocess(sam1_image)
267
+ image_embedding = pe3r.mobilesamv2.image_encoder(input_image)['last_hidden_state']
268
 
269
  image_embedding=torch.repeat_interleave(image_embedding, 320, dim=0)
270
+ prompt_embedding=pe3r.mobilesamv2.prompt_encoder.get_dense_pe()
271
  prompt_embedding=torch.repeat_interleave(prompt_embedding, 320, dim=0)
272
  for (boxes,) in batch_iterator(320, input_boxes):
273
  with torch.no_grad():
274
  image_embedding=image_embedding[0:boxes.shape[0],:,:,:]
275
  prompt_embedding=prompt_embedding[0:boxes.shape[0],:,:,:]
276
+ sparse_embeddings, dense_embeddings = pe3r.mobilesamv2.prompt_encoder(
277
  points=None,
278
  boxes=boxes,
279
  masks=None,)
280
+ low_res_masks, _ = pe3r.mobilesamv2.mask_decoder(
281
  image_embeddings=image_embedding,
282
  image_pe=prompt_embedding,
283
  sparse_prompt_embeddings=sparse_embeddings,
 
285
  multimask_output=False,
286
  simple_type=True,
287
  )
288
+ low_res_masks=pe3r.mobilesamv2.postprocess_masks(low_res_masks, input_size, original_size)
289
+ sam_mask_pre = (low_res_masks > pe3r.mobilesamv2.mask_threshold)
290
  for mask in sam_mask_pre:
291
  if mask.sum() / img_area > 0.002:
292
  sam_mask.append(mask.squeeze(1))
 
300
  @torch.no_grad
301
  def get_cog_feats(images):
302
 
303
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
304
 
305
  cog_seg_maps = []
306
  rev_cog_seg_maps = []
 
312
  np_images = images.np_images
313
  np_images_size = images.np_images_size
314
 
315
+ sam1_masks = get_mask_from_img_sam1(sam1_images[0], np_images[0], np_images_size[0], sam1_images_size[0], images.sam1_transform)
316
  for mask in sam1_masks:
317
  _, _, _ = pe3r.sam2.add_new_mask(
318
  inference_state=inference_state,
 
334
  if out_frame_idx == 0:
335
  continue
336
 
337
+ sam1_masks = get_mask_from_img_sam1(sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform)
338
 
339
  for sam1_mask in sam1_masks:
340
  flg = 1
 
446
  then run get_3D_model_from_scene
447
  """
448
 
449
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
450
 
451
  if len(filelist) < 2:
452
  raise gradio.Error("Please input at least 2 images.")
 
511
  def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
512
  mask_sky, clean_depth, transparent_cams, cam_size):
513
 
514
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
515
 
516
  texts = [text]
517
  inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")