mike23415 commited on
Commit
b33bab2
·
verified ·
1 Parent(s): 5d40874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -65
app.py CHANGED
@@ -4,32 +4,74 @@ import gradio as gr
4
  import numpy as np
5
  from PIL import Image
6
  import tempfile
7
- from tqdm.auto import tqdm
8
 
9
  # Check if CUDA is available, otherwise use CPU
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  print(f"Using device: {device}")
12
 
13
- # Import Shape-E related modules after installing them
14
- print("Loading necessary modules...")
15
- from shap_e.diffusion.sample import sample_latents
16
- from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
17
- from shap_e.models.download import load_model, load_config
18
- from shap_e.util.image_util import load_image
19
- from shap_e.util.notebooks import create_pan_cameras, decode_latent_mesh
 
 
 
 
20
 
21
- print("Loading Shap-E model...")
22
- xm = load_model('transmitter', device=device)
23
- diffusion = diffusion_from_config(load_config('diffusion'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def preprocess_image(image):
26
  # Resize to match expected input size
27
  image = image.resize((256, 256))
28
  return image
29
 
30
- def image_to_3d(image, guidance_scale=15.0, num_inference_steps=64):
31
  """
32
- Convert a single image to a 3D model using Shap-E
33
  """
34
  if image is None:
35
  return None, "No image provided"
@@ -38,72 +80,44 @@ def image_to_3d(image, guidance_scale=15.0, num_inference_steps=64):
38
  # Preprocess image
39
  processed_image = preprocess_image(image)
40
 
41
- # Convert PIL image to Shap-E format
42
- shap_e_image = load_image(processed_image)
 
 
43
 
44
- # Generate latents
45
- latents = sample_latents(
46
- batch_size=1,
47
- model=xm,
48
- diffusion=diffusion,
49
- guidance_scale=guidance_scale,
50
- model_kwargs=dict(images=[shap_e_image]),
51
- progress=True,
52
- clip_denoised=True,
53
- use_fp16=device.type == 'cuda',
54
- use_karras=True,
55
- karras_steps=num_inference_steps,
56
- sigma_min=1e-3,
57
- sigma_max=160,
58
- s_churn=0,
59
- )
60
 
61
- # Create mesh
62
- render_mode = 'nerf' # you can also use 'stf' for faster rendering
63
- mesh = decode_latent_mesh(xm, latents[0], render_mode).tri_mesh()
64
 
65
- # Save mesh to OBJ file
66
  with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
67
  obj_path = obj_file.name
68
- with open(obj_path, 'w') as f:
69
- for v in mesh.verts:
70
- f.write(f'v {v[0]} {v[1]} {v[2]}\n')
71
- for face in mesh.faces:
72
- f.write(f'f {face[0]+1} {face[1]+1} {face[2]+1}\n')
73
 
74
- # Save mesh to PLY file for better Unity compatibility
75
  with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as ply_file:
76
  ply_path = ply_file.name
77
- with open(ply_path, 'w') as f:
78
- f.write('ply\n')
79
- f.write('format ascii 1.0\n')
80
- f.write(f'element vertex {len(mesh.verts)}\n')
81
- f.write('property float x\n')
82
- f.write('property float y\n')
83
- f.write('property float z\n')
84
- f.write(f'element face {len(mesh.faces)}\n')
85
- f.write('property list uchar int vertex_indices\n')
86
- f.write('end_header\n')
87
-
88
- for v in mesh.verts:
89
- f.write(f'{v[0]} {v[1]} {v[2]}\n')
90
-
91
- for face in mesh.faces:
92
- f.write(f'3 {face[0]} {face[1]} {face[2]}\n')
93
 
94
  return [obj_path, ply_path], "3D model generated successfully!"
95
  except Exception as e:
96
  return None, f"Error: {str(e)}"
97
 
98
- def process_image(image, guidance_scale, num_steps):
99
  try:
100
  if image is None:
101
  return None, None, "Please upload an image first."
102
 
103
  results, message = image_to_3d(
104
  image,
105
- guidance_scale=guidance_scale,
106
- num_inference_steps=num_steps
107
  )
108
 
109
  if results:
@@ -114,14 +128,13 @@ def process_image(image, guidance_scale, num_steps):
114
  return None, None, f"Error: {str(e)}"
115
 
116
  # Create Gradio interface
117
- with gr.Blocks(title="Image to 3D Model Converter") as demo:
118
- gr.Markdown("# Image to 3D Model Converter")
119
- gr.Markdown("Upload an image to convert it to a 3D model that you can use in Unity or other engines.")
120
 
121
  with gr.Row():
122
  with gr.Column(scale=1):
123
  input_image = gr.Image(type="pil", label="Input Image")
124
- guidance = gr.Slider(minimum=5.0, maximum=20.0, value=15.0, step=0.5, label="Guidance Scale")
125
  num_steps = gr.Slider(minimum=16, maximum=128, value=64, step=8, label="Number of Inference Steps")
126
  submit_btn = gr.Button("Convert to 3D")
127
 
@@ -132,7 +145,7 @@ with gr.Blocks(title="Image to 3D Model Converter") as demo:
132
 
133
  submit_btn.click(
134
  fn=process_image,
135
- inputs=[input_image, guidance, num_steps],
136
  outputs=[obj_file, ply_file, output_message]
137
  )
138
 
 
4
  import numpy as np
5
  from PIL import Image
6
  import tempfile
7
+ import trimesh
8
 
9
  # Check if CUDA is available, otherwise use CPU
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  print(f"Using device: {device}")
12
 
13
+ # Import Point-E modules
14
+ try:
15
+ print("Loading Point-E model...")
16
+ from point_e.diffusion.configs import DIFFUSION_CONFIGS, diffusion_from_config
17
+ from point_e.diffusion.sampler import PointCloudSampler
18
+ from point_e.models.configs import MODEL_CONFIGS, model_from_config
19
+ from point_e.models.download import load_checkpoint
20
+ from point_e.util.plotting import plot_point_cloud
21
+ except ImportError:
22
+ print("Point-E modules not available. Please make sure Point-E is installed.")
23
+ raise
24
 
25
+ # Create base model for image encoder
26
+ base_name = 'base40M-textvec'
27
+ base_model = model_from_config(MODEL_CONFIGS[base_name], device)
28
+ base_model.eval()
29
+ base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[base_name])
30
+
31
+ # Create upsampler model
32
+ upsampler_model = model_from_config(MODEL_CONFIGS['upsample'], device)
33
+ upsampler_model.eval()
34
+ upsampler_diffusion = diffusion_from_config(DIFFUSION_CONFIGS['upsample'])
35
+
36
+ # Create image to point cloud model
37
+ img2pc_name = 'base300M'
38
+ img2pc_model = model_from_config(MODEL_CONFIGS[img2pc_name], device)
39
+ img2pc_model.eval()
40
+ img2pc_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[img2pc_name])
41
+
42
+ # Load checkpoints
43
+ print("Loading model checkpoints...")
44
+ base_model.load_state_dict(load_checkpoint(base_name, device))
45
+ upsampler_model.load_state_dict(load_checkpoint('upsample', device))
46
+ img2pc_model.load_state_dict(load_checkpoint(img2pc_name, device))
47
+
48
+ # Create samplers
49
+ sampler = PointCloudSampler(
50
+ device=device,
51
+ models=[base_model, upsampler_model],
52
+ diffusions=[base_diffusion, upsampler_diffusion],
53
+ num_points=[1024, 4096],
54
+ aux_channels=['R', 'G', 'B'],
55
+ guidance_scale=[3.0, 0.0],
56
+ )
57
+
58
+ img2pc_sampler = PointCloudSampler(
59
+ device=device,
60
+ models=[img2pc_model],
61
+ diffusions=[img2pc_diffusion],
62
+ num_points=[1024],
63
+ aux_channels=['R', 'G', 'B'],
64
+ guidance_scale=[3.0],
65
+ )
66
 
67
  def preprocess_image(image):
68
  # Resize to match expected input size
69
  image = image.resize((256, 256))
70
  return image
71
 
72
+ def image_to_3d(image, num_steps=64):
73
  """
74
+ Convert a single image to a 3D model using Point-E
75
  """
76
  if image is None:
77
  return None, "No image provided"
 
80
  # Preprocess image
81
  processed_image = preprocess_image(image)
82
 
83
+ # Generate samples
84
+ samples = None
85
+ for i, x in enumerate(img2pc_sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(images=[processed_image]))):
86
+ samples = x
87
 
88
+ # Extract point cloud
89
+ pc = samples[-1]['pred_pc']
90
+ colors = samples[-1]['pred_pc_aux']['R', 'G', 'B']
91
+
92
+ # Create colored point cloud
93
+ points = pc.cpu().numpy()[0]
94
+ colors_np = colors.cpu().numpy()[0]
 
 
 
 
 
 
 
 
 
95
 
96
+ # Create a mesh from point cloud
97
+ point_cloud = trimesh.PointCloud(vertices=points, colors=colors_np)
 
98
 
99
+ # Save as OBJ
100
  with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
101
  obj_path = obj_file.name
102
+ point_cloud.export(obj_path)
 
 
 
 
103
 
104
+ # Save as PLY for better Unity compatibility
105
  with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as ply_file:
106
  ply_path = ply_file.name
107
+ point_cloud.export(ply_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  return [obj_path, ply_path], "3D model generated successfully!"
110
  except Exception as e:
111
  return None, f"Error: {str(e)}"
112
 
113
+ def process_image(image, num_steps):
114
  try:
115
  if image is None:
116
  return None, None, "Please upload an image first."
117
 
118
  results, message = image_to_3d(
119
  image,
120
+ num_steps=num_steps
 
121
  )
122
 
123
  if results:
 
128
  return None, None, f"Error: {str(e)}"
129
 
130
  # Create Gradio interface
131
+ with gr.Blocks(title="Image to 3D Point Cloud Converter") as demo:
132
+ gr.Markdown("# Image to 3D Point Cloud Converter")
133
+ gr.Markdown("Upload an image to convert it to a 3D point cloud that you can use in Unity or other engines.")
134
 
135
  with gr.Row():
136
  with gr.Column(scale=1):
137
  input_image = gr.Image(type="pil", label="Input Image")
 
138
  num_steps = gr.Slider(minimum=16, maximum=128, value=64, step=8, label="Number of Inference Steps")
139
  submit_btn = gr.Button("Convert to 3D")
140
 
 
145
 
146
  submit_btn.click(
147
  fn=process_image,
148
+ inputs=[input_image, num_steps],
149
  outputs=[obj_file, ply_file, output_message]
150
  )
151