Spaces:

mike23415
/

Rightlight

Runtime error

App Files Files Community

mike23415 commited on 11 days ago

Commit

b33bab2

verified ·

1 Parent(s): 5d40874

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -65

app.py CHANGED Viewed

@@ -4,32 +4,74 @@ import gradio as gr
 import numpy as np
 from PIL import Image
 import tempfile
-from tqdm.auto import tqdm
 # Check if CUDA is available, otherwise use CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
-# Import Shape-E related modules after installing them
-print("Loading necessary modules...")
-from shap_e.diffusion.sample import sample_latents
-from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
-from shap_e.models.download import load_model, load_config
-from shap_e.util.image_util import load_image
-from shap_e.util.notebooks import create_pan_cameras, decode_latent_mesh
-print("Loading Shap-E model...")
-xm = load_model('transmitter', device=device)
-diffusion = diffusion_from_config(load_config('diffusion'))
 def preprocess_image(image):
     # Resize to match expected input size
     image = image.resize((256, 256))
     return image
-def image_to_3d(image, guidance_scale=15.0, num_inference_steps=64):
     """
-    Convert a single image to a 3D model using Shap-E
     """
     if image is None:
         return None, "No image provided"
@@ -38,72 +80,44 @@ def image_to_3d(image, guidance_scale=15.0, num_inference_steps=64):
         # Preprocess image
         processed_image = preprocess_image(image)
-        # Convert PIL image to Shap-E format
-        shap_e_image = load_image(processed_image)
-        # Generate latents
-        latents = sample_latents(
-            batch_size=1,
-            model=xm,
-            diffusion=diffusion,
-            guidance_scale=guidance_scale,
-            model_kwargs=dict(images=[shap_e_image]),
-            progress=True,
-            clip_denoised=True,
-            use_fp16=device.type == 'cuda',
-            use_karras=True,
-            karras_steps=num_inference_steps,
-            sigma_min=1e-3,
-            sigma_max=160,
-            s_churn=0,
-        )
-        # Create mesh
-        render_mode = 'nerf' # you can also use 'stf' for faster rendering
-        mesh = decode_latent_mesh(xm, latents[0], render_mode).tri_mesh()
-        # Save mesh to OBJ file
         with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
             obj_path = obj_file.name
-            with open(obj_path, 'w') as f:
-                for v in mesh.verts:
-                    f.write(f'v {v[0]} {v[1]} {v[2]}\n')
-                for face in mesh.faces:
-                    f.write(f'f {face[0]+1} {face[1]+1} {face[2]+1}\n')
-        # Save mesh to PLY file for better Unity compatibility
         with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as ply_file:
             ply_path = ply_file.name
-            with open(ply_path, 'w') as f:
-                f.write('ply\n')
-                f.write('format ascii 1.0\n')
-                f.write(f'element vertex {len(mesh.verts)}\n')
-                f.write('property float x\n')
-                f.write('property float y\n')
-                f.write('property float z\n')
-                f.write(f'element face {len(mesh.faces)}\n')
-                f.write('property list uchar int vertex_indices\n')
-                f.write('end_header\n')
-                for v in mesh.verts:
-                    f.write(f'{v[0]} {v[1]} {v[2]}\n')
-                for face in mesh.faces:
-                    f.write(f'3 {face[0]} {face[1]} {face[2]}\n')
         return [obj_path, ply_path], "3D model generated successfully!"
     except Exception as e:
         return None, f"Error: {str(e)}"
-def process_image(image, guidance_scale, num_steps):
     try:
         if image is None:
             return None, None, "Please upload an image first."
         results, message = image_to_3d(
             image,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_steps
         )
         if results:
@@ -114,14 +128,13 @@ def process_image(image, guidance_scale, num_steps):
         return None, None, f"Error: {str(e)}"
 # Create Gradio interface
-with gr.Blocks(title="Image to 3D Model Converter") as demo:
-    gr.Markdown("# Image to 3D Model Converter")
-    gr.Markdown("Upload an image to convert it to a 3D model that you can use in Unity or other engines.")
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image")
-            guidance = gr.Slider(minimum=5.0, maximum=20.0, value=15.0, step=0.5, label="Guidance Scale")
             num_steps = gr.Slider(minimum=16, maximum=128, value=64, step=8, label="Number of Inference Steps")
             submit_btn = gr.Button("Convert to 3D")
@@ -132,7 +145,7 @@ with gr.Blocks(title="Image to 3D Model Converter") as demo:
     submit_btn.click(
         fn=process_image,
-        inputs=[input_image, guidance, num_steps],
         outputs=[obj_file, ply_file, output_message]
     )

 import numpy as np
 from PIL import Image
 import tempfile
+import trimesh
 # Check if CUDA is available, otherwise use CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
+# Import Point-E modules
+try:
+    print("Loading Point-E model...")
+    from point_e.diffusion.configs import DIFFUSION_CONFIGS, diffusion_from_config
+    from point_e.diffusion.sampler import PointCloudSampler
+    from point_e.models.configs import MODEL_CONFIGS, model_from_config
+    from point_e.models.download import load_checkpoint
+    from point_e.util.plotting import plot_point_cloud
+except ImportError:
+    print("Point-E modules not available. Please make sure Point-E is installed.")
+    raise
+# Create base model for image encoder
+base_name = 'base40M-textvec'
+base_model = model_from_config(MODEL_CONFIGS[base_name], device)
+base_model.eval()
+base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[base_name])
+# Create upsampler model
+upsampler_model = model_from_config(MODEL_CONFIGS['upsample'], device)
+upsampler_model.eval()
+upsampler_diffusion = diffusion_from_config(DIFFUSION_CONFIGS['upsample'])
+# Create image to point cloud model
+img2pc_name = 'base300M'
+img2pc_model = model_from_config(MODEL_CONFIGS[img2pc_name], device)
+img2pc_model.eval()
+img2pc_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[img2pc_name])
+# Load checkpoints
+print("Loading model checkpoints...")
+base_model.load_state_dict(load_checkpoint(base_name, device))
+upsampler_model.load_state_dict(load_checkpoint('upsample', device))
+img2pc_model.load_state_dict(load_checkpoint(img2pc_name, device))
+# Create samplers
+sampler = PointCloudSampler(
+    device=device,
+    models=[base_model, upsampler_model],
+    diffusions=[base_diffusion, upsampler_diffusion],
+    num_points=[1024, 4096],
+    aux_channels=['R', 'G', 'B'],
+    guidance_scale=[3.0, 0.0],
+)
+img2pc_sampler = PointCloudSampler(
+    device=device,
+    models=[img2pc_model],
+    diffusions=[img2pc_diffusion],
+    num_points=[1024],
+    aux_channels=['R', 'G', 'B'],
+    guidance_scale=[3.0],
+)
 def preprocess_image(image):
     # Resize to match expected input size
     image = image.resize((256, 256))
     return image
+def image_to_3d(image, num_steps=64):
     """
+    Convert a single image to a 3D model using Point-E
     """
     if image is None:
         return None, "No image provided"
         # Preprocess image
         processed_image = preprocess_image(image)
+        # Generate samples
+        samples = None
+        for i, x in enumerate(img2pc_sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(images=[processed_image]))):
+            samples = x
+        # Extract point cloud
+        pc = samples[-1]['pred_pc']
+        colors = samples[-1]['pred_pc_aux']['R', 'G', 'B']
+        # Create colored point cloud
+        points = pc.cpu().numpy()[0]
+        colors_np = colors.cpu().numpy()[0]
+        # Create a mesh from point cloud
+        point_cloud = trimesh.PointCloud(vertices=points, colors=colors_np)
+        # Save as OBJ
         with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as obj_file:
             obj_path = obj_file.name
+            point_cloud.export(obj_path)
+        # Save as PLY for better Unity compatibility
         with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as ply_file:
             ply_path = ply_file.name
+            point_cloud.export(ply_path)
         return [obj_path, ply_path], "3D model generated successfully!"
     except Exception as e:
         return None, f"Error: {str(e)}"
+def process_image(image, num_steps):
     try:
         if image is None:
             return None, None, "Please upload an image first."
         results, message = image_to_3d(
             image,
+            num_steps=num_steps
         )
         if results:
         return None, None, f"Error: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(title="Image to 3D Point Cloud Converter") as demo:
+    gr.Markdown("# Image to 3D Point Cloud Converter")
+    gr.Markdown("Upload an image to convert it to a 3D point cloud that you can use in Unity or other engines.")
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image")
             num_steps = gr.Slider(minimum=16, maximum=128, value=64, step=8, label="Number of Inference Steps")
             submit_btn = gr.Button("Convert to 3D")
     submit_btn.click(
         fn=process_image,
+        inputs=[input_image, num_steps],
         outputs=[obj_file, ply_file, output_message]
     )