File size: 4,590 Bytes
4e31b1a
aaa6458
 
 
 
 
 
 
81914fc
aaa6458
 
 
1087492
aaa6458
 
 
 
 
388cf5c
aaa6458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388cf5c
aaa6458
388cf5c
aaa6458
 
 
 
 
388cf5c
aaa6458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1087492
aaa6458
48056a7
aaa6458
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import gradio as gr
import torch
import numpy as np
from PIL import Image
import trimesh
from diffusers import Zero123Pipeline
import tempfile

# Check if CUDA is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize the pipeline
pipe = Zero123Pipeline.from_pretrained(
    "bennyguo/zero123-xl-diffusers",
    torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
).to(device)

def image_to_3d(input_image, num_inference_steps=75, guidance_scale=3.0):
    """
    Convert a single image to a 3D model
    """
    # Preprocess image
    if input_image is None:
        return None
    
    input_image = input_image.convert("RGB").resize((256, 256))
    
    # Generate multiple views using Zero123
    images = []
    
    # Generate views from different angles
    for elevation in [0, 30]:
        for azimuth in [0, 90, 180, 270]:
            print(f"Generating view: elevation={elevation}, azimuth={azimuth}")
            with torch.no_grad():
                image = pipe(
                    image=input_image,
                    elevation=elevation,
                    azimuth=azimuth,
                    num_inference_steps=num_inference_steps,
                    guidance_scale=guidance_scale,
                ).images[0]
                images.append(np.array(image))
    
    # Create point cloud from multiple views
    # This is a simplified approach - in production you might want to use a more sophisticated method
    points = []
    for i, img in enumerate(images):
        # Extract depth information (simplified approach)
        gray = np.mean(img, axis=2)
        # Sample points from the image
        h, w = gray.shape
        for y in range(0, h, 4):
            for x in range(0, w, 4):
                depth = gray[y, x] / 255.0  # Normalize depth
                
                # Convert to 3D point based on view angle
                angle_idx = i % 4
                elevation = 0 if i < 4 else 30
                azimuth = angle_idx * 90
                
                # Convert to radians
                elevation_rad = elevation * np.pi / 180
                azimuth_rad = azimuth * np.pi / 180
                
                # Calculate 3D position based on spherical coordinates
                z = depth * np.cos(elevation_rad) * np.cos(azimuth_rad)
                x = depth * np.cos(elevation_rad) * np.sin(azimuth_rad)
                y = depth * np.sin(elevation_rad)
                
                points.append([x, y, z])
    
    # Create a point cloud
    point_cloud = np.array(points)
    
    # Save point cloud to OBJ file
    with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as tmp_file:
        mesh = trimesh.points.PointCloud(point_cloud)
        mesh.export(tmp_file.name)
        
        # Also export as PLY for better compatibility
        ply_path = tmp_file.name.replace('.obj', '.ply')
        mesh.export(ply_path)
        
        return [tmp_file.name, ply_path]

def process_image(image, num_steps, guidance):
    try:
        model_paths = image_to_3d(image, num_inference_steps=num_steps, guidance_scale=guidance)
        if model_paths:
            return model_paths[0], model_paths[1], "3D model generated successfully!"
        else:
            return None, None, "Failed to process the image."
    except Exception as e:
        return None, None, f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Image to 3D Model Converter") as demo:
    gr.Markdown("# Image to 3D Model Converter")
    gr.Markdown("Upload an image to convert it to a 3D model that you can use in Unity or other engines.")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(type="pil", label="Input Image")
            num_steps = gr.Slider(minimum=20, maximum=100, value=75, step=5, label="Number of Inference Steps")
            guidance = gr.Slider(minimum=1.0, maximum=7.0, value=3.0, step=0.5, label="Guidance Scale")
            submit_btn = gr.Button("Convert to 3D")
        
        with gr.Column(scale=1):
            obj_file = gr.File(label="OBJ File")
            ply_file = gr.File(label="PLY File")
            output_message = gr.Textbox(label="Output Message")
    
    submit_btn.click(
        fn=process_image,
        inputs=[input_image, num_steps, guidance],
        outputs=[obj_file, ply_file, output_message]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)