Spaces:
Running
Running
import os | |
import gradio as gr | |
import torch | |
import numpy as np | |
from PIL import Image | |
import trimesh | |
from diffusers import Zero123Pipeline | |
import tempfile | |
# Check if CUDA is available, otherwise use CPU | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
print(f"Using device: {device}") | |
# Initialize the pipeline | |
pipe = Zero123Pipeline.from_pretrained( | |
"bennyguo/zero123-xl-diffusers", | |
torch_dtype=torch.float16 if device.type == "cuda" else torch.float32, | |
).to(device) | |
def image_to_3d(input_image, num_inference_steps=75, guidance_scale=3.0): | |
""" | |
Convert a single image to a 3D model | |
""" | |
# Preprocess image | |
if input_image is None: | |
return None | |
input_image = input_image.convert("RGB").resize((256, 256)) | |
# Generate multiple views using Zero123 | |
images = [] | |
# Generate views from different angles | |
for elevation in [0, 30]: | |
for azimuth in [0, 90, 180, 270]: | |
print(f"Generating view: elevation={elevation}, azimuth={azimuth}") | |
with torch.no_grad(): | |
image = pipe( | |
image=input_image, | |
elevation=elevation, | |
azimuth=azimuth, | |
num_inference_steps=num_inference_steps, | |
guidance_scale=guidance_scale, | |
).images[0] | |
images.append(np.array(image)) | |
# Create point cloud from multiple views | |
# This is a simplified approach - in production you might want to use a more sophisticated method | |
points = [] | |
for i, img in enumerate(images): | |
# Extract depth information (simplified approach) | |
gray = np.mean(img, axis=2) | |
# Sample points from the image | |
h, w = gray.shape | |
for y in range(0, h, 4): | |
for x in range(0, w, 4): | |
depth = gray[y, x] / 255.0 # Normalize depth | |
# Convert to 3D point based on view angle | |
angle_idx = i % 4 | |
elevation = 0 if i < 4 else 30 | |
azimuth = angle_idx * 90 | |
# Convert to radians | |
elevation_rad = elevation * np.pi / 180 | |
azimuth_rad = azimuth * np.pi / 180 | |
# Calculate 3D position based on spherical coordinates | |
z = depth * np.cos(elevation_rad) * np.cos(azimuth_rad) | |
x = depth * np.cos(elevation_rad) * np.sin(azimuth_rad) | |
y = depth * np.sin(elevation_rad) | |
points.append([x, y, z]) | |
# Create a point cloud | |
point_cloud = np.array(points) | |
# Save point cloud to OBJ file | |
with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as tmp_file: | |
mesh = trimesh.points.PointCloud(point_cloud) | |
mesh.export(tmp_file.name) | |
# Also export as PLY for better compatibility | |
ply_path = tmp_file.name.replace('.obj', '.ply') | |
mesh.export(ply_path) | |
return [tmp_file.name, ply_path] | |
def process_image(image, num_steps, guidance): | |
try: | |
model_paths = image_to_3d(image, num_inference_steps=num_steps, guidance_scale=guidance) | |
if model_paths: | |
return model_paths[0], model_paths[1], "3D model generated successfully!" | |
else: | |
return None, None, "Failed to process the image." | |
except Exception as e: | |
return None, None, f"Error: {str(e)}" | |
# Create Gradio interface | |
with gr.Blocks(title="Image to 3D Model Converter") as demo: | |
gr.Markdown("# Image to 3D Model Converter") | |
gr.Markdown("Upload an image to convert it to a 3D model that you can use in Unity or other engines.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
input_image = gr.Image(type="pil", label="Input Image") | |
num_steps = gr.Slider(minimum=20, maximum=100, value=75, step=5, label="Number of Inference Steps") | |
guidance = gr.Slider(minimum=1.0, maximum=7.0, value=3.0, step=0.5, label="Guidance Scale") | |
submit_btn = gr.Button("Convert to 3D") | |
with gr.Column(scale=1): | |
obj_file = gr.File(label="OBJ File") | |
ply_file = gr.File(label="PLY File") | |
output_message = gr.Textbox(label="Output Message") | |
submit_btn.click( | |
fn=process_image, | |
inputs=[input_image, num_steps, guidance], | |
outputs=[obj_file, ply_file, output_message] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch(server_name="0.0.0.0", server_port=7860) |