Rightlight / app.py
mike23415's picture
Update app.py
aaa6458 verified
raw
history blame
4.59 kB
import os
import gradio as gr
import torch
import numpy as np
from PIL import Image
import trimesh
from diffusers import Zero123Pipeline
import tempfile
# Check if CUDA is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Initialize the pipeline
pipe = Zero123Pipeline.from_pretrained(
"bennyguo/zero123-xl-diffusers",
torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
).to(device)
def image_to_3d(input_image, num_inference_steps=75, guidance_scale=3.0):
"""
Convert a single image to a 3D model
"""
# Preprocess image
if input_image is None:
return None
input_image = input_image.convert("RGB").resize((256, 256))
# Generate multiple views using Zero123
images = []
# Generate views from different angles
for elevation in [0, 30]:
for azimuth in [0, 90, 180, 270]:
print(f"Generating view: elevation={elevation}, azimuth={azimuth}")
with torch.no_grad():
image = pipe(
image=input_image,
elevation=elevation,
azimuth=azimuth,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
).images[0]
images.append(np.array(image))
# Create point cloud from multiple views
# This is a simplified approach - in production you might want to use a more sophisticated method
points = []
for i, img in enumerate(images):
# Extract depth information (simplified approach)
gray = np.mean(img, axis=2)
# Sample points from the image
h, w = gray.shape
for y in range(0, h, 4):
for x in range(0, w, 4):
depth = gray[y, x] / 255.0 # Normalize depth
# Convert to 3D point based on view angle
angle_idx = i % 4
elevation = 0 if i < 4 else 30
azimuth = angle_idx * 90
# Convert to radians
elevation_rad = elevation * np.pi / 180
azimuth_rad = azimuth * np.pi / 180
# Calculate 3D position based on spherical coordinates
z = depth * np.cos(elevation_rad) * np.cos(azimuth_rad)
x = depth * np.cos(elevation_rad) * np.sin(azimuth_rad)
y = depth * np.sin(elevation_rad)
points.append([x, y, z])
# Create a point cloud
point_cloud = np.array(points)
# Save point cloud to OBJ file
with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as tmp_file:
mesh = trimesh.points.PointCloud(point_cloud)
mesh.export(tmp_file.name)
# Also export as PLY for better compatibility
ply_path = tmp_file.name.replace('.obj', '.ply')
mesh.export(ply_path)
return [tmp_file.name, ply_path]
def process_image(image, num_steps, guidance):
try:
model_paths = image_to_3d(image, num_inference_steps=num_steps, guidance_scale=guidance)
if model_paths:
return model_paths[0], model_paths[1], "3D model generated successfully!"
else:
return None, None, "Failed to process the image."
except Exception as e:
return None, None, f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Image to 3D Model Converter") as demo:
gr.Markdown("# Image to 3D Model Converter")
gr.Markdown("Upload an image to convert it to a 3D model that you can use in Unity or other engines.")
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(type="pil", label="Input Image")
num_steps = gr.Slider(minimum=20, maximum=100, value=75, step=5, label="Number of Inference Steps")
guidance = gr.Slider(minimum=1.0, maximum=7.0, value=3.0, step=0.5, label="Guidance Scale")
submit_btn = gr.Button("Convert to 3D")
with gr.Column(scale=1):
obj_file = gr.File(label="OBJ File")
ply_file = gr.File(label="PLY File")
output_message = gr.Textbox(label="Output Message")
submit_btn.click(
fn=process_image,
inputs=[input_image, num_steps, guidance],
outputs=[obj_file, ply_file, output_message]
)
# Launch the app
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)