Spaces:

mike23415
/

Rightlight

Running

App Files Files Community

Rightlight / app.py

mike23415

Update app.py

aaa6458 verified 27 days ago

raw

history blame

4.59 kB

	import os
	import gradio as gr
	import torch
	import numpy as np
	from PIL import Image
	import trimesh
	from diffusers import Zero123Pipeline
	import tempfile

	# Check if CUDA is available, otherwise use CPU
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")

	# Initialize the pipeline
	pipe = Zero123Pipeline.from_pretrained(
	"bennyguo/zero123-xl-diffusers",
	torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
	).to(device)

	def image_to_3d(input_image, num_inference_steps=75, guidance_scale=3.0):
	"""
	Convert a single image to a 3D model
	"""
	# Preprocess image
	if input_image is None:
	return None

	input_image = input_image.convert("RGB").resize((256, 256))

	# Generate multiple views using Zero123
	images = []

	# Generate views from different angles
	for elevation in [0, 30]:
	for azimuth in [0, 90, 180, 270]:
	print(f"Generating view: elevation={elevation}, azimuth={azimuth}")
	with torch.no_grad():
	image = pipe(
	image=input_image,
	elevation=elevation,
	azimuth=azimuth,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	).images[0]
	images.append(np.array(image))

	# Create point cloud from multiple views
	# This is a simplified approach - in production you might want to use a more sophisticated method
	points = []
	for i, img in enumerate(images):
	# Extract depth information (simplified approach)
	gray = np.mean(img, axis=2)
	# Sample points from the image
	h, w = gray.shape
	for y in range(0, h, 4):
	for x in range(0, w, 4):
	depth = gray[y, x] / 255.0 # Normalize depth

	# Convert to 3D point based on view angle
	angle_idx = i % 4
	elevation = 0 if i < 4 else 30
	azimuth = angle_idx * 90

	# Convert to radians
	elevation_rad = elevation * np.pi / 180
	azimuth_rad = azimuth * np.pi / 180

	# Calculate 3D position based on spherical coordinates
	z = depth * np.cos(elevation_rad) * np.cos(azimuth_rad)
	x = depth * np.cos(elevation_rad) * np.sin(azimuth_rad)
	y = depth * np.sin(elevation_rad)

	points.append([x, y, z])

	# Create a point cloud
	point_cloud = np.array(points)

	# Save point cloud to OBJ file
	with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as tmp_file:
	mesh = trimesh.points.PointCloud(point_cloud)
	mesh.export(tmp_file.name)

	# Also export as PLY for better compatibility
	ply_path = tmp_file.name.replace('.obj', '.ply')
	mesh.export(ply_path)

	return [tmp_file.name, ply_path]

	def process_image(image, num_steps, guidance):
	try:
	model_paths = image_to_3d(image, num_inference_steps=num_steps, guidance_scale=guidance)
	if model_paths:
	return model_paths[0], model_paths[1], "3D model generated successfully!"
	else:
	return None, None, "Failed to process the image."
	except Exception as e:
	return None, None, f"Error: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="Image to 3D Model Converter") as demo:
	gr.Markdown("# Image to 3D Model Converter")
	gr.Markdown("Upload an image to convert it to a 3D model that you can use in Unity or other engines.")

	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(type="pil", label="Input Image")
	num_steps = gr.Slider(minimum=20, maximum=100, value=75, step=5, label="Number of Inference Steps")
	guidance = gr.Slider(minimum=1.0, maximum=7.0, value=3.0, step=0.5, label="Guidance Scale")
	submit_btn = gr.Button("Convert to 3D")

	with gr.Column(scale=1):
	obj_file = gr.File(label="OBJ File")
	ply_file = gr.File(label="PLY File")
	output_message = gr.Textbox(label="Output Message")

	submit_btn.click(
	fn=process_image,
	inputs=[input_image, num_steps, guidance],
	outputs=[obj_file, ply_file, output_message]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)