InstantMesh

Runtime error

App Files Files Community

InstantMesh / app.py

DonPab1o

Update app.py

600b7aa verified about 1 month ago

raw

history blame contribute delete

14.8 kB

	import spaces

	import os
	import imageio
	import numpy as np
	import torch
	import rembg
	from PIL import Image, ImageDraw, ImageFont
	from torchvision.transforms import v2
	from pytorch_lightning import seed_everything
	from omegaconf import OmegaConf
	from einops import rearrange, repeat
	from tqdm import tqdm
	from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler

	from src.utils.train_util import instantiate_from_config
	from src.utils.camera_util import (
	FOV_to_intrinsics,
	get_zero123plus_input_cameras,
	get_circular_camera_poses,
	)
	from src.utils.mesh_util import save_obj, save_glb
	from src.utils.infer_util import remove_background, resize_foreground, images_to_video

	import tempfile
	from functools import partial

	from huggingface_hub import hf_hub_download

	import gradio as gr


	def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
	"""
	Get the rendering camera parameters.
	"""
	c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
	if is_flexicubes:
	cameras = torch.linalg.inv(c2ws)
	cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
	else:
	extrinsics = c2ws.flatten(-2)
	intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
	cameras = torch.cat([extrinsics, intrinsics], dim=-1)
	cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
	return cameras


	def images_to_video(images, output_path, fps=30):
	# images: (N, C, H, W)
	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	frames = []
	for i in range(images.shape[0]):
	frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
	assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
	f"Frame shape mismatch: {frame.shape} vs {images.shape}"
	assert frame.min() >= 0 and frame.max() <= 255, \
	f"Frame value out of range: {frame.min()} ~ {frame.max()}"
	frames.append(frame)
	imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')


	###############################################################################
	# Configuration.
	###############################################################################

	import shutil

	def find_cuda():
	# Check if CUDA_HOME or CUDA_PATH environment variables are set
	cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')

	if cuda_home and os.path.exists(cuda_home):
	return cuda_home

	# Search for the nvcc executable in the system's PATH
	nvcc_path = shutil.which('nvcc')

	if nvcc_path:
	# Remove the 'bin/nvcc' part to get the CUDA installation path
	cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
	return cuda_path

	return None

	cuda_path = find_cuda()

	if cuda_path:
	print(f"CUDA installation found at: {cuda_path}")
	else:
	print("CUDA installation not found")

	config_path = 'configs/instant-mesh-large.yaml'
	config = OmegaConf.load(config_path)
	config_name = os.path.basename(config_path).replace('.yaml', '')
	model_config = config.model_config
	infer_config = config.infer_config

	IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False

	# Set device to CPU
	device = torch.device('cpu')

	# load diffusion model
	print('Loading diffusion model ...')
	pipeline = DiffusionPipeline.from_pretrained(
	"sudo-ai/zero123plus-v1.2",
	custom_pipeline="zero123plus",
	torch_dtype=torch.float32, # Changed from float16 to float32 for CPU
	)
	pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
	pipeline.scheduler.config, timestep_spacing='trailing'
	)

	# load custom white-background UNet
	unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
	state_dict = torch.load(unet_ckpt_path, map_location='cpu')
	pipeline.unet.load_state_dict(state_dict, strict=True)

	pipeline = pipeline.to(device)

	# load reconstruction model
	print('Loading reconstruction model ...')
	model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_mesh_large.ckpt", repo_type="model")
	model = instantiate_from_config(model_config)
	state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
	state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
	model.load_state_dict(state_dict, strict=True)

	model = model.to(device)

	print('Loading Finished!')


	def check_input_image(input_image):
	if input_image is None:
	raise gr.Error("No image uploaded!")


	def preprocess(input_image, do_remove_background):
	rembg_session = rembg.new_session() if do_remove_background else None

	if do_remove_background:
	input_image = remove_background(input_image, rembg_session)
	input_image = resize_foreground(input_image, 0.85)

	return input_image


	def generate_mvs(input_image, sample_steps, sample_seed):
	seed_everything(sample_seed)

	# sampling
	z123_image = pipeline(
	input_image,
	num_inference_steps=sample_steps
	).images[0]

	show_image = np.asarray(z123_image, dtype=np.uint8)
	show_image = torch.from_numpy(show_image) # (960, 640, 3)
	show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
	show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
	show_image = Image.fromarray(show_image.numpy())

	return z123_image, show_image


	def make3d(images):
	global model
	if IS_FLEXICUBES:
	model.init_flexicubes_geometry(device, use_renderer=False)
	model = model.eval()

	images = np.asarray(images, dtype=np.float32) / 255.0
	images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float() # (3, 960, 640)
	images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2) # (6, 3, 320, 320)

	input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
	render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)

	images = images.unsqueeze(0).to(device)
	images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)

	mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
	print(mesh_fpath)
	mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
	mesh_dirname = os.path.dirname(mesh_fpath)
	video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
	mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")

	with torch.no_grad():
	# get triplane
	planes = model.forward_planes(images, input_cameras)

	# get mesh
	mesh_out = model.extract_mesh(
	planes,
	use_texture_map=False,
	**infer_config,
	)

	vertices, faces, vertex_colors = mesh_out
	vertices = vertices[:, [1, 2, 0]]

	save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
	save_obj(vertices, faces, vertex_colors, mesh_fpath)

	print(f"Mesh saved to {mesh_fpath}")

	return mesh_fpath, mesh_glb_fpath


	# New function to generate 2D pixel art sprites
	def generate_pixel_art(prompt, remove_background=True, sample_steps=75, seed=42):
	"""Generate a pixel art sprite based on the prompt"""
	seed_everything(seed)

	# Create a simple image with text as starting point
	text_img = Image.new('RGB', (512, 512), color=(255, 255, 255))
	draw = ImageDraw.Draw(text_img)
	# Try to load a font, use default if not available
	try:
	font = ImageFont.truetype("Arial", 20)
	except:
	font = ImageFont.load_default()

	# Add prompt as text
	pixel_prompt = f"Pixel art: {prompt}"
	draw.text((10, 10), pixel_prompt, fill=(0, 0, 0), font=font)

	# Process through the pipeline
	processed_img = preprocess(text_img, remove_background)

	# Generate the pixel art
	result_img, _ = generate_mvs(processed_img, sample_steps, seed)

	# Save to a temporary file
	sprite_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
	result_img.save(sprite_path)

	return sprite_path


	_HEADER_ = '''
	<h2><b>Official 🤗 Gradio Demo</b></h2><h2><a href='https://github.com/TencentARC/InstantMesh' target='_blank'><b>InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models</b></a></h2>

	InstantMesh is a feed-forward framework for efficient 3D mesh generation from a single image based on the LRM/Instant3D architecture.

	Code: <a href='https://github.com/TencentARC/InstantMesh' target='_blank'>GitHub</a>. Techenical report: <a href='https://arxiv.org/abs/2404.07191' target='_blank'>ArXiv</a>.

	❗️❗️❗️Important Notes:
	- Our demo can export a .obj mesh with vertex colors or a .glb mesh now. If you prefer to export a .obj mesh with a texture map, please refer to our <a href='https://github.com/TencentARC/InstantMesh?tab=readme-ov-file#running-with-command-line' target='_blank'>Github Repo</a>.
	- The 3D mesh generation results highly depend on the quality of generated multi-view images. Please try a different seed value if the result is unsatisfying (Default: 42).
	'''

	_CITE_ = r"""
	If InstantMesh is helpful, please help to ⭐ the <a href='https://github.com/TencentARC/InstantMesh' target='_blank'>Github Repo</a>. Thanks! [![GitHub Stars](https://img.shields.io/github/stars/TencentARC/InstantMesh?style=social)](https://github.com/TencentARC/InstantMesh)
	---
	📝 Citation

	If you find our work useful for your research or applications, please cite using this bibtex:
	```bibtex
	@article{xu2024instantmesh,
	title={InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models},
	author={Xu, Jiale and Cheng, Weihao and Gao, Yiming and Wang, Xintao and Gao, Shenghua and Shan, Ying},
	journal={arXiv preprint arXiv:2404.07191},
	year={2024}
	}
	```

	📋 License

	Apache-2.0 LICENSE. Please refer to the [LICENSE file](https://huggingface.co/spaces/TencentARC/InstantMesh/blob/main/LICENSE) for details.

	📧 Contact

	If you have any questions, feel free to open a discussion or contact us at <b>[email protected]</b>.
	"""


	with gr.Blocks() as demo:
	gr.Markdown("# InstantMesh and Pixel Art Generator")

	with gr.Tab("3D Model Generation"):
	with gr.Row(variant="panel"):
	with gr.Column():
	with gr.Row():
	input_image = gr.Image(
	label="Input Image",
	image_mode="RGBA",
	sources="upload",
	type="pil",
	elem_id="content_image",
	)
	processed_image = gr.Image(
	label="Processed Image",
	image_mode="RGBA",
	type="pil",
	interactive=False
	)
	with gr.Row():
	with gr.Group():
	do_remove_background = gr.Checkbox(
	label="Remove Background", value=True
	)
	sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
	sample_steps = gr.Slider(
	label="Sample Steps",
	minimum=30,
	maximum=75,
	value=75,
	step=5
	)

	with gr.Row():
	submit_3d = gr.Button("Generate 3D Model", elem_id="generate", variant="primary")

	with gr.Column():
	with gr.Row():
	with gr.Column():
	mv_show_images = gr.Image(
	label="Generated Multi-views",
	type="pil",
	width=379,
	interactive=False
	)

	with gr.Row():
	with gr.Tab("OBJ"):
	output_model_obj = gr.Model3D(
	label="Output Model (OBJ Format)",
	interactive=False,
	)
	with gr.Tab("GLB"):
	output_model_glb = gr.Model3D(
	label="Output Model (GLB Format)",
	interactive=False,
	)

	with gr.Tab("Pixel Art Generation"):
	with gr.Row(variant="panel"):
	with gr.Column():
	pixel_prompt = gr.Textbox(
	label="Describe your pixel art sprite",
	placeholder="green cactus snake character, side view, game sprite",
	lines=3
	)
	with gr.Row():
	pixel_bg_remove = gr.Checkbox(label="Remove Background", value=True)
	pixel_seed = gr.Number(value=42, label="Seed Value", precision=0)
	pixel_steps = gr.Slider(
	label="Sample Steps",
	minimum=30,
	maximum=75,
	value=75,
	step=5
	)
	submit_pixel = gr.Button("Generate Pixel Art", variant="primary")

	with gr.Column():
	pixel_output = gr.Image(
	label="Generated Pixel Art Sprite",
	type="pil",
	interactive=False
	)

	# Set up event handlers
	mv_images = gr.State()

	# 3D Model generation flow
	submit_3d.click(fn=check_input_image, inputs=[input_image]).success(
	fn=preprocess,
	inputs=[input_image, do_remove_background],
	outputs=[processed_image],
	).success(
	fn=generate_mvs,
	inputs=[processed_image, sample_steps, sample_seed],
	outputs=[mv_images, mv_show_images]
	).success(
	fn=make3d,
	inputs=[mv_images],
	outputs=[output_model_obj, output_model_glb]
	)

	# Pixel Art generation flow
	submit_pixel.click(
	fn=generate_pixel_art,
	inputs=[pixel_prompt, pixel_bg_remove, pixel_steps, pixel_seed],
	outputs=[pixel_output]
	)

	# Enable API access
	demo.queue(concurrency_count=1)

	# Launch with API access enabled
	demo.launch(enable_api=True, share=False)