genzoo / app.py
x-tomasz's picture
initial commit
d8b3f49
import os
if "PYOPENGL_PLATFORM" not in os.environ:
os.environ["PYOPENGL_PLATFORM"] = "egl"
import math
import numpy as np
import pyrender
import torch
import trimesh
import cv2
import gradio as gr
from src.datasets.vitdet_dataset import ViTDetDataset
from src.models import load_hmr2
# Color of the mesh
LIGHT_BLUE = (0.65098039, 0.74117647, 0.85882353)
class WeakPerspectiveCamera(pyrender.Camera):
def __init__(
self,
scale,
translation,
znear=10.0,
zfar=1000.0,
name=None,
):
super(WeakPerspectiveCamera, self).__init__(
znear=znear,
zfar=zfar,
name=name,
)
self.scale = scale
self.translation = translation
def get_projection_matrix(self, width=None, height=None):
P = np.eye(4)
P[0, 0] = self.scale[0]
P[1, 1] = self.scale[1]
P[0, 3] = self.translation[0] * self.scale[0]
P[1, 3] = -self.translation[1] * self.scale[1]
P[2, 2] = -0.1
return P
class Renderer:
def __init__(self, faces, resolution=(1024, 1024), orig_img=False):
self.resolution = resolution
self.faces = faces
self.orig_img = orig_img
self.renderer = pyrender.OffscreenRenderer(
viewport_width=self.resolution[0],
viewport_height=self.resolution[1],
point_size=1.0,
)
self.scene = pyrender.Scene(bg_color=[0.0, 0.0, 0.0, 0.0], ambient_light=(0.3, 0.3, 0.3))
light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=0.8)
light_pose = np.eye(4)
light_pose[:3, 3] = [0, -1, 1]
self.scene.add(light, pose=light_pose)
light_pose[:3, 3] = [0, 1, 1]
self.scene.add(light, pose=light_pose)
light_pose[:3, 3] = [1, 1, 2]
self.scene.add(light, pose=light_pose)
def render(self, verts, cam, color=LIGHT_BLUE, znear=1.0, zfar=10000.0):
mesh = trimesh.Trimesh(vertices=verts, faces=self.faces, process=False)
Rx = trimesh.transformations.rotation_matrix(math.radians(180), [1, 0, 0])
mesh.apply_transform(Rx)
sx, sy, tx, ty = cam
camera = WeakPerspectiveCamera(scale=[sx, sy], translation=[tx, ty], znear=znear, zfar=zfar)
material = pyrender.MetallicRoughnessMaterial(
metallicFactor=0.0, alphaMode="OPAQUE", baseColorFactor=LIGHT_BLUE
)
mesh = pyrender.Mesh.from_trimesh(mesh, material=material, smooth=True)
mesh_node = self.scene.add(mesh, "mesh")
camera_pose = np.eye(4)
cam_node = self.scene.add(camera, pose=camera_pose)
render_flags = pyrender.RenderFlags.RGBA
rgb, depth = self.renderer.render(self.scene, flags=render_flags)
self.scene.remove_node(mesh_node)
self.scene.remove_node(cam_node)
return rgb, depth
def create_temp_obj(vertices, faces):
mesh = trimesh.Trimesh(
vertices=vertices,
faces=faces,
vertex_colors=np.tile(np.array(LIGHT_BLUE + (1.0,)), (len(vertices), 1)),
)
temp_path = os.path.join(os.getcwd(), "out_mesh.obj")
mesh.export(temp_path)
return temp_path
def resize_and_pad(img):
original_type = img.dtype
img_to_process = img.copy()
h, w = img_to_process.shape[:2]
target_size = 1024
scale = min(target_size / w, target_size / h)
new_w = int(w * scale)
new_h = int(h * scale)
resized = cv2.resize(img_to_process, (new_w, new_h), interpolation=cv2.INTER_AREA)
if len(img.shape) == 3:
canvas = np.zeros((target_size, target_size, img.shape[2]), dtype=original_type)
else:
canvas = np.zeros((target_size, target_size), dtype=original_type)
x_offset = (target_size - new_w) // 2
y_offset = (target_size - new_h) // 2
canvas[y_offset : y_offset + new_h, x_offset : x_offset + new_w] = resized
return canvas
def process_image(input_image):
img = resize_and_pad(input_image["composite"])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
hmr2, hmr_2_cfg = load_hmr2()
device = torch.device("cpu")
hmr2 = hmr2.to(device)
hmr2.eval()
bbox = [0, 0, img.shape[1], img.shape[0]]
dataset = ViTDetDataset(hmr_2_cfg, img, np.array([bbox]))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
batch = next(iter(dataloader))
with torch.inference_mode():
out = hmr2(batch)
pred_verts = hmr2.smpl(**{k: v.float() for k, v in out["pred_smpl_params"].items()}, pose2rot=False).vertices[0]
scale, tx, ty = out["scale"], out["tx"], out["ty"]
obj_verts = pred_verts.detach().cpu().numpy()
obj_verts[:, 1] = -obj_verts[:, 1]
obj_verts[:, 0] = -obj_verts[:, 0]
obj_path = create_temp_obj(obj_verts, hmr2.smpl.faces)
if str(device) == "cpu":
pred_verts = pred_verts * torch.tensor([-1, -1, 1])[None]
renderer = Renderer(hmr2.smpl.faces, resolution=(img.shape[1], img.shape[0]))
factor = 2.0
rendered, depth = renderer.render(
pred_verts.detach().cpu().numpy(),
(scale * factor, scale * factor, tx / scale, ty / scale),
)
rendered_float = rendered.astype(np.float32) / 255.0
out_img_float = img.astype(np.float32) / 255.0
mask = rendered_float[:, :, 3]
mask = np.stack([mask] * 3, axis=-1)
rendered_rgb = rendered_float[:, :, :3]
mesh_overlay = out_img_float * (1 - mask) + rendered_rgb * mask
mesh_overlay = (mesh_overlay * 255).astype(np.uint8)
return cv2.cvtColor(mesh_overlay, cv2.COLOR_RGB2BGR), obj_path
iface = gr.Interface(
fn=process_image,
analytics_enabled=False,
inputs=gr.ImageEditor(
sources=("upload", "clipboard"),
brush=False,
eraser=False,
crop_size="1:1",
layers=False,
placeholder="Upload an image or select from the examples.",
),
outputs=[
gr.Image(label="Mesh overlay"),
gr.Model3D(
clear_color=[0.0, 0.0, 0.0, 0.0],
label="3D Model",
display_mode="point_cloud",
),
],
title="GenZoo",
description="""
# Generative Zoo
https://genzoo.is.tue.mpg.de
## Usage
1. **Input**: Select an example image or upload your own.
2. **Processing**: Crop the image to a square.
3. **Output**:
- 2D mesh overlay on the original image
- Interactive 3D model visualization
The demo is provided for non-commercial purposes, and its use is governed by the [LICENSE](https://genzoo.is.tue.mpg.de/license.html). \n
We thank the authors of [Humans in 4D: Reconstructing and Tracking Humans with Transformers](https://shubham-goel.github.io/4dhumans/) from which we borrowed components.
""",
examples=[
"gradio_example_images/000014.png",
"gradio_example_images/000018.png",
"gradio_example_images/000247.png",
"gradio_example_images/000315.png",
"gradio_example_images/001114.png",
],
)
iface.launch(
)