File size: 3,207 Bytes
9b87edc
 
478d982
 
9b87edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478d982
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b87edc
 
 
478d982
9b87edc
 
 
 
 
478d982
 
9b87edc
 
 
478d982
9b87edc
 
 
 
478d982
 
 
 
9b87edc
88e3c7c
9b87edc
 
478d982
9b87edc
 
 
 
478d982
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
import numpy as np
import cv2
from PIL import Image
from PIFuHD.data import EvalWMetaDataset
from PIFuHD.data.ImageBundle import ImageBundle
from PIFuHD.options import BaseOptions
from PIFuHD.recontructor import Reconstructor
from huggingface_hub import hf_hub_download
from human_pose_estimator import PoseEstimator
from estimator import rect

REPO_ID = "cxeep/PIFuHD"
pose_estimator = PoseEstimator("cpu")
checkpoint_path = hf_hub_download(repo_id=REPO_ID, filename="pifuhd.pt")

cmd = [
    '--dataroot', './data',
    '--results_path', './results',
    '--loadSize', '1024',
    '--resolution', '256',
    '--load_netMR_checkpoint_path', checkpoint_path,
    '--start_id', '-1',
    '--end_id', '-1'
]
parser = BaseOptions()
opts = parser.parse(cmd)
reconstructor = Reconstructor(opts)

def make_bundle(image, name):
    image, rects = rect(pose_estimator, image)
    return ImageBundle(img=image, name=name, meta=rects)

def process_video(video_path):
    frames = []
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(Image.fromarray(frame))
    cap.release()
    
    models = []
    for frame in frames:
        bundle = make_bundle(np.array(frame), "Model3D")
        dataset = EvalWMetaDataset(opts, [bundle])
        model = reconstructor.evaluate(dataset)
        models.append(model)
    
    # TODO: Combine models into animation
    output_animation = models[0]  # Placeholder, replace with actual animation
    
    return output_animation

def predict(input):
    if isinstance(input, str):  # video 
        return process_video(input)
    else:  # image
        bundle = make_bundle(input, "Model3D")
        dataset = EvalWMetaDataset(opts, [bundle])
        return reconstructor.evaluate(dataset)

footer = r"""
<center>
<b>Demo for <a href='https://github.com/facebookresearch/pifuhd'>PIFuHD</a></b>
</center>
"""

with gr.Blocks(title="PIFuHD") as app:
    gr.HTML("<center><h1>3D Human Digitization</h1></center>")
    gr.HTML("<center><h3>PIFuHD: Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Human Digitization (CVPR 2020)</h3></center>")
    
    with gr.Row(equal_height=False):
        with gr.Column():
            input_img = gr.Image(type="numpy", label="Input image")
            input_video = gr.Video(type="filepath", label="Input Video")
            run_btn = gr.Button(variant="primary")
        with gr.Column():
            output_obj = gr.Model3D(label="Output model")
            output_img = gr.Image(type="filepath", label="Output image")
            gr.ClearButton(components=[input_img, input_video, output_img, output_obj], variant="stop")
    
    run_btn.click(predict, [input_img, input_video], [output_img, output_obj])
    
    with gr.Row():
        blobs = [[f"examples/{x:02d}.png"] for x in range(1, 4)]
        examples = gr.Dataset(components=[input_img], samples=blobs)
        examples.click(lambda x: x[0], [examples], [input_img])
    
    with gr.Row():
        gr.HTML(footer)

app.launch(share=False, debug=True, show_error=True)
app.queue()