Spaces:

xinjjj
/

ImgRoboAssetGen

Running on Zero

App Files Files Community

xinjie.wang commited on 19 days ago

Commit

4a6c4ad

1 Parent(s): c417f1a

update

Browse files

Files changed (2) hide show

app.py +18 -29
common.py +23 -8

app.py CHANGED Viewed

@@ -10,7 +10,9 @@ from common import (
     extract_3d_representations_v2,
     extract_urdf,
     get_seed,
     image_to_3d,
     preprocess_image_fn,
     preprocess_sam_image_fn,
     select_point,
@@ -29,6 +31,8 @@ with gr.Blocks(
         The service is temporarily deployed on `dev015-10.34.8.82: CUDA 4`.
     """
     )
     with gr.Row():
         with gr.Column(scale=2):
             with gr.Tabs() as input_tabs:
@@ -41,23 +45,13 @@ with gr.Blocks(
                         type="pil",
                         visible=False,
                     )
-                    image_css = """
-                    <style>
-                    #img-fit .image-frame  {
-                    object-fit: contain !important;
-                    height: 100% !important;
-                    }
-                    </style>
-                    """
-                    gr.HTML(image_css)
                     image_prompt = gr.Image(
                         label="Input Image",
                         format="png",
                         image_mode="RGBA",
                         type="pil",
-                        height=300,
-                        elem_id="img-fit",
                     )
                     gr.Markdown(
                         """
@@ -70,7 +64,10 @@ with gr.Blocks(
                     with gr.Row():
                         with gr.Column(scale=1):
                             image_prompt_sam = gr.Image(
-                                label="Input Image", type="numpy", height=500
                             )
                             image_seg_sam = gr.Image(
                                 label="SAM Seg Image",
@@ -80,7 +77,9 @@ with gr.Blocks(
                                 visible=False,
                             )
                         with gr.Column(scale=1):
-                            image_mask_sam = gr.AnnotatedImage()
                     fg_bg_radio = gr.Radio(
                         ["foreground_point", "background_point"],
@@ -238,26 +237,17 @@ with gr.Blocks(
                 label="Gaussian Representation", height=300, interactive=False
             )
             aligned_gs = gr.Textbox(visible=False)
-            lighting_css = """
-            <style>
-            #lighter_mesh canvas {
-                filter: brightness(2.8) !important;
-            }
-            </style>
-            """
-            gr.HTML(lighting_css)
             with gr.Row():
                 model_output_mesh = gr.Model3D(
                     label="Mesh Representation",
                     height=300,
                     interactive=False,
-                    clear_color=[1, 1, 1, 1],
-                    elem_id="lighter_mesh"
                 )
-            gr.Markdown(
-                """ The rendering of `Gaussian Representation` takes additional 10s. """  # noqa
-            )
     is_samimage = gr.State(False)
     output_buf = gr.State()
@@ -456,4 +446,3 @@ with gr.Blocks(
 if __name__ == "__main__":
     demo.launch()

     extract_3d_representations_v2,
     extract_urdf,
     get_seed,
+    image_css,
     image_to_3d,
+    lighting_css,
     preprocess_image_fn,
     preprocess_sam_image_fn,
     select_point,
         The service is temporarily deployed on `dev015-10.34.8.82: CUDA 4`.
     """
     )
+    gr.HTML(image_css)
+    gr.HTML(lighting_css)
     with gr.Row():
         with gr.Column(scale=2):
             with gr.Tabs() as input_tabs:
                         type="pil",
                         visible=False,
                     )
                     image_prompt = gr.Image(
                         label="Input Image",
                         format="png",
                         image_mode="RGBA",
                         type="pil",
+                        height=400,
+                        elem_classes=["image_fit"],
                     )
                     gr.Markdown(
                         """
                     with gr.Row():
                         with gr.Column(scale=1):
                             image_prompt_sam = gr.Image(
+                                label="Input Image",
+                                type="numpy",
+                                height=400,
+                                elem_classes=["image_fit"],
                             )
                             image_seg_sam = gr.Image(
                                 label="SAM Seg Image",
                                 visible=False,
                             )
                         with gr.Column(scale=1):
+                            image_mask_sam = gr.AnnotatedImage(
+                                elem_classes=["image_fit"]
+                            )
                     fg_bg_radio = gr.Radio(
                         ["foreground_point", "background_point"],
                 label="Gaussian Representation", height=300, interactive=False
             )
             aligned_gs = gr.Textbox(visible=False)
+            gr.Markdown(
+                """ The rendering of `Gaussian Representation` takes additional 10s. """  # noqa
+            )
             with gr.Row():
                 model_output_mesh = gr.Model3D(
                     label="Mesh Representation",
                     height=300,
                     interactive=False,
+                    clear_color=[0.9, 0.9, 0.9, 1],
+                    elem_id="lighter_mesh",
                 )
     is_samimage = gr.State(False)
     output_buf = gr.State()
 if __name__ == "__main__":
     demo.launch()

common.py CHANGED Viewed

@@ -127,6 +127,24 @@ elif os.getenv("GRADIO_APP") == "texture_edit":
 os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request) -> None:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
@@ -193,13 +211,13 @@ def render_video(
 @spaces.GPU
 def preprocess_image_fn(
-    image: str | np.ndarray | Image.Image
 ) -> tuple[Image.Image, Image.Image]:
     if isinstance(image, str):
         image = Image.open(image)
     elif isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     image_cache = image.copy().resize((512, 512))
     image = RBG_REMOVER(image)
@@ -208,9 +226,8 @@ def preprocess_image_fn(
     return image, image_cache
-# @spaces.GPU
 def preprocess_sam_image_fn(
-    image: Image.Image
 ) -> tuple[Image.Image, Image.Image]:
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
@@ -304,7 +321,6 @@ def get_seed(randomize_seed: bool, seed: int, max_seed: int = MAX_SEED) -> int:
     return np.random.randint(0, max_seed) if randomize_seed else seed
-# @spaces.GPU
 def select_point(
     image: np.ndarray,
     sel_pix: list,
@@ -333,7 +349,7 @@ def select_point(
             thickness=10,
         )
-    # torch.cuda.empty_cache()
     return (image, masks), seg_image
@@ -387,8 +403,7 @@ def image_to_3d(
     mesh_model = outputs["mesh"][0]
     color_images = render_video(gs_model)["color"]
     normal_images = render_video(mesh_model)["normal"]
     video_path = os.path.join(output_root, "gs_mesh.mp4")
     merge_images_video(color_images, normal_images, video_path)
     state = pack_state(gs_model, mesh_model)

 os.makedirs(TMP_DIR, exist_ok=True)
+lighting_css = """
+<style>
+#lighter_mesh canvas {
+    filter: brightness(2.5) !important;
+}
+</style>
+"""
+image_css = """
+<style>
+.image_fit .image-frame {
+object-fit: contain !important;
+height: 100% !important;
+}
+</style>
+"""
 def start_session(req: gr.Request) -> None:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
 @spaces.GPU
 def preprocess_image_fn(
+    image: str | np.ndarray | Image.Image,
 ) -> tuple[Image.Image, Image.Image]:
     if isinstance(image, str):
         image = Image.open(image)
     elif isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     image_cache = image.copy().resize((512, 512))
     image = RBG_REMOVER(image)
     return image, image_cache
 def preprocess_sam_image_fn(
+    image: Image.Image,
 ) -> tuple[Image.Image, Image.Image]:
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     return np.random.randint(0, max_seed) if randomize_seed else seed
 def select_point(
     image: np.ndarray,
     sel_pix: list,
             thickness=10,
         )
+    torch.cuda.empty_cache()
     return (image, masks), seg_image
     mesh_model = outputs["mesh"][0]
     color_images = render_video(gs_model)["color"]
     normal_images = render_video(mesh_model)["normal"]
     video_path = os.path.join(output_root, "gs_mesh.mp4")
     merge_images_video(color_images, normal_images, video_path)
     state = pack_state(gs_model, mesh_model)