VisualCloze

Running on Zero

App Files Files Community

lzyhha commited on about 1 month ago

Commit

42002e4

1 Parent(s): 808cfcf

space

Browse files

Files changed (2) hide show

app.py +6 -11
visualcloze.py +5 -4

app.py CHANGED Viewed

@@ -71,10 +71,6 @@ def create_demo(model):
         """)
         gr.Markdown(GUIDANCE)
-        # gr.Markdown("<div style='font-size: 24px; font-weight: bold; color: #FF9999;'>" +
-        #             "Note: Click the task button in the right bottom to acquire examples of tasks." +
-        #             "</div>", )
         # Pre-create all possible image components
         all_image_inputs = []
@@ -82,9 +78,8 @@ def create_demo(model):
         row_texts = []
         with gr.Row():
-            # 左侧列：图像网格和提示输入
             with gr.Column(scale=2):
-                # 图像网格部分
                 for i in range(max_grid_h):
                     # Add row label before each row
                     row_texts.append(gr.Markdown(
@@ -106,7 +101,7 @@ def create_demo(model):
                             )
                             all_image_inputs.append(img_input)
-                # 提示输入部分
                 layout_prompt = gr.Textbox(
                     label="Layout Description (Auto-filled, Read-only)",
                     placeholder="Layout description will be automatically filled based on grid size...",
@@ -143,17 +138,17 @@ def create_demo(model):
                 gr.Markdown(CITATION)
-            # 右侧列：输出图像
             with gr.Column(scale=2):
                 output_gallery = gr.Gallery(
                     label="Generated Results",
                     show_label=True,
                     elem_id="output_gallery",
-                    columns=None,  # 设为None以允许自动调整
-                    rows=None,     # 设为None以允许自动调整
                     height="auto",
                     allow_preview=True,
-                    object_fit="contain"  # 确保图片完整显示
                 )
                 gr.Markdown("# Task Examples")

         """)
         gr.Markdown(GUIDANCE)
         # Pre-create all possible image components
         all_image_inputs = []
         row_texts = []
         with gr.Row():
             with gr.Column(scale=2):
+                # Image grid
                 for i in range(max_grid_h):
                     # Add row label before each row
                     row_texts.append(gr.Markdown(
                             )
                             all_image_inputs.append(img_input)
+                # Prompts
                 layout_prompt = gr.Textbox(
                     label="Layout Description (Auto-filled, Read-only)",
                     placeholder="Layout description will be automatically filled based on grid size...",
                 gr.Markdown(CITATION)
+            # Output
             with gr.Column(scale=2):
                 output_gallery = gr.Gallery(
                     label="Generated Results",
                     show_label=True,
                     elem_id="output_gallery",
+                    columns=None,
+                    rows=None,
                     height="auto",
                     allow_preview=True,
+                    object_fit="contain"
                 )
                 gr.Markdown("# Task Examples")

visualcloze.py CHANGED Viewed

@@ -241,7 +241,8 @@ class VisualClozeModel:
             return output_image
     def process_images(
-        self, images: list[list[Image.Image]], text_prompt: list[str],
         seed: int = 0,
         cfg: int = 30,
         steps: int = 30,
@@ -256,7 +257,7 @@ class VisualClozeModel:
         images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
         where the current query should be placed in the last row.
         The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
-        text_prompt (list[str]): Three prompts, representing the layout prompt, task prompt, and content prompt respectively.
         seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
         cfg (int): The strength of Classifier-Free Diffusion Guidance.
         steps (int): The number of sampling steps.
@@ -388,7 +389,7 @@ class VisualClozeModel:
             x = [noise]
             with torch.no_grad():
-                inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(text_prompt)], proportion_empty_prompts=0.0)
                 model_kwargs = dict(
                     txt=inp["txt"],
@@ -445,7 +446,7 @@ class VisualClozeModel:
                             upsampling_steps=upsampling_steps,
                             upsampling_noise=upsampling_noise,
                             generator=rng,
-                            content_prompt=text_prompt[2])
                         ret.append(upsampled)
             return ret

             return output_image
     def process_images(
+        self, images: list[list[Image.Image]],
+        prompts: list[str],
         seed: int = 0,
         cfg: int = 30,
         steps: int = 30,
         images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
         where the current query should be placed in the last row.
         The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
+        prompts (list[str]): Three prompts, representing the layout prompt, task prompt, and content prompt respectively.
         seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
         cfg (int): The strength of Classifier-Free Diffusion Guidance.
         steps (int): The number of sampling steps.
             x = [noise]
             with torch.no_grad():
+                inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(prompts)], proportion_empty_prompts=0.0)
                 model_kwargs = dict(
                     txt=inp["txt"],
                             upsampling_steps=upsampling_steps,
                             upsampling_noise=upsampling_noise,
                             generator=rng,
+                            content_prompt=prompts[2])
                         ret.append(upsampled)
             return ret