lzyhha commited on
Commit
42002e4
·
1 Parent(s): 808cfcf
Files changed (2) hide show
  1. app.py +6 -11
  2. visualcloze.py +5 -4
app.py CHANGED
@@ -71,10 +71,6 @@ def create_demo(model):
71
  """)
72
 
73
  gr.Markdown(GUIDANCE)
74
-
75
- # gr.Markdown("<div style='font-size: 24px; font-weight: bold; color: #FF9999;'>" +
76
- # "Note: Click the task button in the right bottom to acquire examples of tasks." +
77
- # "</div>", )
78
 
79
  # Pre-create all possible image components
80
  all_image_inputs = []
@@ -82,9 +78,8 @@ def create_demo(model):
82
  row_texts = []
83
  with gr.Row():
84
 
85
- # 左侧列:图像网格和提示输入
86
  with gr.Column(scale=2):
87
- # 图像网格部分
88
  for i in range(max_grid_h):
89
  # Add row label before each row
90
  row_texts.append(gr.Markdown(
@@ -106,7 +101,7 @@ def create_demo(model):
106
  )
107
  all_image_inputs.append(img_input)
108
 
109
- # 提示输入部分
110
  layout_prompt = gr.Textbox(
111
  label="Layout Description (Auto-filled, Read-only)",
112
  placeholder="Layout description will be automatically filled based on grid size...",
@@ -143,17 +138,17 @@ def create_demo(model):
143
 
144
  gr.Markdown(CITATION)
145
 
146
- # 右侧列:输出图像
147
  with gr.Column(scale=2):
148
  output_gallery = gr.Gallery(
149
  label="Generated Results",
150
  show_label=True,
151
  elem_id="output_gallery",
152
- columns=None, # 设为None以允许自动调整
153
- rows=None, # 设为None以允许自动调整
154
  height="auto",
155
  allow_preview=True,
156
- object_fit="contain" # 确保图片完整显示
157
  )
158
 
159
  gr.Markdown("# Task Examples")
 
71
  """)
72
 
73
  gr.Markdown(GUIDANCE)
 
 
 
 
74
 
75
  # Pre-create all possible image components
76
  all_image_inputs = []
 
78
  row_texts = []
79
  with gr.Row():
80
 
 
81
  with gr.Column(scale=2):
82
+ # Image grid
83
  for i in range(max_grid_h):
84
  # Add row label before each row
85
  row_texts.append(gr.Markdown(
 
101
  )
102
  all_image_inputs.append(img_input)
103
 
104
+ # Prompts
105
  layout_prompt = gr.Textbox(
106
  label="Layout Description (Auto-filled, Read-only)",
107
  placeholder="Layout description will be automatically filled based on grid size...",
 
138
 
139
  gr.Markdown(CITATION)
140
 
141
+ # Output
142
  with gr.Column(scale=2):
143
  output_gallery = gr.Gallery(
144
  label="Generated Results",
145
  show_label=True,
146
  elem_id="output_gallery",
147
+ columns=None,
148
+ rows=None,
149
  height="auto",
150
  allow_preview=True,
151
+ object_fit="contain"
152
  )
153
 
154
  gr.Markdown("# Task Examples")
visualcloze.py CHANGED
@@ -241,7 +241,8 @@ class VisualClozeModel:
241
  return output_image
242
 
243
  def process_images(
244
- self, images: list[list[Image.Image]], text_prompt: list[str],
 
245
  seed: int = 0,
246
  cfg: int = 30,
247
  steps: int = 30,
@@ -256,7 +257,7 @@ class VisualClozeModel:
256
  images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
257
  where the current query should be placed in the last row.
258
  The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
259
- text_prompt (list[str]): Three prompts, representing the layout prompt, task prompt, and content prompt respectively.
260
  seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
261
  cfg (int): The strength of Classifier-Free Diffusion Guidance.
262
  steps (int): The number of sampling steps.
@@ -388,7 +389,7 @@ class VisualClozeModel:
388
  x = [noise]
389
 
390
  with torch.no_grad():
391
- inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(text_prompt)], proportion_empty_prompts=0.0)
392
 
393
  model_kwargs = dict(
394
  txt=inp["txt"],
@@ -445,7 +446,7 @@ class VisualClozeModel:
445
  upsampling_steps=upsampling_steps,
446
  upsampling_noise=upsampling_noise,
447
  generator=rng,
448
- content_prompt=text_prompt[2])
449
  ret.append(upsampled)
450
 
451
  return ret
 
241
  return output_image
242
 
243
  def process_images(
244
+ self, images: list[list[Image.Image]],
245
+ prompts: list[str],
246
  seed: int = 0,
247
  cfg: int = 30,
248
  steps: int = 30,
 
257
  images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
258
  where the current query should be placed in the last row.
259
  The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
260
+ prompts (list[str]): Three prompts, representing the layout prompt, task prompt, and content prompt respectively.
261
  seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
262
  cfg (int): The strength of Classifier-Free Diffusion Guidance.
263
  steps (int): The number of sampling steps.
 
389
  x = [noise]
390
 
391
  with torch.no_grad():
392
+ inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(prompts)], proportion_empty_prompts=0.0)
393
 
394
  model_kwargs = dict(
395
  txt=inp["txt"],
 
446
  upsampling_steps=upsampling_steps,
447
  upsampling_noise=upsampling_noise,
448
  generator=rng,
449
+ content_prompt=prompts[2])
450
  ret.append(upsampled)
451
 
452
  return ret