Spaces:
Running
on
Zero
Running
on
Zero
lzyhha
commited on
Commit
·
42002e4
1
Parent(s):
808cfcf
space
Browse files- app.py +6 -11
- visualcloze.py +5 -4
app.py
CHANGED
@@ -71,10 +71,6 @@ def create_demo(model):
|
|
71 |
""")
|
72 |
|
73 |
gr.Markdown(GUIDANCE)
|
74 |
-
|
75 |
-
# gr.Markdown("<div style='font-size: 24px; font-weight: bold; color: #FF9999;'>" +
|
76 |
-
# "Note: Click the task button in the right bottom to acquire examples of tasks." +
|
77 |
-
# "</div>", )
|
78 |
|
79 |
# Pre-create all possible image components
|
80 |
all_image_inputs = []
|
@@ -82,9 +78,8 @@ def create_demo(model):
|
|
82 |
row_texts = []
|
83 |
with gr.Row():
|
84 |
|
85 |
-
# 左侧列:图像网格和提示输入
|
86 |
with gr.Column(scale=2):
|
87 |
-
#
|
88 |
for i in range(max_grid_h):
|
89 |
# Add row label before each row
|
90 |
row_texts.append(gr.Markdown(
|
@@ -106,7 +101,7 @@ def create_demo(model):
|
|
106 |
)
|
107 |
all_image_inputs.append(img_input)
|
108 |
|
109 |
-
#
|
110 |
layout_prompt = gr.Textbox(
|
111 |
label="Layout Description (Auto-filled, Read-only)",
|
112 |
placeholder="Layout description will be automatically filled based on grid size...",
|
@@ -143,17 +138,17 @@ def create_demo(model):
|
|
143 |
|
144 |
gr.Markdown(CITATION)
|
145 |
|
146 |
-
#
|
147 |
with gr.Column(scale=2):
|
148 |
output_gallery = gr.Gallery(
|
149 |
label="Generated Results",
|
150 |
show_label=True,
|
151 |
elem_id="output_gallery",
|
152 |
-
columns=None,
|
153 |
-
rows=None,
|
154 |
height="auto",
|
155 |
allow_preview=True,
|
156 |
-
object_fit="contain"
|
157 |
)
|
158 |
|
159 |
gr.Markdown("# Task Examples")
|
|
|
71 |
""")
|
72 |
|
73 |
gr.Markdown(GUIDANCE)
|
|
|
|
|
|
|
|
|
74 |
|
75 |
# Pre-create all possible image components
|
76 |
all_image_inputs = []
|
|
|
78 |
row_texts = []
|
79 |
with gr.Row():
|
80 |
|
|
|
81 |
with gr.Column(scale=2):
|
82 |
+
# Image grid
|
83 |
for i in range(max_grid_h):
|
84 |
# Add row label before each row
|
85 |
row_texts.append(gr.Markdown(
|
|
|
101 |
)
|
102 |
all_image_inputs.append(img_input)
|
103 |
|
104 |
+
# Prompts
|
105 |
layout_prompt = gr.Textbox(
|
106 |
label="Layout Description (Auto-filled, Read-only)",
|
107 |
placeholder="Layout description will be automatically filled based on grid size...",
|
|
|
138 |
|
139 |
gr.Markdown(CITATION)
|
140 |
|
141 |
+
# Output
|
142 |
with gr.Column(scale=2):
|
143 |
output_gallery = gr.Gallery(
|
144 |
label="Generated Results",
|
145 |
show_label=True,
|
146 |
elem_id="output_gallery",
|
147 |
+
columns=None,
|
148 |
+
rows=None,
|
149 |
height="auto",
|
150 |
allow_preview=True,
|
151 |
+
object_fit="contain"
|
152 |
)
|
153 |
|
154 |
gr.Markdown("# Task Examples")
|
visualcloze.py
CHANGED
@@ -241,7 +241,8 @@ class VisualClozeModel:
|
|
241 |
return output_image
|
242 |
|
243 |
def process_images(
|
244 |
-
self, images: list[list[Image.Image]],
|
|
|
245 |
seed: int = 0,
|
246 |
cfg: int = 30,
|
247 |
steps: int = 30,
|
@@ -256,7 +257,7 @@ class VisualClozeModel:
|
|
256 |
images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
|
257 |
where the current query should be placed in the last row.
|
258 |
The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
|
259 |
-
|
260 |
seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
|
261 |
cfg (int): The strength of Classifier-Free Diffusion Guidance.
|
262 |
steps (int): The number of sampling steps.
|
@@ -388,7 +389,7 @@ class VisualClozeModel:
|
|
388 |
x = [noise]
|
389 |
|
390 |
with torch.no_grad():
|
391 |
-
inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(
|
392 |
|
393 |
model_kwargs = dict(
|
394 |
txt=inp["txt"],
|
@@ -445,7 +446,7 @@ class VisualClozeModel:
|
|
445 |
upsampling_steps=upsampling_steps,
|
446 |
upsampling_noise=upsampling_noise,
|
447 |
generator=rng,
|
448 |
-
content_prompt=
|
449 |
ret.append(upsampled)
|
450 |
|
451 |
return ret
|
|
|
241 |
return output_image
|
242 |
|
243 |
def process_images(
|
244 |
+
self, images: list[list[Image.Image]],
|
245 |
+
prompts: list[str],
|
246 |
seed: int = 0,
|
247 |
cfg: int = 30,
|
248 |
steps: int = 30,
|
|
|
257 |
images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
|
258 |
where the current query should be placed in the last row.
|
259 |
The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
|
260 |
+
prompts (list[str]): Three prompts, representing the layout prompt, task prompt, and content prompt respectively.
|
261 |
seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
|
262 |
cfg (int): The strength of Classifier-Free Diffusion Guidance.
|
263 |
steps (int): The number of sampling steps.
|
|
|
389 |
x = [noise]
|
390 |
|
391 |
with torch.no_grad():
|
392 |
+
inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(prompts)], proportion_empty_prompts=0.0)
|
393 |
|
394 |
model_kwargs = dict(
|
395 |
txt=inp["txt"],
|
|
|
446 |
upsampling_steps=upsampling_steps,
|
447 |
upsampling_noise=upsampling_noise,
|
448 |
generator=rng,
|
449 |
+
content_prompt=prompts[2])
|
450 |
ret.append(upsampled)
|
451 |
|
452 |
return ret
|