Spaces:

helblazer811
/

ConceptAttention

Running on Zero

App Files Files Community

helblazer811 commited on Feb 18

Commit

4f83196

1 Parent(s): e6d4da8

App changes

Browse files

Files changed (1) hide show

app.py +94 -100

app.py CHANGED Viewed

@@ -1,44 +1,26 @@
 import base64
 import io
 import spaces
 import gradio as gr
 from PIL import Image
-import requests
-import numpy as np
-import PIL
 from concept_attention import ConceptAttentionFluxPipeline
-# concept_attention_default_args = {
-#     "model_name": "flux-schnell",
-#     "device": "cuda",
-#     "layer_indices": list(range(10, 19)),
-#     "timesteps": list(range(2, 4)),
-#     "num_samples": 4,
-#     "num_inference_steps": 4
-# }
 IMG_SIZE = 250
-def download_image(url):
-    return Image.open(io.BytesIO(requests.get(url).content))
 EXAMPLES = [
     [
         "A dog by a tree",  # prompt
-        download_image("https://github.com/helblazer811/ConceptAttention/blob/master/images/dog_by_tree.png?raw=true"),
         "tree, dog, grass, background",  # words
         42,  # seed
     ],
     [
         "A dragon",  # prompt
-        download_image("https://github.com/helblazer811/ConceptAttention/blob/master/images/dragon_image.png?raw=true"),
         "dragon, sky, rock, cloud",  # words
         42,  # seed
     ],
-       [
         "A hot air balloon",  # prompt
-        download_image("https://github.com/helblazer811/ConceptAttention/blob/master/images/hot_air_balloon.png?raw=true"),
         "balloon, sky, water, tree",  # words
         42,  # seed
     ]
@@ -47,67 +29,68 @@ EXAMPLES = [
 pipeline = ConceptAttentionFluxPipeline(model_name="flux-schnell", device="cuda")
 @spaces.GPU(duration=60)
-def process_inputs(prompt, input_image, word_list, seed, num_samples, layer_start_index, timestep_start_index):
     print("Processing inputs")
     prompt = prompt.strip()
     if not word_list.strip():
-        return None, "Please enter comma-separated words"
     concepts = [w.strip() for w in word_list.split(",")]
-    if input_image is not None:
-        if isinstance(input_image, np.ndarray):
-            input_image = Image.fromarray(input_image)
-            input_image = input_image.convert("RGB")
-            input_image = input_image.resize((1024, 1024))
-        elif isinstance(input_image, PIL.Image.Image):
-            input_image = input_image.convert("RGB")
-            input_image = input_image.resize((1024, 1024))
-        pipeline_output = pipeline.encode_image(
-            image=input_image,
-            concepts=concepts,
-            prompt=prompt,
-            width=1024,
-            height=1024,
-            seed=seed,
-            num_samples=num_samples,
-            layer_indices=list(range(layer_start_index, 19)),
-        )
-    else:
-        pipeline_output = pipeline.generate_image(
-            prompt=prompt,
-            concepts=concepts,
-            width=1024,
-            height=1024,
-            seed=seed,
-            timesteps=list(range(timestep_start_index, 4)),
-            num_inference_steps=4,
-            layer_indices=list(range(layer_start_index, 19)),
-        )
     output_image = pipeline_output.image
     concept_heatmaps = pipeline_output.concept_heatmaps
-    html_elements = []
-    for concept, heatmap in zip(concepts, concept_heatmaps):
-        img = heatmap.resize((IMG_SIZE, IMG_SIZE), resample=Image.NEAREST)
-        buffered = io.BytesIO()
-        img.save(buffered, format="PNG")
-        img_str = base64.b64encode(buffered.getvalue()).decode()
-        html = f"""
-        <div style='text-align: center; margin: 5px; padding: 5px;  overflow-x: auto; white-space: nowrap;'>
-            <h1 style='margin-bottom: 10px;'>{concept}</h1>
-            <img src='data:image/png;base64,{img_str}' style='width: {IMG_SIZE}px; display: inline-block; height: {IMG_SIZE}px;'>
-        </div>
-        """
-        html_elements.append(html)
-    combined_html = "<div style='display: flex; flex-wrap: wrap; justify-content: center;'>" + "".join(html_elements) + "</div>"
-    return output_image, combined_html, None # None fills input_image with None
 with gr.Blocks(
     css="""
@@ -115,10 +98,8 @@ with gr.Blocks(
     .title { text-align: center; margin-bottom: 10px; }
     .authors { text-align: center; margin-bottom: 10px; }
     .affiliations { text-align: center; color: #666; margin-bottom: 10px; }
-    .content { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }
-    .section {  }
-    .input-image { width: 100%; height: 200px; }
     .abstract { text-align: center; margin-bottom: 40px; }
 """
 ) as demo:
     with gr.Column(elem_classes="container"):
@@ -134,41 +115,54 @@ with gr.Blocks(
             elem_classes="abstract"
         )
-        with gr.Row(elem_classes="content"):
-            with gr.Column(elem_classes="section"):
-                gr.Markdown("### Input")
-                prompt = gr.Textbox(label="Enter your prompt")
-                words = gr.Textbox(label="Enter a list of concepts (comma-separated)")
-                # gr.HTML("<div style='text-align: center;'> <h3> Or </h3> </div>")
-                image_input = gr.Image(type="numpy", label="Upload image (optional)", elem_classes="input-image")
-                # Set up advanced options
-                with gr.Accordion("Advanced Settings", open=False):
-                    seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Seed", value=42)
-                    num_samples = gr.Slider(minimum=1, maximum=10, step=1, label="Number of Samples", value=4)
-                    layer_start_index = gr.Slider(minimum=0, maximum=18, step=1, label="Layer Start Index", value=10)
-                    timestep_start_index = gr.Slider(minimum=0, maximum=4, step=1, label="Timestep Start Index", value=2)
-            with gr.Column(elem_classes="section"):
-                gr.Markdown("### Output")
-                output_image = gr.Image(type="numpy", label="Output image")
-        with gr.Row():
-            submit_btn = gr.Button("Process")
-        with gr.Row(elem_classes="section"):
-            saliency_display = gr.HTML(label="Saliency Maps")
         submit_btn.click(
             fn=process_inputs,
-            inputs=[prompt, image_input, words, seed, num_samples, layer_start_index, timestep_start_index], outputs=[output_image, saliency_display, image_input]
         )
-        # .then(
-        #     fn=lambda component: gr.update(value=None),
-        #     inputs=[image_input],
-        #     outputs=[]
-        # )
-        gr.Examples(examples=EXAMPLES, inputs=[prompt, image_input, words, seed], outputs=[output_image, saliency_display], fn=process_inputs, cache_examples=False)
 if __name__ == "__main__":
     demo.launch(max_threads=1)

 import base64
 import io
 import spaces
 import gradio as gr
 from PIL import Image
 from concept_attention import ConceptAttentionFluxPipeline
 IMG_SIZE = 250
 EXAMPLES = [
     [
         "A dog by a tree",  # prompt
         "tree, dog, grass, background",  # words
         42,  # seed
     ],
     [
         "A dragon",  # prompt
         "dragon, sky, rock, cloud",  # words
         42,  # seed
     ],
+    [
         "A hot air balloon",  # prompt
         "balloon, sky, water, tree",  # words
         42,  # seed
     ]
 pipeline = ConceptAttentionFluxPipeline(model_name="flux-schnell", device="cuda")
 @spaces.GPU(duration=60)
+def process_inputs(prompt, word_list, seed, layer_start_index, timestep_start_index):
     print("Processing inputs")
+    assert layer_start_index is not None
+    assert timestep_start_index is not None
     prompt = prompt.strip()
     if not word_list.strip():
+        gr.exceptions.InputError("words", "Please enter comma-separated words")
     concepts = [w.strip() for w in word_list.split(",")]
+    if len(concepts) == 0:
+        raise gr.exceptions.InputError("words", "Please enter at least 1 concept")
+    if len(concepts) > 9:
+        raise gr.exceptions.InputError("words", "Please enter at most 9 concepts")
+    pipeline_output = pipeline.generate_image(
+        prompt=prompt,
+        concepts=concepts,
+        width=1024,
+        height=1024,
+        seed=seed,
+        timesteps=list(range(timestep_start_index, 4)),
+        num_inference_steps=4,
+        layer_indices=list(range(layer_start_index, 19)),
+        softmax=True if len(concepts) > 1 else False
+    )
     output_image = pipeline_output.image
     concept_heatmaps = pipeline_output.concept_heatmaps
+    concept_heatmaps = [heatmap.resize((IMG_SIZE, IMG_SIZE), resample=Image.NEAREST) for heatmap in concept_heatmaps]
+    heatmaps_and_labels = [(concept_heatmaps[concept_index], concepts[concept_index]) for concept_index in range(len(concepts))]
+    all_images_and_labels = [(output_image, "Generated Image")] + heatmaps_and_labels
+    # combined_html = "<div style='display: flex; flex-wrap: wrap; justify-content: center;'>"
+    # # Show the output image
+    # combined_html += f"""
+    #     <div style='text-align: center; margin: 5px; padding: 5px;'>
+    #         <img src='data:image/png;base64,{output_image}' style='width: {IMG_SIZE}px; display: inline-block; height: {IMG_SIZE}px;'>
+    #     </div>
+    # """
+    # for concept, heatmap in zip(concepts, concept_heatmaps):
+    #     img = heatmap.resize((IMG_SIZE, IMG_SIZE), resample=Image.NEAREST)
+    #     buffered = io.BytesIO()
+    #     img.save(buffered, format="PNG")
+    #     img_str = base64.b64encode(buffered.getvalue()).decode()
+    #     html = f"""
+    #     <div style='text-align: center; margin: 5px; padding: 5px;  overflow-x: auto; white-space: nowrap;'>
+    #         <h1 style='margin-bottom: 10px;'>{concept}</h1>
+    #         <img src='data:image/png;base64,{img_str}' style='width: {IMG_SIZE}px; display: inline-block; height: {IMG_SIZE}px;'>
+    #     </div>
+    #     """
+    #     combined_html += html
+    # combined_html += "</div>"
+    return all_images_and_labels
 with gr.Blocks(
     css="""
     .title { text-align: center; margin-bottom: 10px; }
     .authors { text-align: center; margin-bottom: 10px; }
     .affiliations { text-align: center; color: #666; margin-bottom: 10px; }
     .abstract { text-align: center; margin-bottom: 40px; }
+    .input-row { height: 60px; }
 """
 ) as demo:
     with gr.Column(elem_classes="container"):
             elem_classes="abstract"
         )
+        with gr.Row(equal_height=True, elem_classes="input-row"):
+            prompt = gr.Textbox(
+                label="Enter your prompt",
+                placeholder="Enter your prompt",
+                value=EXAMPLES[0][0],
+                scale=4,
+                # show_label=False
+            )
+            words = gr.Textbox(
+                label="Enter a list of concepts (comma-separated)",
+                placeholder="Enter a list of concepts (comma-separated)",
+                value=EXAMPLES[0][1],
+                scale=4,
+                # show_label=False
+            )
+            submit_btn = gr.Button(
+                "Run",
+                min_width="100px",
+                scale=1
+            )
+        # generated_image = gr.Image(label="Generated Image", elem_classes="input-image")
+        gallery = gr.Gallery(
+            label="Generated images",
+            show_label=True,
+            elem_id="gallery",
+            columns=[5],
+            # rows=[1],
+            object_fit="contain",
+            # height="auto"
+        )
+        with gr.Accordion("Advanced Settings", open=False):
+            seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Seed", value=42)
+            layer_start_index = gr.Slider(minimum=0, maximum=18, step=1, label="Layer Start Index", value=10)
+            timestep_start_index = gr.Slider(minimum=0, maximum=4, step=1, label="Timestep Start Index", value=2)
         submit_btn.click(
             fn=process_inputs,
+            inputs=[prompt, words, seed, layer_start_index, timestep_start_index],
+            outputs=[gallery]
         )
+        gr.Examples(examples=EXAMPLES, inputs=[prompt, words, seed, layer_start_index, timestep_start_index], outputs=[gallery], fn=process_inputs, cache_examples=True)
+        # Automatically process the first example on launch
+        demo.load(process_inputs, inputs=[prompt, words, seed, layer_start_index, timestep_start_index], outputs=[gallery])
 if __name__ == "__main__":
     demo.launch(max_threads=1)