JointTaggerProject-Inference-Beta-AttnVis

Running

App Files Files Community

drhead commited on 8 days ago

Commit

b545f1a

verified ·

1 Parent(s): 9969656

add sliders for cam alpha/thresholding

Browse files

Files changed (1) hide show

app.py +22 -7

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from torchvision.transforms import InterpolationMode
 import torchvision.transforms.functional as TF
 from huggingface_hub import hf_hub_download
 import numpy as np
-import matplotlib.cm as cm
 class Fit(torch.nn.Module):
     def __init__(
@@ -178,9 +178,9 @@ def create_tags(threshold, sorted_tag_score: dict):
     return text_no_impl, filtered_tag_score
 def clear_image():
-    return "", {}, None, {}
-def cam_inference(img, threshold, evt: gr.SelectData):
     target_tag = evt.value
     tensor = transform(img).unsqueeze(0)
@@ -224,7 +224,7 @@ def cam_inference(img, threshold, evt: gr.SelectData):
     gradients = {}
     activations = {}
-    return create_cam_visualization_pil(img, cam, vis_threshold=threshold)
 def create_cam_visualization_pil(image_pil, cam, alpha=0.6, vis_threshold=0.2):
     """
@@ -275,10 +275,13 @@ with gr.Blocks(css=".output-class { display: none; }") as demo:
     """)
     original_image_state = gr.State() # stash a copy of the input image
     sorted_tag_score_state = gr.State(value={}) # stash a copy of the input image
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Source", sources=['upload'], type='pil', height=512, show_label=False)
-            threshold_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.20, label="Threshold")
         with gr.Column():
             tag_string = gr.Textbox(label="Tag String")
             label_box = gr.Label(label="Tag Predictions", num_top_classes=250, show_label=False)
@@ -292,7 +295,7 @@ with gr.Blocks(css=".output-class { display: none; }") as demo:
     image_input.clear(
         fn=clear_image,
         inputs=[],
-        outputs=[tag_string, label_box, original_image_state, sorted_tag_score_state]
     )
     threshold_slider.input(
@@ -303,7 +306,19 @@ with gr.Blocks(css=".output-class { display: none; }") as demo:
     label_box.select(
         fn=cam_inference,
-        inputs=[original_image_state, threshold_slider],
         outputs=[image_input]
     )

 import torchvision.transforms.functional as TF
 from huggingface_hub import hf_hub_download
 import numpy as np
+import matplotlib.colormaps as cm
 class Fit(torch.nn.Module):
     def __init__(
     return text_no_impl, filtered_tag_score
 def clear_image():
+    return "", {}, None, {}, None
+def cam_inference(img, threshold, alpha, evt: gr.SelectData):
     target_tag = evt.value
     tensor = transform(img).unsqueeze(0)
     gradients = {}
     activations = {}
+    return create_cam_visualization_pil(img, cam, alpha=alpha, vis_threshold=threshold), cam
 def create_cam_visualization_pil(image_pil, cam, alpha=0.6, vis_threshold=0.2):
     """
     """)
     original_image_state = gr.State() # stash a copy of the input image
     sorted_tag_score_state = gr.State(value={}) # stash a copy of the input image
+    cam_state = gr.State()
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Source", sources=['upload'], type='pil', height=512, show_label=False)
+            threshold_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.20, label="Tag Threshold")
+            cam_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.20, label="CAM Threshold")
+            alpha_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.60, label="CAM Alpha")
         with gr.Column():
             tag_string = gr.Textbox(label="Tag String")
             label_box = gr.Label(label="Tag Predictions", num_top_classes=250, show_label=False)
     image_input.clear(
         fn=clear_image,
         inputs=[],
+        outputs=[tag_string, label_box, original_image_state, sorted_tag_score_state, cam_state]
     )
     threshold_slider.input(
     label_box.select(
         fn=cam_inference,
+        inputs=[original_image_state, cam_slider, alpha_slider],
+        outputs=[image_input]
+    )
+    cam_slider.input(
+        fn=create_cam_visualization_pil,
+        inputs=[original_image_state, cam_state, alpha_slider, cam_slider],
+        outputs=[image_input]
+    )
+    alpha_slider.input(
+        fn=create_cam_visualization_pil,
+        inputs=[original_image_state, cam_state, alpha_slider, cam_slider],
         outputs=[image_input]
     )