JointTaggerProject-Inference-Beta-AttnVis

Running

App Files Files Community

drhead commited on 8 days ago

Commit

3cb1c16

verified ·

1 Parent(s): 02a9646

fix leaky globals

Browse files

Files changed (1) hide show

app.py +41 -66

app.py CHANGED Viewed

@@ -154,19 +154,15 @@ allowed_tags = list(tags.keys())
 for idx, tag in enumerate(allowed_tags):
     allowed_tags[idx] = tag.replace("_", " ")
-sorted_tag_score = {}
-input_image = None
 @spaces.GPU(duration=5)
-def run_classifier(image, threshold):
-    global sorted_tag_score, input_image
-    input_image = image.convert('RGBA')
-    img = input_image
     tensor = transform(img).unsqueeze(0)
     with torch.no_grad():
-        probits = model(tensor)[0]
         values, indices = probits.topk(250)
     tag_score = dict()
@@ -174,37 +170,18 @@ def run_classifier(image, threshold):
         tag_score[allowed_tags[indices[i]]] = values[i].item()
     sorted_tag_score = dict(sorted(tag_score.items(), key=lambda item: item[1], reverse=True))
-    return create_tags(threshold)
-def create_tags(threshold):
-    global sorted_tag_score
     filtered_tag_score = {key: value for key, value in sorted_tag_score.items() if value > threshold}
     text_no_impl = ", ".join(filtered_tag_score.keys())
     return text_no_impl, filtered_tag_score
 def clear_image():
-    global sorted_tag_score, input_image
-    input_image = None
-    sorted_tag_score = {}
-    return "", {}
-target_tag_index = None
-# Store hooks and intermediate values
-gradients = {}
-activations = {}
-def hook_forward(module, input, output):
-    activations['value'] = output
-def hook_backward(module, grad_in, grad_out):
-    gradients['value'] = grad_out[0]
-def cam_inference(threshold, evt: gr.SelectData):
     target_tag = evt.value
-    print(f"target_tag: {target_tag}")
-    global input_image, sorted_tag_score, target_tag_index, gradients, activations
-    img = input_image
     tensor = transform(img).unsqueeze(0)
     gradients = {}
@@ -212,46 +189,44 @@ def cam_inference(threshold, evt: gr.SelectData):
     cam = None
     target_tag_index = None
-    if target_tag:
-        if target_tag not in allowed_tags:
-            print(f"Warning: Target tag '{target_tag}' not found in allowed tags.")
-            target_tag = None
-        else:
-            target_tag_index = allowed_tags.index(target_tag)
-            handle_forward = model.norm.register_forward_hook(hook_forward)
-            handle_backward = model.norm.register_full_backward_hook(hook_backward)
-    probits = model(tensor)[0].cpu()
-    if target_tag is not None and target_tag_index is not None:
-        model.zero_grad()
-        target_score = probits[target_tag_index]
-        target_score.backward(retain_graph=True)
-        grads = gradients.get('value')
-        acts = activations.get('value')
-        if grads is not None and acts is not None:
-            patch_grads = grads
-            patch_acts = acts
-            weights = torch.mean(patch_grads, dim=1).squeeze(0)
-            cam_1d = torch.einsum('pe,e->p', patch_acts.squeeze(0), weights)
-            cam_1d = torch.relu(cam_1d)
-            cam = cam_1d.reshape(27, 27).detach().cpu().numpy()
-        handle_forward.remove()
-        handle_backward.remove()
-        gradients = {}
-        activations = {}
-    return create_cam_visualization_pil(cam, vis_threshold=threshold)
-def create_cam_visualization_pil(cam, alpha=0.6, vis_threshold=0.2):
     """
     Overlays CAM on image and returns a PIL image.
@@ -265,9 +240,6 @@ def create_cam_visualization_pil(cam, alpha=0.6, vis_threshold=0.2):
         PIL.Image.Image with overlay
     """
-    global input_image
-    # Convert to RGB (in case RGBA or others)
-    image_pil = input_image
     w, h = image_pil.size
     # Resize CAM to match image
@@ -297,8 +269,11 @@ with gr.Blocks(css=".output-class { display: none; }") as demo:
     This tagger is the result of joint efforts between members of the RedRocket team, with distinctions given to Thessalo for creating the foundation for this project with his efforts, RedHotTensors for redesigning the process into a second-order method that models information expectation, and drhead for dataset prep, creation of training code and supervision of training runs.
     Special thanks to Minotoro at frosting.ai for providing the compute power for this project.
     """)
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Source", sources=['upload'], type='pil', height=512, show_label=False)
@@ -310,13 +285,13 @@ with gr.Blocks(css=".output-class { display: none; }") as demo:
     image_input.upload(
         fn=run_classifier,
         inputs=[image_input, threshold_slider],
-        outputs=[tag_string, label_box]
     )
     image_input.clear(
         fn=clear_image,
         inputs=[],
-        outputs=[tag_string, label_box]
     )
     threshold_slider.input(
@@ -327,7 +302,7 @@ with gr.Blocks(css=".output-class { display: none; }") as demo:
     label_box.select(
         fn=cam_inference,
-        inputs=[threshold_slider],
         outputs=[image_input]
     )

 for idx, tag in enumerate(allowed_tags):
     allowed_tags[idx] = tag.replace("_", " ")
 @spaces.GPU(duration=5)
+def run_classifier(image: Image.Image, threshold):
+    img = image.convert('RGBA')
     tensor = transform(img).unsqueeze(0)
     with torch.no_grad():
+        probits = model(tensor)[0] # type: torch.Tensor
         values, indices = probits.topk(250)
     tag_score = dict()
         tag_score[allowed_tags[indices[i]]] = values[i].item()
     sorted_tag_score = dict(sorted(tag_score.items(), key=lambda item: item[1], reverse=True))
+    return *create_tags(threshold, sorted_tag_score), img
+def create_tags(threshold, sorted_tag_score: dict):
     filtered_tag_score = {key: value for key, value in sorted_tag_score.items() if value > threshold}
     text_no_impl = ", ".join(filtered_tag_score.keys())
     return text_no_impl, filtered_tag_score
 def clear_image():
+    return "", {}, None
+def cam_inference(img, threshold, evt: gr.SelectData):
     target_tag = evt.value
     tensor = transform(img).unsqueeze(0)
     gradients = {}
     cam = None
     target_tag_index = None
+    def hook_forward(module, input, output):
+        activations['value'] = output
+    def hook_backward(module, grad_in, grad_out):
+        gradients['value'] = grad_out[0]
+    target_tag_index = allowed_tags.index(target_tag)
+    handle_forward = model.norm.register_forward_hook(hook_forward)
+    handle_backward = model.norm.register_full_backward_hook(hook_backward)
+    probits = model(tensor)[0].cpu()
+    model.zero_grad()
+    target_score = probits[target_tag_index]
+    target_score.backward(retain_graph=True)
+    grads = gradients.get('value')
+    acts = activations.get('value')
+    patch_grads = grads
+    patch_acts = acts
+    weights = torch.mean(patch_grads, dim=1).squeeze(0)
+    cam_1d = torch.einsum('pe,e->p', patch_acts.squeeze(0), weights)
+    cam_1d = torch.relu(cam_1d)
+    cam = cam_1d.reshape(27, 27).detach().cpu().numpy()
+    handle_forward.remove()
+    handle_backward.remove()
+    gradients = {}
+    activations = {}
+    return create_cam_visualization_pil(img, cam, vis_threshold=threshold)
+def create_cam_visualization_pil(image_pil, cam, alpha=0.6, vis_threshold=0.2):
     """
     Overlays CAM on image and returns a PIL image.
         PIL.Image.Image with overlay
     """
     w, h = image_pil.size
     # Resize CAM to match image
     This tagger is the result of joint efforts between members of the RedRocket team, with distinctions given to Thessalo for creating the foundation for this project with his efforts, RedHotTensors for redesigning the process into a second-order method that models information expectation, and drhead for dataset prep, creation of training code and supervision of training runs.
+    Thanks to metal63 for providing initial code for attention visualization (click a tag in the tag list to try it out!)
     Special thanks to Minotoro at frosting.ai for providing the compute power for this project.
     """)
+    original_image_state = gr.State() # stash a copy of the input image
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Source", sources=['upload'], type='pil', height=512, show_label=False)
     image_input.upload(
         fn=run_classifier,
         inputs=[image_input, threshold_slider],
+        outputs=[tag_string, label_box, original_image_state]
     )
     image_input.clear(
         fn=clear_image,
         inputs=[],
+        outputs=[tag_string, label_box, original_image_state]
     )
     threshold_slider.input(
     label_box.select(
         fn=cam_inference,
+        inputs=[original_image_state, threshold_slider],
         outputs=[image_input]
     )