cyber-tagger

Running

App Files Files Community

CyberWaifu commited on Mar 10

Commit

dda048a

verified ·

1 Parent(s): 912174d

MCut

Browse files

Files changed (1) hide show

app.py +45 -19

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ MODEL_REPO = "AngelBottomless/camie-tagger-onnxruntime"
 MODEL_FILE = "camie_tagger_initial.onnx"
 META_FILE = "metadata.json"
 IMAGE_SIZE = (512, 512)
-DEFAULT_THRESHOLD = 0.35  # Default value if slider is not used
 # Download model and metadata from Hugging Face Hub
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=".")
@@ -35,7 +35,6 @@ def preprocess_image(pil_image: Image.Image) -> np.ndarray:
 def run_inference(pil_image: Image.Image) -> np.ndarray:
     """
     Preprocess the image and run the ONNX model inference.
     Returns the refined logits as a numpy array.
     """
     input_tensor = preprocess_image(pil_image)
@@ -44,13 +43,26 @@ def run_inference(pil_image: Image.Image) -> np.ndarray:
     _, refined_logits = session.run(None, {input_name: input_tensor})
     return refined_logits[0]
 def get_tags(refined_logits: np.ndarray, metadata: dict, default_threshold: float):
     """
     Compute probabilities from logits and collect tag predictions.
     Returns:
-        results_by_cat: Dictionary mapping each category to a list of (tag, probability) above its threshold.
-        prompt_tags_by_cat: Dictionary for prompt-style output (character, general).
         all_artist_tags: All artist tags (with probabilities) regardless of threshold.
     """
     probs = 1 / (1 + np.exp(-refined_logits))
@@ -59,7 +71,7 @@ def get_tags(refined_logits: np.ndarray, metadata: dict, default_threshold: floa
     category_thresholds = metadata.get("category_thresholds", {})
     results_by_cat = {}
-    # For prompt style, only include character and general tags (artists handled separately)
     prompt_tags_by_cat = {"character": [], "general": []}
     all_artist_tags = []
@@ -78,7 +90,8 @@ def get_tags(refined_logits: np.ndarray, metadata: dict, default_threshold: floa
 def format_prompt_tags(prompt_tags_by_cat: dict, all_artist_tags: list) -> str:
     """
     Format the tags for prompt-style output.
-    Only the top artist tag is shown (regardless of threshold), and all character and general tags are shown.
     Returns a comma-separated string of escaped tags.
     """
@@ -106,13 +119,12 @@ def format_prompt_tags(prompt_tags_by_cat: dict, all_artist_tags: list) -> str:
 def format_detailed_output(results_by_cat: dict, all_artist_tags: list) -> str:
     """
     Format the tags for detailed output.
     Returns a Markdown-formatted string listing tags by category.
     """
     if not results_by_cat:
         return "No tags predicted for this image."
-    # Include an artist tag even if below threshold
     if "artist" not in results_by_cat and all_artist_tags:
         best_artist_tag, best_artist_prob = max(all_artist_tags, key=lambda item: item[1])
         results_by_cat["artist"] = [(best_artist_tag, best_artist_prob)]
@@ -126,17 +138,24 @@ def format_detailed_output(results_by_cat: dict, all_artist_tags: list) -> str:
         lines.append("")  # blank line between categories
     return "\n".join(lines)
-def tag_image(pil_image: Image.Image, output_format: str, threshold: float) -> str:
     """
     Run inference on the image and return formatted tags based on the chosen output format.
-    The slider value (threshold) overrides the default threshold for tag selection.
     """
     if pil_image is None:
         return "Please upload an image."
     refined_logits = run_inference(pil_image)
-    results_by_cat, prompt_tags_by_cat, all_artist_tags = get_tags(refined_logits, metadata, default_threshold=threshold)
     if output_format == "Prompt-style Tags":
         return format_prompt_tags(prompt_tags_by_cat, all_artist_tags)
@@ -153,7 +172,8 @@ with demo:
         "Upload an image, adjust the threshold, and click **Tag Image** to see predictions."
     )
     gr.Markdown(
-        "*(Note: In prompt-style output, only the top artist tag is displayed along with all character and general tags.)*"
     )
     with gr.Row():
         with gr.Column():
@@ -163,26 +183,32 @@ with demo:
                 value="Prompt-style Tags",
                 label="Output Format"
             )
-            # Slider to modify the default threshold value used in inference.
             threshold_slider = gr.Slider(
                 minimum=0.0,
                 maximum=1.0,
                 step=0.05,
                 value=DEFAULT_THRESHOLD,
-                label="Threshold"
             )
             tag_button = gr.Button("🔍 Tag Image")
         with gr.Column():
             output_box = gr.Markdown("")  # Markdown output for formatted results
-    # Pass the threshold_slider value into the tag_image function
-    tag_button.click(fn=tag_image, inputs=[image_in, format_choice, threshold_slider], outputs=output_box)
     gr.Markdown(
         "----\n"
         "**Model:** [Camie Tagger ONNX](https://huggingface.co/AngelBottomless/camie-tagger-onnxruntime)   •   "
-        "**Base Model:** Camais03/camie-tagger (61% F1 on 70k tags)   •   "
-        "**ONNX Runtime:** for efficient CPU inference   •   "
         "*Demo built with Gradio Blocks.*"
     )

 MODEL_FILE = "camie_tagger_initial.onnx"
 META_FILE = "metadata.json"
 IMAGE_SIZE = (512, 512)
+DEFAULT_THRESHOLD = 0.35  # Default threshold if slider is used
 # Download model and metadata from Hugging Face Hub
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=".")
 def run_inference(pil_image: Image.Image) -> np.ndarray:
     """
     Preprocess the image and run the ONNX model inference.
     Returns the refined logits as a numpy array.
     """
     input_tensor = preprocess_image(pil_image)
     _, refined_logits = session.run(None, {input_name: input_tensor})
     return refined_logits[0]
+def mcut_threshold(probs: np.ndarray) -> float:
+    """
+    Compute the MCut threshold from the given probabilities.
+    Uses the MCut method described in:
+    Largeron, C., Moulin, C., & Gery, M. (2012).
+    """
+    sorted_probs = probs[probs.argsort()[::-1]]
+    diffs = sorted_probs[:-1] - sorted_probs[1:]
+    t = diffs.argmax()
+    thresh = (sorted_probs[t] + sorted_probs[t + 1]) / 2
+    return thresh
 def get_tags(refined_logits: np.ndarray, metadata: dict, default_threshold: float):
     """
     Compute probabilities from logits and collect tag predictions.
     Returns:
+        results_by_cat: Dictionary mapping each category to a list of (tag, probability)
+                        above its threshold.
+        prompt_tags_by_cat: Dictionary for prompt-style output (character and general tags).
         all_artist_tags: All artist tags (with probabilities) regardless of threshold.
     """
     probs = 1 / (1 + np.exp(-refined_logits))
     category_thresholds = metadata.get("category_thresholds", {})
     results_by_cat = {}
+    # For prompt-style output, only include character and general tags (artists handled separately)
     prompt_tags_by_cat = {"character": [], "general": []}
     all_artist_tags = []
 def format_prompt_tags(prompt_tags_by_cat: dict, all_artist_tags: list) -> str:
     """
     Format the tags for prompt-style output.
+    Only the top artist tag is shown (regardless of threshold),
+    and all character and general tags are shown.
     Returns a comma-separated string of escaped tags.
     """
 def format_detailed_output(results_by_cat: dict, all_artist_tags: list) -> str:
     """
     Format the tags for detailed output.
     Returns a Markdown-formatted string listing tags by category.
     """
     if not results_by_cat:
         return "No tags predicted for this image."
+    # Include an artist tag even if below threshold.
     if "artist" not in results_by_cat and all_artist_tags:
         best_artist_tag, best_artist_prob = max(all_artist_tags, key=lambda item: item[1])
         results_by_cat["artist"] = [(best_artist_tag, best_artist_prob)]
         lines.append("")  # blank line between categories
     return "\n".join(lines)
+def tag_image(pil_image: Image.Image, output_format: str, threshold: float, mcut_enabled: bool) -> str:
     """
     Run inference on the image and return formatted tags based on the chosen output format.
+    The slider value (threshold) normally overrides the default threshold for tag selection.
+    If mcut_enabled is True, compute a new threshold using MCut from all probabilities.
     """
     if pil_image is None:
         return "Please upload an image."
     refined_logits = run_inference(pil_image)
+    # Compute probabilities from logits
+    probs = 1 / (1 + np.exp(-refined_logits))
+    # If MCut is enabled, override the threshold using the MCut method.
+    computed_threshold = mcut_threshold(probs) if mcut_enabled else threshold
+    results_by_cat, prompt_tags_by_cat, all_artist_tags = get_tags(
+        refined_logits, metadata, default_threshold=computed_threshold
+    )
     if output_format == "Prompt-style Tags":
         return format_prompt_tags(prompt_tags_by_cat, all_artist_tags)
         "Upload an image, adjust the threshold, and click **Tag Image** to see predictions."
     )
     gr.Markdown(
+        "*(Note: In prompt-style output, only the top artist tag is displayed along with all character and general tags. "
+        "If MCut is enabled, its computed threshold overrides the default slider value.)*"
     )
     with gr.Row():
         with gr.Column():
                 value="Prompt-style Tags",
                 label="Output Format"
             )
             threshold_slider = gr.Slider(
                 minimum=0.0,
                 maximum=1.0,
                 step=0.05,
                 value=DEFAULT_THRESHOLD,
+                label="Default Threshold"
+            )
+            mcut_checkbox = gr.Checkbox(
+                value=False,
+                label="Use MCut threshold"
             )
             tag_button = gr.Button("🔍 Tag Image")
         with gr.Column():
             output_box = gr.Markdown("")  # Markdown output for formatted results
+    # Pass the threshold_slider and mcut_checkbox values into the tag_image function
+    tag_button.click(
+        fn=tag_image,
+        inputs=[image_in, format_choice, threshold_slider, mcut_checkbox],
+        outputs=output_box
+    )
     gr.Markdown(
         "----\n"
         "**Model:** [Camie Tagger ONNX](https://huggingface.co/AngelBottomless/camie-tagger-onnxruntime)   •   "
+        "**Base Model:** Camais03/camie-tagger (61% F1 on 70k tags)   •   **ONNX Runtime:** for efficient CPU inference   •   "
         "*Demo built with Gradio Blocks.*"
     )