from typing import Tuple, Dict import gradio as gr import supervision as sv import numpy as np from PIL import Image from huggingface_hub import hf_hub_download from ultralytics import YOLO import cv2 # Define models MODEL_OPTIONS = { "YOLOv11-Nano": "yolov11n-seg.pt", "YOLOv11-Small": "yolov11s-seg.pt", "YOLOv11-Medium": "yolov11m-seg.pt", "YOLOv11-Large": "yolov11l-seg.pt", "YOLOv11-XLarge": "yolov11x-seg.pt" } print(MODEL_OPTIONS) # Dictionary to store loaded models models: Dict[str, YOLO] = {} # Load all models for name, model_file in MODEL_OPTIONS.items(): model_path = hf_hub_download( repo_id="wjbmattingly/kraken-yiddish", filename=model_file ) models[name] = YOLO(model_path) # Create annotators LABEL_ANNOTATOR = sv.LabelAnnotator(text_color=sv.Color.BLACK) MASK_ANNOTATOR = sv.MaskAnnotator() BOX_ANNOTATOR = sv.BoxAnnotator() def detect_and_annotate( image: np.ndarray, model_name: str, conf_threshold: float, iou_threshold: float ) -> np.ndarray: # Get the selected model model = models[model_name] try: # Perform inference with YOLO standard settings results = model( image, conf=conf_threshold, iou=iou_threshold, verbose=False, device='cpu' ) # Get the first result and create annotated image result = results[0] # Get first result from list annotated_image = result.plot( conf=True, # Show confidence scores line_width=None, # Auto-scale line width font_size=None, # Auto-scale font size boxes=True, # Draw boxes masks=True, # Draw masks if available probs=True, # Draw classification probabilities if available labels=True # Draw labels ) return annotated_image except Exception as e: print(f"\nError during detection: {str(e)}") import traceback traceback.print_exc() return image.copy() # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# Yiddish Document Line Parser") with gr.Row(): with gr.Column(): input_image = gr.Image( label="Input Image", type='numpy' ) with gr.Accordion("Detection Settings", open=True): model_selector = gr.Dropdown( choices=list(MODEL_OPTIONS.keys()), value=list(MODEL_OPTIONS.keys())[0], label="Model", info="Select YOLO model variant" ) with gr.Row(): conf_threshold = gr.Slider( label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.25, ) iou_threshold = gr.Slider( label="IoU Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.45, info="Decrease for stricter detection, increase for more overlapping boxes" ) with gr.Row(): clear_btn = gr.Button("Clear") detect_btn = gr.Button("Detect", variant="primary") with gr.Column(): output_image = gr.Image( label="Detection Result", type='numpy' ) def process_image( image: np.ndarray, model_name: str, conf_threshold: float, iou_threshold: float ) -> Tuple[np.ndarray, np.ndarray]: if image is None: return None, None annotated_image = detect_and_annotate(image, model_name, conf_threshold, iou_threshold) return image, annotated_image def clear(): return None, None # Connect buttons to functions detect_btn.click( process_image, inputs=[input_image, model_selector, conf_threshold, iou_threshold], outputs=[input_image, output_image] ) clear_btn.click( clear, inputs=None, outputs=[input_image, output_image] ) if __name__ == "__main__": demo.launch(debug=True, show_error=True)