Spaces:

wjbmattingly
/

yiddish-yolo

Sleeping

File size: 4,437 Bytes

e347edc

from typing import Tuple, Dict
import gradio as gr
import supervision as sv
import numpy as np
from PIL import Image
from huggingface_hub import hf_hub_download
from ultralytics import YOLO
import cv2

# Define models
MODEL_OPTIONS = {
    "YOLOv11-Nano": "yolov11n-seg.pt",
    "YOLOv11-Small": "yolov11s-seg.pt",
    "YOLOv11-Medium": "yolov11m-seg.pt",
    "YOLOv11-Large": "yolov11l-seg.pt",
    "YOLOv11-XLarge": "yolov11x-seg.pt"
}
print(MODEL_OPTIONS)
# Dictionary to store loaded models
models: Dict[str, YOLO] = {}

# Load all models
for name, model_file in MODEL_OPTIONS.items():

    model_path = hf_hub_download(
        repo_id="wjbmattingly/kraken-yiddish",
        filename=model_file
    )
    models[name] = YOLO(model_path)
    
# Create annotators
LABEL_ANNOTATOR = sv.LabelAnnotator(text_color=sv.Color.BLACK)
MASK_ANNOTATOR = sv.MaskAnnotator()
BOX_ANNOTATOR = sv.BoxAnnotator()

def detect_and_annotate(
    image: np.ndarray,
    model_name: str,
    conf_threshold: float,
    iou_threshold: float
) -> np.ndarray:
    # Get the selected model
    model = models[model_name]
    
    try:
        # Perform inference with YOLO standard settings
        results = model(
            image,
            conf=conf_threshold,
            iou=iou_threshold,
            verbose=False,
            device='cpu'
        )
        
        # Get the first result and create annotated image
        result = results[0]  # Get first result from list
        annotated_image = result.plot(
            conf=True,  # Show confidence scores
            line_width=None,  # Auto-scale line width
            font_size=None,  # Auto-scale font size
            boxes=True,  # Draw boxes
            masks=True,  # Draw masks if available
            probs=True,  # Draw classification probabilities if available
            labels=True  # Draw labels
        )
        
        return annotated_image

    except Exception as e:
        print(f"\nError during detection: {str(e)}")
        import traceback
        traceback.print_exc()
        return image.copy()

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Yiddish Document Line Parser")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(
                label="Input Image",
                type='numpy'
            )
            with gr.Accordion("Detection Settings", open=True):
                model_selector = gr.Dropdown(
                    choices=list(MODEL_OPTIONS.keys()),
                    value=list(MODEL_OPTIONS.keys())[0],
                    label="Model",
                    info="Select YOLO model variant"
                )
                with gr.Row():
                    conf_threshold = gr.Slider(
                        label="Confidence Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.25,
                    )
                    iou_threshold = gr.Slider(
                        label="IoU Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.45,
                        info="Decrease for stricter detection, increase for more overlapping boxes"
                    )
            with gr.Row():
                clear_btn = gr.Button("Clear")
                detect_btn = gr.Button("Detect", variant="primary")
                
        with gr.Column():
            output_image = gr.Image(
                label="Detection Result",
                type='numpy'
            )

    def process_image(
        image: np.ndarray,
        model_name: str,
        conf_threshold: float,
        iou_threshold: float
    ) -> Tuple[np.ndarray, np.ndarray]:
        if image is None:
            return None, None
        annotated_image = detect_and_annotate(image, model_name, conf_threshold, iou_threshold)
        return image, annotated_image

    def clear():
        return None, None

    # Connect buttons to functions
    detect_btn.click(
        process_image,
        inputs=[input_image, model_selector, conf_threshold, iou_threshold],
        outputs=[input_image, output_image]
    )
    clear_btn.click(
        clear,
        inputs=None,
        outputs=[input_image, output_image]
    )

if __name__ == "__main__":
    demo.launch(debug=True, show_error=True)