File size: 4,437 Bytes
e347edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from typing import Tuple, Dict
import gradio as gr
import supervision as sv
import numpy as np
from PIL import Image
from huggingface_hub import hf_hub_download
from ultralytics import YOLO
import cv2

# Define models
MODEL_OPTIONS = {
    "YOLOv11-Nano": "yolov11n-seg.pt",
    "YOLOv11-Small": "yolov11s-seg.pt",
    "YOLOv11-Medium": "yolov11m-seg.pt",
    "YOLOv11-Large": "yolov11l-seg.pt",
    "YOLOv11-XLarge": "yolov11x-seg.pt"
}
print(MODEL_OPTIONS)
# Dictionary to store loaded models
models: Dict[str, YOLO] = {}

# Load all models
for name, model_file in MODEL_OPTIONS.items():

    model_path = hf_hub_download(
        repo_id="wjbmattingly/kraken-yiddish",
        filename=model_file
    )
    models[name] = YOLO(model_path)
    
# Create annotators
LABEL_ANNOTATOR = sv.LabelAnnotator(text_color=sv.Color.BLACK)
MASK_ANNOTATOR = sv.MaskAnnotator()
BOX_ANNOTATOR = sv.BoxAnnotator()

def detect_and_annotate(
    image: np.ndarray,
    model_name: str,
    conf_threshold: float,
    iou_threshold: float
) -> np.ndarray:
    # Get the selected model
    model = models[model_name]
    
    try:
        # Perform inference with YOLO standard settings
        results = model(
            image,
            conf=conf_threshold,
            iou=iou_threshold,
            verbose=False,
            device='cpu'
        )
        
        # Get the first result and create annotated image
        result = results[0]  # Get first result from list
        annotated_image = result.plot(
            conf=True,  # Show confidence scores
            line_width=None,  # Auto-scale line width
            font_size=None,  # Auto-scale font size
            boxes=True,  # Draw boxes
            masks=True,  # Draw masks if available
            probs=True,  # Draw classification probabilities if available
            labels=True  # Draw labels
        )
        
        return annotated_image

    except Exception as e:
        print(f"\nError during detection: {str(e)}")
        import traceback
        traceback.print_exc()
        return image.copy()

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Yiddish Document Line Parser")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(
                label="Input Image",
                type='numpy'
            )
            with gr.Accordion("Detection Settings", open=True):
                model_selector = gr.Dropdown(
                    choices=list(MODEL_OPTIONS.keys()),
                    value=list(MODEL_OPTIONS.keys())[0],
                    label="Model",
                    info="Select YOLO model variant"
                )
                with gr.Row():
                    conf_threshold = gr.Slider(
                        label="Confidence Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.25,
                    )
                    iou_threshold = gr.Slider(
                        label="IoU Threshold",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.45,
                        info="Decrease for stricter detection, increase for more overlapping boxes"
                    )
            with gr.Row():
                clear_btn = gr.Button("Clear")
                detect_btn = gr.Button("Detect", variant="primary")
                
        with gr.Column():
            output_image = gr.Image(
                label="Detection Result",
                type='numpy'
            )

    def process_image(
        image: np.ndarray,
        model_name: str,
        conf_threshold: float,
        iou_threshold: float
    ) -> Tuple[np.ndarray, np.ndarray]:
        if image is None:
            return None, None
        annotated_image = detect_and_annotate(image, model_name, conf_threshold, iou_threshold)
        return image, annotated_image

    def clear():
        return None, None

    # Connect buttons to functions
    detect_btn.click(
        process_image,
        inputs=[input_image, model_selector, conf_threshold, iou_threshold],
        outputs=[input_image, output_image]
    )
    clear_btn.click(
        clear,
        inputs=None,
        outputs=[input_image, output_image]
    )

if __name__ == "__main__":
    demo.launch(debug=True, show_error=True)