File size: 9,238 Bytes
ab78124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
import json
from typing import Dict, Union, List
from gliner import GLiNER
import gradio as gr
import os

# Load available models
MODELS = {
    "GLiNER Medium v2.1": "urchade/gliner_medium-v2.1",
    "NuNER Zero": "numind/NuZero_token",
    "GLiNER Multi PII": "urchade/gliner_multi_pii-v1"
}

# Example datasets with descriptions
EXAMPLE_SETS = {
    "General NER": "examples.json",
    "NuNER Zero": "examples-nuner.json",
    "PII Detection": "examples-pii.json"
}

# Initialize models (will be loaded on demand)
loaded_models = {}

# Current examples
current_examples = []

def load_example_set(example_set_name):
    """Load a set of examples from the specified file"""
    try:
        file_path = EXAMPLE_SETS[example_set_name]
        with open(file_path, "r", encoding="utf-8") as f:
            examples = json.load(f)
        return examples
    except (KeyError, FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error loading example set {example_set_name}: {e}")
        return []

# Load default example set
current_examples = load_example_set("General NER")

def get_model(model_name):
    """Load model if not already loaded"""
    if model_name not in loaded_models:
        model_path = MODELS[model_name]
        loaded_models[model_name] = GLiNER.from_pretrained(model_path)
    return loaded_models[model_name]

def merge_entities(entities):
    """Merge adjacent entities of the same type"""
    if not entities:
        return []
    merged = []
    current = entities[0]
    for next_entity in entities[1:]:
        if (next_entity['entity'] == current['entity'] and 
            (next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end'])):
            current['word'] += ' ' + next_entity['word']
            current['end'] = next_entity['end']
        else:
            merged.append(current)
            current = next_entity
    merged.append(current)
    return merged

def ner(
    text: str, 
    labels: str, 
    model_name: str,
    threshold: float, 
    nested_ner: bool,
    merge_entities_toggle: bool
) -> Dict[str, Union[str, List]]:
    """Run named entity recognition with selected model and parameters"""
    
    # Get the selected model
    model = get_model(model_name)
    
    # Split labels
    label_list = [label.strip() for label in labels.split(",")]
    
    # Predict entities
    entities = [
        {
            "entity": entity["label"],
            "word": entity["text"],
            "start": entity["start"],
            "end": entity["end"],
            "score": entity.get("score", 0),
        }
        for entity in model.predict_entities(
            text, label_list, flat_ner=not nested_ner, threshold=threshold
        )
    ]
    
    # Merge entities if enabled
    if merge_entities_toggle:
        entities = merge_entities(entities)
    
    # Return results
    return {
        "text": text,
        "entities": entities,
    }

def load_example(example_idx):
    """Load a specific example by index from the current example set"""
    if not current_examples or example_idx >= len(current_examples):
        return "", "", 0.3, False, False
    
    example = current_examples[example_idx]
    return example[0], example[1], example[2], example[3], False

def switch_example_set(example_set_name):
    """Switch to a different example set and update the interface"""
    global current_examples
    current_examples = load_example_set(example_set_name)
    
    # Return the first example from the new set
    if current_examples:
        example = current_examples[0]
        # Return example text, labels, threshold, nested_ner, merge status, example names for dropdown
        example_names = [f"Example {i+1}" for i in range(len(current_examples))]
        return example[0], example[1], example[2], example[3], False, gr.Dropdown.update(choices=example_names, value="Example 1")
    else:
        return "", "", 0.3, False, False, gr.Dropdown.update(choices=[], value=None)

with gr.Blocks(title="Unified NER Interface") as demo:
    gr.Markdown(
        """
        # Unified Zero-shot Named Entity Recognition Interface
        
        This interface allows you to compare different zero-shot Named Entity Recognition models.
        
        ## Models Available:
        - **GLiNER Medium v2.1**: The original GLiNER medium model
        - **NuNER Zero**: A specialized token-based NER model
        - **GLiNER Multi PII**: Fine-tuned for detecting personally identifiable information across multiple languages
        
        ## Features:
        - Select different models
        - Switch between example sets for different use cases
        - Toggle nested entity recognition
        - Toggle entity merging (combining adjacent entities of the same type)
        - Select from various examples within each set
        """
    )
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=list(MODELS.keys()),
            value=list(MODELS.keys())[0],
            label="Model",
            info="Select the NER model to use"
        )
        example_set_dropdown = gr.Dropdown(
            choices=list(EXAMPLE_SETS.keys()),
            value="General NER",
            label="Example Set",
            info="Select a set of example texts"
        )
        
    with gr.Row():
        example_dropdown = gr.Dropdown(
            choices=[f"Example {i+1}" for i in range(len(current_examples))],
            value="Example 1",
            label="Example",
            info="Select a specific example text"
        )
    
    input_text = gr.Textbox(
        value=current_examples[0][0] if current_examples else "", 
        label="Text input", 
        placeholder="Enter your text here",
        lines=5
    )
    
    with gr.Row():
        labels = gr.Textbox(
            value=current_examples[0][1] if current_examples else "",
            label="Entity Labels",
            placeholder="Enter your labels here (comma separated)",
            scale=2,
        )
        threshold = gr.Slider(
            0,
            1,
            value=current_examples[0][2] if current_examples else 0.3,
            step=0.01,
            label="Confidence Threshold",
            info="Lower the threshold to increase how many entities get predicted.",
            scale=1,
        )
    
    with gr.Row():
        nested_ner = gr.Checkbox(
            value=current_examples[0][3] if current_examples else False,
            label="Nested NER",
            info="Allow entities to be contained within other entities",
        )
        merge_entities_toggle = gr.Checkbox(
            value=False,
            label="Merge Adjacent Entities",
            info="Combine adjacent entities of the same type into a single entity",
        )
    
    output = gr.HighlightedText(label="Predicted Entities")
    submit_btn = gr.Button("Submit")
    
    # Handling example set selection
    example_set_dropdown.change(
        fn=switch_example_set,
        inputs=[example_set_dropdown],
        outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle, example_dropdown]
    )
    
    # Handling example selection within a set
    example_dropdown.change(
        fn=lambda idx: load_example(int(idx.split()[1]) - 1),
        inputs=[example_dropdown],
        outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle]
    )
    
    # Add a model recommendation for the example set
    def recommend_model(example_set_name):
        """Recommend appropriate model based on example set"""
        if example_set_name == "PII Detection":
            return gr.Dropdown.update(value="GLiNER Multi PII")
        elif example_set_name == "NuNER Zero":
            return gr.Dropdown.update(value="NuNER Zero")
        else:
            return gr.Dropdown.update(value="GLiNER Medium v2.1")
    
    # Auto-suggest model when changing example set
    example_set_dropdown.change(
        fn=recommend_model,
        inputs=[example_set_dropdown],
        outputs=[model_dropdown]
    )
    
    # Submitting
    submit_btn.click(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    input_text.submit(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    
    # Other interactions
    model_dropdown.change(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    
    threshold.release(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    
    nested_ner.change(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    
    merge_entities_toggle.change(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True)