dwb2023 commited on
Commit
ab78124
·
verified ·
1 Parent(s): 5efb400

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +279 -0
app.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Dict, Union, List
3
+ from gliner import GLiNER
4
+ import gradio as gr
5
+ import os
6
+
7
+ # Load available models
8
+ MODELS = {
9
+ "GLiNER Medium v2.1": "urchade/gliner_medium-v2.1",
10
+ "NuNER Zero": "numind/NuZero_token",
11
+ "GLiNER Multi PII": "urchade/gliner_multi_pii-v1"
12
+ }
13
+
14
+ # Example datasets with descriptions
15
+ EXAMPLE_SETS = {
16
+ "General NER": "examples.json",
17
+ "NuNER Zero": "examples-nuner.json",
18
+ "PII Detection": "examples-pii.json"
19
+ }
20
+
21
+ # Initialize models (will be loaded on demand)
22
+ loaded_models = {}
23
+
24
+ # Current examples
25
+ current_examples = []
26
+
27
+ def load_example_set(example_set_name):
28
+ """Load a set of examples from the specified file"""
29
+ try:
30
+ file_path = EXAMPLE_SETS[example_set_name]
31
+ with open(file_path, "r", encoding="utf-8") as f:
32
+ examples = json.load(f)
33
+ return examples
34
+ except (KeyError, FileNotFoundError, json.JSONDecodeError) as e:
35
+ print(f"Error loading example set {example_set_name}: {e}")
36
+ return []
37
+
38
+ # Load default example set
39
+ current_examples = load_example_set("General NER")
40
+
41
+ def get_model(model_name):
42
+ """Load model if not already loaded"""
43
+ if model_name not in loaded_models:
44
+ model_path = MODELS[model_name]
45
+ loaded_models[model_name] = GLiNER.from_pretrained(model_path)
46
+ return loaded_models[model_name]
47
+
48
+ def merge_entities(entities):
49
+ """Merge adjacent entities of the same type"""
50
+ if not entities:
51
+ return []
52
+ merged = []
53
+ current = entities[0]
54
+ for next_entity in entities[1:]:
55
+ if (next_entity['entity'] == current['entity'] and
56
+ (next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end'])):
57
+ current['word'] += ' ' + next_entity['word']
58
+ current['end'] = next_entity['end']
59
+ else:
60
+ merged.append(current)
61
+ current = next_entity
62
+ merged.append(current)
63
+ return merged
64
+
65
+ def ner(
66
+ text: str,
67
+ labels: str,
68
+ model_name: str,
69
+ threshold: float,
70
+ nested_ner: bool,
71
+ merge_entities_toggle: bool
72
+ ) -> Dict[str, Union[str, List]]:
73
+ """Run named entity recognition with selected model and parameters"""
74
+
75
+ # Get the selected model
76
+ model = get_model(model_name)
77
+
78
+ # Split labels
79
+ label_list = [label.strip() for label in labels.split(",")]
80
+
81
+ # Predict entities
82
+ entities = [
83
+ {
84
+ "entity": entity["label"],
85
+ "word": entity["text"],
86
+ "start": entity["start"],
87
+ "end": entity["end"],
88
+ "score": entity.get("score", 0),
89
+ }
90
+ for entity in model.predict_entities(
91
+ text, label_list, flat_ner=not nested_ner, threshold=threshold
92
+ )
93
+ ]
94
+
95
+ # Merge entities if enabled
96
+ if merge_entities_toggle:
97
+ entities = merge_entities(entities)
98
+
99
+ # Return results
100
+ return {
101
+ "text": text,
102
+ "entities": entities,
103
+ }
104
+
105
+ def load_example(example_idx):
106
+ """Load a specific example by index from the current example set"""
107
+ if not current_examples or example_idx >= len(current_examples):
108
+ return "", "", 0.3, False, False
109
+
110
+ example = current_examples[example_idx]
111
+ return example[0], example[1], example[2], example[3], False
112
+
113
+ def switch_example_set(example_set_name):
114
+ """Switch to a different example set and update the interface"""
115
+ global current_examples
116
+ current_examples = load_example_set(example_set_name)
117
+
118
+ # Return the first example from the new set
119
+ if current_examples:
120
+ example = current_examples[0]
121
+ # Return example text, labels, threshold, nested_ner, merge status, example names for dropdown
122
+ example_names = [f"Example {i+1}" for i in range(len(current_examples))]
123
+ return example[0], example[1], example[2], example[3], False, gr.Dropdown.update(choices=example_names, value="Example 1")
124
+ else:
125
+ return "", "", 0.3, False, False, gr.Dropdown.update(choices=[], value=None)
126
+
127
+ with gr.Blocks(title="Unified NER Interface") as demo:
128
+ gr.Markdown(
129
+ """
130
+ # Unified Zero-shot Named Entity Recognition Interface
131
+
132
+ This interface allows you to compare different zero-shot Named Entity Recognition models.
133
+
134
+ ## Models Available:
135
+ - **GLiNER Medium v2.1**: The original GLiNER medium model
136
+ - **NuNER Zero**: A specialized token-based NER model
137
+ - **GLiNER Multi PII**: Fine-tuned for detecting personally identifiable information across multiple languages
138
+
139
+ ## Features:
140
+ - Select different models
141
+ - Switch between example sets for different use cases
142
+ - Toggle nested entity recognition
143
+ - Toggle entity merging (combining adjacent entities of the same type)
144
+ - Select from various examples within each set
145
+ """
146
+ )
147
+
148
+ with gr.Row():
149
+ model_dropdown = gr.Dropdown(
150
+ choices=list(MODELS.keys()),
151
+ value=list(MODELS.keys())[0],
152
+ label="Model",
153
+ info="Select the NER model to use"
154
+ )
155
+ example_set_dropdown = gr.Dropdown(
156
+ choices=list(EXAMPLE_SETS.keys()),
157
+ value="General NER",
158
+ label="Example Set",
159
+ info="Select a set of example texts"
160
+ )
161
+
162
+ with gr.Row():
163
+ example_dropdown = gr.Dropdown(
164
+ choices=[f"Example {i+1}" for i in range(len(current_examples))],
165
+ value="Example 1",
166
+ label="Example",
167
+ info="Select a specific example text"
168
+ )
169
+
170
+ input_text = gr.Textbox(
171
+ value=current_examples[0][0] if current_examples else "",
172
+ label="Text input",
173
+ placeholder="Enter your text here",
174
+ lines=5
175
+ )
176
+
177
+ with gr.Row():
178
+ labels = gr.Textbox(
179
+ value=current_examples[0][1] if current_examples else "",
180
+ label="Entity Labels",
181
+ placeholder="Enter your labels here (comma separated)",
182
+ scale=2,
183
+ )
184
+ threshold = gr.Slider(
185
+ 0,
186
+ 1,
187
+ value=current_examples[0][2] if current_examples else 0.3,
188
+ step=0.01,
189
+ label="Confidence Threshold",
190
+ info="Lower the threshold to increase how many entities get predicted.",
191
+ scale=1,
192
+ )
193
+
194
+ with gr.Row():
195
+ nested_ner = gr.Checkbox(
196
+ value=current_examples[0][3] if current_examples else False,
197
+ label="Nested NER",
198
+ info="Allow entities to be contained within other entities",
199
+ )
200
+ merge_entities_toggle = gr.Checkbox(
201
+ value=False,
202
+ label="Merge Adjacent Entities",
203
+ info="Combine adjacent entities of the same type into a single entity",
204
+ )
205
+
206
+ output = gr.HighlightedText(label="Predicted Entities")
207
+ submit_btn = gr.Button("Submit")
208
+
209
+ # Handling example set selection
210
+ example_set_dropdown.change(
211
+ fn=switch_example_set,
212
+ inputs=[example_set_dropdown],
213
+ outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle, example_dropdown]
214
+ )
215
+
216
+ # Handling example selection within a set
217
+ example_dropdown.change(
218
+ fn=lambda idx: load_example(int(idx.split()[1]) - 1),
219
+ inputs=[example_dropdown],
220
+ outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle]
221
+ )
222
+
223
+ # Add a model recommendation for the example set
224
+ def recommend_model(example_set_name):
225
+ """Recommend appropriate model based on example set"""
226
+ if example_set_name == "PII Detection":
227
+ return gr.Dropdown.update(value="GLiNER Multi PII")
228
+ elif example_set_name == "NuNER Zero":
229
+ return gr.Dropdown.update(value="NuNER Zero")
230
+ else:
231
+ return gr.Dropdown.update(value="GLiNER Medium v2.1")
232
+
233
+ # Auto-suggest model when changing example set
234
+ example_set_dropdown.change(
235
+ fn=recommend_model,
236
+ inputs=[example_set_dropdown],
237
+ outputs=[model_dropdown]
238
+ )
239
+
240
+ # Submitting
241
+ submit_btn.click(
242
+ fn=ner,
243
+ inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
244
+ outputs=output
245
+ )
246
+ input_text.submit(
247
+ fn=ner,
248
+ inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
249
+ outputs=output
250
+ )
251
+
252
+ # Other interactions
253
+ model_dropdown.change(
254
+ fn=ner,
255
+ inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
256
+ outputs=output
257
+ )
258
+
259
+ threshold.release(
260
+ fn=ner,
261
+ inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
262
+ outputs=output
263
+ )
264
+
265
+ nested_ner.change(
266
+ fn=ner,
267
+ inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
268
+ outputs=output
269
+ )
270
+
271
+ merge_entities_toggle.change(
272
+ fn=ner,
273
+ inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
274
+ outputs=output
275
+ )
276
+
277
+ if __name__ == "__main__":
278
+ demo.queue()
279
+ demo.launch(debug=True)