Spaces:
Running
Running
File size: 9,238 Bytes
ab78124 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
import json
from typing import Dict, Union, List
from gliner import GLiNER
import gradio as gr
import os
# Load available models
MODELS = {
"GLiNER Medium v2.1": "urchade/gliner_medium-v2.1",
"NuNER Zero": "numind/NuZero_token",
"GLiNER Multi PII": "urchade/gliner_multi_pii-v1"
}
# Example datasets with descriptions
EXAMPLE_SETS = {
"General NER": "examples.json",
"NuNER Zero": "examples-nuner.json",
"PII Detection": "examples-pii.json"
}
# Initialize models (will be loaded on demand)
loaded_models = {}
# Current examples
current_examples = []
def load_example_set(example_set_name):
"""Load a set of examples from the specified file"""
try:
file_path = EXAMPLE_SETS[example_set_name]
with open(file_path, "r", encoding="utf-8") as f:
examples = json.load(f)
return examples
except (KeyError, FileNotFoundError, json.JSONDecodeError) as e:
print(f"Error loading example set {example_set_name}: {e}")
return []
# Load default example set
current_examples = load_example_set("General NER")
def get_model(model_name):
"""Load model if not already loaded"""
if model_name not in loaded_models:
model_path = MODELS[model_name]
loaded_models[model_name] = GLiNER.from_pretrained(model_path)
return loaded_models[model_name]
def merge_entities(entities):
"""Merge adjacent entities of the same type"""
if not entities:
return []
merged = []
current = entities[0]
for next_entity in entities[1:]:
if (next_entity['entity'] == current['entity'] and
(next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end'])):
current['word'] += ' ' + next_entity['word']
current['end'] = next_entity['end']
else:
merged.append(current)
current = next_entity
merged.append(current)
return merged
def ner(
text: str,
labels: str,
model_name: str,
threshold: float,
nested_ner: bool,
merge_entities_toggle: bool
) -> Dict[str, Union[str, List]]:
"""Run named entity recognition with selected model and parameters"""
# Get the selected model
model = get_model(model_name)
# Split labels
label_list = [label.strip() for label in labels.split(",")]
# Predict entities
entities = [
{
"entity": entity["label"],
"word": entity["text"],
"start": entity["start"],
"end": entity["end"],
"score": entity.get("score", 0),
}
for entity in model.predict_entities(
text, label_list, flat_ner=not nested_ner, threshold=threshold
)
]
# Merge entities if enabled
if merge_entities_toggle:
entities = merge_entities(entities)
# Return results
return {
"text": text,
"entities": entities,
}
def load_example(example_idx):
"""Load a specific example by index from the current example set"""
if not current_examples or example_idx >= len(current_examples):
return "", "", 0.3, False, False
example = current_examples[example_idx]
return example[0], example[1], example[2], example[3], False
def switch_example_set(example_set_name):
"""Switch to a different example set and update the interface"""
global current_examples
current_examples = load_example_set(example_set_name)
# Return the first example from the new set
if current_examples:
example = current_examples[0]
# Return example text, labels, threshold, nested_ner, merge status, example names for dropdown
example_names = [f"Example {i+1}" for i in range(len(current_examples))]
return example[0], example[1], example[2], example[3], False, gr.Dropdown.update(choices=example_names, value="Example 1")
else:
return "", "", 0.3, False, False, gr.Dropdown.update(choices=[], value=None)
with gr.Blocks(title="Unified NER Interface") as demo:
gr.Markdown(
"""
# Unified Zero-shot Named Entity Recognition Interface
This interface allows you to compare different zero-shot Named Entity Recognition models.
## Models Available:
- **GLiNER Medium v2.1**: The original GLiNER medium model
- **NuNER Zero**: A specialized token-based NER model
- **GLiNER Multi PII**: Fine-tuned for detecting personally identifiable information across multiple languages
## Features:
- Select different models
- Switch between example sets for different use cases
- Toggle nested entity recognition
- Toggle entity merging (combining adjacent entities of the same type)
- Select from various examples within each set
"""
)
with gr.Row():
model_dropdown = gr.Dropdown(
choices=list(MODELS.keys()),
value=list(MODELS.keys())[0],
label="Model",
info="Select the NER model to use"
)
example_set_dropdown = gr.Dropdown(
choices=list(EXAMPLE_SETS.keys()),
value="General NER",
label="Example Set",
info="Select a set of example texts"
)
with gr.Row():
example_dropdown = gr.Dropdown(
choices=[f"Example {i+1}" for i in range(len(current_examples))],
value="Example 1",
label="Example",
info="Select a specific example text"
)
input_text = gr.Textbox(
value=current_examples[0][0] if current_examples else "",
label="Text input",
placeholder="Enter your text here",
lines=5
)
with gr.Row():
labels = gr.Textbox(
value=current_examples[0][1] if current_examples else "",
label="Entity Labels",
placeholder="Enter your labels here (comma separated)",
scale=2,
)
threshold = gr.Slider(
0,
1,
value=current_examples[0][2] if current_examples else 0.3,
step=0.01,
label="Confidence Threshold",
info="Lower the threshold to increase how many entities get predicted.",
scale=1,
)
with gr.Row():
nested_ner = gr.Checkbox(
value=current_examples[0][3] if current_examples else False,
label="Nested NER",
info="Allow entities to be contained within other entities",
)
merge_entities_toggle = gr.Checkbox(
value=False,
label="Merge Adjacent Entities",
info="Combine adjacent entities of the same type into a single entity",
)
output = gr.HighlightedText(label="Predicted Entities")
submit_btn = gr.Button("Submit")
# Handling example set selection
example_set_dropdown.change(
fn=switch_example_set,
inputs=[example_set_dropdown],
outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle, example_dropdown]
)
# Handling example selection within a set
example_dropdown.change(
fn=lambda idx: load_example(int(idx.split()[1]) - 1),
inputs=[example_dropdown],
outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle]
)
# Add a model recommendation for the example set
def recommend_model(example_set_name):
"""Recommend appropriate model based on example set"""
if example_set_name == "PII Detection":
return gr.Dropdown.update(value="GLiNER Multi PII")
elif example_set_name == "NuNER Zero":
return gr.Dropdown.update(value="NuNER Zero")
else:
return gr.Dropdown.update(value="GLiNER Medium v2.1")
# Auto-suggest model when changing example set
example_set_dropdown.change(
fn=recommend_model,
inputs=[example_set_dropdown],
outputs=[model_dropdown]
)
# Submitting
submit_btn.click(
fn=ner,
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
outputs=output
)
input_text.submit(
fn=ner,
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
outputs=output
)
# Other interactions
model_dropdown.change(
fn=ner,
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
outputs=output
)
threshold.release(
fn=ner,
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
outputs=output
)
nested_ner.change(
fn=ner,
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
outputs=output
)
merge_entities_toggle.change(
fn=ner,
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
outputs=output
)
if __name__ == "__main__":
demo.queue()
demo.launch(debug=True) |