import json from typing import Dict, Union, List from gliner import GLiNER import gradio as gr import os # Load available models MODELS = { "GLiNER Medium v2.1": "urchade/gliner_medium-v2.1", "NuNER Zero": "numind/NuZero_token", "GLiNER Multi PII": "urchade/gliner_multi_pii-v1" } # Example datasets with descriptions EXAMPLE_SETS = { "General NER": "examples.json", "NuNER Zero": "examples-nuner.json", "PII Detection": "examples-pii.json" } # Initialize models (will be loaded on demand) loaded_models = {} # Current examples current_examples = [] def load_example_set(example_set_name): """Load a set of examples from the specified file""" try: file_path = EXAMPLE_SETS[example_set_name] with open(file_path, "r", encoding="utf-8") as f: examples = json.load(f) return examples except (KeyError, FileNotFoundError, json.JSONDecodeError) as e: print(f"Error loading example set {example_set_name}: {e}") return [] # Load default example set current_examples = load_example_set("General NER") def get_model(model_name): """Load model if not already loaded""" if model_name not in loaded_models: model_path = MODELS[model_name] loaded_models[model_name] = GLiNER.from_pretrained(model_path) return loaded_models[model_name] def merge_entities(entities): """Merge adjacent entities of the same type""" if not entities: return [] merged = [] current = entities[0] for next_entity in entities[1:]: if (next_entity['entity'] == current['entity'] and (next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end'])): current['word'] += ' ' + next_entity['word'] current['end'] = next_entity['end'] else: merged.append(current) current = next_entity merged.append(current) return merged def ner( text: str, labels: str, model_name: str, threshold: float, nested_ner: bool, merge_entities_toggle: bool ) -> Dict[str, Union[str, List]]: """Run named entity recognition with selected model and parameters""" # Get the selected model model = get_model(model_name) # Split labels label_list = [label.strip() for label in labels.split(",")] # Predict entities entities = [ { "entity": entity["label"], "word": entity["text"], "start": entity["start"], "end": entity["end"], "score": entity.get("score", 0), } for entity in model.predict_entities( text, label_list, flat_ner=not nested_ner, threshold=threshold ) ] # Merge entities if enabled if merge_entities_toggle: entities = merge_entities(entities) # Return results return { "text": text, "entities": entities, } def load_example(example_idx): """Load a specific example by index from the current example set""" if not current_examples or example_idx >= len(current_examples): return "", "", 0.3, False, False example = current_examples[example_idx] return example[0], example[1], example[2], example[3], False def switch_example_set(example_set_name): """Switch to a different example set and update the interface""" global current_examples current_examples = load_example_set(example_set_name) # Return the first example from the new set if current_examples: example = current_examples[0] # Return example text, labels, threshold, nested_ner, merge status, example names for dropdown example_names = [f"Example {i+1}" for i in range(len(current_examples))] return example[0], example[1], example[2], example[3], False, gr.Dropdown.update(choices=example_names, value="Example 1") else: return "", "", 0.3, False, False, gr.Dropdown.update(choices=[], value=None) with gr.Blocks(title="Unified NER Interface") as demo: gr.Markdown( """ # Unified Zero-shot Named Entity Recognition Interface This interface allows you to compare different zero-shot Named Entity Recognition models. ## Models Available: - **GLiNER Medium v2.1**: The original GLiNER medium model - **NuNER Zero**: A specialized token-based NER model - **GLiNER Multi PII**: Fine-tuned for detecting personally identifiable information across multiple languages ## Features: - Select different models - Switch between example sets for different use cases - Toggle nested entity recognition - Toggle entity merging (combining adjacent entities of the same type) - Select from various examples within each set """ ) with gr.Row(): model_dropdown = gr.Dropdown( choices=list(MODELS.keys()), value=list(MODELS.keys())[0], label="Model", info="Select the NER model to use" ) example_set_dropdown = gr.Dropdown( choices=list(EXAMPLE_SETS.keys()), value="General NER", label="Example Set", info="Select a set of example texts" ) with gr.Row(): example_dropdown = gr.Dropdown( choices=[f"Example {i+1}" for i in range(len(current_examples))], value="Example 1", label="Example", info="Select a specific example text" ) input_text = gr.Textbox( value=current_examples[0][0] if current_examples else "", label="Text input", placeholder="Enter your text here", lines=5 ) with gr.Row(): labels = gr.Textbox( value=current_examples[0][1] if current_examples else "", label="Entity Labels", placeholder="Enter your labels here (comma separated)", scale=2, ) threshold = gr.Slider( 0, 1, value=current_examples[0][2] if current_examples else 0.3, step=0.01, label="Confidence Threshold", info="Lower the threshold to increase how many entities get predicted.", scale=1, ) with gr.Row(): nested_ner = gr.Checkbox( value=current_examples[0][3] if current_examples else False, label="Nested NER", info="Allow entities to be contained within other entities", ) merge_entities_toggle = gr.Checkbox( value=False, label="Merge Adjacent Entities", info="Combine adjacent entities of the same type into a single entity", ) output = gr.HighlightedText(label="Predicted Entities") submit_btn = gr.Button("Submit") # Handling example set selection example_set_dropdown.change( fn=switch_example_set, inputs=[example_set_dropdown], outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle, example_dropdown] ) # Handling example selection within a set example_dropdown.change( fn=lambda idx: load_example(int(idx.split()[1]) - 1), inputs=[example_dropdown], outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle] ) # Add a model recommendation for the example set def recommend_model(example_set_name): """Recommend appropriate model based on example set""" if example_set_name == "PII Detection": return gr.Dropdown.update(value="GLiNER Multi PII") elif example_set_name == "NuNER Zero": return gr.Dropdown.update(value="NuNER Zero") else: return gr.Dropdown.update(value="GLiNER Medium v2.1") # Auto-suggest model when changing example set example_set_dropdown.change( fn=recommend_model, inputs=[example_set_dropdown], outputs=[model_dropdown] ) # Submitting submit_btn.click( fn=ner, inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], outputs=output ) input_text.submit( fn=ner, inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], outputs=output ) # Other interactions model_dropdown.change( fn=ner, inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], outputs=output ) threshold.release( fn=ner, inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], outputs=output ) nested_ner.change( fn=ner, inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], outputs=output ) merge_entities_toggle.change( fn=ner, inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], outputs=output ) if __name__ == "__main__": demo.queue() demo.launch(debug=True)