Spaces:

dwb2023
/

gliner_testbed

Running

File size: 9,238 Bytes

ab78124

import json
from typing import Dict, Union, List
from gliner import GLiNER
import gradio as gr
import os

# Load available models
MODELS = {
    "GLiNER Medium v2.1": "urchade/gliner_medium-v2.1",
    "NuNER Zero": "numind/NuZero_token",
    "GLiNER Multi PII": "urchade/gliner_multi_pii-v1"
}

# Example datasets with descriptions
EXAMPLE_SETS = {
    "General NER": "examples.json",
    "NuNER Zero": "examples-nuner.json",
    "PII Detection": "examples-pii.json"
}

# Initialize models (will be loaded on demand)
loaded_models = {}

# Current examples
current_examples = []

def load_example_set(example_set_name):
    """Load a set of examples from the specified file"""
    try:
        file_path = EXAMPLE_SETS[example_set_name]
        with open(file_path, "r", encoding="utf-8") as f:
            examples = json.load(f)
        return examples
    except (KeyError, FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error loading example set {example_set_name}: {e}")
        return []

# Load default example set
current_examples = load_example_set("General NER")

def get_model(model_name):
    """Load model if not already loaded"""
    if model_name not in loaded_models:
        model_path = MODELS[model_name]
        loaded_models[model_name] = GLiNER.from_pretrained(model_path)
    return loaded_models[model_name]

def merge_entities(entities):
    """Merge adjacent entities of the same type"""
    if not entities:
        return []
    merged = []
    current = entities[0]
    for next_entity in entities[1:]:
        if (next_entity['entity'] == current['entity'] and 
            (next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end'])):
            current['word'] += ' ' + next_entity['word']
            current['end'] = next_entity['end']
        else:
            merged.append(current)
            current = next_entity
    merged.append(current)
    return merged

def ner(
    text: str, 
    labels: str, 
    model_name: str,
    threshold: float, 
    nested_ner: bool,
    merge_entities_toggle: bool
) -> Dict[str, Union[str, List]]:
    """Run named entity recognition with selected model and parameters"""
    
    # Get the selected model
    model = get_model(model_name)
    
    # Split labels
    label_list = [label.strip() for label in labels.split(",")]
    
    # Predict entities
    entities = [
        {
            "entity": entity["label"],
            "word": entity["text"],
            "start": entity["start"],
            "end": entity["end"],
            "score": entity.get("score", 0),
        }
        for entity in model.predict_entities(
            text, label_list, flat_ner=not nested_ner, threshold=threshold
        )
    ]
    
    # Merge entities if enabled
    if merge_entities_toggle:
        entities = merge_entities(entities)
    
    # Return results
    return {
        "text": text,
        "entities": entities,
    }

def load_example(example_idx):
    """Load a specific example by index from the current example set"""
    if not current_examples or example_idx >= len(current_examples):
        return "", "", 0.3, False, False
    
    example = current_examples[example_idx]
    return example[0], example[1], example[2], example[3], False

def switch_example_set(example_set_name):
    """Switch to a different example set and update the interface"""
    global current_examples
    current_examples = load_example_set(example_set_name)
    
    # Return the first example from the new set
    if current_examples:
        example = current_examples[0]
        # Return example text, labels, threshold, nested_ner, merge status, example names for dropdown
        example_names = [f"Example {i+1}" for i in range(len(current_examples))]
        return example[0], example[1], example[2], example[3], False, gr.Dropdown.update(choices=example_names, value="Example 1")
    else:
        return "", "", 0.3, False, False, gr.Dropdown.update(choices=[], value=None)

with gr.Blocks(title="Unified NER Interface") as demo:
    gr.Markdown(
        """
        # Unified Zero-shot Named Entity Recognition Interface
        
        This interface allows you to compare different zero-shot Named Entity Recognition models.
        
        ## Models Available:
        - **GLiNER Medium v2.1**: The original GLiNER medium model
        - **NuNER Zero**: A specialized token-based NER model
        - **GLiNER Multi PII**: Fine-tuned for detecting personally identifiable information across multiple languages
        
        ## Features:
        - Select different models
        - Switch between example sets for different use cases
        - Toggle nested entity recognition
        - Toggle entity merging (combining adjacent entities of the same type)
        - Select from various examples within each set
        """
    )
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=list(MODELS.keys()),
            value=list(MODELS.keys())[0],
            label="Model",
            info="Select the NER model to use"
        )
        example_set_dropdown = gr.Dropdown(
            choices=list(EXAMPLE_SETS.keys()),
            value="General NER",
            label="Example Set",
            info="Select a set of example texts"
        )
        
    with gr.Row():
        example_dropdown = gr.Dropdown(
            choices=[f"Example {i+1}" for i in range(len(current_examples))],
            value="Example 1",
            label="Example",
            info="Select a specific example text"
        )
    
    input_text = gr.Textbox(
        value=current_examples[0][0] if current_examples else "", 
        label="Text input", 
        placeholder="Enter your text here",
        lines=5
    )
    
    with gr.Row():
        labels = gr.Textbox(
            value=current_examples[0][1] if current_examples else "",
            label="Entity Labels",
            placeholder="Enter your labels here (comma separated)",
            scale=2,
        )
        threshold = gr.Slider(
            0,
            1,
            value=current_examples[0][2] if current_examples else 0.3,
            step=0.01,
            label="Confidence Threshold",
            info="Lower the threshold to increase how many entities get predicted.",
            scale=1,
        )
    
    with gr.Row():
        nested_ner = gr.Checkbox(
            value=current_examples[0][3] if current_examples else False,
            label="Nested NER",
            info="Allow entities to be contained within other entities",
        )
        merge_entities_toggle = gr.Checkbox(
            value=False,
            label="Merge Adjacent Entities",
            info="Combine adjacent entities of the same type into a single entity",
        )
    
    output = gr.HighlightedText(label="Predicted Entities")
    submit_btn = gr.Button("Submit")
    
    # Handling example set selection
    example_set_dropdown.change(
        fn=switch_example_set,
        inputs=[example_set_dropdown],
        outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle, example_dropdown]
    )
    
    # Handling example selection within a set
    example_dropdown.change(
        fn=lambda idx: load_example(int(idx.split()[1]) - 1),
        inputs=[example_dropdown],
        outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle]
    )
    
    # Add a model recommendation for the example set
    def recommend_model(example_set_name):
        """Recommend appropriate model based on example set"""
        if example_set_name == "PII Detection":
            return gr.Dropdown.update(value="GLiNER Multi PII")
        elif example_set_name == "NuNER Zero":
            return gr.Dropdown.update(value="NuNER Zero")
        else:
            return gr.Dropdown.update(value="GLiNER Medium v2.1")
    
    # Auto-suggest model when changing example set
    example_set_dropdown.change(
        fn=recommend_model,
        inputs=[example_set_dropdown],
        outputs=[model_dropdown]
    )
    
    # Submitting
    submit_btn.click(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    input_text.submit(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    
    # Other interactions
    model_dropdown.change(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    
    threshold.release(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    
    nested_ner.change(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )
    
    merge_entities_toggle.change(
        fn=ner, 
        inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle], 
        outputs=output
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True)