Spaces:

dwb2023
/

gliner_testbed

Running

App Files Files Community

gliner_testbed / app.py

dwb2023

Create app.py

ab78124 verified 2 months ago

raw

history blame

9.24 kB

	import json
	from typing import Dict, Union, List
	from gliner import GLiNER
	import gradio as gr
	import os

	# Load available models
	MODELS = {
	"GLiNER Medium v2.1": "urchade/gliner_medium-v2.1",
	"NuNER Zero": "numind/NuZero_token",
	"GLiNER Multi PII": "urchade/gliner_multi_pii-v1"
	}

	# Example datasets with descriptions
	EXAMPLE_SETS = {
	"General NER": "examples.json",
	"NuNER Zero": "examples-nuner.json",
	"PII Detection": "examples-pii.json"
	}

	# Initialize models (will be loaded on demand)
	loaded_models = {}

	# Current examples
	current_examples = []

	def load_example_set(example_set_name):
	"""Load a set of examples from the specified file"""
	try:
	file_path = EXAMPLE_SETS[example_set_name]
	with open(file_path, "r", encoding="utf-8") as f:
	examples = json.load(f)
	return examples
	except (KeyError, FileNotFoundError, json.JSONDecodeError) as e:
	print(f"Error loading example set {example_set_name}: {e}")
	return []

	# Load default example set
	current_examples = load_example_set("General NER")

	def get_model(model_name):
	"""Load model if not already loaded"""
	if model_name not in loaded_models:
	model_path = MODELS[model_name]
	loaded_models[model_name] = GLiNER.from_pretrained(model_path)
	return loaded_models[model_name]

	def merge_entities(entities):
	"""Merge adjacent entities of the same type"""
	if not entities:
	return []
	merged = []
	current = entities[0]
	for next_entity in entities[1:]:
	if (next_entity['entity'] == current['entity'] and
	(next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end'])):
	current['word'] += ' ' + next_entity['word']
	current['end'] = next_entity['end']
	else:
	merged.append(current)
	current = next_entity
	merged.append(current)
	return merged

	def ner(
	text: str,
	labels: str,
	model_name: str,
	threshold: float,
	nested_ner: bool,
	merge_entities_toggle: bool
	) -> Dict[str, Union[str, List]]:
	"""Run named entity recognition with selected model and parameters"""

	# Get the selected model
	model = get_model(model_name)

	# Split labels
	label_list = [label.strip() for label in labels.split(",")]

	# Predict entities
	entities = [
	{
	"entity": entity["label"],
	"word": entity["text"],
	"start": entity["start"],
	"end": entity["end"],
	"score": entity.get("score", 0),
	}
	for entity in model.predict_entities(
	text, label_list, flat_ner=not nested_ner, threshold=threshold
	)
	]

	# Merge entities if enabled
	if merge_entities_toggle:
	entities = merge_entities(entities)

	# Return results
	return {
	"text": text,
	"entities": entities,
	}

	def load_example(example_idx):
	"""Load a specific example by index from the current example set"""
	if not current_examples or example_idx >= len(current_examples):
	return "", "", 0.3, False, False

	example = current_examples[example_idx]
	return example[0], example[1], example[2], example[3], False

	def switch_example_set(example_set_name):
	"""Switch to a different example set and update the interface"""
	global current_examples
	current_examples = load_example_set(example_set_name)

	# Return the first example from the new set
	if current_examples:
	example = current_examples[0]
	# Return example text, labels, threshold, nested_ner, merge status, example names for dropdown
	example_names = [f"Example {i+1}" for i in range(len(current_examples))]
	return example[0], example[1], example[2], example[3], False, gr.Dropdown.update(choices=example_names, value="Example 1")
	else:
	return "", "", 0.3, False, False, gr.Dropdown.update(choices=[], value=None)

	with gr.Blocks(title="Unified NER Interface") as demo:
	gr.Markdown(
	"""
	# Unified Zero-shot Named Entity Recognition Interface

	This interface allows you to compare different zero-shot Named Entity Recognition models.

	## Models Available:
	- GLiNER Medium v2.1: The original GLiNER medium model
	- NuNER Zero: A specialized token-based NER model
	- GLiNER Multi PII: Fine-tuned for detecting personally identifiable information across multiple languages

	## Features:
	- Select different models
	- Switch between example sets for different use cases
	- Toggle nested entity recognition
	- Toggle entity merging (combining adjacent entities of the same type)
	- Select from various examples within each set
	"""
	)

	with gr.Row():
	model_dropdown = gr.Dropdown(
	choices=list(MODELS.keys()),
	value=list(MODELS.keys())[0],
	label="Model",
	info="Select the NER model to use"
	)
	example_set_dropdown = gr.Dropdown(
	choices=list(EXAMPLE_SETS.keys()),
	value="General NER",
	label="Example Set",
	info="Select a set of example texts"
	)

	with gr.Row():
	example_dropdown = gr.Dropdown(
	choices=[f"Example {i+1}" for i in range(len(current_examples))],
	value="Example 1",
	label="Example",
	info="Select a specific example text"
	)

	input_text = gr.Textbox(
	value=current_examples[0][0] if current_examples else "",
	label="Text input",
	placeholder="Enter your text here",
	lines=5
	)

	with gr.Row():
	labels = gr.Textbox(
	value=current_examples[0][1] if current_examples else "",
	label="Entity Labels",
	placeholder="Enter your labels here (comma separated)",
	scale=2,
	)
	threshold = gr.Slider(
	0,
	1,
	value=current_examples[0][2] if current_examples else 0.3,
	step=0.01,
	label="Confidence Threshold",
	info="Lower the threshold to increase how many entities get predicted.",
	scale=1,
	)

	with gr.Row():
	nested_ner = gr.Checkbox(
	value=current_examples[0][3] if current_examples else False,
	label="Nested NER",
	info="Allow entities to be contained within other entities",
	)
	merge_entities_toggle = gr.Checkbox(
	value=False,
	label="Merge Adjacent Entities",
	info="Combine adjacent entities of the same type into a single entity",
	)

	output = gr.HighlightedText(label="Predicted Entities")
	submit_btn = gr.Button("Submit")

	# Handling example set selection
	example_set_dropdown.change(
	fn=switch_example_set,
	inputs=[example_set_dropdown],
	outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle, example_dropdown]
	)

	# Handling example selection within a set
	example_dropdown.change(
	fn=lambda idx: load_example(int(idx.split()[1]) - 1),
	inputs=[example_dropdown],
	outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle]
	)

	# Add a model recommendation for the example set
	def recommend_model(example_set_name):
	"""Recommend appropriate model based on example set"""
	if example_set_name == "PII Detection":
	return gr.Dropdown.update(value="GLiNER Multi PII")
	elif example_set_name == "NuNER Zero":
	return gr.Dropdown.update(value="NuNER Zero")
	else:
	return gr.Dropdown.update(value="GLiNER Medium v2.1")

	# Auto-suggest model when changing example set
	example_set_dropdown.change(
	fn=recommend_model,
	inputs=[example_set_dropdown],
	outputs=[model_dropdown]
	)

	# Submitting
	submit_btn.click(
	fn=ner,
	inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
	outputs=output
	)
	input_text.submit(
	fn=ner,
	inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
	outputs=output
	)

	# Other interactions
	model_dropdown.change(
	fn=ner,
	inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
	outputs=output
	)

	threshold.release(
	fn=ner,
	inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
	outputs=output
	)

	nested_ner.change(
	fn=ner,
	inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
	outputs=output
	)

	merge_entities_toggle.change(
	fn=ner,
	inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
	outputs=output
	)

	if __name__ == "__main__":
	demo.queue()
	demo.launch(debug=True)