import gradio as gr
from transformers import pipeline
from collections import defaultdict
ner_pipeline = pipeline("ner", model="syubraj/spanish_bert_based_ner")
ENTITY_COLORS = {
"B-PER": "#FF5733",
"I-PER": "#FF8D1A",
"B-LOC": "#33FF57",
"I-LOC": "#2EB82E",
"B-ORG": "#3380FF",
"I-ORG": "#1A66FF",
"O": "#E0E0E0"
}
def highlight_entities(text):
"""
This function takes a Spanish text input and applies Named Entity Recognition (NER) using a pre-trained model.
The recognized entities are highlighted with distinct colors based on their categories.
A legend is also generated to indicate the meaning of each color.
Parameters:
text (str): The input Spanish text.
Returns:
str: The HTML formatted output with highlighted entities and a legend.
"""
entities = ner_pipeline(text)
formatted_text = ""
last_idx = 0
for ent in entities:
word = ent['word']
start, end = ent['start'], ent['end']
entity_label = ent['entity']
color = ENTITY_COLORS.get(entity_label, "#E0E0E0")
formatted_text += text[last_idx:start] + f'{word}'
last_idx = end
formatted_text += text[last_idx:]
# Generate legend
legend_html = "
Legend:
"
for label, color in ENTITY_COLORS.items():
legend_html += f'{label}'
legend_html += "
"
return legend_html + formatted_text
example_sentences = [
"Elon Musk vive en Estados Unidos y es dueño de SpaceX, Tesla y Starlink.",
"Lionel Messi juega para el Inter Miami y ha ganado múltiples Balón de Oro.",
"Amazon es una de las empresas tecnológicas más grandes con sede en Seattle, EE.UU.",
"Madrid es la capital de España y alberga el famoso museo del Prado.",
"Shakira nació en Colombia y es una de las artistas más reconocidas a nivel mundial."
]
def example_selector(example):
return highlight_entities(example)
grn = gr.Interface(
fn=highlight_entities,
inputs=gr.Textbox(label="Enter Spanish Text"),
outputs=gr.HTML(label="NER Highlighted Text"),
title="Spanish Named Entity Recognition",
description="This interactive demo performs Named Entity Recognition (NER) on Spanish text. Recognized entities such as persons, locations, and organizations are highlighted in distinct colors for better readability. A legend is provided to help interpret the color coding.",
examples=example_sentences,
allow_flagging="never"
)
grn.launch()