File size: 2,736 Bytes
9d8ab14
 
 
 
 
 
 
 
 
 
 
 
 
 
2bc3666
9d8ab14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bc3666
9d8ab14
 
 
 
 
 
 
2bc3666
 
 
 
 
 
 
 
 
 
2c1ec44
9d8ab14
 
2bc3666
9d8ab14
 
 
2bc3666
 
9d8ab14
 
2c1ec44
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from transformers import pipeline
from collections import defaultdict

ner_pipeline = pipeline("ner", model="syubraj/spanish_bert_based_ner")

ENTITY_COLORS = {
    "B-PER": "#FF5733",  
    "I-PER": "#FF8D1A",  
    "B-LOC": "#33FF57",  
    "I-LOC": "#2EB82E",  
    "B-ORG": "#3380FF",  
    "I-ORG": "#1A66FF",  
    "O": "#E0E0E0"       
    }

def highlight_entities(text):
    """
    This function takes a Spanish text input and applies Named Entity Recognition (NER) using a pre-trained model.
    The recognized entities are highlighted with distinct colors based on their categories.
    A legend is also generated to indicate the meaning of each color.
    
    Parameters:
    text (str): The input Spanish text.
    
    Returns:
    str: The HTML formatted output with highlighted entities and a legend.
    """
    entities = ner_pipeline(text)
    formatted_text = ""
    last_idx = 0
    
    for ent in entities:
        word = ent['word']
        start, end = ent['start'], ent['end']
        entity_label = ent['entity']
        color = ENTITY_COLORS.get(entity_label, "#E0E0E0")
        
        formatted_text += text[last_idx:start] + f'<span style="background-color:{color}; padding:2px; border-radius:5px;">{word}</span>'
        last_idx = end
    
    formatted_text += text[last_idx:]
    
    # Generate legend
    legend_html = "<div><b>Legend:</b><br>"
    for label, color in ENTITY_COLORS.items():
        legend_html += f'<span style="background-color:{color}; padding:2px 5px; border-radius:5px; margin-right:5px;">{label}</span>'
    legend_html += "</div><br>"
    
    return legend_html + formatted_text

example_sentences = [
    "Elon Musk vive en Estados Unidos y es due帽o de SpaceX, Tesla y Starlink.",
    "Lionel Messi juega para el Inter Miami y ha ganado m煤ltiples Bal贸n de Oro.",
    "Amazon es una de las empresas tecnol贸gicas m谩s grandes con sede en Seattle, EE.UU.",
    "Madrid es la capital de Espa帽a y alberga el famoso museo del Prado.",
    "Shakira naci贸 en Colombia y es una de las artistas m谩s reconocidas a nivel mundial."
]

def example_selector(example):
    return highlight_entities(example)

grn = gr.Interface(
    fn=highlight_entities,
    inputs=gr.Textbox(label="Enter Spanish Text"),
    outputs=gr.HTML(label="NER Highlighted Text"),
    title="Spanish Named Entity Recognition",
    description="This interactive demo performs Named Entity Recognition (NER) on Spanish text. Recognized entities such as persons, locations, and organizations are highlighted in distinct colors for better readability. A legend is provided to help interpret the color coding.",
    examples=example_sentences,
    allow_flagging="never"
)

grn.launch()