File size: 1,417 Bytes
0d3453c
79446df
43b2e08
0d3453c
43b2e08
 
 
 
 
79446df
43b2e08
 
 
184f4f7
43b2e08
 
 
 
 
 
184f4f7
cdabd86
43b2e08
 
 
 
79446df
 
 
43b2e08
 
 
 
 
 
 
 
184f4f7
43b2e08
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from transformers import pipeline
import re

pii_detector = pipeline(
    "token-classification",
    model="iiiorg/piiranha-v1-detect-personal-information",
    aggregation_strategy="simple"
)

def highlight_pii(text):
    entities = pii_detector(text)
    highlighted_text = text
    offset = 0  

    for entity in sorted(entities, key=lambda x: x['start']):
        start = entity['start'] + offset
        end = entity['end'] + offset
        label = entity['entity_group']


        span = f'<span style="background-color:#ffcccc; padding:2px 4px; border-radius:4px;">{text[entity["start"]:entity["end"]]} <b style="color:red;">[{label}]</b></span>'
        highlighted_text = highlighted_text[:start] + span + highlighted_text[end:]
        offset += len(span) - (end - start)

    return highlighted_text

def create_personal_info_tab():
    with gr.Column():
        gr.Markdown("### Personal Information Identifier")
        input_text = gr.Textbox(label="Enter text", lines=5, placeholder="Type your text here...")
        output_html = gr.HTML()

        submit_button = gr.Button("Submit")

        submit_button.click(fn=highlight_pii, inputs=input_text, outputs=output_html)


        gr.Examples(
            examples=[
                "Hugging Face is a company based in Paris and New York City that acquired Gradio in 2021."
            ],
            inputs=input_text
        )