Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import re | |
pii_detector = pipeline( | |
"token-classification", | |
model="iiiorg/piiranha-v1-detect-personal-information", | |
aggregation_strategy="simple" | |
) | |
def highlight_pii(text): | |
entities = pii_detector(text) | |
highlighted_text = text | |
offset = 0 | |
for entity in sorted(entities, key=lambda x: x['start']): | |
start = entity['start'] + offset | |
end = entity['end'] + offset | |
label = entity['entity_group'] | |
span = f'<span style="background-color:#ffcccc; padding:2px 4px; border-radius:4px;">{text[entity["start"]:entity["end"]]} <b style="color:red;">[{label}]</b></span>' | |
highlighted_text = highlighted_text[:start] + span + highlighted_text[end:] | |
offset += len(span) - (end - start) | |
return highlighted_text | |
def create_personal_info_tab(): | |
with gr.Column(): | |
gr.Markdown("### Personal Information Identifier") | |
input_text = gr.Textbox(label="Enter text", lines=5, placeholder="Type your text here...") | |
output_html = gr.HTML() | |
submit_button = gr.Button("Submit") | |
submit_button.click(fn=highlight_pii, inputs=input_text, outputs=output_html) | |
gr.Examples( | |
examples=[ | |
"Hugging Face is a company based in Paris and New York City that acquired Gradio in 2021." | |
], | |
inputs=input_text | |
) |