Spaces:
Running
Running
File size: 1,417 Bytes
0d3453c 79446df 43b2e08 0d3453c 43b2e08 79446df 43b2e08 184f4f7 43b2e08 184f4f7 cdabd86 43b2e08 79446df 43b2e08 184f4f7 43b2e08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
from transformers import pipeline
import re
pii_detector = pipeline(
"token-classification",
model="iiiorg/piiranha-v1-detect-personal-information",
aggregation_strategy="simple"
)
def highlight_pii(text):
entities = pii_detector(text)
highlighted_text = text
offset = 0
for entity in sorted(entities, key=lambda x: x['start']):
start = entity['start'] + offset
end = entity['end'] + offset
label = entity['entity_group']
span = f'<span style="background-color:#ffcccc; padding:2px 4px; border-radius:4px;">{text[entity["start"]:entity["end"]]} <b style="color:red;">[{label}]</b></span>'
highlighted_text = highlighted_text[:start] + span + highlighted_text[end:]
offset += len(span) - (end - start)
return highlighted_text
def create_personal_info_tab():
with gr.Column():
gr.Markdown("### Personal Information Identifier")
input_text = gr.Textbox(label="Enter text", lines=5, placeholder="Type your text here...")
output_html = gr.HTML()
submit_button = gr.Button("Submit")
submit_button.click(fn=highlight_pii, inputs=input_text, outputs=output_html)
gr.Examples(
examples=[
"Hugging Face is a company based in Paris and New York City that acquired Gradio in 2021."
],
inputs=input_text
) |