import gradio as gr from transformers import pipeline import re pii_detector = pipeline( "token-classification", model="iiiorg/piiranha-v1-detect-personal-information", aggregation_strategy="simple" ) def highlight_pii(text): entities = pii_detector(text) highlighted_text = text offset = 0 for entity in sorted(entities, key=lambda x: x['start']): start = entity['start'] + offset end = entity['end'] + offset label = entity['entity_group'] span = f'{text[entity["start"]:entity["end"]]} [{label}]' highlighted_text = highlighted_text[:start] + span + highlighted_text[end:] offset += len(span) - (end - start) return highlighted_text def create_personal_info_tab(): with gr.Column(): gr.Markdown("### Personal Information Identifier") input_text = gr.Textbox(label="Enter text", lines=5, placeholder="Type your text here...") output_html = gr.HTML() submit_button = gr.Button("Submit") submit_button.click(fn=highlight_pii, inputs=input_text, outputs=output_html) gr.Examples( examples=[ "Hugging Face is a company based in Paris and New York City that acquired Gradio in 2021." ], inputs=input_text )