import gradio as gr import re from mailparser import parse_from_string from bs4 import BeautifulSoup import spacy nlp = spacy.load("en_core_web_sm") def accept_mail(email_content): email = parse_from_string(email_content) return email def clean_email(email): soup = BeautifulSoup(email.body, 'html.parser') for tag in soup.find_all(['style', 'link']): tag.decompose() cleaned_text = ' '.join(soup.get_text(separator=' ').split()) return cleaned_text def remove_special_characters(text): pattern = r'[=_-]+' cleaned_text = re.sub(pattern, '', text) return cleaned_text def extract_entities(text, labels): doc = nlp(text) entities = [] for ent in doc.ents: if ent.label_ in labels: entities.append((ent.text, ent.label_)) return entities def present(email_content, labels): email = accept_mail(email_content) cleaned_text = clean_email(email) further_cleaned_text = remove_special_characters(cleaned_text) entities = extract_entities(further_cleaned_text, labels) entity_info = '\n'.join([f"{text}: {label}" for text, label in entities]) email_info = { "Subject": email.subject, "From": email.from_, "To": email.to, "Date": email.date, "Cleaned Body": further_cleaned_text, "Extracted Entities": entity_info } return [email_info[key] for key in email_info] labels = ["PERSON", "PRODUCT", "DEAL", "ORDER", "ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY", "MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"] demo = gr.Interface( fn=present, inputs=[ gr.components.Textbox(label="Email Content"), gr.components.CheckboxGroup(label="Labels to Detect", choices=labels, default=labels) ], outputs=[ gr.components.Textbox(label="Subject"), gr.components.Textbox(label="From"), gr.components.Textbox(label="To"), gr.components.Textbox(label="Date"), gr.components.Textbox(label="Cleaned Body"), gr.components.Textbox(label="Extracted Entities") ], title="Email Info", description="Enter the email content below to view its details and detected entities." ) demo.launch()