import gradio as gr import torch from transformers import AutoTokenizer, pipeline from typing import Dict # Custom models for zero-shot classification requiring trust_remote_code=True CUSTOM_MODELS = [ "mjwong/gte-multilingual-base-xnli-anli" ] # Available models for zero-shot classification AVAILABLE_MODELS = [ "mjwong/multilingual-e5-large-instruct-xnli-anli", "mjwong/multilingual-e5-base-xnli-anli", "mjwong/multilingual-e5-large-xnli-anli", "mjwong/drama-base-xnli-anli", "mjwong/drama-large-xnli-anli", "mjwong/mcontriever-msmarco-xnli", "mjwong/mcontriever-xnli" ] + CUSTOM_MODELS def classify_text( model_name: str, text: str, labels: str, multi_label: bool = False, ) -> Dict[str, float]: """ Classifies the input text into one of the provided labels using a zero-shot classification model. Args: model_name: The name of the Hugging Face model to use. text: The input text to classify. labels: A comma-separated string of candidate labels. Returns: Dict[str, float]: A dictionary mapping each label to its classification score. """ if not text.strip(): return "Error: Please enter some text to classify." if not labels.strip(): return "Error: Please enter some labels to classify the text." try: # Set device: 0 if GPU available, else -1 for CPU device = 0 if torch.cuda.is_available() else -1 if model_name in CUSTOM_MODELS: tokenizer = AutoTokenizer.from_pretrained(model_name) classifier = pipeline("zero-shot-classification", model=model_name, device=device, tokenizer=tokenizer, trust_remote_code=True) else: classifier = pipeline("zero-shot-classification", model=model_name, device=device) labels_list = [label.strip() for label in labels.split(",")] result = classifier(text, candidate_labels=labels_list, multi_label=multi_label) return {label: score for label, score in zip(result["labels"], result["scores"])} except Exception as _: return "Error: An unexpected error occurred. Please try again later." # Example Input with Mutually Exclusive Labels from News Articles examples = [ [ "The government announced a new economic policy today aimed at reducing inflation and stabilizing the currency market.", "economy, politics, finance, policy, inflation, government, currency" ], [ "中国的科技公司在人工智能领域取得了重大突破,这可能会影响全球市场。", "科技, 经济, 创新, 市场, 人工智能, 全球" ], [ "นักวิจัยค้นพบวิธีใหม่ในการรักษาโรคมะเร็ง ซึ่งอาจช่วยชีวิตผู้ป่วยหลายล้านคนทั่วโลก", "การแพทย์, วิทยาศาสตร์, นวัตกรรม, สุขภาพ, โรคมะเร็ง, การรักษา" ], [ "La conférence des Nations Unies sur le climat a abouti à un nouvel accord pour réduire les émissions de carbone d'ici 2030.", "environnement, climat, politique, énergie, carbone, écologie, ONU" ], [ "सरकार ने आज एक नई आर्थिक नीति की घोषणा की, जिसका उद्देश्य मुद्रास्फीति को कम करना और मुद्रा बाजार को स्थिर करना है।", "अर्थव्यवस्था, राजनीति, वित्त, नीति, मुद्रास्फीति, सरकार, मुद्रा" ] ] # Define the Gradio interface css = """ footer {display:none !important} .output-markdown{display:none !important} .gr-button-primary { z-index: 14; height: 43px; width: 130px; left: 0px; top: 0px; padding: 0px; cursor: pointer !important; background: none rgb(17, 20, 45) !important; border: none !important; text-align: center !important; font-family: Poppins !important; font-size: 14px !important; font-weight: 500 !important; color: rgb(255, 255, 255) !important; line-height: 1 !important; border-radius: 12px !important; transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important; box-shadow: none !important; } .classify-button { background: linear-gradient(90deg, yellow, orange) !important; } """ # Initialize Gradio interface with gr.Blocks(css=css) as iface: gr.Markdown("# Zero-Shot Text Classifier") gr.Markdown("Select a model, enter text, and a set of labels to classify the text using a zero-shot classification model.") gr.Markdown("More than 10 languages are officially supported, including: English, Arabic, Bulgarian, German, Greek, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnam and Chinese.") with gr.Row(): # Dropdown to select a model model_dropdown = gr.Dropdown(AVAILABLE_MODELS, label="Choose Model") # Checkbox for multi-label classification multi_label = gr.Checkbox(label="True", value=False, info="Check for multi-label classification, uncheck for single-label (multi-class).") # Input fields for text and labels with gr.Row(): text_input = gr.Textbox(label="Enter Text", placeholder="Type or paste text here...") label_input = gr.Textbox(label="Enter Labels (comma-separated)", placeholder="e.g., sports, politics, technology") # Output display output_label = gr.Label(label="Classification Scores") # Classification button submit_button = gr.Button("Classify", elem_classes=["classify-button"]) submit_button.click(fn=classify_text, inputs=[model_dropdown, text_input, label_input, multi_label], outputs=output_label) # Example input/output pairs gr.Examples(examples, inputs=[text_input, label_input]) # Launch the app if __name__ == "__main__": iface.launch()