Spaces:

mjwong
/

Zero-Shot-Text-Classification

Sleeping

File size: 6,183 Bytes

import gradio as gr
import torch
from transformers import AutoTokenizer, pipeline
from typing import Dict

# Custom models for zero-shot classification requiring trust_remote_code=True
CUSTOM_MODELS = [
    "mjwong/gte-multilingual-base-xnli-anli"
]

# Available models for zero-shot classification
AVAILABLE_MODELS = [
    "mjwong/multilingual-e5-large-instruct-xnli-anli",
    "mjwong/multilingual-e5-base-xnli-anli",
    "mjwong/multilingual-e5-large-xnli-anli",
    "mjwong/drama-base-xnli-anli",
    "mjwong/drama-large-xnli-anli",
    "mjwong/mcontriever-msmarco-xnli",
    "mjwong/mcontriever-xnli"
] + CUSTOM_MODELS

def classify_text(
        model_name: str, 
        text: str, 
        labels: str,
        multi_label: bool = False,
    ) -> Dict[str, float]:
    """
    Classifies the input text into one of the provided labels using a zero-shot classification model.
    
    Args:
        model_name: The name of the Hugging Face model to use.
        text: The input text to classify.
        labels: A comma-separated string of candidate labels.
    
    Returns:
        Dict[str, float]: A dictionary mapping each label to its classification score.
    """
    if not text.strip():
        return "Error: Please enter some text to classify."
    if not labels.strip():
        return "Error: Please enter some labels to classify the text."
    
    try:
        # Set device: 0 if GPU available, else -1 for CPU
        device = 0 if torch.cuda.is_available() else -1

        if model_name in CUSTOM_MODELS:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            classifier = pipeline("zero-shot-classification", model=model_name, device=device, tokenizer=tokenizer, trust_remote_code=True)
        else:
            classifier = pipeline("zero-shot-classification", model=model_name, device=device)

        labels_list = [label.strip() for label in labels.split(",")]
        result = classifier(text, candidate_labels=labels_list, multi_label=multi_label)
        return {label: score for label, score in zip(result["labels"], result["scores"])}
    except Exception as _:
        return "Error: An unexpected error occurred. Please try again later."

# Example Input with Mutually Exclusive Labels from News Articles
examples = [
    [
        "The government announced a new economic policy today aimed at reducing inflation and stabilizing the currency market.", 
        "economy, politics, finance, policy, inflation, government, currency"
    ],
    [
        "中国的科技公司在人工智能领域取得了重大突破，这可能会影响全球市场。", 
        "科技, 经济, 创新, 市场, 人工智能, 全球"
    ],
    [
        "นักวิจัยค้นพบวิธีใหม่ในการรักษาโรคมะเร็ง ซึ่งอาจช่วยชีวิตผู้ป่วยหลายล้านคนทั่วโลก", 
        "การแพทย์, วิทยาศาสตร์, นวัตกรรม, สุขภาพ, โรคมะเร็ง, การรักษา"
    ],
    [
        "La conférence des Nations Unies sur le climat a abouti à un nouvel accord pour réduire les émissions de carbone d'ici 2030.", 
        "environnement, climat, politique, énergie, carbone, écologie, ONU"
    ],
    [
        "सरकार ने आज एक नई आर्थिक नीति की घोषणा की, जिसका उद्देश्य मुद्रास्फीति को कम करना और मुद्रा बाजार को स्थिर करना है।", 
        "अर्थव्यवस्था, राजनीति, वित्त, नीति, मुद्रास्फीति, सरकार, मुद्रा"
    ]
]

# Define the Gradio interface
css = """
footer {display:none !important}
.output-markdown{display:none !important}
.gr-button-primary {
    z-index: 14;
    height: 43px;
    width: 130px;
    left: 0px;
    top: 0px;
    padding: 0px;
    cursor: pointer !important; 
    background: none rgb(17, 20, 45) !important;
    border: none !important;
    text-align: center !important;
    font-family: Poppins !important;
    font-size: 14px !important;
    font-weight: 500 !important;
    color: rgb(255, 255, 255) !important;
    line-height: 1 !important;
    border-radius: 12px !important;
    transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
    box-shadow: none !important;
}
.classify-button {
    background: linear-gradient(90deg, yellow, orange) !important;
}
"""

# Initialize Gradio interface
with gr.Blocks(css=css) as iface:
    gr.Markdown("# Zero-Shot Text Classifier")
    gr.Markdown("Select a model, enter text, and a set of labels to classify the text using a zero-shot classification model.")
    gr.Markdown("More than 10 languages are officially supported, including: English, Arabic, Bulgarian, German, Greek, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnam and Chinese.")

    with gr.Row():
        # Dropdown to select a model
        model_dropdown = gr.Dropdown(AVAILABLE_MODELS, label="Choose Model")
        # Checkbox for multi-label classification
        multi_label = gr.Checkbox(label="True", value=False, info="Check for multi-label classification, uncheck for single-label (multi-class).")

    # Input fields for text and labels
    with gr.Row():
        text_input = gr.Textbox(label="Enter Text", placeholder="Type or paste text here...")
        label_input = gr.Textbox(label="Enter Labels (comma-separated)", placeholder="e.g., sports, politics, technology")

    # Output display
    output_label = gr.Label(label="Classification Scores")

    # Classification button
    submit_button = gr.Button("Classify", elem_classes=["classify-button"])
    submit_button.click(fn=classify_text, inputs=[model_dropdown, text_input, label_input, multi_label], outputs=output_label)

    # Example input/output pairs
    gr.Examples(examples, inputs=[text_input, label_input])

# Launch the app
if __name__ == "__main__":
    iface.launch()