|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, pipeline |
|
from typing import Dict |
|
|
|
|
|
CUSTOM_MODELS = [ |
|
"mjwong/gte-multilingual-base-xnli-anli" |
|
] |
|
|
|
|
|
AVAILABLE_MODELS = [ |
|
"mjwong/multilingual-e5-large-instruct-xnli-anli", |
|
"mjwong/multilingual-e5-base-xnli-anli", |
|
"mjwong/multilingual-e5-large-xnli-anli", |
|
"mjwong/drama-base-xnli-anli", |
|
"mjwong/drama-large-xnli-anli", |
|
"mjwong/mcontriever-msmarco-xnli", |
|
"mjwong/mcontriever-xnli" |
|
] + CUSTOM_MODELS |
|
|
|
def classify_text( |
|
model_name: str, |
|
text: str, |
|
labels: str, |
|
multi_label: bool = False, |
|
) -> Dict[str, float]: |
|
""" |
|
Classifies the input text into one of the provided labels using a zero-shot classification model. |
|
|
|
Args: |
|
model_name: The name of the Hugging Face model to use. |
|
text: The input text to classify. |
|
labels: A comma-separated string of candidate labels. |
|
|
|
Returns: |
|
Dict[str, float]: A dictionary mapping each label to its classification score. |
|
""" |
|
if not text.strip(): |
|
return "Error: Please enter some text to classify." |
|
if not labels.strip(): |
|
return "Error: Please enter some labels to classify the text." |
|
|
|
try: |
|
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
|
if model_name in CUSTOM_MODELS: |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
classifier = pipeline("zero-shot-classification", model=model_name, device=device, tokenizer=tokenizer, trust_remote_code=True) |
|
else: |
|
classifier = pipeline("zero-shot-classification", model=model_name, device=device) |
|
|
|
labels_list = [label.strip() for label in labels.split(",")] |
|
result = classifier(text, candidate_labels=labels_list, multi_label=multi_label) |
|
return {label: score for label, score in zip(result["labels"], result["scores"])} |
|
except Exception as _: |
|
return "Error: An unexpected error occurred. Please try again later." |
|
|
|
|
|
examples = [ |
|
[ |
|
"The government announced a new economic policy today aimed at reducing inflation and stabilizing the currency market.", |
|
"economy, politics, finance, policy, inflation, government, currency" |
|
], |
|
[ |
|
"中国的科技公司在人工智能领域取得了重大突破,这可能会影响全球市场。", |
|
"科技, 经济, 创新, 市场, 人工智能, 全球" |
|
], |
|
[ |
|
"นักวิจัยค้นพบวิธีใหม่ในการรักษาโรคมะเร็ง ซึ่งอาจช่วยชีวิตผู้ป่วยหลายล้านคนทั่วโลก", |
|
"การแพทย์, วิทยาศาสตร์, นวัตกรรม, สุขภาพ, โรคมะเร็ง, การรักษา" |
|
], |
|
[ |
|
"La conférence des Nations Unies sur le climat a abouti à un nouvel accord pour réduire les émissions de carbone d'ici 2030.", |
|
"environnement, climat, politique, énergie, carbone, écologie, ONU" |
|
], |
|
[ |
|
"सरकार ने आज एक नई आर्थिक नीति की घोषणा की, जिसका उद्देश्य मुद्रास्फीति को कम करना और मुद्रा बाजार को स्थिर करना है।", |
|
"अर्थव्यवस्था, राजनीति, वित्त, नीति, मुद्रास्फीति, सरकार, मुद्रा" |
|
] |
|
] |
|
|
|
|
|
css = """ |
|
footer {display:none !important} |
|
.output-markdown{display:none !important} |
|
.gr-button-primary { |
|
z-index: 14; |
|
height: 43px; |
|
width: 130px; |
|
left: 0px; |
|
top: 0px; |
|
padding: 0px; |
|
cursor: pointer !important; |
|
background: none rgb(17, 20, 45) !important; |
|
border: none !important; |
|
text-align: center !important; |
|
font-family: Poppins !important; |
|
font-size: 14px !important; |
|
font-weight: 500 !important; |
|
color: rgb(255, 255, 255) !important; |
|
line-height: 1 !important; |
|
border-radius: 12px !important; |
|
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important; |
|
box-shadow: none !important; |
|
} |
|
.classify-button { |
|
background: linear-gradient(90deg, yellow, orange) !important; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=css) as iface: |
|
gr.Markdown("# Zero-Shot Text Classifier") |
|
gr.Markdown("Select a model, enter text, and a set of labels to classify the text using a zero-shot classification model.") |
|
gr.Markdown("More than 10 languages are officially supported, including: English, Arabic, Bulgarian, German, Greek, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnam and Chinese.") |
|
|
|
with gr.Row(): |
|
|
|
model_dropdown = gr.Dropdown(AVAILABLE_MODELS, label="Choose Model") |
|
|
|
multi_label = gr.Checkbox(label="True", value=False, info="Check for multi-label classification, uncheck for single-label (multi-class).") |
|
|
|
|
|
with gr.Row(): |
|
text_input = gr.Textbox(label="Enter Text", placeholder="Type or paste text here...") |
|
label_input = gr.Textbox(label="Enter Labels (comma-separated)", placeholder="e.g., sports, politics, technology") |
|
|
|
|
|
output_label = gr.Label(label="Classification Scores") |
|
|
|
|
|
submit_button = gr.Button("Classify", elem_classes=["classify-button"]) |
|
submit_button.click(fn=classify_text, inputs=[model_dropdown, text_input, label_input, multi_label], outputs=output_label) |
|
|
|
|
|
gr.Examples(examples, inputs=[text_input, label_input]) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|