Spaces:

mjwong
/

Zero-Shot-Text-Classification

Sleeping

App Files Files Community

Zero-Shot-Text-Classification / app.py

mjwong

Update app.py

f74c03b verified about 2 months ago

raw

history blame contribute delete

6.18 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, pipeline
	from typing import Dict

	# Custom models for zero-shot classification requiring trust_remote_code=True
	CUSTOM_MODELS = [
	"mjwong/gte-multilingual-base-xnli-anli"
	]

	# Available models for zero-shot classification
	AVAILABLE_MODELS = [
	"mjwong/multilingual-e5-large-instruct-xnli-anli",
	"mjwong/multilingual-e5-base-xnli-anli",
	"mjwong/multilingual-e5-large-xnli-anli",
	"mjwong/drama-base-xnli-anli",
	"mjwong/drama-large-xnli-anli",
	"mjwong/mcontriever-msmarco-xnli",
	"mjwong/mcontriever-xnli"
	] + CUSTOM_MODELS

	def classify_text(
	model_name: str,
	text: str,
	labels: str,
	multi_label: bool = False,
	) -> Dict[str, float]:
	"""
	Classifies the input text into one of the provided labels using a zero-shot classification model.

	Args:
	model_name: The name of the Hugging Face model to use.
	text: The input text to classify.
	labels: A comma-separated string of candidate labels.

	Returns:
	Dict[str, float]: A dictionary mapping each label to its classification score.
	"""
	if not text.strip():
	return "Error: Please enter some text to classify."
	if not labels.strip():
	return "Error: Please enter some labels to classify the text."

	try:
	# Set device: 0 if GPU available, else -1 for CPU
	device = 0 if torch.cuda.is_available() else -1

	if model_name in CUSTOM_MODELS:
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	classifier = pipeline("zero-shot-classification", model=model_name, device=device, tokenizer=tokenizer, trust_remote_code=True)
	else:
	classifier = pipeline("zero-shot-classification", model=model_name, device=device)

	labels_list = [label.strip() for label in labels.split(",")]
	result = classifier(text, candidate_labels=labels_list, multi_label=multi_label)
	return {label: score for label, score in zip(result["labels"], result["scores"])}
	except Exception as _:
	return "Error: An unexpected error occurred. Please try again later."

	# Example Input with Mutually Exclusive Labels from News Articles
	examples = [
	[
	"The government announced a new economic policy today aimed at reducing inflation and stabilizing the currency market.",
	"economy, politics, finance, policy, inflation, government, currency"
	],
	[
	"中国的科技公司在人工智能领域取得了重大突破，这可能会影响全球市场。",
	"科技, 经济, 创新, 市场, 人工智能, 全球"
	],
	[
	"นักวิจัยค้นพบวิธีใหม่ในการรักษาโรคมะเร็ง ซึ่งอาจช่วยชีวิตผู้ป่วยหลายล้านคนทั่วโลก",
	"การแพทย์, วิทยาศาสตร์, นวัตกรรม, สุขภาพ, โรคมะเร็ง, การรักษา"
	],
	[
	"La conférence des Nations Unies sur le climat a abouti à un nouvel accord pour réduire les émissions de carbone d'ici 2030.",
	"environnement, climat, politique, énergie, carbone, écologie, ONU"
	],
	[
	"सरकार ने आज एक नई आर्थिक नीति की घोषणा की, जिसका उद्देश्य मुद्रास्फीति को कम करना और मुद्रा बाजार को स्थिर करना है।",
	"अर्थव्यवस्था, राजनीति, वित्त, नीति, मुद्रास्फीति, सरकार, मुद्रा"
	]
	]

	# Define the Gradio interface
	css = """
	footer {display:none !important}
	.output-markdown{display:none !important}
	.gr-button-primary {
	z-index: 14;
	height: 43px;
	width: 130px;
	left: 0px;
	top: 0px;
	padding: 0px;
	cursor: pointer !important;
	background: none rgb(17, 20, 45) !important;
	border: none !important;
	text-align: center !important;
	font-family: Poppins !important;
	font-size: 14px !important;
	font-weight: 500 !important;
	color: rgb(255, 255, 255) !important;
	line-height: 1 !important;
	border-radius: 12px !important;
	transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
	box-shadow: none !important;
	}
	.classify-button {
	background: linear-gradient(90deg, yellow, orange) !important;
	}
	"""

	# Initialize Gradio interface
	with gr.Blocks(css=css) as iface:
	gr.Markdown("# Zero-Shot Text Classifier")
	gr.Markdown("Select a model, enter text, and a set of labels to classify the text using a zero-shot classification model.")
	gr.Markdown("More than 10 languages are officially supported, including: English, Arabic, Bulgarian, German, Greek, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnam and Chinese.")

	with gr.Row():
	# Dropdown to select a model
	model_dropdown = gr.Dropdown(AVAILABLE_MODELS, label="Choose Model")
	# Checkbox for multi-label classification
	multi_label = gr.Checkbox(label="True", value=False, info="Check for multi-label classification, uncheck for single-label (multi-class).")

	# Input fields for text and labels
	with gr.Row():
	text_input = gr.Textbox(label="Enter Text", placeholder="Type or paste text here...")
	label_input = gr.Textbox(label="Enter Labels (comma-separated)", placeholder="e.g., sports, politics, technology")

	# Output display
	output_label = gr.Label(label="Classification Scores")

	# Classification button
	submit_button = gr.Button("Classify", elem_classes=["classify-button"])
	submit_button.click(fn=classify_text, inputs=[model_dropdown, text_input, label_input, multi_label], outputs=output_label)

	# Example input/output pairs
	gr.Examples(examples, inputs=[text_input, label_input])

	# Launch the app
	if __name__ == "__main__":
	iface.launch()