Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import logging | |
from typing import List, Dict | |
import gc | |
import os | |
import pandas as pd | |
import numpy as np | |
import json | |
# Huggingface stuff | |
from datasets import load_dataset, Dataset | |
from huggingface_hub import hf_hub_url, ModelCard | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer | |
from evaluate import load | |
def preprocess_function(examples): | |
return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True) | |
def compute_metrics(eval_pred): | |
predictions, labels = eval_pred | |
predictions = np.argmax(predictions, axis=1) | |
return metric.compute(predictions=predictions, references=labels) | |
def compute_model_card_evaluation_results(tokenizer, model_checkpoint, raw_datasets, metric): | |
tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) | |
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2) | |
batch_size = 16 | |
args = TrainingArguments( | |
"test-glue", | |
eval_strategy = "epoch", | |
learning_rate=5e-5, | |
seed=42, | |
lr_scheduler_type="linear", | |
per_device_train_batch_size=batch_size, | |
per_device_eval_batch_size=batch_size, | |
num_train_epochs=3, | |
weight_decay=0.01, | |
load_best_model_at_end=False, | |
metric_for_best_model="accuracy", | |
report_to="none" | |
) | |
trainer = Trainer( | |
model, | |
args, | |
train_dataset=tokenized_datasets["train"], | |
eval_dataset=tokenized_datasets["validation"], | |
tokenizer=tokenizer, | |
compute_metrics=compute_metrics | |
) | |
result = trainer.evaluate() | |
return result | |
if __name__ == "__main__": | |
st.title("Hugging Face Model Evaluation Demo") | |
with st.form("my_st_form"): | |
# Create an input text box | |
dataset_name = st.text_input("Enter dataset identifier", "") | |
model_checkpoint = st.text_input("Enter model identifier", "") | |
# Every form must have a submit button. | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
print(dataset_name, model_checkpoint) | |
# hardcode input data | |
#model_checkpoint = "sgugger/glue-mrpc" | |
#dataset_name = "nyu-mll/glue" | |
metric = load("glue", "mrpc") | |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) | |
raw_datasets = load_dataset(dataset_name, "mrpc") | |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) | |
output = compute_model_card_evaluation_results(tokenizer, model_checkpoint, raw_datasets, metric) | |
print(json.dumps(output)) | |
#st.text_area(label="Output Data:", value=st.json(output, expanded=True), height=300) | |
st.header("Self-generated Evaluation Results:") | |
st.json(output, expanded=True) | |
card = ModelCard.load(model_checkpoint) | |
#st.text_area(label="Model Card Data:", height=500, value=json.dumps(card.data.eval_results)) | |
st.header("Model Card Evaluation Results:") | |
st.json(card.data.eval_results, expanded=True) |