ksatzke's picture
Update app.py
843d6e6 verified
raw
history blame
3.26 kB
import streamlit as st
import gradio as gr
import torch
import logging
from typing import List, Dict
import gc
import os
import pandas as pd
import numpy as np
import json
# Huggingface stuff
from datasets import concatenate_datasets, Dataset
from datasets import load_dataset
from huggingface_hub import hf_hub_url, ModelCard
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from evaluate import load
def preprocess_function(examples):
return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return metric.compute(predictions=predictions, references=labels)
def compute_model_card_evaluation_results(tokenizer, model_checkpoint, raw_datasets, metric):
tokenized_datasets = raw_datasets.map(preprocess_function, batched=True)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
batch_size = 16
args = TrainingArguments(
"test-glue",
evaluation_strategy = "epoch",
learning_rate=5e-5,
seed=42,
lr_scheduler_type="linear",
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=3,
weight_decay=0.01,
load_best_model_at_end=False,
metric_for_best_model="accuracy",
report_to="none"
)
trainer = Trainer(
model,
args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["validation"],
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
result = trainer.evaluate()
return result
if __name__ == "__main__":
st.title("Hugging Face Model Evaluation Demo")
with st.form("my_st_form"):
# Create an input text box
dataset_name = st.text_input("Enter dataset identifier", "")
model_checkpoint = st.text_input("Enter model identifier", "")
# Every form must have a submit button.
submitted = st.form_submit_button("Submit")
if submitted:
print(dataset_name, model_checkpoint)
# hardcode input data
#model_checkpoint = "sgugger/glue-mrpc"
#dataset_name = "nyu-mll/glue"
metric = load("glue", "mrpc")
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
#in_container = False
model_checkpoint = model_checkpoint
raw_datasets = load_dataset(dataset_name, "mrpc")
metric = load("glue", "mrpc")
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
output = compute_model_card_evaluation_results(tokenizer, model_checkpoint, raw_datasets, metric)
print(json.dumps(output))
#st.text_area(label="Output Data:", value=st.json(output, expanded=True), height=300)
st.json(output, expanded=True)
card = ModelCard.load(model_checkpoint)
st.text_area(label="Model Card Data:", value=card.data.eval_results)