Spaces:
Sleeping
Sleeping
File size: 3,273 Bytes
78fc145 5be1834 1218270 e711649 9cc9a99 e688c12 59c09a6 9cc9a99 843d6e6 9cc9a99 e711649 9cc9a99 117043a 0526a15 9cc9a99 1e55a04 9cc9a99 1218270 5b8f50d e711649 6a96cd0 e085d1b 61c0e67 e085d1b dc2bd70 e085d1b e711649 6a96cd0 e63e630 e085d1b 3fd074b e63e630 e085d1b 843d6e6 dea31f5 843d6e6 3e91df5 dea31f5 03e6204 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import streamlit as st
import torch
import logging
from typing import List, Dict
import gc
import os
import pandas as pd
import numpy as np
import json
# Huggingface stuff
from datasets import load_dataset, Dataset
from huggingface_hub import hf_hub_url, ModelCard
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from evaluate import load
def preprocess_function(examples):
return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return metric.compute(predictions=predictions, references=labels)
def compute_model_card_evaluation_results(tokenizer, model_checkpoint, raw_datasets, metric):
tokenized_datasets = raw_datasets.map(preprocess_function, batched=True)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
batch_size = 16
args = TrainingArguments(
"test-glue",
eval_strategy = "epoch",
learning_rate=5e-5,
seed=42,
lr_scheduler_type="linear",
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=3,
weight_decay=0.01,
load_best_model_at_end=False,
metric_for_best_model="accuracy",
report_to="none"
)
trainer = Trainer(
model,
args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["validation"],
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
result = trainer.evaluate()
return result
if __name__ == "__main__":
st.title("Hugging Face Model Evaluation Demo")
with st.form("my_st_form"):
# Create an input text box
dataset_name = st.text_input("Enter dataset identifier", "")
model_checkpoint = st.text_input("Enter model identifier", "")
# Every form must have a submit button.
submitted = st.form_submit_button("Submit")
if submitted:
print(dataset_name, model_checkpoint)
# hardcode input data
#model_checkpoint = "sgugger/glue-mrpc"
#dataset_name = "nyu-mll/glue"
metric = load("glue", "mrpc")
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
raw_datasets = load_dataset(dataset_name, "mrpc")
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
output = compute_model_card_evaluation_results(tokenizer, model_checkpoint, raw_datasets, metric)
print(json.dumps(output))
#st.text_area(label="Output Data:", value=st.json(output, expanded=True), height=300)
st.header("Self-generated Evaluation Results:")
st.json(output, expanded=True)
card = ModelCard.load(model_checkpoint)
#st.text_area(label="Model Card Data:", height=500, value=json.dumps(card.data.eval_results))
st.header("Model Card Evaluation Results:")
st.json(card.data.eval_results, expanded=True) |