Ethical_Judgment_Generator / commonsense_judgments.py
jeffhaines's picture
Create new file
cf47ee6
raw
history blame
1.53 kB
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch
import pandas as pd
import numpy as np
import streamlit as st
from transformers import pipeline
from transformers_interpret import SequenceClassificationExplainer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
st.title('Ethics Classifier')
st.write('This app uses a pre-trained Distilbert model fine-tuned on the Commonsense Ethics dataset from the Aligning AI With Shared Human Values project (https://github.com/hendrycks/ethics). It judges whether a given action of scenario is wrong or not wrong and shows how the words in the scenario affected the judgment.')
loaded_model = DistilBertForSequenceClassification.from_pretrained('commonsense_ethics')
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
cls_explainer = SequenceClassificationExplainer(loaded_model, tokenizer)
clf = pipeline("text-classification", model = loaded_model, tokenizer = tokenizer)
text = st.text_input('Enter a scenario or action.')
if text:
answer = clf(text)
label = 'wrong' if answer[0]['label'] == 'LABEL_1' else 'not wrong'
st.write(f'This action is {label} (confidence level {answer[0]["score"]*100:.2f}%).')
attributions = cls_explainer(text)
df = pd.DataFrame(attributions[1:-1])
df.rename(columns = {0: 'Token', 1: 'Contribution'}, inplace = True)
st.write(df.style.hide(axis = 'index'))
st.write(cls_explainer.visualize())