File size: 1,529 Bytes
cf47ee6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch 
import pandas as pd
import numpy as np
import streamlit as st
from transformers import pipeline
from transformers_interpret import SequenceClassificationExplainer
from transformers import AutoModelForSequenceClassification, AutoTokenizer

st.title('Ethics Classifier')
st.write('This app uses a pre-trained Distilbert model fine-tuned on the Commonsense Ethics dataset from the Aligning AI With Shared Human Values project (https://github.com/hendrycks/ethics). It judges whether a given action of scenario is wrong or not wrong and shows how the words in the scenario affected the judgment.')

loaded_model = DistilBertForSequenceClassification.from_pretrained('commonsense_ethics')
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
cls_explainer = SequenceClassificationExplainer(loaded_model, tokenizer)

clf = pipeline("text-classification", model = loaded_model, tokenizer = tokenizer)

text = st.text_input('Enter a scenario or action.')

if text:
    answer = clf(text)
    label = 'wrong' if answer[0]['label'] == 'LABEL_1' else 'not wrong'
    st.write(f'This action is {label} (confidence level {answer[0]["score"]*100:.2f}%).')
    attributions = cls_explainer(text)
    df = pd.DataFrame(attributions[1:-1])
    df.rename(columns = {0: 'Token', 1: 'Contribution'}, inplace = True)
    st.write(df.style.hide(axis = 'index'))
    st.write(cls_explainer.visualize())