File size: 1,534 Bytes
fd77815
0dd6279
682174e
fd77815
 
 
682174e
29406f8
682174e
fd77815
cb4608c
617cbbf
fd77815
682174e
 
fd77815
682174e
 
be5dc38
682174e
 
08728c1
528da04
682174e
 
39e1615
 
 
4a0592e
fd77815
 
682174e
39e1615
682174e
 
 
0dd6279
682174e
be5dc38
 
 
 
 
 
 
 
 
 
 
2d942ee
0dd6279
cb4608c
be5dc38
682174e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer
from transformers import (
    TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
)
from transformers import pipeline

st.title("Toxic Tweet Classifier")

demo = """Your words are like poison. They seep into my mind and make me feel worthless."""
text = st.text_area("Input text", demo, height=275)

submit = False
model_name = ""

with st.container():
    model_name = st.selectbox(
        "Select Model",
        ("RobCaamano/toxicity",),
    )
    submit = st.button("Submit", type="primary")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
clf = pipeline(
    "sentiment-analysis", model=model, tokenizer=tokenizer, return_all_scores=True
)

input = tokenizer(text, return_tensors="tf")

if submit:
    results = dict(d.values() for d in clf(text)[0])
    classes = {k: results[k] for k in results.keys() if not k == "toxic"}

    max_class = max(classes, key=classes.get)
    probability = classes[max_class]

    if results['toxic'] >= 0.5:
        result_df = pd.DataFrame({
            'Toxic': ['Yes'],
            'Toxicity Class': [max_class],
            'Probability': [probability]
        })
    else:
        result_df = pd.DataFrame({
            'Toxic': ['No'],
            'Toxicity Class': 'This text is not toxic',
        })

    st.table(result_df)

    expander = st.expander("View Raw output")
    expander.write(results)