Spaces:
Sleeping
Sleeping
File size: 2,700 Bytes
d3830cc 056dc2d d3830cc e542c59 98186a7 e542c59 aea35b3 e542c59 faf7c96 e542c59 98186a7 e542c59 19a55cd e542c59 e91e4da e542c59 e91e4da f82e9bd e91e4da bb7d89b e91e4da e542c59 19a55cd e542c59 813b46e 19a55cd e542c59 19a55cd e542c59 19a55cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
"""
python interactive.py
"""
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TextClassificationPipeline
import gradio as gr
# global var
MODEL_NAME = 'momo/KcBERT-base_Hate_speech_Privacy_Detection'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
MODEL_NAME,
num_labels= 15,
problem_type="multi_label_classification"
)
MODEL_BUF = {
"name": MODEL_NAME,
"tokenizer": tokenizer,
"model": model,
}
def change_model_name(name):
MODEL_BUF["name"] = name
MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)
def predict(model_name, text):
if model_name != MODEL_BUF["name"]:
change_model_name(model_name)
tokenizer = MODEL_BUF["tokenizer"]
model = MODEL_BUF["model"]
unsmile_labels = ["์ฌ์ฑ/๊ฐ์กฑ","๋จ์ฑ","์ฑ์์์","์ธ์ข
/๊ตญ์ ","์ฐ๋ น","์ง์ญ","์ข
๊ต","๊ธฐํ ํ์ค","์
ํ/์์ค", "clean", '์ด๋ฆ', '์ ํ๋ฒํธ', '์ฃผ์', '๊ณ์ข๋ฒํธ', '์ฃผ๋ฏผ๋ฒํธ']
num_labels = len(unsmile_labels)
model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
model.config.label2id = {label: i for i, label in zip(range(num_labels), unsmile_labels)}
pipe = TextClassificationPipeline(
model = model,
tokenizer = tokenizer,
return_all_scores=False,
function_to_apply='sigmoid'
)
for result in pipe(text)[0]:
print(result)
return result
if __name__ == '__main__':
exam1 = '๊ฒฝ๊ธฐ๋ ์ฑ๋จ์ ์์ ๊ตฌ ํํ3๋์ ์ฐ๋ฆฌ ๋๋ค์ผ!'
exam2 = '๋ด ํธ๋ํฐ ๋ฒํธ๋ 010-3930-8237 ์ด์ผ!'
exam3 = '์ ์ ์ฅ ๋๋ฌด ์ง์ฆ๋๋ค'
model_name_list = [
'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
"momo/KcBERT-base_Hate_speech_Privacy_Detection",
]
#Create a gradio app with a button that calls predict()
app = gr.Interface(
fn=predict,
inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'],
outputs='text',
examples = [
[MODEL_BUF["name"], exam1],
[MODEL_BUF["name"], exam2],
[MODEL_BUF["name"], exam3]
],
title="ํ๊ตญ์ด ํ์คํํ, ๊ฐ์ธ์ ๋ณด ํ๋ณ๊ธฐ (Korean Hate Speech and Privacy Detection)",
description="Korean Hate Speech and Privacy Detection. \t 15๊ฐ label Detection: ์ฌ์ฑ/๊ฐ์กฑ, ๋จ์ฑ, ์ฑ์์์, ์ธ์ข
/๊ตญ์ , ์ฐ๋ น, ์ง์ญ, ์ข
๊ต, ๊ธฐํ ํ์ค, ์
ํ/์์ค, clean, name, number, address, bank, person"
)
app.launch() |