Spaces:
Running
Running
import gradio as gr | |
import os | |
import torch | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
## Model | |
TOKEN = os.environ.get("accesstoken") | |
tokenizer = AutoTokenizer.from_pretrained("PhongLT/ViLexNorm-bartpho-syllable-base-10e-nopre", token=TOKEN) | |
model = AutoModelForSeq2SeqLM.from_pretrained("PhongLT/ViLexNorm-bartpho-syllable-base-10e-nopre", token=TOKEN) | |
def normalize(input): | |
input_ids = tokenizer( input, | |
return_tensors="pt", | |
max_length=128, | |
padding="max_length", | |
truncation= True).input_ids | |
output_ids = model.generate(input_ids, | |
max_length=128) | |
return tokenizer.decode(output_ids[0], | |
skip_special_tokens=True, | |
max_length=128) | |
# Create title, description and article strings | |
title = "Vietnamese Lexical Normalization" | |
description = "Nhập 1 câu cần chuẩn hoá" | |
example_list = ["mình thấy hát cũm bth, nhìn hơi x ấ u nữa", "bạn ơi cho mk hỏi chút đc ko"] | |
demo = gr.Interface(fn=normalize, | |
inputs="text", | |
outputs="text", | |
examples=example_list, | |
title=title, | |
description=description) | |
# Launch the demo! | |
demo.launch() | |