import gradio as gr
import os
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

## Model
TOKEN = os.environ.get("accesstoken")

tokenizer = AutoTokenizer.from_pretrained("PhongLT/ViLexNorm-bartpho-syllable-base-10e-nopre", token=TOKEN)
model = AutoModelForSeq2SeqLM.from_pretrained("PhongLT/ViLexNorm-bartpho-syllable-base-10e-nopre", token=TOKEN)


def normalize(input):
    input_ids = tokenizer(  input, 
                            return_tensors="pt", 
                            max_length=128, 
                            padding="max_length", 
                            truncation= True).input_ids

    output_ids = model.generate(input_ids, 
                                max_length=128)

    return tokenizer.decode(output_ids[0], 
                            skip_special_tokens=True, 
                            max_length=128)

# Create title, description and article strings
title = "Vietnamese Lexical Normalization"
description = "Nhập 1 câu cần chuẩn hoá"
example_list = ["mình thấy hát cũm bth, nhìn hơi x ấ u nữa", "bạn ơi cho mk hỏi chút đc ko"]

demo = gr.Interface(fn=normalize,
                    inputs="text", 
                    outputs="text", 
                    examples=example_list, 
                    title=title,
                    description=description)

# Launch the demo!
demo.launch()