Spaces:

PhongLT
/

lexnorm-demo

Running

lexnorm-demo / app.py

Update app.py

c91fd6b verified 4 months ago

1.39 kB

	import gradio as gr
	import os
	import torch
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

	## Model
	TOKEN = os.environ.get("accesstoken")

	tokenizer = AutoTokenizer.from_pretrained("PhongLT/ViLexNorm-bartpho-syllable-base-10e-nopre", token=TOKEN)
	model = AutoModelForSeq2SeqLM.from_pretrained("PhongLT/ViLexNorm-bartpho-syllable-base-10e-nopre", token=TOKEN)


	def normalize(input):
	input_ids = tokenizer( input,
	return_tensors="pt",
	max_length=128,
	padding="max_length",
	truncation= True).input_ids

	output_ids = model.generate(input_ids,
	max_length=128)

	return tokenizer.decode(output_ids[0],
	skip_special_tokens=True,
	max_length=128)

	# Create title, description and article strings
	title = "Vietnamese Lexical Normalization"
	description = "Nhập 1 câu cần chuẩn hoá"
	example_list = ["mình thấy hát cũm bth, nhìn hơi x ấ u nữa", "bạn ơi cho mk hỏi chút đc ko"]

	demo = gr.Interface(fn=normalize,
	inputs="text",
	outputs="text",
	examples=example_list,
	title=title,
	description=description)

	# Launch the demo!
	demo.launch()