haotieu's picture
Update app.py
dda7919
raw
history blame contribute delete
1.54 kB
from newspaper import Article
from newspaper import Config
import gradio as gr
from transformers import MBartForConditionalGeneration
from transformers import AutoTokenizer
model_name = "haotieu/vietnamese-summarization"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)
def extract_article_text(url):
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 20
article = Article(url, config=config)
article.download()
article.parse()
text = article.text
return text
def news_summarizer(url):
text = extract_article_text(url)
input_ids = tokenizer.encode_plus(text, return_tensors='pt', max_length=512, truncation=True, padding=True)['input_ids']
summary_ids = model.generate(input_ids,max_length=128,min_length= 64)
summaries = [tokenizer.decode(s, skip_special_tokens=True) for s in summary_ids]
return summaries[0]
sample_url = 'https://vnexpress.net/them-hai-nuoc-rut-nhan-vien-su-quan-tai-ukraine-4420581.html'
desc = 'This app uses BARTpho model by VinAI to summarize the text of a news article.'
summarizer_interface = gr.Interface(fn = news_summarizer, inputs="text", outputs = "text",title="vietnamese news summarizer",
theme = 'huggingface',examples=[sample_url],description=desc
)
summarizer_interface.launch(inline=False)