nadhiffh's picture
Fix tokenizer issue
b1dcba7
import streamlit as st
from newspaper import Article
from transformers import pipeline
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration
# Load model from Hugging Face
@st.cache_resource
def load_summarizer():
model_name = "cahya/t5-base-indonesian-summarization-cased"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
return pipeline("summarization", model=model, tokenizer=tokenizer)
summarizer = load_summarizer()
st.title("๐Ÿ“ฐ Indonesian News Summarizer")
st.write("Enter a URL from an Indonesian news website (e.g. Detik.com)")
url = st.text_input("Paste the article URL here:")
if st.button("Show Article Text"):
if url:
try:
article = Article(url, language='id')
article.download()
article.parse()
st.subheader("Full Article:")
st.write(article.text)
st.session_state.article_text = article.text
except Exception as e:
st.error(f"Failed to fetch article: {str(e)}")
else:
st.warning("Please input a valid URL.")
if st.button("Summarize"):
if "article_text" in st.session_state:
with st.spinner("Summarizing..."):
input_text = "ringkasan: " + st.session_state.article_text
summary = summarizer(input_text, max_length=150, min_length=40, do_sample=False)
st.subheader("Summary:")
st.success(summary[0]['summary_text'])
else:
st.warning("No article text found. Please load the article first.")