|
|
|
|
|
from __future__ import absolute_import |
|
from __future__ import division, print_function, unicode_literals |
|
|
|
import psutil |
|
from sumy.parsers.html import HtmlParser |
|
from sumy.parsers.plaintext import PlaintextParser |
|
from sumy.nlp.tokenizers import Tokenizer |
|
from sumy.summarizers.lsa import LsaSummarizer as Summarizer |
|
from sumy.nlp.stemmers import Stemmer |
|
from sumy.utils import get_stop_words |
|
import nltk |
|
import streamlit as st |
|
|
|
|
|
class SumySummarizer: |
|
LANGUAGE = "german" |
|
SENTENCES_COUNT = 10 |
|
|
|
def __init__(self): |
|
nltk.download('punkt_tab') |
|
|
|
def summarize(self, text: str): |
|
parser = PlaintextParser.from_string(text, Tokenizer(self.LANGUAGE)) |
|
stemmer = Stemmer(self.LANGUAGE) |
|
|
|
summarizer = Summarizer(stemmer) |
|
st.info("Using Textsummarization Model Sumy Summarizer") |
|
summarizer.stop_words = get_stop_words(self.LANGUAGE) |
|
|
|
summary = summarizer(parser.document, self.SENTENCES_COUNT) |
|
summary = [str(sentence) for sentence in summary] |
|
return summary |