In [106]:
import subprocess
import spacy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer

In [94]:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])

Collecting en-core-web-sm==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)
[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.8/12.8 MB 3.9 MB/s eta 0:00:00
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


CompletedProcess(args=['python', '-m', 'spacy', 'download', 'en_core_web_sm'], returncode=0)

In [95]:
nlp = spacy.load('en_core_web_sm')
doc = nlp("This is an example text in Singapore by Sunil Singh on 6th August 2023")
doc

This is an example text in Singapore by Sunil Singh on 6th August 2023

In [96]:
ner_html = displacy.render(docs=doc, style="ent", jupyter=False, page=True)
ner_html

'<!DOCTYPE html>\n<html lang="en">\n    <head>\n        <title>displaCy</title>\n    </head>\n\n    <body style="font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, \'Segoe UI\', Helvetica, Arial, sans-serif, \'Apple Color Emoji\', \'Segoe UI Emoji\', \'Segoe UI Symbol\'; padding: 4rem 2rem; direction: ltr">\n<figure style="margin-bottom: 6rem">\n<div class="entities" style="line-height: 2.5; direction: ltr">This is an example text in \n<mark class="entity" style="background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    Singapore\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">GPE</span>\n</mark>\n by \n<mark class="entity" style="background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    Sunil Singh\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 

In [97]:
import pandas as pd
label, desc = [],[]
for ent in doc.ents:
    label.append(ent.label_)
    desc.append(spacy.explain(ent.label_))
label, desc = list(set(label)), list(set(desc))
df = pd.DataFrame(data={"Entity Code":label, "Entity Description":desc})
df

Unnamed: 0,Entity Code,Entity Description
0,DATE,Absolute or relative dates or periods
1,GPE,"Countries, cities, states"


In [98]:
doc.ents

(Singapore, Sunil Singh, 6th August 2023)

In [99]:
for token in doc:
    print(token.text, token.pos_, spacy.explain(token.pos_))

This PRON pronoun
is AUX auxiliary
an DET determiner
example NOUN noun
text NOUN noun
in ADP adposition
Singapore PROPN proper noun
by ADP adposition
Sunil PROPN proper noun
Singh PROPN proper noun
on ADP adposition
6th ADJ adjective
August PROPN proper noun
2023 NUM numeral


In [100]:
word, tag, pos, desc = [], [], [], []
for token in doc:
  if token.is_stop or token.is_punct:
    continue
  word.append(str(token))
  tag.append(str(token.tag_))
  pos.append(token.pos_)
  desc.append(spacy.explain(token.tag_))
pd.DataFrame(data=dict(Token=word, Tag=tag, Pos=pos, Description=desc))


Unnamed: 0,Token,Tag,Pos,Description
0,example,NN,NOUN,"noun, singular or mass"
1,text,NN,NOUN,"noun, singular or mass"
2,Singapore,NNP,PROPN,"noun, proper singular"
3,Sunil,NNP,PROPN,"noun, proper singular"
4,Singh,NNP,PROPN,"noun, proper singular"
5,6th,JJ,ADJ,"adjective (English), other noun-modifier (Chin..."
6,August,NNP,PROPN,"noun, proper singular"
7,2023,CD,NUM,cardinal number


In [101]:
import sys
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from dataclasses import dataclass
@dataclass
class __AppConfig:
    """app-wide configurations"""
    summarizers = dict(
        TextRankSummarizer="sumy.summarizers.text_rank",
        LexRankSummarizer="sumy.summarizers.lex_rank",
        LsaSummarizer="sumy.summarizers.lsa",
    )
### make configs available to any module that imports this module
app_config = __AppConfig()

In [102]:
def class_from_name(module, class_name):
    return getattr(module, class_name)

In [103]:
method="TextRankSummarizer"
def get_summarizer(method):
  module=sys.modules[app_config.summarizers.get(method)]
  summarizer = class_from_name(module, method)
  return summarizer

In [108]:
text = """Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan. It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine. Set in a dystopian future where humanity is embroiled in a catastrophic blight and famine, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for humankind.
Brothers Christopher and Jonathan Nolan wrote the screenplay, which had its origins in a script Jonathan developed in 2007 and was originally set to be directed by Steven Spielberg. Kip Thorne, a Caltech theoretical physicist and 2017 Nobel laureate in Physics,[4] was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar. Cinematographer Hoyte van Hoytema shot it on 35 mm movie film in the Panavision anamorphic format and IMAX 70 mm. Principal photography began in late 2013 and took place in Alberta, Iceland, and Los Angeles. Interstellar uses extensive practical and miniature effects, and the company Double Negative created additional digital effects."""

In [109]:
parser = PlaintextParser.from_string(text, Tokenizer("english"))
parser

<sumy.parsers.plaintext.PlaintextParser at 0x7fa774f4a510>

In [117]:
parser.document.sentences

8