File size: 380 Bytes
a2682b3
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import spacy
import nltk

class NLPModel:
    def __init__(self):
        self.nlp = spacy.load("pt_core_news_md")
        nltk.download('punkt')
    
    def extract_entities(self, text: str):
        doc = self.nlp(text)
        return [(ent.text.lower(), ent.label_) for ent in doc.ents]
    
    def tokenize_sentences(self, text: str):
        return nltk.sent_tokenize(text)