import spacy | |
import nltk | |
class NLPModel: | |
def __init__(self): | |
self.nlp = spacy.load("pt_core_news_md") | |
nltk.download('punkt') | |
def extract_entities(self, text: str): | |
doc = self.nlp(text) | |
return [(ent.text.lower(), ent.label_) for ent in doc.ents] | |
def tokenize_sentences(self, text: str): | |
return nltk.sent_tokenize(text) |