import spacy | |
import nltk | |
class NLPModel: | |
def __init__(self): | |
self.nlp = spacy.load("pt_core_news_md") | |
nltk.download('punkt') | |
def __call__(self, text: str): | |
"""Makes the model callable like model(text).""" | |
return self.extract_entities(text) # or another default method | |
def extract_entities(self, text: str): | |
"""Ensure this always takes a string and returns entities""" | |
if isinstance(text, list): # If accidentally passed a list | |
text = " ".join(text) # Combine into single string | |
doc = self.nlp(text) | |
return [(ent.text.lower(), ent.label_) for ent in doc.ents] | |
def tokenize_sentences(self, text: str): | |
return nltk.sent_tokenize(text) |