import spacy | |
import nltk | |
class NLPModel: | |
def __init__(self): | |
self.nlp = spacy.load("pt_core_news_md") | |
nltk.download('punkt') | |
def __call__(self, text: str): | |
"""Makes the model callable like model(text).""" | |
return self.extract_entities(text) # or another default method | |
def extract_entities(self, text: str): | |
if isinstance(text, list): # If input is a list of sentences | |
entities = [] | |
for sentence in text: | |
doc = self.nlp(sentence) | |
entities.extend([(ent.text.lower(), ent.label_) for ent in doc.ents]) | |
return entities | |
else: # If input is a single string | |
doc = self.nlp(text) | |
return [(ent.text.lower(), ent.label_) for ent in doc.ents] | |
def tokenize_sentences(self, text: str): | |
return nltk.sent_tokenize(text) |