test / models /nlp.py
christopher
changed nlp and query processors to fix issues with lists
c8d57fb
raw
history blame
759 Bytes
import spacy
import nltk
class NLPModel:
def __init__(self):
self.nlp = spacy.load("pt_core_news_md")
nltk.download('punkt')
def __call__(self, text: str):
"""Makes the model callable like model(text)."""
return self.extract_entities(text) # or another default method
def extract_entities(self, text: str):
"""Ensure this always takes a string and returns entities"""
if isinstance(text, list): # If accidentally passed a list
text = " ".join(text) # Combine into single string
doc = self.nlp(text)
return [(ent.text.lower(), ent.label_) for ent in doc.ents]
def tokenize_sentences(self, text: str):
return nltk.sent_tokenize(text)