import spacy from typing import List, Union import logging logger = logging.getLogger(__name__) class NLPModel: def __init__(self): try: # Load spaCy model only self.nlp = spacy.load("pt_core_news_md") logger.info("spaCy model initialized successfully") except Exception as e: logger.error(f"Failed to initialize spaCy model: {str(e)}") raise def extract_entities(self, text: Union[str, List[str]]) -> List[tuple]: """Entity extraction using spaCy""" try: if isinstance(text, list): text = " ".join(text) doc = self.nlp(text) return [(ent.text.lower(), ent.label_) for ent in doc.ents] except Exception as e: logger.error(f"Entity extraction failed: {str(e)}") return [] def tokenize_sentences(self, text: str) -> List[str]: """Sentence tokenization using spaCy""" try: doc = self.nlp(text) return [sent.text for sent in doc.sents] except Exception as e: logger.error(f"Sentence tokenization failed: {str(e)}") return [text] # Fallback to returning whole text