import pickle import warnings # Filter NumPy array implementation warnings specifically warnings.filterwarnings( "ignore", message=".*array.*implementation doesn't accept a copy keyword.*", category=DeprecationWarning ) # Or alternatively, target the exact warning message: warnings.filterwarnings( "ignore", message=".*NumPy will pass `copy` to the `__array__` special method.*", category=DeprecationWarning ) from config import ( MODEL_NAME, MODEL_TYPE, DEVICE_TYPE, SENTENCE_EMBEDDING_FILE, STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR ) from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper from data_lib.subject_data import SubjectData from data_lib.standard_name_map_data import StandardNameMapData import os class SentenceTransformerService: def __init__(self): self.sentenceTransformerHelper = None self.dic_standard_subject = None self.anchor_name_sentence_embeddings = None self.sampleData = None def load_model_data(self): """Load model and data only once at startup""" if self.sentenceTransformerHelper is not None: print("Model already loaded. Skipping reload.") return # Không load lại nếu đã có model print("Loading models and data...") # Load sentence transformer model self.sentenceTransformerHelper = SentenceTransformerHelper(model_name=MODEL_NAME, model_type=MODEL_TYPE) print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE}") # Load standard subject dictionary self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE) # Load and process sample data self.standardNameMapData = StandardNameMapData(self.anchor_name_sentence_embeddings) self.standardNameMapData.load_data_from_csv(STANDARD_NAME_MAP_DATA_FILE) self.standardNameMapData.process_data() # Load pre-computed embeddings and similarities if SENTENCE_EMBEDDING_FILE is not None: with open(SENTENCE_EMBEDDING_FILE, "rb") as f: self.anchor_name_sentence_embeddings = pickle.load(f) print(f"Loaded anchor name sentence embeddings shape: {self.anchor_name_sentence_embeddings.shape}") else: list_anchor_name_sentence = self.standardNameMapData.processed_data["anchor_name_sentences"] anchor_name_sentence_embeddings = self.sentenceTransformerHelper.create_embeddings(list_anchor_name_sentence) self.anchor_name_sentence_embeddings = anchor_name_sentence_embeddings sentence_embedding_file_path = os.path.join(DATA_DIR, "anchor_name_sentence_embeddings_" + MODEL_NAME.rsplit("/", maxsplit=1)[-1] + ".pkl") with open(sentence_embedding_file_path, "wb") as f: pickle.dump(self.anchor_name_sentence_embeddings, f) print(f"Saved anchor name sentence embeddings to {sentence_embedding_file_path}") print("Models and data loaded successfully") # Global instance (singleton) sentence_transformer_service = SentenceTransformerService()