Spaces:
Sleeping
Sleeping
import pickle | |
import warnings | |
# Filter NumPy array implementation warnings specifically | |
warnings.filterwarnings( | |
"ignore", | |
message=".*array.*implementation doesn't accept a copy keyword.*", | |
category=DeprecationWarning | |
) | |
# Or alternatively, target the exact warning message: | |
warnings.filterwarnings( | |
"ignore", | |
message=".*NumPy will pass `copy` to the `__array__` special method.*", | |
category=DeprecationWarning | |
) | |
from config import ( | |
MODEL_NAME, MODEL_TYPE, DEVICE_TYPE, | |
SENTENCE_EMBEDDING_FILE, | |
STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR | |
) | |
from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper | |
from data_lib.subject_data import SubjectData | |
from data_lib.standard_name_map_data import StandardNameMapData | |
import os | |
class SentenceTransformerService: | |
def __init__(self): | |
self.sentenceTransformerHelper = None | |
self.dic_standard_subject = None | |
self.anchor_name_sentence_embeddings = None | |
self.sampleData = None | |
def load_model_data(self): | |
"""Load model and data only once at startup""" | |
if self.sentenceTransformerHelper is not None: | |
print("Model already loaded. Skipping reload.") | |
return # Kh么ng load l岷 n岷縰 膽茫 c贸 model | |
print("Loading models and data...") | |
# Load sentence transformer model | |
self.sentenceTransformerHelper = SentenceTransformerHelper(model_name=MODEL_NAME, model_type=MODEL_TYPE) | |
print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE}") | |
# Load standard subject dictionary | |
self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE) | |
# Load and process sample data | |
self.standardNameMapData = StandardNameMapData(self.anchor_name_sentence_embeddings) | |
self.standardNameMapData.load_data_from_csv(STANDARD_NAME_MAP_DATA_FILE) | |
self.standardNameMapData.process_data() | |
# Load pre-computed embeddings and similarities | |
if SENTENCE_EMBEDDING_FILE is not None: | |
with open(SENTENCE_EMBEDDING_FILE, "rb") as f: | |
self.anchor_name_sentence_embeddings = pickle.load(f) | |
print(f"Loaded anchor name sentence embeddings shape: {self.anchor_name_sentence_embeddings.shape}") | |
else: | |
list_anchor_name_sentence = self.standardNameMapData.processed_data["anchor_name_sentences"] | |
anchor_name_sentence_embeddings = self.sentenceTransformerHelper.create_embeddings(list_anchor_name_sentence) | |
self.anchor_name_sentence_embeddings = anchor_name_sentence_embeddings | |
sentence_embedding_file_path = os.path.join(DATA_DIR, "anchor_name_sentence_embeddings_" + MODEL_NAME.rsplit("/", maxsplit=1)[-1] + ".pkl") | |
with open(sentence_embedding_file_path, "wb") as f: | |
pickle.dump(self.anchor_name_sentence_embeddings, f) | |
print(f"Saved anchor name sentence embeddings to {sentence_embedding_file_path}") | |
print("Models and data loaded successfully") | |
# Global instance (singleton) | |
sentence_transformer_service = SentenceTransformerService() |