Spaces:

Detomo
/

meisaicheck-api

Sleeping

App Files Files Community

meisaicheck-api / services /sentence_transformer_service.py

vumichien

upload new model

632ec54 about 1 month ago

raw

history blame

3.18 kB

	import pickle
	import warnings

	# Filter NumPy array implementation warnings specifically
	warnings.filterwarnings(
	"ignore",
	message=".array.implementation doesn't accept a copy keyword.*",
	category=DeprecationWarning
	)

	# Or alternatively, target the exact warning message:
	warnings.filterwarnings(
	"ignore",
	message=".NumPy will pass `copy` to the `__array__` special method.",
	category=DeprecationWarning
	)

	from config import (
	MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
	SENTENCE_EMBEDDING_FILE,
	STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR
	)
	from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
	from data_lib.subject_data import SubjectData
	from data_lib.standard_name_map_data import StandardNameMapData
	import os


	class SentenceTransformerService:
	def __init__(self):
	self.sentenceTransformerHelper = None
	self.dic_standard_subject = None
	self.anchor_name_sentence_embeddings = None
	self.sampleData = None

	def load_model_data(self):
	"""Load model and data only once at startup"""
	if self.sentenceTransformerHelper is not None:
	print("Model already loaded. Skipping reload.")
	return # Không load lại nếu đã có model

	print("Loading models and data...")
	# Load sentence transformer model
	self.sentenceTransformerHelper = SentenceTransformerHelper(model_name=MODEL_NAME, model_type=MODEL_TYPE)
	print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE}")

	# Load standard subject dictionary
	self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)

	# Load and process sample data
	self.standardNameMapData = StandardNameMapData(self.anchor_name_sentence_embeddings)
	self.standardNameMapData.load_data_from_csv(STANDARD_NAME_MAP_DATA_FILE)
	self.standardNameMapData.process_data()

	# Load pre-computed embeddings and similarities
	if SENTENCE_EMBEDDING_FILE is not None:
	with open(SENTENCE_EMBEDDING_FILE, "rb") as f:
	self.anchor_name_sentence_embeddings = pickle.load(f)
	print(f"Loaded anchor name sentence embeddings shape: {self.anchor_name_sentence_embeddings.shape}")
	else:
	list_anchor_name_sentence = self.standardNameMapData.processed_data["anchor_name_sentences"]
	anchor_name_sentence_embeddings = self.sentenceTransformerHelper.create_embeddings(list_anchor_name_sentence)
	self.anchor_name_sentence_embeddings = anchor_name_sentence_embeddings
	sentence_embedding_file_path = os.path.join(DATA_DIR, "anchor_name_sentence_embeddings_" + MODEL_NAME.rsplit("/", maxsplit=1)[-1] + ".pkl")
	with open(sentence_embedding_file_path, "wb") as f:
	pickle.dump(self.anchor_name_sentence_embeddings, f)
	print(f"Saved anchor name sentence embeddings to {sentence_embedding_file_path}")

	print("Models and data loaded successfully")

	# Global instance (singleton)
	sentence_transformer_service = SentenceTransformerService()