Spaces:
Running
Running
File size: 2,782 Bytes
b77c0a2 892f887 01ae535 28bdc3c b77c0a2 887cb19 632ec54 01ae535 b77c0a2 887cb19 b77c0a2 28bdc3c b77c0a2 df40595 887cb19 892f887 df40595 632ec54 df40595 632ec54 df40595 632ec54 b77c0a2 df40595 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import pickle
from config import (
MODEL_NAME, MODEL_TYPE, DEVICE_TYPE,
SENTENCE_EMBEDDING_FILE,
STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE, DATA_DIR, QINT8
)
from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
from data_lib.subject_data import SubjectData
from data_lib.standard_name_map_data import StandardNameMapData
import os
class SentenceTransformerService:
def __init__(self):
self.sentenceTransformerHelper = None
self.dic_standard_subject = None
self.anchor_name_sentence_embeddings = None
self.sampleData = None
def load_model_data(self):
"""Load model and data only once at startup"""
if self.sentenceTransformerHelper is not None:
print("Model already loaded. Skipping reload.")
return # Không load lại nếu đã có model
print("Loading models and data...")
# Load sentence transformer model
print(f"Loading model {MODEL_NAME} with type {MODEL_TYPE} and qint8={QINT8}")
self.sentenceTransformerHelper = SentenceTransformerHelper(
model_name=MODEL_NAME,
model_type=MODEL_TYPE,
qint8=QINT8
)
# Load standard subject dictionary
self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
# Initialize StandardNameMapData without embeddings first
self.standardNameMapData = StandardNameMapData(None)
self.standardNameMapData.load_data_from_csv(STANDARD_NAME_MAP_DATA_FILE)
self.standardNameMapData.process_data()
# Load or create embeddings
if os.path.exists(SENTENCE_EMBEDDING_FILE):
with open(SENTENCE_EMBEDDING_FILE, "rb") as f:
self.anchor_name_sentence_embeddings = pickle.load(f)
print(f"Loaded anchor name sentence embeddings shape: {self.anchor_name_sentence_embeddings.shape}")
else:
list_anchor_name_sentence = self.standardNameMapData.processed_data["anchor_name_sentences"]
self.anchor_name_sentence_embeddings = (
self.sentenceTransformerHelper.create_embeddings(
list_anchor_name_sentence
)
)
with open(SENTENCE_EMBEDDING_FILE, "wb") as f:
pickle.dump(self.anchor_name_sentence_embeddings, f)
print(f"Saved anchor name sentence embeddings to {SENTENCE_EMBEDDING_FILE}")
# Update embeddings in StandardNameMapData
self.standardNameMapData.update_embeddings(self.anchor_name_sentence_embeddings)
print("Models and data loaded successfully")
# Global instance (singleton)
sentence_transformer_service = SentenceTransformerService()
|