from sentence_transformers import SentenceTransformer import torch import numpy as np import os import logging class EmbeddingModel: def __init__(self, model_name="BAAI/bge-m3"): try: # 使用 Hugging Face 模型 ID self.model = SentenceTransformer(model_name) self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model.to(self.device) logging.info(f"成功加载嵌入模型 {model_name} 到 {self.device} 设备") except Exception as e: logging.error(f"加载模型失败: {str(e)}") raise def encode(self, texts, batch_size=32): """ 将文本转换为向量表示 """ embeddings = self.model.encode( texts, batch_size=batch_size, show_progress_bar=True, normalize_embeddings=True ) return embeddings def encode_queries(self, queries): """ 为查询文本添加特殊前缀并编码 BGE模型推荐在查询前添加"Represent this sentence for searching relevant passages: " """ prefix = "Represent this sentence for searching relevant passages: " if isinstance(queries, str): queries = [queries] prefixed_queries = [prefix + query for query in queries] return self.encode(prefixed_queries)