File size: 2,543 Bytes
5889992 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from pathlib import Path
from typing import List, Optional
import logging
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from src.llm.utils.logging import TheryBotLogger
class FAISSVectorSearch:
def __init__(
self,
embedding_model: Optional[HuggingFaceEmbeddings] = None,
db_path: Path = Path("vector_embedding/mental_health_vector_db"),
k: int = 5,
logger: Optional[TheryBotLogger] = None
):
self.embedding_model = embedding_model or self._get_default_embedding_model()
self.db_path = db_path
self.k = k
self.logger = logger or TheryBotLogger()
self._initialize_store()
def _get_default_embedding_model(self) -> HuggingFaceEmbeddings:
return HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={
"padding": "max_length",
"max_length": 512,
"truncation": True,
"normalize_embeddings": True
}
)
def _initialize_store(self) -> None:
if self.db_path.exists():
self.vectorstore = FAISS.load_local(
str(self.db_path),
self.embedding_model,
allow_dangerous_deserialization=True
)
else:
# Initialize with empty store
self.vectorstore = FAISS.from_texts(
[""], self.embedding_model
)
def search(self, query: str, k: Optional[int] = None) -> List[str]:
try:
results = self.vectorstore.similarity_search(
query,
k=(k or self.k)
)
return [res.page_content for res in results]
except Exception as e:
# Log error and return empty results
self.logger.log_interaction(
interaction_type="vector_search_error",
data={"error": str(e)},
level=logging.ERROR
)
return []
def add_texts(self, texts: List[str]) -> None:
"""Add new texts to the vector store"""
self.vectorstore.add_texts(texts)
# Optionally save after adding
self.save()
def save(self) -> None:
"""Save the vector store to disk"""
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self.vectorstore.save_local(str(self.db_path)) |