File size: 2,543 Bytes
5889992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from pathlib import Path
from typing import List, Optional
import logging
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from src.llm.utils.logging import TheryBotLogger

class FAISSVectorSearch:
    def __init__(
        self,
        embedding_model: Optional[HuggingFaceEmbeddings] = None,
        db_path: Path = Path("vector_embedding/mental_health_vector_db"),
        k: int = 5,
        logger: Optional[TheryBotLogger] = None
    ):
        self.embedding_model = embedding_model or self._get_default_embedding_model()
        self.db_path = db_path
        self.k = k
        self.logger = logger or TheryBotLogger()
        self._initialize_store()
    
    def _get_default_embedding_model(self) -> HuggingFaceEmbeddings:
        return HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={"device": "cpu"},
            encode_kwargs={
                "padding": "max_length",
                "max_length": 512,
                "truncation": True,
                "normalize_embeddings": True
            }
        )
    
    def _initialize_store(self) -> None:
        if self.db_path.exists():
            self.vectorstore = FAISS.load_local(
                str(self.db_path),
                self.embedding_model,
                allow_dangerous_deserialization=True
            )
        else:
            # Initialize with empty store
            self.vectorstore = FAISS.from_texts(
                [""], self.embedding_model
            )
    
    def search(self, query: str, k: Optional[int] = None) -> List[str]:
        try:
            results = self.vectorstore.similarity_search(
                query,
                k=(k or self.k)
            )
            return [res.page_content for res in results]
        except Exception as e:
            # Log error and return empty results
            self.logger.log_interaction(
                interaction_type="vector_search_error",
                data={"error": str(e)},
                level=logging.ERROR
            )
            return []
    
    def add_texts(self, texts: List[str]) -> None:
        """Add new texts to the vector store"""
        self.vectorstore.add_texts(texts)
        # Optionally save after adding
        self.save()
    
    def save(self) -> None:
        """Save the vector store to disk"""
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self.vectorstore.save_local(str(self.db_path))