Spaces:

nishantgaurav23
/

Sport-Chatbot

Runtime error

App Files Files Community

nishantgaurav23 commited on Oct 31, 2024

Commit

526c0d9

verified ·

1 Parent(s): 88e53d1

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -23

app.py CHANGED Viewed

@@ -123,17 +123,21 @@ def check_environment():
 class SentenceTransformerRetriever:
     def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", cache_dir: str = "embeddings_cache"):
-        self.device = torch.device("cpu")
-        self.model_name = model_name
-        self.cache_dir = cache_dir
-        self.cache_file = "embeddings.pkl"
-        self.doc_embeddings = None
-        os.makedirs(cache_dir, exist_ok=True)
-        # Initialize model using cached method
-        self.model = self._load_model(model_name)  # Pass model_name as argument
     @st.cache_resource(show_spinner=False)
-    def _load_model(_self, _model_name: str):  # Changed to _self and added _model_name
         """Load and cache the sentence transformer model"""
         try:
             with warnings.catch_warnings():
@@ -144,11 +148,17 @@ class SentenceTransformerRetriever:
                 if not isinstance(test_embedding, torch.Tensor):
                     raise ValueError("Model initialization failed")
                 return model
     def get_cache_path(self, data_folder: str = None) -> str:
         return os.path.join(self.cache_dir, self.cache_file)
     @log_function
     def save_cache(self, data_folder: str, cache_data: dict):
         try:
             cache_path = self.get_cache_path()
             if os.path.exists(cache_path):
@@ -162,7 +172,8 @@ class SentenceTransformerRetriever:
     @log_function
     @st.cache_data
-    def load_cache(_self, _data_folder: str = None) -> Optional[Dict]:  # Changed to _self and _data_folder
         try:
             cache_path = _self.get_cache_path()
             if os.path.exists(cache_path):
@@ -179,6 +190,7 @@ class SentenceTransformerRetriever:
     @log_function
     def encode(self, texts: List[str], batch_size: int = 32) -> torch.Tensor:
         try:
             embeddings = self.model.encode(texts, batch_size=batch_size, convert_to_tensor=True, show_progress_bar=True)
             return F.normalize(embeddings, p=2, dim=1)
@@ -188,23 +200,29 @@ class SentenceTransformerRetriever:
     @log_function
     def store_embeddings(self, embeddings: torch.Tensor):
         self.doc_embeddings = embeddings
     @log_function
     def search(self, query_embedding: torch.Tensor, k: int, documents: List[str]):
-        if self.doc_embeddings is None:
-            raise ValueError("No document embeddings stored!")
-        similarities = F.cosine_similarity(query_embedding, self.doc_embeddings)
-        k = min(k, len(documents))
-        scores, indices = torch.topk(similarities, k=k)
-        logging.info(f"\nSimilarity Stats:")
-        logging.info(f"Max similarity: {similarities.max().item():.4f}")
-        logging.info(f"Mean similarity: {similarities.mean().item():.4f}")
-        logging.info(f"Selected similarities: {scores.tolist()}")
-        return indices.cpu(), scores.cpu()
 class RAGPipeline:
      def __init__(self, data_folder: str, k: int = 5):

 class SentenceTransformerRetriever:
     def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", cache_dir: str = "embeddings_cache"):
+        try:
+            self.device = torch.device("cpu")
+            self.model_name = model_name
+            self.cache_dir = cache_dir
+            self.cache_file = "embeddings.pkl"
+            self.doc_embeddings = None
+            os.makedirs(cache_dir, exist_ok=True)
+            # Initialize model using cached method
+            self.model = self._load_model(model_name)
+        except Exception as e:
+            logging.error(f"Error initializing SentenceTransformerRetriever: {str(e)}")
+            raise
     @st.cache_resource(show_spinner=False)
+    def _load_model(_self, _model_name: str):
         """Load and cache the sentence transformer model"""
         try:
             with warnings.catch_warnings():
                 if not isinstance(test_embedding, torch.Tensor):
                     raise ValueError("Model initialization failed")
                 return model
+        except Exception as e:
+            logging.error(f"Error loading model: {str(e)}")
+            raise
     def get_cache_path(self, data_folder: str = None) -> str:
+        """Get the path for cache file"""
         return os.path.join(self.cache_dir, self.cache_file)
     @log_function
     def save_cache(self, data_folder: str, cache_data: dict):
+        """Save embeddings to cache"""
         try:
             cache_path = self.get_cache_path()
             if os.path.exists(cache_path):
     @log_function
     @st.cache_data
+    def load_cache(_self, _data_folder: str = None) -> Optional[Dict]:
+        """Load embeddings from cache"""
         try:
             cache_path = _self.get_cache_path()
             if os.path.exists(cache_path):
     @log_function
     def encode(self, texts: List[str], batch_size: int = 32) -> torch.Tensor:
+        """Encode texts into embeddings"""
         try:
             embeddings = self.model.encode(texts, batch_size=batch_size, convert_to_tensor=True, show_progress_bar=True)
             return F.normalize(embeddings, p=2, dim=1)
     @log_function
     def store_embeddings(self, embeddings: torch.Tensor):
+        """Store embeddings in memory"""
         self.doc_embeddings = embeddings
     @log_function
     def search(self, query_embedding: torch.Tensor, k: int, documents: List[str]):
+        """Search for similar documents"""
+        try:
+            if self.doc_embeddings is None:
+                raise ValueError("No document embeddings stored!")
+            similarities = F.cosine_similarity(query_embedding, self.doc_embeddings)
+            k = min(k, len(documents))
+            scores, indices = torch.topk(similarities, k=k)
+            logging.info(f"\nSimilarity Stats:")
+            logging.info(f"Max similarity: {similarities.max().item():.4f}")
+            logging.info(f"Mean similarity: {similarities.mean().item():.4f}")
+            logging.info(f"Selected similarities: {scores.tolist()}")
+            return indices.cpu(), scores.cpu()
+        except Exception as e:
+            logging.error(f"Error in search: {str(e)}")
+            raise
 class RAGPipeline:
      def __init__(self, data_folder: str, k: int = 5):