Spaces:

nishantgaurav23
/

Sport-Chatbot

Runtime error

App Files Files Community

nishantgaurav23 commited on Nov 2, 2024

Commit

c650a86

verified ·

1 Parent(s): 6665ca1

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -44

app.py CHANGED Viewed

@@ -447,62 +447,155 @@ class RAGPipeline:
             placeholder.warning(message)
             return message
-    def query_model(self, prompt: str) -> str:
-        """Query the local Llama model"""
-        try:
-            if self.llm is None:
-                raise RuntimeError("Model not initialized")
-            response = self.llm(
-                prompt,
-                max_tokens=512,
-                temperature=0.4,
-                top_p=0.95,
-                echo=False,
-                stop=["Question:", "Context:", "Guidelines:"],  # Removed \n\n from stop tokens to allow paragraphs
-                repeat_penalty=1.1  # Added to encourage more diverse text
-            )
-            if response and 'choices' in response and len(response['choices']) > 0:
-                text = response['choices'][0].get('text', '').strip()
-                return text
-            else:
-                raise ValueError("No valid response generated")
-        except Exception as e:
-            logging.error(f"Error in query_model: {str(e)}")
-            raise
-@st.cache_resource(show_spinner=False)
-def initialize_rag_pipeline():
-    """Initialize the RAG pipeline once"""
     try:
-        # Create necessary directories
-        os.makedirs("ESPN_data", exist_ok=True)
-        # Load embeddings from Drive
-        drive_file_id = "1MuV63AE9o6zR9aBvdSDQOUextp71r2NN"
-        with st.spinner("Loading embeddings from Google Drive..."):
-            cache_data = load_from_drive(drive_file_id)
-            if cache_data is None:
-                st.error("Failed to load embeddings from Google Drive")
-                st.stop()
-        # Initialize pipeline
-        data_folder = "ESPN_data"
-        rag = RAGPipeline(data_folder)
-        # Store embeddings
-        rag.documents = cache_data['documents']
-        rag.retriever.store_embeddings(cache_data['embeddings'])
-        return rag
     except Exception as e:
-        logging.error(f"Pipeline initialization error: {str(e)}")
-        st.error(f"Failed to initialize the system: {str(e)}")
         raise
 # def main():
 #     try:
 #         # Environment check

             placeholder.warning(message)
             return message
+    # def query_model(self, prompt: str) -> str:
+    #     """Query the local Llama model"""
+    #     try:
+    #         if self.llm is None:
+    #             raise RuntimeError("Model not initialized")
+    #         response = self.llm(
+    #             prompt,
+    #             max_tokens=512,
+    #             temperature=0.4,
+    #             top_p=0.95,
+    #             echo=False,
+    #             stop=["Question:", "Context:", "Guidelines:"],  # Removed \n\n from stop tokens to allow paragraphs
+    #             repeat_penalty=1.1  # Added to encourage more diverse text
+    #         )
+    #         if response and 'choices' in response and len(response['choices']) > 0:
+    #             text = response['choices'][0].get('text', '').strip()
+    #             return text
+    #         else:
+    #             raise ValueError("No valid response generated")
+    #     except Exception as e:
+    #         logging.error(f"Error in query_model: {str(e)}")
+    #         raise
+    def query_model(self, prompt: str) -> str:
+    """Query the local Llama model"""
     try:
+        if self.llm is None:
+            raise RuntimeError("Model not initialized")
+        # Log the prompt for debugging
+        logging.info(f"Sending prompt to model...")
+        # Generate response with more explicit parameters
+        response = self.llm(
+            prompt,
+            max_tokens=512,        # Maximum length of the response
+            temperature=0.7,       # Slightly increased for more dynamic responses
+            top_p=0.95,           # Nucleus sampling parameter
+            top_k=50,             # Top-k sampling parameter
+            echo=False,           # Don't include prompt in response
+            stop=["Question:", "Context:", "Guidelines:"],  # Stop tokens
+            repeat_penalty=1.1,    # Penalize repetition
+            presence_penalty=0.5,  # Encourage topic diversity
+            frequency_penalty=0.5  # Discourage word repetition
+        )
+        # Log the raw response for debugging
+        logging.info(f"Raw model response: {response}")
+        if response and isinstance(response, dict) and 'choices' in response and response['choices']:
+            generated_text = response['choices'][0].get('text', '').strip()
+            if generated_text:
+                logging.info(f"Generated text: {generated_text[:100]}...")  # Log first 100 chars
+                return generated_text
+            else:
+                logging.warning("Model returned empty response")
+                raise ValueError("Empty response from model")
+        else:
+            logging.warning(f"Unexpected response format: {response}")
+            raise ValueError("Invalid response format from model")
+    except Exception as e:
+        logging.error(f"Error in query_model: {str(e)}")
+        logging.error("Full error details: ", exc_info=True)
+        raise
+def initialize_model(self):
+    """Initialize the model with proper error handling and verification"""
+    try:
+        if not os.path.exists(self.model_path):
+            st.info("Downloading model... This may take a while.")
+            direct_url = "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf"
+            download_file_with_progress(direct_url, self.model_path)
+        # Verify file exists and has content
+        if not os.path.exists(self.model_path):
+            raise FileNotFoundError(f"Model file {self.model_path} not found after download attempts")
+        if os.path.getsize(self.model_path) < 1000000:  # Less than 1MB
+            os.remove(self.model_path)
+            raise ValueError("Downloaded model file is too small, likely corrupted")
+        # Updated model configuration
+        llm_config = {
+            "model_path": self.model_path,
+            "n_ctx": 4096,          # Increased context window
+            "n_threads": 4,
+            "n_batch": 512,
+            "n_gpu_layers": 0,
+            "verbose": True,        # Enable verbose mode for debugging
+            "use_mlock": False,     # Disable memory locking
+            "last_n_tokens_size": 64,  # Token window size for repeat penalty
+            "seed": -1              # Random seed for reproducibility
+        }
+        logging.info("Initializing Llama model...")
+        self.llm = Llama(**llm_config)
+        # Test the model
+        test_response = self.llm(
+            "Test response",
+            max_tokens=10,
+            temperature=0.7,
+            echo=False
+        )
+        if not test_response or 'choices' not in test_response:
+            raise RuntimeError("Model initialization test failed")
+        logging.info("Model initialized and tested successfully")
+        return self.llm
     except Exception as e:
+        logging.error(f"Error initializing model: {str(e)}")
         raise
+# @st.cache_resource(show_spinner=False)
+# def initialize_rag_pipeline():
+#     """Initialize the RAG pipeline once"""
+#     try:
+#         # Create necessary directories
+#         os.makedirs("ESPN_data", exist_ok=True)
+#         # Load embeddings from Drive
+#         drive_file_id = "1MuV63AE9o6zR9aBvdSDQOUextp71r2NN"
+#         with st.spinner("Loading embeddings from Google Drive..."):
+#             cache_data = load_from_drive(drive_file_id)
+#             if cache_data is None:
+#                 st.error("Failed to load embeddings from Google Drive")
+#                 st.stop()
+#         # Initialize pipeline
+#         data_folder = "ESPN_data"
+#         rag = RAGPipeline(data_folder)
+#         # Store embeddings
+#         rag.documents = cache_data['documents']
+#         rag.retriever.store_embeddings(cache_data['embeddings'])
+#         return rag
+#     except Exception as e:
+#         logging.error(f"Pipeline initialization error: {str(e)}")
+#         st.error(f"Failed to initialize the system: {str(e)}")
+#         raise
 # def main():
 #     try:
 #         # Environment check