Spaces:

HumbleBeeAI
/

humblebee-chatbot

Runtime error

App Files Files Community

joe4ai commited on Feb 6

Commit

03e04be

verified ·

1 Parent(s): e30aa81

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -68

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from huggingface_hub import HfApi, snapshot_download
 from helper import download_hugging_face_embeddings
 from url import md_files_url
 from get_data import extract_repo_details, fetch_md_file_via_api, data_loader, chunk_text
@@ -24,29 +23,49 @@ from datetime import datetime
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 base = {}
 last_messages = 4
 documents = []
 load_dotenv()
 AUTH_TOKEN_KEY = os.environ.get('AUTH_TOKEN_KEY')
-BASE_URL = os.environ.get('BASE_URL')
 os.environ['AUTH_TOKEN_KEY'] = AUTH_TOKEN_KEY
-os.environ['BASE_URL'] = BASE_URL
 HF_TOKEN = os.environ.get("HF_TOKEN")
-os.environ['HF_TOKEN'] = HF_TOKEN
-HF_USERNAME = "HumbleBeeAI"   # Replace with your HF username
-DATASET_NAME = "faiss_index"
-index_path = "faiss_index"
-from pathlib import Path
-# 🔹 Use /tmp directory in Hugging Face Spaces (to avoid filesystem restrictions)
-db_path = "/tmp/chatbot.db"
-# 🔹 Ensure the database file exists
 if not os.path.exists(db_path):
     print("🔴 chatbot.db does not exist! Creating it now...")
     conn = sqlite3.connect(db_path)
     cursor = conn.cursor()
     cursor.execute('''CREATE TABLE IF NOT EXISTS users (
@@ -58,40 +77,33 @@ if not os.path.exists(db_path):
     conn.close()
     print("✅ chatbot.db created successfully!")
-# 🔹 Confirm file existence
-if os.path.exists(db_path):
-    print(f"✅ File chatbot.db found at {db_path}")
-else:
-    raise FileNotFoundError("🚨 chatbot.db was not found!")
-api = HfApi()
 # 🔹 Upload chatbot.db as a private dataset
 api.upload_file(
-    path_or_fileobj=db_path,  # Use the /tmp path
-    path_in_repo="chatbot.db",  # How it will appear in the dataset
-    repo_id=f"{HF_USERNAME}/{DATASET_NAME}",  # Your private dataset repo
     repo_type="dataset",
     token=HF_TOKEN
 )
-print("✅ chatbot.db successfully uploaded to Hugging Face Dataset.")
 # 🔹 Download chatbot.db securely
 db_folder = snapshot_download(
-    repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
-    allow_patterns=["chatbot.db"],  # Only download the database
     use_auth_token=HF_TOKEN
 )
-# 🔹 Define the database path
 DB_PATH = os.path.join(db_folder, "chatbot.db")
-# 🔹 Confirm database was downloaded
 if os.path.exists(DB_PATH):
     print(f"✅ Database downloaded at {DB_PATH}")
 else:
-    raise FileNotFoundError("🚨 Failed to download chatbot.db from Hugging Face.")
 # ---- Database part ----- #
 # Database Connection
@@ -181,63 +193,80 @@ for url in md_files_url:
         logging.error(f"Error processing URL {url}: {ve}")
 print(f"Fetched {len(documents)} documents.")
-# 🔹 Use /tmp directory in Spaces
-faiss_index_path = "/tmp/faiss_index"
-# 🔹 Ensure FAISS index exists before uploading
-if not os.path.exists(faiss_index_path):
-    print("🔴 FAISS index not found! Creating a new FAISS index...")
-    # Create a dummy FAISS index (you should replace this with real embeddings)
-    d = 768  # Embedding dimension
-    index = faiss.IndexFlatL2(d)  # Create an empty FAISS index
-    faiss.write_index(index, os.path.join(faiss_index_path, "index.faiss"))
-    print("✅ FAISS index created successfully!")
-# 🔹 Confirm FAISS index exists
-faiss_file = os.path.join(faiss_index_path, "index.faiss")
-if os.path.exists(faiss_file):
-    print(f"✅ FAISS index found at {faiss_file}")
-else:
-    raise FileNotFoundError("🚨 FAISS index was not found!")
-api = HfApi()
-# 🔹 Upload FAISS index as a private dataset
 api.upload_folder(
-    folder_path=faiss_index_path,  # Upload the FAISS folder
-    repo_id=f"{HF_USERNAME}/{DATASET_NAME}",  # Your private dataset repo
     repo_type="dataset",
     token=HF_TOKEN
 )
-print("✅ FAISS index successfully uploaded to Hugging Face Dataset.")
-# 🔹 Download FAISS index securely
-faiss_folder = snapshot_download(
-    repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
-    allow_patterns=["faiss_index/*"],  # Only download FAISS index
     use_auth_token=HF_TOKEN
 )
-# 🔹 Define FAISS file path
-faiss_file_path = os.path.join(faiss_folder, "index.faiss")
-# 🔹 Ensure the FAISS index was downloaded
-if os.path.exists(faiss_file_path):
-    print(f"✅ FAISS index downloaded at {faiss_file_path}")
 else:
-    raise FileNotFoundError("🚨 Failed to download FAISS index from Hugging Face.")
-# 🔹 Load FAISS Index
-index = faiss.read_index(faiss_file_path)
-# 🔹 Integrate FAISS with LangChain
-embedding_function = download_hugging_face_embeddings()  # Your embedding function
-docsearch = FAISS(index, embedding_function)
 retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
-llm = Ollama(model='llama3.2', base_url=BASE_URL)
 prompt = ChatPromptTemplate.from_messages(
     [

 from helper import download_hugging_face_embeddings
 from url import md_files_url
 from get_data import extract_repo_details, fetch_md_file_via_api, data_loader, chunk_text
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 base = {}
 last_messages = 4
 documents = []
+HF_ORG_NAME = 'HumbleBeeAI'
+DATASET_NAME = 'faiss_index'
+repo_id = f"{HF_ORG_NAME}/{DATASET_NAME}"
 load_dotenv()
 AUTH_TOKEN_KEY = os.environ.get('AUTH_TOKEN_KEY')
 os.environ['AUTH_TOKEN_KEY'] = AUTH_TOKEN_KEY
 HF_TOKEN = os.environ.get("HF_TOKEN")
+if not HF_TOKEN:
+    raise ValueError("🚨 HF_TOKEN is not set! Ensure you have the right permissions.")
+from huggingface_hub import HfApi
+api = HfApi()
+# 🔹 Check if the dataset exists in the organization
+try:
+    api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
+    print(f"✅ Dataset '{repo_id}' already exists in the organization.")
+except Exception:
+    print(f"🔴 Dataset '{repo_id}' not found. Creating it now...")
+    # Create repo inside the organization
+    api.create_repo(
+        repo_id=repo_id,
+        repo_type="dataset",
+        private=True,  # Ensure it's private
+        token=HF_TOKEN,
+        organization=HF_ORG_NAME  # Specify organization
+    )
+    print(f"✅ Dataset '{repo_id}' created successfully in the organization.")
+db_path = "chatbot.db"
+# 🔹 Ensure `chatbot.db` exists before uploading
 if not os.path.exists(db_path):
     print("🔴 chatbot.db does not exist! Creating it now...")
+    import sqlite3
     conn = sqlite3.connect(db_path)
     cursor = conn.cursor()
     cursor.execute('''CREATE TABLE IF NOT EXISTS users (
     conn.close()
     print("✅ chatbot.db created successfully!")
 # 🔹 Upload chatbot.db as a private dataset
 api.upload_file(
+    path_or_fileobj=db_path,
+    path_in_repo="chatbot.db",
+    repo_id=repo_id,
     repo_type="dataset",
     token=HF_TOKEN
 )
+print("✅ chatbot.db successfully uploaded to the organization's private dataset.")
+from huggingface_hub import snapshot_download
+import os
 # 🔹 Download chatbot.db securely
 db_folder = snapshot_download(
+    repo_id=repo_id,
+    allow_patterns=["chatbot.db"],  # Only download chatbot.db
     use_auth_token=HF_TOKEN
 )
 DB_PATH = os.path.join(db_folder, "chatbot.db")
 if os.path.exists(DB_PATH):
     print(f"✅ Database downloaded at {DB_PATH}")
 else:
+    raise FileNotFoundError("🚨 Failed to download chatbot.db from the organization's dataset.")
 # ---- Database part ----- #
 # Database Connection
         logging.error(f"Error processing URL {url}: {ve}")
 print(f"Fetched {len(documents)} documents.")
+text_chunk = chunk_text(documents)
+# Define paths
+FAISS_LOCAL_PATH = "/tmp/faiss_index"
+# 🔹 Ensure FAISS directory exists
+os.makedirs(FAISS_LOCAL_PATH, exist_ok=True)
+try:
+    # 🔹 Create FAISS index
+    faiss_index = FAISS.from_documents(text_chunk, download_hugging_face_embeddings())
+    # 🔹 Save FAISS locally
+    faiss_index.save_local(FAISS_LOCAL_PATH)
+    print(f"✅ FAISS index successfully saved to {FAISS_LOCAL_PATH}")
+except Exception as e:
+    logging.error(f"🚨 Error creating or saving FAISS index: {e}")
+try:
+    api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
+    print(f"✅ Dataset '{repo_id}' already exists in the organization.")
+except Exception:
+    print(f"🔴 Dataset '{repo_id}' not found. Creating it now...")
+    # Create dataset in the organization
+    api.create_repo(
+        repo_id=repo_id,
+        repo_type="dataset",
+        private=True,
+        token=HF_TOKEN,
+        organization=HF_ORG_NAME
+    )
+    print(f"✅ Dataset '{repo_id}' created successfully in the organization.")
+# 🔹 Upload FAISS to the organization dataset
 api.upload_folder(
+    folder_path=FAISS_LOCAL_PATH,
+    repo_id=repo_id,
     repo_type="dataset",
     token=HF_TOKEN
 )
+print("✅ FAISS index successfully uploaded to the organization's private dataset.")
+# 🔹 Download FAISS index from the private organization dataset
+faiss_download_folder = snapshot_download(
+    repo_id=repo_id,
+    allow_patterns=["faiss_index/*"],
     use_auth_token=HF_TOKEN
 )
+FAISS_PATH = os.path.join(faiss_download_folder, "faiss_index")
+if os.path.exists(FAISS_PATH):
+    print(f"✅ FAISS index found at {FAISS_PATH}, loading it now...")
 else:
+    raise FileNotFoundError("🚨 FAISS index not found in the organization's dataset.")
+try:
+    # 🔹 Load FAISS index with LangChain
+    docsearch = FAISS.load_local(
+        FAISS_PATH,
+        download_hugging_face_embeddings(),
+        allow_dangerous_deserialization=True
+    )
+    print("✅ FAISS index successfully loaded for retrieval.")
+except Exception as e:
+    logging.error(f"🚨 Error loading FAISS index: {e}")
 retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
+llm = Ollama(model='llama3.2')
 prompt = ChatPromptTemplate.from_messages(
     [