Spaces:

HumbleBeeAI
/

humblebee-chatbot

Runtime error

App Files Files Community

joe4ai commited on Feb 6

Commit

e30aa81

verified ·

1 Parent(s): d5a2912

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -29

app.py CHANGED Viewed

@@ -40,24 +40,59 @@ HF_USERNAME = "HumbleBeeAI"   # Replace with your HF username
 DATASET_NAME = "faiss_index"
 index_path = "faiss_index"
-api = HfApi()
 from pathlib import Path
-db_path = str(Path("chatbot.db").resolve())
 api.upload_file(
-    path_or_fileobj=db_path,  # Use absolute path
-    path_in_repo="chatbot.db",
-    repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
     repo_type="dataset",
     token=HF_TOKEN
 )
 db_folder = snapshot_download(
     repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
     allow_patterns=["chatbot.db"],  # Only download the database
     use_auth_token=HF_TOKEN
 )
 DB_PATH = os.path.join(db_folder, "chatbot.db")
 # ---- Database part ----- #
 # Database Connection
 def connect_db():
@@ -146,33 +181,60 @@ for url in md_files_url:
         logging.error(f"Error processing URL {url}: {ve}")
 print(f"Fetched {len(documents)} documents.")
-def create_faiss_index(text_chunk):
-    embedding_function = download_hugging_face_embeddings()
-    faiss_index = FAISS.from_documents(text_chunk, embedding_function)
-    faiss_index.save_local(index_path)
-    api = HfApi()
-    api.upload_folder(folder_path=index_path, repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset", token=HF_TOKEN)
-    return True
-# 🔹 Download & Load FAISS Index Privately
-def load_faiss():
-    index_path = snapshot_download(
-        repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
-        allow_patterns=["faiss_index/*"],
-        use_auth_token=HF_TOKEN
-    )
-    return FAISS.load_local(
-        index_path,
-        download_hugging_face_embeddings(),
-        allow_dangerous_deserialization=True
-    )
-text_chunk = chunk_text(documents)
-if create_faiss_index(text_chunk):
-    docsearch = load_faiss()
 else:
-    logging.error("❌ FAISS index creation failed.")
 retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
 llm = Ollama(model='llama3.2', base_url=BASE_URL)

 DATASET_NAME = "faiss_index"
 index_path = "faiss_index"
 from pathlib import Path
+# 🔹 Use /tmp directory in Hugging Face Spaces (to avoid filesystem restrictions)
+db_path = "/tmp/chatbot.db"
+# 🔹 Ensure the database file exists
+if not os.path.exists(db_path):
+    print("🔴 chatbot.db does not exist! Creating it now...")
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute('''CREATE TABLE IF NOT EXISTS users (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        username TEXT UNIQUE NOT NULL,
+        password_hash TEXT NOT NULL
+    )''')
+    conn.commit()
+    conn.close()
+    print("✅ chatbot.db created successfully!")
+# 🔹 Confirm file existence
+if os.path.exists(db_path):
+    print(f"✅ File chatbot.db found at {db_path}")
+else:
+    raise FileNotFoundError("🚨 chatbot.db was not found!")
+api = HfApi()
+# 🔹 Upload chatbot.db as a private dataset
 api.upload_file(
+    path_or_fileobj=db_path,  # Use the /tmp path
+    path_in_repo="chatbot.db",  # How it will appear in the dataset
+    repo_id=f"{HF_USERNAME}/{DATASET_NAME}",  # Your private dataset repo
     repo_type="dataset",
     token=HF_TOKEN
 )
+print("✅ chatbot.db successfully uploaded to Hugging Face Dataset.")
+# 🔹 Download chatbot.db securely
 db_folder = snapshot_download(
     repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
     allow_patterns=["chatbot.db"],  # Only download the database
     use_auth_token=HF_TOKEN
 )
+# 🔹 Define the database path
 DB_PATH = os.path.join(db_folder, "chatbot.db")
+# 🔹 Confirm database was downloaded
+if os.path.exists(DB_PATH):
+    print(f"✅ Database downloaded at {DB_PATH}")
+else:
+    raise FileNotFoundError("🚨 Failed to download chatbot.db from Hugging Face.")
 # ---- Database part ----- #
 # Database Connection
 def connect_db():
         logging.error(f"Error processing URL {url}: {ve}")
 print(f"Fetched {len(documents)} documents.")
+# 🔹 Use /tmp directory in Spaces
+faiss_index_path = "/tmp/faiss_index"
+# 🔹 Ensure FAISS index exists before uploading
+if not os.path.exists(faiss_index_path):
+    print("🔴 FAISS index not found! Creating a new FAISS index...")
+    # Create a dummy FAISS index (you should replace this with real embeddings)
+    d = 768  # Embedding dimension
+    index = faiss.IndexFlatL2(d)  # Create an empty FAISS index
+    faiss.write_index(index, os.path.join(faiss_index_path, "index.faiss"))
+    print("✅ FAISS index created successfully!")
+# 🔹 Confirm FAISS index exists
+faiss_file = os.path.join(faiss_index_path, "index.faiss")
+if os.path.exists(faiss_file):
+    print(f"✅ FAISS index found at {faiss_file}")
 else:
+    raise FileNotFoundError("🚨 FAISS index was not found!")
+api = HfApi()
+# 🔹 Upload FAISS index as a private dataset
+api.upload_folder(
+    folder_path=faiss_index_path,  # Upload the FAISS folder
+    repo_id=f"{HF_USERNAME}/{DATASET_NAME}",  # Your private dataset repo
+    repo_type="dataset",
+    token=HF_TOKEN
+)
+print("✅ FAISS index successfully uploaded to Hugging Face Dataset.")
+# 🔹 Download FAISS index securely
+faiss_folder = snapshot_download(
+    repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
+    allow_patterns=["faiss_index/*"],  # Only download FAISS index
+    use_auth_token=HF_TOKEN
+)
+# 🔹 Define FAISS file path
+faiss_file_path = os.path.join(faiss_folder, "index.faiss")
+# 🔹 Ensure the FAISS index was downloaded
+if os.path.exists(faiss_file_path):
+    print(f"✅ FAISS index downloaded at {faiss_file_path}")
+else:
+    raise FileNotFoundError("🚨 Failed to download FAISS index from Hugging Face.")
+# 🔹 Load FAISS Index
+index = faiss.read_index(faiss_file_path)
+# 🔹 Integrate FAISS with LangChain
+embedding_function = download_hugging_face_embeddings()  # Your embedding function
+docsearch = FAISS(index, embedding_function)
 retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
 llm = Ollama(model='llama3.2', base_url=BASE_URL)