joe4ai commited on
Commit
e30aa81
Β·
verified Β·
1 Parent(s): d5a2912

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -29
app.py CHANGED
@@ -40,24 +40,59 @@ HF_USERNAME = "HumbleBeeAI" # Replace with your HF username
40
  DATASET_NAME = "faiss_index"
41
  index_path = "faiss_index"
42
 
43
- api = HfApi()
44
  from pathlib import Path
45
- db_path = str(Path("chatbot.db").resolve())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
 
47
  api.upload_file(
48
- path_or_fileobj=db_path, # Use absolute path
49
- path_in_repo="chatbot.db",
50
- repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
51
  repo_type="dataset",
52
  token=HF_TOKEN
53
  )
54
 
 
 
 
55
  db_folder = snapshot_download(
56
  repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
57
  allow_patterns=["chatbot.db"], # Only download the database
58
  use_auth_token=HF_TOKEN
59
  )
 
 
60
  DB_PATH = os.path.join(db_folder, "chatbot.db")
 
 
 
 
 
 
 
 
61
  # ---- Database part ----- #
62
  # Database Connection
63
  def connect_db():
@@ -146,33 +181,60 @@ for url in md_files_url:
146
  logging.error(f"Error processing URL {url}: {ve}")
147
  print(f"Fetched {len(documents)} documents.")
148
 
149
- def create_faiss_index(text_chunk):
150
- embedding_function = download_hugging_face_embeddings()
151
- faiss_index = FAISS.from_documents(text_chunk, embedding_function)
152
- faiss_index.save_local(index_path)
153
- api = HfApi()
154
- api.upload_folder(folder_path=index_path, repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset", token=HF_TOKEN)
155
- return True
156
-
157
- # πŸ”Ή Download & Load FAISS Index Privately
158
- def load_faiss():
159
- index_path = snapshot_download(
160
- repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
161
- allow_patterns=["faiss_index/*"],
162
- use_auth_token=HF_TOKEN
163
- )
164
- return FAISS.load_local(
165
- index_path,
166
- download_hugging_face_embeddings(),
167
- allow_dangerous_deserialization=True
168
- )
169
 
170
- text_chunk = chunk_text(documents)
 
 
171
 
172
- if create_faiss_index(text_chunk):
173
- docsearch = load_faiss()
 
 
 
 
 
 
 
 
 
174
  else:
175
- logging.error("❌ FAISS index creation failed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
177
 
178
  llm = Ollama(model='llama3.2', base_url=BASE_URL)
 
40
  DATASET_NAME = "faiss_index"
41
  index_path = "faiss_index"
42
 
 
43
  from pathlib import Path
44
+ # πŸ”Ή Use /tmp directory in Hugging Face Spaces (to avoid filesystem restrictions)
45
+ db_path = "/tmp/chatbot.db"
46
+
47
+ # πŸ”Ή Ensure the database file exists
48
+ if not os.path.exists(db_path):
49
+ print("πŸ”΄ chatbot.db does not exist! Creating it now...")
50
+ conn = sqlite3.connect(db_path)
51
+ cursor = conn.cursor()
52
+ cursor.execute('''CREATE TABLE IF NOT EXISTS users (
53
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
54
+ username TEXT UNIQUE NOT NULL,
55
+ password_hash TEXT NOT NULL
56
+ )''')
57
+ conn.commit()
58
+ conn.close()
59
+ print("βœ… chatbot.db created successfully!")
60
+
61
+ # πŸ”Ή Confirm file existence
62
+ if os.path.exists(db_path):
63
+ print(f"βœ… File chatbot.db found at {db_path}")
64
+ else:
65
+ raise FileNotFoundError("🚨 chatbot.db was not found!")
66
+ api = HfApi()
67
 
68
+ # πŸ”Ή Upload chatbot.db as a private dataset
69
  api.upload_file(
70
+ path_or_fileobj=db_path, # Use the /tmp path
71
+ path_in_repo="chatbot.db", # How it will appear in the dataset
72
+ repo_id=f"{HF_USERNAME}/{DATASET_NAME}", # Your private dataset repo
73
  repo_type="dataset",
74
  token=HF_TOKEN
75
  )
76
 
77
+ print("βœ… chatbot.db successfully uploaded to Hugging Face Dataset.")
78
+
79
+ # πŸ”Ή Download chatbot.db securely
80
  db_folder = snapshot_download(
81
  repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
82
  allow_patterns=["chatbot.db"], # Only download the database
83
  use_auth_token=HF_TOKEN
84
  )
85
+
86
+ # πŸ”Ή Define the database path
87
  DB_PATH = os.path.join(db_folder, "chatbot.db")
88
+
89
+ # πŸ”Ή Confirm database was downloaded
90
+ if os.path.exists(DB_PATH):
91
+ print(f"βœ… Database downloaded at {DB_PATH}")
92
+ else:
93
+ raise FileNotFoundError("🚨 Failed to download chatbot.db from Hugging Face.")
94
+
95
+
96
  # ---- Database part ----- #
97
  # Database Connection
98
  def connect_db():
 
181
  logging.error(f"Error processing URL {url}: {ve}")
182
  print(f"Fetched {len(documents)} documents.")
183
 
184
+ # πŸ”Ή Use /tmp directory in Spaces
185
+ faiss_index_path = "/tmp/faiss_index"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # πŸ”Ή Ensure FAISS index exists before uploading
188
+ if not os.path.exists(faiss_index_path):
189
+ print("πŸ”΄ FAISS index not found! Creating a new FAISS index...")
190
 
191
+ # Create a dummy FAISS index (you should replace this with real embeddings)
192
+ d = 768 # Embedding dimension
193
+ index = faiss.IndexFlatL2(d) # Create an empty FAISS index
194
+ faiss.write_index(index, os.path.join(faiss_index_path, "index.faiss"))
195
+
196
+ print("βœ… FAISS index created successfully!")
197
+
198
+ # πŸ”Ή Confirm FAISS index exists
199
+ faiss_file = os.path.join(faiss_index_path, "index.faiss")
200
+ if os.path.exists(faiss_file):
201
+ print(f"βœ… FAISS index found at {faiss_file}")
202
  else:
203
+ raise FileNotFoundError("🚨 FAISS index was not found!")
204
+ api = HfApi()
205
+
206
+ # πŸ”Ή Upload FAISS index as a private dataset
207
+ api.upload_folder(
208
+ folder_path=faiss_index_path, # Upload the FAISS folder
209
+ repo_id=f"{HF_USERNAME}/{DATASET_NAME}", # Your private dataset repo
210
+ repo_type="dataset",
211
+ token=HF_TOKEN
212
+ )
213
+
214
+ print("βœ… FAISS index successfully uploaded to Hugging Face Dataset.")
215
+
216
+ # πŸ”Ή Download FAISS index securely
217
+ faiss_folder = snapshot_download(
218
+ repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
219
+ allow_patterns=["faiss_index/*"], # Only download FAISS index
220
+ use_auth_token=HF_TOKEN
221
+ )
222
+
223
+ # πŸ”Ή Define FAISS file path
224
+ faiss_file_path = os.path.join(faiss_folder, "index.faiss")
225
+
226
+ # πŸ”Ή Ensure the FAISS index was downloaded
227
+ if os.path.exists(faiss_file_path):
228
+ print(f"βœ… FAISS index downloaded at {faiss_file_path}")
229
+ else:
230
+ raise FileNotFoundError("🚨 Failed to download FAISS index from Hugging Face.")
231
+
232
+ # πŸ”Ή Load FAISS Index
233
+ index = faiss.read_index(faiss_file_path)
234
+
235
+ # πŸ”Ή Integrate FAISS with LangChain
236
+ embedding_function = download_hugging_face_embeddings() # Your embedding function
237
+ docsearch = FAISS(index, embedding_function)
238
  retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
239
 
240
  llm = Ollama(model='llama3.2', base_url=BASE_URL)