joe4ai commited on
Commit
78b7958
Β·
verified Β·
1 Parent(s): 03e04be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -30
app.py CHANGED
@@ -194,23 +194,31 @@ for url in md_files_url:
194
  print(f"Fetched {len(documents)} documents.")
195
 
196
  text_chunk = chunk_text(documents)
 
197
  # Define paths
198
  FAISS_LOCAL_PATH = "/tmp/faiss_index"
 
199
 
200
- # πŸ”Ή Ensure FAISS directory exists
201
- os.makedirs(FAISS_LOCAL_PATH, exist_ok=True)
202
 
203
- try:
204
- # πŸ”Ή Create FAISS index
205
- faiss_index = FAISS.from_documents(text_chunk, download_hugging_face_embeddings())
206
-
207
- # πŸ”Ή Save FAISS locally
208
- faiss_index.save_local(FAISS_LOCAL_PATH)
209
- print(f"βœ… FAISS index successfully saved to {FAISS_LOCAL_PATH}")
210
 
211
- except Exception as e:
212
- logging.error(f"🚨 Error creating or saving FAISS index: {e}")
 
 
 
 
 
213
 
 
 
 
 
 
 
214
  try:
215
  api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
216
  print(f"βœ… Dataset '{repo_id}' already exists in the organization.")
@@ -228,33 +236,47 @@ except Exception:
228
  print(f"βœ… Dataset '{repo_id}' created successfully in the organization.")
229
 
230
  # πŸ”Ή Upload FAISS to the organization dataset
231
- api.upload_folder(
232
- folder_path=FAISS_LOCAL_PATH,
233
- repo_id=repo_id,
234
- repo_type="dataset",
235
- token=HF_TOKEN
236
- )
 
 
 
237
 
238
- print("βœ… FAISS index successfully uploaded to the organization's private dataset.")
 
239
 
240
- # πŸ”Ή Download FAISS index from the private organization dataset
241
- faiss_download_folder = snapshot_download(
242
- repo_id=repo_id,
243
- allow_patterns=["faiss_index/*"],
244
- use_auth_token=HF_TOKEN
245
- )
 
 
 
 
 
 
 
 
 
246
 
247
- FAISS_PATH = os.path.join(faiss_download_folder, "faiss_index")
 
248
 
249
- if os.path.exists(FAISS_PATH):
250
- print(f"βœ… FAISS index found at {FAISS_PATH}, loading it now...")
251
  else:
252
- raise FileNotFoundError("🚨 FAISS index not found in the organization's dataset.")
 
253
  try:
254
  # πŸ”Ή Load FAISS index with LangChain
 
255
  docsearch = FAISS.load_local(
256
- FAISS_PATH,
257
- download_hugging_face_embeddings(),
258
  allow_dangerous_deserialization=True
259
  )
260
 
 
194
  print(f"Fetched {len(documents)} documents.")
195
 
196
  text_chunk = chunk_text(documents)
197
+
198
  # Define paths
199
  FAISS_LOCAL_PATH = "/tmp/faiss_index"
200
+ os.makedirs(FAISS_LOCAL_PATH, exist_ok=True) # Ensure directory exists
201
 
202
+ FAISS_INDEX_FILE = os.path.join(FAISS_LOCAL_PATH, "index.faiss") # FAISS index file
 
203
 
204
+ # πŸ”Ή Check if FAISS index exists before creating a new one
205
+ if not os.path.exists(FAISS_INDEX_FILE):
206
+ print("πŸ”΄ FAISS index not found! Creating a new FAISS index...")
 
 
 
 
207
 
208
+ try:
209
+ # πŸ”Ή Create FAISS index
210
+ faiss_index = FAISS.from_documents(text_chunk, download_hugging_face_embeddings())
211
+
212
+ # πŸ”Ή Save FAISS locally
213
+ faiss_index.save_local(FAISS_LOCAL_PATH)
214
+ print(f"βœ… FAISS index successfully saved to {FAISS_LOCAL_PATH}")
215
 
216
+ except Exception as e:
217
+ logging.error(f"🚨 Error creating or saving FAISS index: {e}")
218
+ else:
219
+ print(f"🟒 FAISS index already exists at {FAISS_LOCAL_PATH}, skipping creation.")
220
+
221
+ # πŸ”Ή Check if dataset exists before uploading FAISS
222
  try:
223
  api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
224
  print(f"βœ… Dataset '{repo_id}' already exists in the organization.")
 
236
  print(f"βœ… Dataset '{repo_id}' created successfully in the organization.")
237
 
238
  # πŸ”Ή Upload FAISS to the organization dataset
239
+ try:
240
+ print("πŸ“€ Uploading FAISS index to Hugging Face...")
241
+ api.upload_folder(
242
+ folder_path=FAISS_LOCAL_PATH,
243
+ repo_id=repo_id,
244
+ repo_type="dataset",
245
+ token=HF_TOKEN
246
+ )
247
+ print("βœ… FAISS index successfully uploaded to the organization's private dataset.")
248
 
249
+ except Exception as e:
250
+ logging.error(f"🚨 Error uploading FAISS index: {e}")
251
 
252
+ FAISS_DOWNLOAD_PATH = "/tmp/faiss_index_download"
253
+
254
+ # πŸ”Ή Check if FAISS exists before downloading
255
+ if not os.path.exists(FAISS_DOWNLOAD_PATH):
256
+ print("πŸ“₯ Downloading FAISS index from Hugging Face...")
257
+
258
+ try:
259
+ faiss_download_folder = snapshot_download(
260
+ repo_id=repo_id,
261
+ repo_type="dataset", # βœ… Ensure it's a dataset repo
262
+ allow_patterns=["faiss_index/*"],
263
+ use_auth_token=HF_TOKEN
264
+ )
265
+
266
+ print(f"βœ… FAISS index downloaded at {faiss_download_folder}")
267
 
268
+ except Exception as e:
269
+ raise FileNotFoundError(f"🚨 Failed to download FAISS index: {e}")
270
 
 
 
271
  else:
272
+ print("🟒 FAISS index already exists locally, skipping download.")
273
+
274
  try:
275
  # πŸ”Ή Load FAISS index with LangChain
276
+ embedding_function = download_hugging_face_embeddings()
277
  docsearch = FAISS.load_local(
278
+ FAISS_DOWNLOAD_PATH,
279
+ embedding_function,
280
  allow_dangerous_deserialization=True
281
  )
282