Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -194,23 +194,31 @@ for url in md_files_url:
|
|
194 |
print(f"Fetched {len(documents)} documents.")
|
195 |
|
196 |
text_chunk = chunk_text(documents)
|
|
|
197 |
# Define paths
|
198 |
FAISS_LOCAL_PATH = "/tmp/faiss_index"
|
|
|
199 |
|
200 |
-
|
201 |
-
os.makedirs(FAISS_LOCAL_PATH, exist_ok=True)
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
# πΉ Save FAISS locally
|
208 |
-
faiss_index.save_local(FAISS_LOCAL_PATH)
|
209 |
-
print(f"β
FAISS index successfully saved to {FAISS_LOCAL_PATH}")
|
210 |
|
211 |
-
|
212 |
-
|
|
|
|
|
|
|
|
|
|
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
try:
|
215 |
api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
|
216 |
print(f"β
Dataset '{repo_id}' already exists in the organization.")
|
@@ -228,33 +236,47 @@ except Exception:
|
|
228 |
print(f"β
Dataset '{repo_id}' created successfully in the organization.")
|
229 |
|
230 |
# πΉ Upload FAISS to the organization dataset
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
237 |
|
238 |
-
|
|
|
239 |
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
-
|
|
|
248 |
|
249 |
-
if os.path.exists(FAISS_PATH):
|
250 |
-
print(f"β
FAISS index found at {FAISS_PATH}, loading it now...")
|
251 |
else:
|
252 |
-
|
|
|
253 |
try:
|
254 |
# πΉ Load FAISS index with LangChain
|
|
|
255 |
docsearch = FAISS.load_local(
|
256 |
-
|
257 |
-
|
258 |
allow_dangerous_deserialization=True
|
259 |
)
|
260 |
|
|
|
194 |
print(f"Fetched {len(documents)} documents.")
|
195 |
|
196 |
text_chunk = chunk_text(documents)
|
197 |
+
|
198 |
# Define paths
|
199 |
FAISS_LOCAL_PATH = "/tmp/faiss_index"
|
200 |
+
os.makedirs(FAISS_LOCAL_PATH, exist_ok=True) # Ensure directory exists
|
201 |
|
202 |
+
FAISS_INDEX_FILE = os.path.join(FAISS_LOCAL_PATH, "index.faiss") # FAISS index file
|
|
|
203 |
|
204 |
+
# πΉ Check if FAISS index exists before creating a new one
|
205 |
+
if not os.path.exists(FAISS_INDEX_FILE):
|
206 |
+
print("π΄ FAISS index not found! Creating a new FAISS index...")
|
|
|
|
|
|
|
|
|
207 |
|
208 |
+
try:
|
209 |
+
# πΉ Create FAISS index
|
210 |
+
faiss_index = FAISS.from_documents(text_chunk, download_hugging_face_embeddings())
|
211 |
+
|
212 |
+
# πΉ Save FAISS locally
|
213 |
+
faiss_index.save_local(FAISS_LOCAL_PATH)
|
214 |
+
print(f"β
FAISS index successfully saved to {FAISS_LOCAL_PATH}")
|
215 |
|
216 |
+
except Exception as e:
|
217 |
+
logging.error(f"π¨ Error creating or saving FAISS index: {e}")
|
218 |
+
else:
|
219 |
+
print(f"π’ FAISS index already exists at {FAISS_LOCAL_PATH}, skipping creation.")
|
220 |
+
|
221 |
+
# πΉ Check if dataset exists before uploading FAISS
|
222 |
try:
|
223 |
api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
|
224 |
print(f"β
Dataset '{repo_id}' already exists in the organization.")
|
|
|
236 |
print(f"β
Dataset '{repo_id}' created successfully in the organization.")
|
237 |
|
238 |
# πΉ Upload FAISS to the organization dataset
|
239 |
+
try:
|
240 |
+
print("π€ Uploading FAISS index to Hugging Face...")
|
241 |
+
api.upload_folder(
|
242 |
+
folder_path=FAISS_LOCAL_PATH,
|
243 |
+
repo_id=repo_id,
|
244 |
+
repo_type="dataset",
|
245 |
+
token=HF_TOKEN
|
246 |
+
)
|
247 |
+
print("β
FAISS index successfully uploaded to the organization's private dataset.")
|
248 |
|
249 |
+
except Exception as e:
|
250 |
+
logging.error(f"π¨ Error uploading FAISS index: {e}")
|
251 |
|
252 |
+
FAISS_DOWNLOAD_PATH = "/tmp/faiss_index_download"
|
253 |
+
|
254 |
+
# πΉ Check if FAISS exists before downloading
|
255 |
+
if not os.path.exists(FAISS_DOWNLOAD_PATH):
|
256 |
+
print("π₯ Downloading FAISS index from Hugging Face...")
|
257 |
+
|
258 |
+
try:
|
259 |
+
faiss_download_folder = snapshot_download(
|
260 |
+
repo_id=repo_id,
|
261 |
+
repo_type="dataset", # β
Ensure it's a dataset repo
|
262 |
+
allow_patterns=["faiss_index/*"],
|
263 |
+
use_auth_token=HF_TOKEN
|
264 |
+
)
|
265 |
+
|
266 |
+
print(f"β
FAISS index downloaded at {faiss_download_folder}")
|
267 |
|
268 |
+
except Exception as e:
|
269 |
+
raise FileNotFoundError(f"π¨ Failed to download FAISS index: {e}")
|
270 |
|
|
|
|
|
271 |
else:
|
272 |
+
print("π’ FAISS index already exists locally, skipping download.")
|
273 |
+
|
274 |
try:
|
275 |
# πΉ Load FAISS index with LangChain
|
276 |
+
embedding_function = download_hugging_face_embeddings()
|
277 |
docsearch = FAISS.load_local(
|
278 |
+
FAISS_DOWNLOAD_PATH,
|
279 |
+
embedding_function,
|
280 |
allow_dangerous_deserialization=True
|
281 |
)
|
282 |
|