joe4ai commited on
Commit
7dbcbdd
Β·
verified Β·
1 Parent(s): 78b7958

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -88
app.py CHANGED
@@ -8,7 +8,7 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
8
  from langchain.chains import create_history_aware_retriever
9
  from langchain.memory import ChatMessageHistory
10
  from langchain_core.runnables.history import RunnableWithMessageHistory
11
- from langchain.vectorstores import FAISS
12
  from langchain.schema import Document
13
  from dotenv import load_dotenv
14
  from prompt import system_prompt, retriever_prompt
@@ -195,96 +195,21 @@ print(f"Fetched {len(documents)} documents.")
195
 
196
  text_chunk = chunk_text(documents)
197
 
198
- # Define paths
199
- FAISS_LOCAL_PATH = "/tmp/faiss_index"
200
- os.makedirs(FAISS_LOCAL_PATH, exist_ok=True) # Ensure directory exists
201
 
202
- FAISS_INDEX_FILE = os.path.join(FAISS_LOCAL_PATH, "index.faiss") # FAISS index file
203
 
204
- # πŸ”Ή Check if FAISS index exists before creating a new one
205
- if not os.path.exists(FAISS_INDEX_FILE):
206
- print("πŸ”΄ FAISS index not found! Creating a new FAISS index...")
207
-
208
- try:
209
- # πŸ”Ή Create FAISS index
210
- faiss_index = FAISS.from_documents(text_chunk, download_hugging_face_embeddings())
211
-
212
- # πŸ”Ή Save FAISS locally
213
- faiss_index.save_local(FAISS_LOCAL_PATH)
214
- print(f"βœ… FAISS index successfully saved to {FAISS_LOCAL_PATH}")
215
-
216
- except Exception as e:
217
- logging.error(f"🚨 Error creating or saving FAISS index: {e}")
218
- else:
219
- print(f"🟒 FAISS index already exists at {FAISS_LOCAL_PATH}, skipping creation.")
220
-
221
- # πŸ”Ή Check if dataset exists before uploading FAISS
222
- try:
223
- api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
224
- print(f"βœ… Dataset '{repo_id}' already exists in the organization.")
225
- except Exception:
226
- print(f"πŸ”΄ Dataset '{repo_id}' not found. Creating it now...")
227
-
228
- # Create dataset in the organization
229
- api.create_repo(
230
- repo_id=repo_id,
231
- repo_type="dataset",
232
- private=True,
233
- token=HF_TOKEN,
234
- organization=HF_ORG_NAME
235
- )
236
- print(f"βœ… Dataset '{repo_id}' created successfully in the organization.")
237
-
238
- # πŸ”Ή Upload FAISS to the organization dataset
239
- try:
240
- print("πŸ“€ Uploading FAISS index to Hugging Face...")
241
- api.upload_folder(
242
- folder_path=FAISS_LOCAL_PATH,
243
- repo_id=repo_id,
244
- repo_type="dataset",
245
- token=HF_TOKEN
246
- )
247
- print("βœ… FAISS index successfully uploaded to the organization's private dataset.")
248
-
249
- except Exception as e:
250
- logging.error(f"🚨 Error uploading FAISS index: {e}")
251
-
252
- FAISS_DOWNLOAD_PATH = "/tmp/faiss_index_download"
253
-
254
- # πŸ”Ή Check if FAISS exists before downloading
255
- if not os.path.exists(FAISS_DOWNLOAD_PATH):
256
- print("πŸ“₯ Downloading FAISS index from Hugging Face...")
257
-
258
- try:
259
- faiss_download_folder = snapshot_download(
260
- repo_id=repo_id,
261
- repo_type="dataset", # βœ… Ensure it's a dataset repo
262
- allow_patterns=["faiss_index/*"],
263
- use_auth_token=HF_TOKEN
264
- )
265
-
266
- print(f"βœ… FAISS index downloaded at {faiss_download_folder}")
267
-
268
- except Exception as e:
269
- raise FileNotFoundError(f"🚨 Failed to download FAISS index: {e}")
270
-
271
- else:
272
- print("🟒 FAISS index already exists locally, skipping download.")
273
-
274
- try:
275
- # πŸ”Ή Load FAISS index with LangChain
276
- embedding_function = download_hugging_face_embeddings()
277
- docsearch = FAISS.load_local(
278
- FAISS_DOWNLOAD_PATH,
279
- embedding_function,
280
- allow_dangerous_deserialization=True
281
- )
282
-
283
- print("βœ… FAISS index successfully loaded for retrieval.")
284
-
285
- except Exception as e:
286
- logging.error(f"🚨 Error loading FAISS index: {e}")
287
 
 
 
 
 
288
 
289
  retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
290
 
 
8
  from langchain.chains import create_history_aware_retriever
9
  from langchain.memory import ChatMessageHistory
10
  from langchain_core.runnables.history import RunnableWithMessageHistory
11
+ from langchain_pinecone import PineconeVectorStore
12
  from langchain.schema import Document
13
  from dotenv import load_dotenv
14
  from prompt import system_prompt, retriever_prompt
 
195
 
196
  text_chunk = chunk_text(documents)
197
 
198
+ PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
199
+ os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
 
200
 
201
+ index_name = 'humblebeeai'
202
 
203
+ docsearch = PineconeVectorStore.from_documents(
204
+ documents=text_chunks,
205
+ index_name=index_name,
206
+ embedding=download_hugging_face_embeddings()
207
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
+ docsearch = PineconeVectorStore.from_existing_index(
210
+ index_name=index_name,
211
+ embedding=download_hugging_face_embeddings()
212
+ )
213
 
214
  retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
215