Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
8 |
from langchain.chains import create_history_aware_retriever
|
9 |
from langchain.memory import ChatMessageHistory
|
10 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
11 |
-
from
|
12 |
from langchain.schema import Document
|
13 |
from dotenv import load_dotenv
|
14 |
from prompt import system_prompt, retriever_prompt
|
@@ -195,96 +195,21 @@ print(f"Fetched {len(documents)} documents.")
|
|
195 |
|
196 |
text_chunk = chunk_text(documents)
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
os.makedirs(FAISS_LOCAL_PATH, exist_ok=True) # Ensure directory exists
|
201 |
|
202 |
-
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
# πΉ Create FAISS index
|
210 |
-
faiss_index = FAISS.from_documents(text_chunk, download_hugging_face_embeddings())
|
211 |
-
|
212 |
-
# πΉ Save FAISS locally
|
213 |
-
faiss_index.save_local(FAISS_LOCAL_PATH)
|
214 |
-
print(f"β
FAISS index successfully saved to {FAISS_LOCAL_PATH}")
|
215 |
-
|
216 |
-
except Exception as e:
|
217 |
-
logging.error(f"π¨ Error creating or saving FAISS index: {e}")
|
218 |
-
else:
|
219 |
-
print(f"π’ FAISS index already exists at {FAISS_LOCAL_PATH}, skipping creation.")
|
220 |
-
|
221 |
-
# πΉ Check if dataset exists before uploading FAISS
|
222 |
-
try:
|
223 |
-
api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
|
224 |
-
print(f"β
Dataset '{repo_id}' already exists in the organization.")
|
225 |
-
except Exception:
|
226 |
-
print(f"π΄ Dataset '{repo_id}' not found. Creating it now...")
|
227 |
-
|
228 |
-
# Create dataset in the organization
|
229 |
-
api.create_repo(
|
230 |
-
repo_id=repo_id,
|
231 |
-
repo_type="dataset",
|
232 |
-
private=True,
|
233 |
-
token=HF_TOKEN,
|
234 |
-
organization=HF_ORG_NAME
|
235 |
-
)
|
236 |
-
print(f"β
Dataset '{repo_id}' created successfully in the organization.")
|
237 |
-
|
238 |
-
# πΉ Upload FAISS to the organization dataset
|
239 |
-
try:
|
240 |
-
print("π€ Uploading FAISS index to Hugging Face...")
|
241 |
-
api.upload_folder(
|
242 |
-
folder_path=FAISS_LOCAL_PATH,
|
243 |
-
repo_id=repo_id,
|
244 |
-
repo_type="dataset",
|
245 |
-
token=HF_TOKEN
|
246 |
-
)
|
247 |
-
print("β
FAISS index successfully uploaded to the organization's private dataset.")
|
248 |
-
|
249 |
-
except Exception as e:
|
250 |
-
logging.error(f"π¨ Error uploading FAISS index: {e}")
|
251 |
-
|
252 |
-
FAISS_DOWNLOAD_PATH = "/tmp/faiss_index_download"
|
253 |
-
|
254 |
-
# πΉ Check if FAISS exists before downloading
|
255 |
-
if not os.path.exists(FAISS_DOWNLOAD_PATH):
|
256 |
-
print("π₯ Downloading FAISS index from Hugging Face...")
|
257 |
-
|
258 |
-
try:
|
259 |
-
faiss_download_folder = snapshot_download(
|
260 |
-
repo_id=repo_id,
|
261 |
-
repo_type="dataset", # β
Ensure it's a dataset repo
|
262 |
-
allow_patterns=["faiss_index/*"],
|
263 |
-
use_auth_token=HF_TOKEN
|
264 |
-
)
|
265 |
-
|
266 |
-
print(f"β
FAISS index downloaded at {faiss_download_folder}")
|
267 |
-
|
268 |
-
except Exception as e:
|
269 |
-
raise FileNotFoundError(f"π¨ Failed to download FAISS index: {e}")
|
270 |
-
|
271 |
-
else:
|
272 |
-
print("π’ FAISS index already exists locally, skipping download.")
|
273 |
-
|
274 |
-
try:
|
275 |
-
# πΉ Load FAISS index with LangChain
|
276 |
-
embedding_function = download_hugging_face_embeddings()
|
277 |
-
docsearch = FAISS.load_local(
|
278 |
-
FAISS_DOWNLOAD_PATH,
|
279 |
-
embedding_function,
|
280 |
-
allow_dangerous_deserialization=True
|
281 |
-
)
|
282 |
-
|
283 |
-
print("β
FAISS index successfully loaded for retrieval.")
|
284 |
-
|
285 |
-
except Exception as e:
|
286 |
-
logging.error(f"π¨ Error loading FAISS index: {e}")
|
287 |
|
|
|
|
|
|
|
|
|
288 |
|
289 |
retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
|
290 |
|
|
|
8 |
from langchain.chains import create_history_aware_retriever
|
9 |
from langchain.memory import ChatMessageHistory
|
10 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
11 |
+
from langchain_pinecone import PineconeVectorStore
|
12 |
from langchain.schema import Document
|
13 |
from dotenv import load_dotenv
|
14 |
from prompt import system_prompt, retriever_prompt
|
|
|
195 |
|
196 |
text_chunk = chunk_text(documents)
|
197 |
|
198 |
+
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
|
199 |
+
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
|
|
|
200 |
|
201 |
+
index_name = 'humblebeeai'
|
202 |
|
203 |
+
docsearch = PineconeVectorStore.from_documents(
|
204 |
+
documents=text_chunks,
|
205 |
+
index_name=index_name,
|
206 |
+
embedding=download_hugging_face_embeddings()
|
207 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
+
docsearch = PineconeVectorStore.from_existing_index(
|
210 |
+
index_name=index_name,
|
211 |
+
embedding=download_hugging_face_embeddings()
|
212 |
+
)
|
213 |
|
214 |
retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
|
215 |
|