Spaces:

HumbleBeeAI
/

humblebee-chatbot

Runtime error

App Files Files Community

joe4ai commited on Feb 6

Commit

7dbcbdd

verified ·

1 Parent(s): 78b7958

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -88

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain.chains import create_history_aware_retriever
 from langchain.memory import ChatMessageHistory
 from langchain_core.runnables.history import RunnableWithMessageHistory
-from langchain.vectorstores import FAISS
 from langchain.schema import Document
 from dotenv import load_dotenv
 from prompt import system_prompt, retriever_prompt
@@ -195,96 +195,21 @@ print(f"Fetched {len(documents)} documents.")
 text_chunk = chunk_text(documents)
-# Define paths
-FAISS_LOCAL_PATH = "/tmp/faiss_index"
-os.makedirs(FAISS_LOCAL_PATH, exist_ok=True)  # Ensure directory exists
-FAISS_INDEX_FILE = os.path.join(FAISS_LOCAL_PATH, "index.faiss")  # FAISS index file
-# 🔹 Check if FAISS index exists before creating a new one
-if not os.path.exists(FAISS_INDEX_FILE):
-    print("🔴 FAISS index not found! Creating a new FAISS index...")
-    try:
-        # 🔹 Create FAISS index
-        faiss_index = FAISS.from_documents(text_chunk, download_hugging_face_embeddings())
-        # 🔹 Save FAISS locally
-        faiss_index.save_local(FAISS_LOCAL_PATH)
-        print(f"✅ FAISS index successfully saved to {FAISS_LOCAL_PATH}")
-    except Exception as e:
-        logging.error(f"🚨 Error creating or saving FAISS index: {e}")
-else:
-    print(f"🟢 FAISS index already exists at {FAISS_LOCAL_PATH}, skipping creation.")
-# 🔹 Check if dataset exists before uploading FAISS
-try:
-    api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
-    print(f"✅ Dataset '{repo_id}' already exists in the organization.")
-except Exception:
-    print(f"🔴 Dataset '{repo_id}' not found. Creating it now...")
-    # Create dataset in the organization
-    api.create_repo(
-        repo_id=repo_id,
-        repo_type="dataset",
-        private=True,
-        token=HF_TOKEN,
-        organization=HF_ORG_NAME
-    )
-    print(f"✅ Dataset '{repo_id}' created successfully in the organization.")
-# 🔹 Upload FAISS to the organization dataset
-try:
-    print("📤 Uploading FAISS index to Hugging Face...")
-    api.upload_folder(
-        folder_path=FAISS_LOCAL_PATH,
-        repo_id=repo_id,
-        repo_type="dataset",
-        token=HF_TOKEN
-    )
-    print("✅ FAISS index successfully uploaded to the organization's private dataset.")
-except Exception as e:
-    logging.error(f"🚨 Error uploading FAISS index: {e}")
-FAISS_DOWNLOAD_PATH = "/tmp/faiss_index_download"
-# 🔹 Check if FAISS exists before downloading
-if not os.path.exists(FAISS_DOWNLOAD_PATH):
-    print("📥 Downloading FAISS index from Hugging Face...")
-    try:
-        faiss_download_folder = snapshot_download(
-            repo_id=repo_id,
-            repo_type="dataset",  # ✅ Ensure it's a dataset repo
-            allow_patterns=["faiss_index/*"],
-            use_auth_token=HF_TOKEN
-        )
-        print(f"✅ FAISS index downloaded at {faiss_download_folder}")
-    except Exception as e:
-        raise FileNotFoundError(f"🚨 Failed to download FAISS index: {e}")
-else:
-    print("🟢 FAISS index already exists locally, skipping download.")
-try:
-    # 🔹 Load FAISS index with LangChain
-    embedding_function = download_hugging_face_embeddings()
-    docsearch = FAISS.load_local(
-        FAISS_DOWNLOAD_PATH,
-        embedding_function,
-        allow_dangerous_deserialization=True
-    )
-    print("✅ FAISS index successfully loaded for retrieval.")
-except Exception as e:
-    logging.error(f"🚨 Error loading FAISS index: {e}")
 retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})

 from langchain.chains import create_history_aware_retriever
 from langchain.memory import ChatMessageHistory
 from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain_pinecone import PineconeVectorStore
 from langchain.schema import Document
 from dotenv import load_dotenv
 from prompt import system_prompt, retriever_prompt
 text_chunk = chunk_text(documents)
+PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
+os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
+index_name = 'humblebeeai'
+docsearch = PineconeVectorStore.from_documents(
+    documents=text_chunks,
+    index_name=index_name,
+    embedding=download_hugging_face_embeddings()
+)
+docsearch = PineconeVectorStore.from_existing_index(
+    index_name=index_name,
+    embedding=download_hugging_face_embeddings()
+)
 retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})