Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -40,24 +40,59 @@ HF_USERNAME = "HumbleBeeAI" # Replace with your HF username
|
|
40 |
DATASET_NAME = "faiss_index"
|
41 |
index_path = "faiss_index"
|
42 |
|
43 |
-
api = HfApi()
|
44 |
from pathlib import Path
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
|
|
47 |
api.upload_file(
|
48 |
-
path_or_fileobj=db_path, # Use
|
49 |
-
path_in_repo="chatbot.db",
|
50 |
-
repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
|
51 |
repo_type="dataset",
|
52 |
token=HF_TOKEN
|
53 |
)
|
54 |
|
|
|
|
|
|
|
55 |
db_folder = snapshot_download(
|
56 |
repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
|
57 |
allow_patterns=["chatbot.db"], # Only download the database
|
58 |
use_auth_token=HF_TOKEN
|
59 |
)
|
|
|
|
|
60 |
DB_PATH = os.path.join(db_folder, "chatbot.db")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
# ---- Database part ----- #
|
62 |
# Database Connection
|
63 |
def connect_db():
|
@@ -146,33 +181,60 @@ for url in md_files_url:
|
|
146 |
logging.error(f"Error processing URL {url}: {ve}")
|
147 |
print(f"Fetched {len(documents)} documents.")
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
faiss_index = FAISS.from_documents(text_chunk, embedding_function)
|
152 |
-
faiss_index.save_local(index_path)
|
153 |
-
api = HfApi()
|
154 |
-
api.upload_folder(folder_path=index_path, repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset", token=HF_TOKEN)
|
155 |
-
return True
|
156 |
-
|
157 |
-
# πΉ Download & Load FAISS Index Privately
|
158 |
-
def load_faiss():
|
159 |
-
index_path = snapshot_download(
|
160 |
-
repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
|
161 |
-
allow_patterns=["faiss_index/*"],
|
162 |
-
use_auth_token=HF_TOKEN
|
163 |
-
)
|
164 |
-
return FAISS.load_local(
|
165 |
-
index_path,
|
166 |
-
download_hugging_face_embeddings(),
|
167 |
-
allow_dangerous_deserialization=True
|
168 |
-
)
|
169 |
|
170 |
-
|
|
|
|
|
171 |
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
else:
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
|
177 |
|
178 |
llm = Ollama(model='llama3.2', base_url=BASE_URL)
|
|
|
40 |
DATASET_NAME = "faiss_index"
|
41 |
index_path = "faiss_index"
|
42 |
|
|
|
43 |
from pathlib import Path
|
44 |
+
# πΉ Use /tmp directory in Hugging Face Spaces (to avoid filesystem restrictions)
|
45 |
+
db_path = "/tmp/chatbot.db"
|
46 |
+
|
47 |
+
# πΉ Ensure the database file exists
|
48 |
+
if not os.path.exists(db_path):
|
49 |
+
print("π΄ chatbot.db does not exist! Creating it now...")
|
50 |
+
conn = sqlite3.connect(db_path)
|
51 |
+
cursor = conn.cursor()
|
52 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS users (
|
53 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
54 |
+
username TEXT UNIQUE NOT NULL,
|
55 |
+
password_hash TEXT NOT NULL
|
56 |
+
)''')
|
57 |
+
conn.commit()
|
58 |
+
conn.close()
|
59 |
+
print("β
chatbot.db created successfully!")
|
60 |
+
|
61 |
+
# πΉ Confirm file existence
|
62 |
+
if os.path.exists(db_path):
|
63 |
+
print(f"β
File chatbot.db found at {db_path}")
|
64 |
+
else:
|
65 |
+
raise FileNotFoundError("π¨ chatbot.db was not found!")
|
66 |
+
api = HfApi()
|
67 |
|
68 |
+
# πΉ Upload chatbot.db as a private dataset
|
69 |
api.upload_file(
|
70 |
+
path_or_fileobj=db_path, # Use the /tmp path
|
71 |
+
path_in_repo="chatbot.db", # How it will appear in the dataset
|
72 |
+
repo_id=f"{HF_USERNAME}/{DATASET_NAME}", # Your private dataset repo
|
73 |
repo_type="dataset",
|
74 |
token=HF_TOKEN
|
75 |
)
|
76 |
|
77 |
+
print("β
chatbot.db successfully uploaded to Hugging Face Dataset.")
|
78 |
+
|
79 |
+
# πΉ Download chatbot.db securely
|
80 |
db_folder = snapshot_download(
|
81 |
repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
|
82 |
allow_patterns=["chatbot.db"], # Only download the database
|
83 |
use_auth_token=HF_TOKEN
|
84 |
)
|
85 |
+
|
86 |
+
# πΉ Define the database path
|
87 |
DB_PATH = os.path.join(db_folder, "chatbot.db")
|
88 |
+
|
89 |
+
# πΉ Confirm database was downloaded
|
90 |
+
if os.path.exists(DB_PATH):
|
91 |
+
print(f"β
Database downloaded at {DB_PATH}")
|
92 |
+
else:
|
93 |
+
raise FileNotFoundError("π¨ Failed to download chatbot.db from Hugging Face.")
|
94 |
+
|
95 |
+
|
96 |
# ---- Database part ----- #
|
97 |
# Database Connection
|
98 |
def connect_db():
|
|
|
181 |
logging.error(f"Error processing URL {url}: {ve}")
|
182 |
print(f"Fetched {len(documents)} documents.")
|
183 |
|
184 |
+
# πΉ Use /tmp directory in Spaces
|
185 |
+
faiss_index_path = "/tmp/faiss_index"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
+
# πΉ Ensure FAISS index exists before uploading
|
188 |
+
if not os.path.exists(faiss_index_path):
|
189 |
+
print("π΄ FAISS index not found! Creating a new FAISS index...")
|
190 |
|
191 |
+
# Create a dummy FAISS index (you should replace this with real embeddings)
|
192 |
+
d = 768 # Embedding dimension
|
193 |
+
index = faiss.IndexFlatL2(d) # Create an empty FAISS index
|
194 |
+
faiss.write_index(index, os.path.join(faiss_index_path, "index.faiss"))
|
195 |
+
|
196 |
+
print("β
FAISS index created successfully!")
|
197 |
+
|
198 |
+
# πΉ Confirm FAISS index exists
|
199 |
+
faiss_file = os.path.join(faiss_index_path, "index.faiss")
|
200 |
+
if os.path.exists(faiss_file):
|
201 |
+
print(f"β
FAISS index found at {faiss_file}")
|
202 |
else:
|
203 |
+
raise FileNotFoundError("π¨ FAISS index was not found!")
|
204 |
+
api = HfApi()
|
205 |
+
|
206 |
+
# πΉ Upload FAISS index as a private dataset
|
207 |
+
api.upload_folder(
|
208 |
+
folder_path=faiss_index_path, # Upload the FAISS folder
|
209 |
+
repo_id=f"{HF_USERNAME}/{DATASET_NAME}", # Your private dataset repo
|
210 |
+
repo_type="dataset",
|
211 |
+
token=HF_TOKEN
|
212 |
+
)
|
213 |
+
|
214 |
+
print("β
FAISS index successfully uploaded to Hugging Face Dataset.")
|
215 |
+
|
216 |
+
# πΉ Download FAISS index securely
|
217 |
+
faiss_folder = snapshot_download(
|
218 |
+
repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
|
219 |
+
allow_patterns=["faiss_index/*"], # Only download FAISS index
|
220 |
+
use_auth_token=HF_TOKEN
|
221 |
+
)
|
222 |
+
|
223 |
+
# πΉ Define FAISS file path
|
224 |
+
faiss_file_path = os.path.join(faiss_folder, "index.faiss")
|
225 |
+
|
226 |
+
# πΉ Ensure the FAISS index was downloaded
|
227 |
+
if os.path.exists(faiss_file_path):
|
228 |
+
print(f"β
FAISS index downloaded at {faiss_file_path}")
|
229 |
+
else:
|
230 |
+
raise FileNotFoundError("π¨ Failed to download FAISS index from Hugging Face.")
|
231 |
+
|
232 |
+
# πΉ Load FAISS Index
|
233 |
+
index = faiss.read_index(faiss_file_path)
|
234 |
+
|
235 |
+
# πΉ Integrate FAISS with LangChain
|
236 |
+
embedding_function = download_hugging_face_embeddings() # Your embedding function
|
237 |
+
docsearch = FAISS(index, embedding_function)
|
238 |
retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
|
239 |
|
240 |
llm = Ollama(model='llama3.2', base_url=BASE_URL)
|