joe4ai commited on
Commit
03e04be
Β·
verified Β·
1 Parent(s): e30aa81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -68
app.py CHANGED
@@ -1,4 +1,3 @@
1
- from huggingface_hub import HfApi, snapshot_download
2
  from helper import download_hugging_face_embeddings
3
  from url import md_files_url
4
  from get_data import extract_repo_details, fetch_md_file_via_api, data_loader, chunk_text
@@ -24,29 +23,49 @@ from datetime import datetime
24
  logging.basicConfig(level=logging.INFO)
25
  logger = logging.getLogger(__name__)
26
 
27
-
28
  base = {}
29
  last_messages = 4
30
  documents = []
 
 
 
31
 
32
  load_dotenv()
33
  AUTH_TOKEN_KEY = os.environ.get('AUTH_TOKEN_KEY')
34
- BASE_URL = os.environ.get('BASE_URL')
35
  os.environ['AUTH_TOKEN_KEY'] = AUTH_TOKEN_KEY
36
- os.environ['BASE_URL'] = BASE_URL
37
  HF_TOKEN = os.environ.get("HF_TOKEN")
38
- os.environ['HF_TOKEN'] = HF_TOKEN
39
- HF_USERNAME = "HumbleBeeAI" # Replace with your HF username
40
- DATASET_NAME = "faiss_index"
41
- index_path = "faiss_index"
42
 
43
- from pathlib import Path
44
- # πŸ”Ή Use /tmp directory in Hugging Face Spaces (to avoid filesystem restrictions)
45
- db_path = "/tmp/chatbot.db"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # πŸ”Ή Ensure the database file exists
48
  if not os.path.exists(db_path):
49
  print("πŸ”΄ chatbot.db does not exist! Creating it now...")
 
50
  conn = sqlite3.connect(db_path)
51
  cursor = conn.cursor()
52
  cursor.execute('''CREATE TABLE IF NOT EXISTS users (
@@ -58,40 +77,33 @@ if not os.path.exists(db_path):
58
  conn.close()
59
  print("βœ… chatbot.db created successfully!")
60
 
61
- # πŸ”Ή Confirm file existence
62
- if os.path.exists(db_path):
63
- print(f"βœ… File chatbot.db found at {db_path}")
64
- else:
65
- raise FileNotFoundError("🚨 chatbot.db was not found!")
66
- api = HfApi()
67
-
68
  # πŸ”Ή Upload chatbot.db as a private dataset
69
  api.upload_file(
70
- path_or_fileobj=db_path, # Use the /tmp path
71
- path_in_repo="chatbot.db", # How it will appear in the dataset
72
- repo_id=f"{HF_USERNAME}/{DATASET_NAME}", # Your private dataset repo
73
  repo_type="dataset",
74
  token=HF_TOKEN
75
  )
76
 
77
- print("βœ… chatbot.db successfully uploaded to Hugging Face Dataset.")
 
 
 
78
 
79
  # πŸ”Ή Download chatbot.db securely
80
  db_folder = snapshot_download(
81
- repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
82
- allow_patterns=["chatbot.db"], # Only download the database
83
  use_auth_token=HF_TOKEN
84
  )
85
 
86
- # πŸ”Ή Define the database path
87
  DB_PATH = os.path.join(db_folder, "chatbot.db")
88
 
89
- # πŸ”Ή Confirm database was downloaded
90
  if os.path.exists(DB_PATH):
91
  print(f"βœ… Database downloaded at {DB_PATH}")
92
  else:
93
- raise FileNotFoundError("🚨 Failed to download chatbot.db from Hugging Face.")
94
-
95
 
96
  # ---- Database part ----- #
97
  # Database Connection
@@ -181,63 +193,80 @@ for url in md_files_url:
181
  logging.error(f"Error processing URL {url}: {ve}")
182
  print(f"Fetched {len(documents)} documents.")
183
 
184
- # πŸ”Ή Use /tmp directory in Spaces
185
- faiss_index_path = "/tmp/faiss_index"
186
-
187
- # πŸ”Ή Ensure FAISS index exists before uploading
188
- if not os.path.exists(faiss_index_path):
189
- print("πŸ”΄ FAISS index not found! Creating a new FAISS index...")
190
 
191
- # Create a dummy FAISS index (you should replace this with real embeddings)
192
- d = 768 # Embedding dimension
193
- index = faiss.IndexFlatL2(d) # Create an empty FAISS index
194
- faiss.write_index(index, os.path.join(faiss_index_path, "index.faiss"))
195
 
196
- print("βœ… FAISS index created successfully!")
197
-
198
- # πŸ”Ή Confirm FAISS index exists
199
- faiss_file = os.path.join(faiss_index_path, "index.faiss")
200
- if os.path.exists(faiss_file):
201
- print(f"βœ… FAISS index found at {faiss_file}")
202
- else:
203
- raise FileNotFoundError("🚨 FAISS index was not found!")
204
- api = HfApi()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
- # πŸ”Ή Upload FAISS index as a private dataset
207
  api.upload_folder(
208
- folder_path=faiss_index_path, # Upload the FAISS folder
209
- repo_id=f"{HF_USERNAME}/{DATASET_NAME}", # Your private dataset repo
210
  repo_type="dataset",
211
  token=HF_TOKEN
212
  )
213
 
214
- print("βœ… FAISS index successfully uploaded to Hugging Face Dataset.")
215
 
216
- # πŸ”Ή Download FAISS index securely
217
- faiss_folder = snapshot_download(
218
- repo_id=f"{HF_USERNAME}/{DATASET_NAME}",
219
- allow_patterns=["faiss_index/*"], # Only download FAISS index
220
  use_auth_token=HF_TOKEN
221
  )
222
 
223
- # πŸ”Ή Define FAISS file path
224
- faiss_file_path = os.path.join(faiss_folder, "index.faiss")
225
 
226
- # πŸ”Ή Ensure the FAISS index was downloaded
227
- if os.path.exists(faiss_file_path):
228
- print(f"βœ… FAISS index downloaded at {faiss_file_path}")
229
  else:
230
- raise FileNotFoundError("🚨 Failed to download FAISS index from Hugging Face.")
 
 
 
 
 
 
 
231
 
232
- # πŸ”Ή Load FAISS Index
233
- index = faiss.read_index(faiss_file_path)
234
 
235
- # πŸ”Ή Integrate FAISS with LangChain
236
- embedding_function = download_hugging_face_embeddings() # Your embedding function
237
- docsearch = FAISS(index, embedding_function)
 
238
  retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
239
 
240
- llm = Ollama(model='llama3.2', base_url=BASE_URL)
241
 
242
  prompt = ChatPromptTemplate.from_messages(
243
  [
 
 
1
  from helper import download_hugging_face_embeddings
2
  from url import md_files_url
3
  from get_data import extract_repo_details, fetch_md_file_via_api, data_loader, chunk_text
 
23
  logging.basicConfig(level=logging.INFO)
24
  logger = logging.getLogger(__name__)
25
 
 
26
  base = {}
27
  last_messages = 4
28
  documents = []
29
+ HF_ORG_NAME = 'HumbleBeeAI'
30
+ DATASET_NAME = 'faiss_index'
31
+ repo_id = f"{HF_ORG_NAME}/{DATASET_NAME}"
32
 
33
  load_dotenv()
34
  AUTH_TOKEN_KEY = os.environ.get('AUTH_TOKEN_KEY')
35
+
36
  os.environ['AUTH_TOKEN_KEY'] = AUTH_TOKEN_KEY
 
37
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
 
 
 
38
 
39
+ if not HF_TOKEN:
40
+ raise ValueError("🚨 HF_TOKEN is not set! Ensure you have the right permissions.")
41
+
42
+ from huggingface_hub import HfApi
43
+
44
+ api = HfApi()
45
+
46
+ # πŸ”Ή Check if the dataset exists in the organization
47
+ try:
48
+ api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
49
+ print(f"βœ… Dataset '{repo_id}' already exists in the organization.")
50
+ except Exception:
51
+ print(f"πŸ”΄ Dataset '{repo_id}' not found. Creating it now...")
52
+
53
+ # Create repo inside the organization
54
+ api.create_repo(
55
+ repo_id=repo_id,
56
+ repo_type="dataset",
57
+ private=True, # Ensure it's private
58
+ token=HF_TOKEN,
59
+ organization=HF_ORG_NAME # Specify organization
60
+ )
61
+ print(f"βœ… Dataset '{repo_id}' created successfully in the organization.")
62
+
63
+ db_path = "chatbot.db"
64
 
65
+ # πŸ”Ή Ensure `chatbot.db` exists before uploading
66
  if not os.path.exists(db_path):
67
  print("πŸ”΄ chatbot.db does not exist! Creating it now...")
68
+ import sqlite3
69
  conn = sqlite3.connect(db_path)
70
  cursor = conn.cursor()
71
  cursor.execute('''CREATE TABLE IF NOT EXISTS users (
 
77
  conn.close()
78
  print("βœ… chatbot.db created successfully!")
79
 
 
 
 
 
 
 
 
80
  # πŸ”Ή Upload chatbot.db as a private dataset
81
  api.upload_file(
82
+ path_or_fileobj=db_path,
83
+ path_in_repo="chatbot.db",
84
+ repo_id=repo_id,
85
  repo_type="dataset",
86
  token=HF_TOKEN
87
  )
88
 
89
+ print("βœ… chatbot.db successfully uploaded to the organization's private dataset.")
90
+
91
+ from huggingface_hub import snapshot_download
92
+ import os
93
 
94
  # πŸ”Ή Download chatbot.db securely
95
  db_folder = snapshot_download(
96
+ repo_id=repo_id,
97
+ allow_patterns=["chatbot.db"], # Only download chatbot.db
98
  use_auth_token=HF_TOKEN
99
  )
100
 
 
101
  DB_PATH = os.path.join(db_folder, "chatbot.db")
102
 
 
103
  if os.path.exists(DB_PATH):
104
  print(f"βœ… Database downloaded at {DB_PATH}")
105
  else:
106
+ raise FileNotFoundError("🚨 Failed to download chatbot.db from the organization's dataset.")
 
107
 
108
  # ---- Database part ----- #
109
  # Database Connection
 
193
  logging.error(f"Error processing URL {url}: {ve}")
194
  print(f"Fetched {len(documents)} documents.")
195
 
196
+ text_chunk = chunk_text(documents)
197
+ # Define paths
198
+ FAISS_LOCAL_PATH = "/tmp/faiss_index"
 
 
 
199
 
200
+ # πŸ”Ή Ensure FAISS directory exists
201
+ os.makedirs(FAISS_LOCAL_PATH, exist_ok=True)
 
 
202
 
203
+ try:
204
+ # πŸ”Ή Create FAISS index
205
+ faiss_index = FAISS.from_documents(text_chunk, download_hugging_face_embeddings())
206
+
207
+ # πŸ”Ή Save FAISS locally
208
+ faiss_index.save_local(FAISS_LOCAL_PATH)
209
+ print(f"βœ… FAISS index successfully saved to {FAISS_LOCAL_PATH}")
210
+
211
+ except Exception as e:
212
+ logging.error(f"🚨 Error creating or saving FAISS index: {e}")
213
+
214
+ try:
215
+ api.repo_info(repo_id, repo_type="dataset", token=HF_TOKEN)
216
+ print(f"βœ… Dataset '{repo_id}' already exists in the organization.")
217
+ except Exception:
218
+ print(f"πŸ”΄ Dataset '{repo_id}' not found. Creating it now...")
219
+
220
+ # Create dataset in the organization
221
+ api.create_repo(
222
+ repo_id=repo_id,
223
+ repo_type="dataset",
224
+ private=True,
225
+ token=HF_TOKEN,
226
+ organization=HF_ORG_NAME
227
+ )
228
+ print(f"βœ… Dataset '{repo_id}' created successfully in the organization.")
229
 
230
+ # πŸ”Ή Upload FAISS to the organization dataset
231
  api.upload_folder(
232
+ folder_path=FAISS_LOCAL_PATH,
233
+ repo_id=repo_id,
234
  repo_type="dataset",
235
  token=HF_TOKEN
236
  )
237
 
238
+ print("βœ… FAISS index successfully uploaded to the organization's private dataset.")
239
 
240
+ # πŸ”Ή Download FAISS index from the private organization dataset
241
+ faiss_download_folder = snapshot_download(
242
+ repo_id=repo_id,
243
+ allow_patterns=["faiss_index/*"],
244
  use_auth_token=HF_TOKEN
245
  )
246
 
247
+ FAISS_PATH = os.path.join(faiss_download_folder, "faiss_index")
 
248
 
249
+ if os.path.exists(FAISS_PATH):
250
+ print(f"βœ… FAISS index found at {FAISS_PATH}, loading it now...")
 
251
  else:
252
+ raise FileNotFoundError("🚨 FAISS index not found in the organization's dataset.")
253
+ try:
254
+ # πŸ”Ή Load FAISS index with LangChain
255
+ docsearch = FAISS.load_local(
256
+ FAISS_PATH,
257
+ download_hugging_face_embeddings(),
258
+ allow_dangerous_deserialization=True
259
+ )
260
 
261
+ print("βœ… FAISS index successfully loaded for retrieval.")
 
262
 
263
+ except Exception as e:
264
+ logging.error(f"🚨 Error loading FAISS index: {e}")
265
+
266
+
267
  retriever = docsearch.as_retriever(search_type='similarity', search_kwargs={'k':2})
268
 
269
+ llm = Ollama(model='llama3.2')
270
 
271
  prompt = ChatPromptTemplate.from_messages(
272
  [