localsavageai commited on
Commit
79b023d
·
verified ·
1 Parent(s): 5c262f5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -57
app.py CHANGED
@@ -13,7 +13,6 @@ import requests
13
  from tqdm import tqdm
14
 
15
  # Configuration
16
- DATABASE_DIR = "semantic_memory" # Persistent directory for FAISS databases
17
  QWEN_API_URL = "Qwen/Qwen2.5-Max-Demo" # Gradio API for Qwen2.5 chat
18
  CHUNK_SIZE = 800
19
  TOP_K_RESULTS = 150
@@ -84,49 +83,30 @@ def split_text_into_chunks(text: str) -> List[str]:
84
 
85
  return chunks
86
 
87
- def initialize_vector_store(embeddings: Embeddings, db_name: str) -> FAISS:
88
- """Initialize or load a FAISS vector store"""
89
- db_path = os.path.join(DATABASE_DIR, db_name)
90
- if os.path.exists(db_path):
91
- try:
92
- logging.info(f"Loading existing database: {db_name}")
93
- return FAISS.load_local(
94
- db_path,
95
- embeddings,
96
- allow_dangerous_deserialization=True
97
- )
98
- except Exception as e:
99
- logging.error(f"FAISS load error: {str(e)}")
100
- raise
101
-
102
- logging.info(f"Creating new vector database: {db_name}")
103
- os.makedirs(db_path, exist_ok=True)
104
- return None
105
-
106
- def create_new_database(file_content: str, db_name: str, password: str, progress=gr.Progress()) -> Tuple[str, List[str]]:
107
  """Create a new FAISS database from uploaded file"""
108
  if password != PASSWORD_HASH:
109
- return "Incorrect password. Database creation failed.", []
110
 
111
  if not file_content.strip():
112
- return "Uploaded file is empty. Database creation failed.", []
113
 
114
  if not db_name.isalnum():
115
- return "Database name must be alphanumeric. Database creation failed.", []
116
 
117
  try:
118
- db_path = os.path.join(DATABASE_DIR, db_name)
119
- if os.path.exists(db_path):
120
- return f"Database '{db_name}' already exists.", []
121
-
122
- # Create the database directory
123
- os.makedirs(db_path, exist_ok=True)
124
- logging.info(f"Created database directory: {db_path}")
125
 
126
  # Initialize embeddings and split text
127
  chunks = split_text_into_chunks(file_content)
128
  if not chunks:
129
- return "No valid chunks generated. Database creation failed.", []
130
 
131
  logging.info(f"Creating {len(chunks)} chunks...")
132
  progress(0, desc="Starting embedding process...")
@@ -142,21 +122,24 @@ def create_new_database(file_content: str, db_name: str, password: str, progress
142
  text_embeddings=list(zip(chunks, embeddings_list)),
143
  embedding=embeddings
144
  )
145
- vector_store.save_local(db_path)
146
- logging.info(f"FAISS database saved to: {db_path}")
147
 
148
- # Verify files were created
149
- if not os.listdir(db_path):
150
- return f"Failed to save FAISS database files in: {db_path}", []
151
- logging.info(f"FAISS database files: {os.listdir(db_path)}")
152
 
153
- # Update the list of available databases
154
- db_list = [name for name in os.listdir(DATABASE_DIR) if os.path.isdir(os.path.join(DATABASE_DIR, name))]
155
- return f"Database '{db_name}' created successfully.", db_list
 
 
 
 
 
156
 
 
157
  except Exception as e:
158
  logging.error(f"Database creation failed: {str(e)}")
159
- return f"Error creating database: {str(e)}", []
160
 
161
  def generate_response(user_input: str, db_name: str) -> Optional[str]:
162
  """Generate response using Qwen2.5 MAX"""
@@ -164,15 +147,15 @@ def generate_response(user_input: str, db_name: str) -> Optional[str]:
164
  if not db_name:
165
  return "Please select a database to chat with."
166
 
167
- db_path = os.path.join(DATABASE_DIR, db_name)
168
- if not os.path.exists(db_path):
 
 
 
169
  return f"Database '{db_name}' does not exist."
170
 
171
- vector_store = FAISS.load_local(
172
- db_path,
173
- embeddings,
174
- allow_dangerous_deserialization=True
175
- )
176
 
177
  # Contextual search
178
  docs_scores = vector_store.similarity_search_with_score(
@@ -233,9 +216,11 @@ with gr.Blocks() as app:
233
 
234
  def update_db_list():
235
  """Update the list of available databases"""
236
- if not os.path.exists(DATABASE_DIR):
237
- os.makedirs(DATABASE_DIR, exist_ok=True)
238
- return [name for name in os.listdir(DATABASE_DIR) if os.path.isdir(os.path.join(DATABASE_DIR, name))]
 
 
239
 
240
  with gr.Tab("Create Database"):
241
  gr.Markdown("## Create a New FAISS Database")
@@ -247,7 +232,7 @@ with gr.Blocks() as app:
247
 
248
  def handle_create(file, db_name, password, progress=gr.Progress()):
249
  if not file or not db_name or not password:
250
- return "Please provide all required inputs.", []
251
 
252
  # Check if the file is valid
253
  if isinstance(file, str): # Gradio provides the file path as a string
@@ -255,12 +240,15 @@ with gr.Blocks() as app:
255
  with open(file, "r", encoding="utf-8") as f:
256
  file_content = f.read()
257
  except Exception as e:
258
- return f"Error reading file: {str(e)}", []
259
  else:
260
- return "Invalid file format. Please upload a .txt file.", []
261
 
262
- result, db_list = create_new_database(file_content, db_name, password, progress)
263
- return result, db_list
 
 
 
264
 
265
  create_button.click(
266
  handle_create,
@@ -299,4 +287,10 @@ with gr.Blocks() as app:
299
  )
300
 
301
  if __name__ == "__main__":
 
 
 
 
 
 
302
  app.launch(server_name="0.0.0.0", server_port=7860)
 
13
  from tqdm import tqdm
14
 
15
  # Configuration
 
16
  QWEN_API_URL = "Qwen/Qwen2.5-Max-Demo" # Gradio API for Qwen2.5 chat
17
  CHUNK_SIZE = 800
18
  TOP_K_RESULTS = 150
 
83
 
84
  return chunks
85
 
86
+ def create_new_database(file_content: str, db_name: str, password: str, progress=gr.Progress()) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  """Create a new FAISS database from uploaded file"""
88
  if password != PASSWORD_HASH:
89
+ return "Incorrect password. Database creation failed."
90
 
91
  if not file_content.strip():
92
+ return "Uploaded file is empty. Database creation failed."
93
 
94
  if not db_name.isalnum():
95
+ return "Database name must be alphanumeric. Database creation failed."
96
 
97
  try:
98
+ # Define file names for the FAISS database
99
+ faiss_file = f"{db_name}-index.faiss"
100
+ pkl_file = f"{db_name}-index.pkl"
101
+
102
+ # Check if the database already exists
103
+ if os.path.exists(faiss_file) or os.path.exists(pkl_file):
104
+ return f"Database '{db_name}' already exists."
105
 
106
  # Initialize embeddings and split text
107
  chunks = split_text_into_chunks(file_content)
108
  if not chunks:
109
+ return "No valid chunks generated. Database creation failed."
110
 
111
  logging.info(f"Creating {len(chunks)} chunks...")
112
  progress(0, desc="Starting embedding process...")
 
122
  text_embeddings=list(zip(chunks, embeddings_list)),
123
  embedding=embeddings
124
  )
 
 
125
 
126
+ # Save the FAISS database to the root directory
127
+ vector_store.save_local(".")
128
+ logging.info(f"FAISS database saved to: {faiss_file} and {pkl_file}")
 
129
 
130
+ # Rename the default FAISS files to match the desired naming convention
131
+ os.rename("index.faiss", faiss_file)
132
+ os.rename("index.pkl", pkl_file)
133
+
134
+ # Verify files were created
135
+ if not os.path.exists(faiss_file) or not os.path.exists(pkl_file):
136
+ return f"Failed to save FAISS database files: {faiss_file} or {pkl_file}"
137
+ logging.info(f"FAISS database files: {faiss_file}, {pkl_file}")
138
 
139
+ return f"Database '{db_name}' created successfully."
140
  except Exception as e:
141
  logging.error(f"Database creation failed: {str(e)}")
142
+ return f"Error creating database: {str(e)}"
143
 
144
  def generate_response(user_input: str, db_name: str) -> Optional[str]:
145
  """Generate response using Qwen2.5 MAX"""
 
147
  if not db_name:
148
  return "Please select a database to chat with."
149
 
150
+ # Define file names for the FAISS database
151
+ faiss_file = f"{db_name}-index.faiss"
152
+ pkl_file = f"{db_name}-index.pkl"
153
+
154
+ if not os.path.exists(faiss_file) or not os.path.exists(pkl_file):
155
  return f"Database '{db_name}' does not exist."
156
 
157
+ # Load the FAISS database
158
+ vector_store = FAISS.load_local(".", embeddings, allow_dangerous_deserialization=True)
 
 
 
159
 
160
  # Contextual search
161
  docs_scores = vector_store.similarity_search_with_score(
 
216
 
217
  def update_db_list():
218
  """Update the list of available databases"""
219
+ return [
220
+ name.replace("-index.faiss", "") # Remove "-index.faiss" suffix for display
221
+ for name in os.listdir(".")
222
+ if name.endswith("-index.faiss")
223
+ ]
224
 
225
  with gr.Tab("Create Database"):
226
  gr.Markdown("## Create a New FAISS Database")
 
232
 
233
  def handle_create(file, db_name, password, progress=gr.Progress()):
234
  if not file or not db_name or not password:
235
+ return "Please provide all required inputs."
236
 
237
  # Check if the file is valid
238
  if isinstance(file, str): # Gradio provides the file path as a string
 
240
  with open(file, "r", encoding="utf-8") as f:
241
  file_content = f.read()
242
  except Exception as e:
243
+ return f"Error reading file: {str(e)}"
244
  else:
245
+ return "Invalid file format. Please upload a .txt file."
246
 
247
+ result = create_new_database(file_content, db_name, password, progress)
248
+ if "created successfully" in result:
249
+ # Update the database list
250
+ return result, update_db_list()
251
+ return result, None
252
 
253
  create_button.click(
254
  handle_create,
 
287
  )
288
 
289
  if __name__ == "__main__":
290
+ # Log existing databases at startup
291
+ logging.info("Existing databases:")
292
+ for name in os.listdir("."):
293
+ if name.endswith("-index.faiss"):
294
+ logging.info(f"- {name}")
295
+
296
  app.launch(server_name="0.0.0.0", server_port=7860)