logasanjeev commited on
Commit
c4200bd
·
verified ·
1 Parent(s): b0deff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -11
app.py CHANGED
@@ -12,6 +12,8 @@ from pptx import Presentation
12
  from io import BytesIO
13
  import shutil
14
  import logging
 
 
15
 
16
  # Set up logging
17
  logging.basicConfig(level=logging.INFO)
@@ -40,7 +42,7 @@ vector_store = None
40
  qa_chain = None
41
  chat_history = []
42
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
43
- PERSIST_DIRECTORY = "./chroma_db"
44
 
45
  # Custom PPTX loader
46
  class PPTXLoader:
@@ -94,7 +96,7 @@ def process_documents(files, chunk_size, chunk_overlap, embedding_model):
94
  if not files:
95
  return "Please upload at least one document.", None
96
 
97
- # Clear existing vector store to avoid dimensionality mismatch
98
  if os.path.exists(PERSIST_DIRECTORY):
99
  try:
100
  shutil.rmtree(PERSIST_DIRECTORY)
@@ -102,6 +104,7 @@ def process_documents(files, chunk_size, chunk_overlap, embedding_model):
102
  except Exception as e:
103
  logger.error(f"Error clearing ChromaDB directory: {str(e)}")
104
  return f"Error clearing vector store: {str(e)}", None
 
105
 
106
  # Load documents
107
  documents = load_documents(files)
@@ -130,7 +133,15 @@ def process_documents(files, chunk_size, chunk_overlap, embedding_model):
130
 
131
  # Create vector store
132
  try:
133
- vector_store = Chroma.from_documents(doc_splits, embeddings, persist_directory=PERSIST_DIRECTORY)
 
 
 
 
 
 
 
 
134
  return f"Processed {len(documents)} documents into {len(doc_splits)} chunks.", None
135
  except Exception as e:
136
  logger.error(f"Error creating vector store: {str(e)}")
@@ -173,8 +184,8 @@ def answer_question(question, llm_model, embedding_model, temperature, chunk_siz
173
 
174
  try:
175
  response = qa_chain({"question": question})["answer"]
176
- chat_history.append(("User", question))
177
- chat_history.append(("Bot", response))
178
  logger.info(f"Answered question: {question}")
179
  return response, chat_history
180
  except Exception as e:
@@ -189,8 +200,10 @@ def export_chat():
189
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
190
  filename = f"chat_history_{timestamp}.txt"
191
  with open(filename, "w") as f:
192
- for role, message in chat_history:
193
- f.write(f"{role}: {message}\n\n")
 
 
194
  logger.info(f"Exported chat history to {filename}.")
195
  return f"Chat history exported to {filename}.", filename
196
  except Exception as e:
@@ -207,6 +220,7 @@ def reset_app():
207
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
208
  if os.path.exists(PERSIST_DIRECTORY):
209
  shutil.rmtree(PERSIST_DIRECTORY)
 
210
  logger.info("Cleared ChromaDB directory on reset.")
211
  logger.info("App reset successfully.")
212
  return "App reset successfully.", None
@@ -238,20 +252,22 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DocTalk: Document Q&A Chatbot") as
238
  gr.Markdown("## Chat Interface")
239
  question = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
240
  answer = gr.Textbox(label="Answer", interactive=False)
241
- chat_display = gr.Chatbot(label="Chat History")
242
  export_button = gr.Button("Export Chat History")
243
  export_file = gr.File(label="Exported Chat File")
244
 
245
- # Event handlers
246
  process_button.click(
247
  fn=process_documents,
248
  inputs=[file_upload, chunk_size, chunk_overlap, embedding_model],
249
- outputs=[status, chat_display]
 
250
  )
251
  init_button.click(
252
  fn=initialize_qa_chain,
253
  inputs=[llm_model, temperature],
254
- outputs=[status, chat_display]
 
255
  )
256
  question.submit(
257
  fn=answer_question,
 
12
  from io import BytesIO
13
  import shutil
14
  import logging
15
+ import chromadb
16
+ import tempfile
17
 
18
  # Set up logging
19
  logging.basicConfig(level=logging.INFO)
 
42
  qa_chain = None
43
  chat_history = []
44
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
45
+ PERSIST_DIRECTORY = tempfile.mkdtemp() # Use temporary directory for ChromaDB
46
 
47
  # Custom PPTX loader
48
  class PPTXLoader:
 
96
  if not files:
97
  return "Please upload at least one document.", None
98
 
99
+ # Clear existing vector store
100
  if os.path.exists(PERSIST_DIRECTORY):
101
  try:
102
  shutil.rmtree(PERSIST_DIRECTORY)
 
104
  except Exception as e:
105
  logger.error(f"Error clearing ChromaDB directory: {str(e)}")
106
  return f"Error clearing vector store: {str(e)}", None
107
+ os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
108
 
109
  # Load documents
110
  documents = load_documents(files)
 
133
 
134
  # Create vector store
135
  try:
136
+ # Reset ChromaDB client to avoid tenant issues
137
+ client = chromadb.PersistentClient(path=P Persistent_DIRECTORY)
138
+ client.reset()
139
+ vector_store = Chroma.from_documents(
140
+ documents=doc_splits,
141
+ embedding=embeddings,
142
+ persist_directory=PERSIST_DIRECTORY,
143
+ collection_name="doctalk_collection"
144
+ )
145
  return f"Processed {len(documents)} documents into {len(doc_splits)} chunks.", None
146
  except Exception as e:
147
  logger.error(f"Error creating vector store: {str(e)}")
 
184
 
185
  try:
186
  response = qa_chain({"question": question})["answer"]
187
+ chat_history.append({"role": "user", "content": question})
188
+ chat_history.append({"role": "assistant", "content": response})
189
  logger.info(f"Answered question: {question}")
190
  return response, chat_history
191
  except Exception as e:
 
200
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
201
  filename = f"chat_history_{timestamp}.txt"
202
  with open(filename, "w") as f:
203
+ for message in chat_history:
204
+ role = message["role"].capitalize()
205
+ content = message["content"]
206
+ f.write(f"{role}: {content}\n\n")
207
  logger.info(f"Exported chat history to {filename}.")
208
  return f"Chat history exported to {filename}.", filename
209
  except Exception as e:
 
220
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
221
  if os.path.exists(PERSIST_DIRECTORY):
222
  shutil.rmtree(PERSIST_DIRECTORY)
223
+ os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
224
  logger.info("Cleared ChromaDB directory on reset.")
225
  logger.info("App reset successfully.")
226
  return "App reset successfully.", None
 
252
  gr.Markdown("## Chat Interface")
253
  question = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
254
  answer = gr.Textbox(label="Answer", interactive=False)
255
+ chat_display = gr.Chatbot(label="Chat History", type="messages")
256
  export_button = gr.Button("Export Chat History")
257
  export_file = gr.File(label="Exported Chat File")
258
 
259
+ # Event handlers with loading state
260
  process_button.click(
261
  fn=process_documents,
262
  inputs=[file_upload, chunk_size, chunk_overlap, embedding_model],
263
+ outputs=[status, chat_display],
264
+ _js="() => { return { 'Process Documents': 'Processing...' } }"
265
  )
266
  init_button.click(
267
  fn=initialize_qa_chain,
268
  inputs=[llm_model, temperature],
269
+ outputs=[status, chat_display],
270
+ _js="() => { return { 'Initialize QA Chain': 'Initializing...' } }"
271
  )
272
  question.submit(
273
  fn=answer_question,