akash015 commited on
Commit
43aad56
·
verified ·
1 Parent(s): 7deaf1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -32
app.py CHANGED
@@ -180,7 +180,7 @@
180
 
181
 
182
  # v2
183
- import re
184
  import PyPDF2
185
  from langchain_community.embeddings import OllamaEmbeddings
186
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -195,14 +195,13 @@ import logging
195
  import pypandoc
196
  import pdfkit
197
  from paddleocr import PaddleOCR
198
- import fitz
199
  import asyncio
200
  from langchain_nomic.embeddings import NomicEmbeddings
201
- import os
202
 
203
  llm_groq = ChatGroq(
204
- model_name='llama3-70b-8192'
205
- )
206
 
207
  # Initialize anonymizer
208
  anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL'], faker_seed=18)
@@ -276,11 +275,21 @@ async def extract_text_from_mixed_pdf(file_path):
276
  pdf_text += text
277
  return pdf_text
278
 
 
 
 
 
279
  @cl.on_chat_start
280
  async def on_chat_start():
281
 
282
  files = None # Initialize variable to store uploaded files
283
 
 
 
 
 
 
 
284
  # Wait for the user to upload a file
285
  while files is None:
286
  files = await cl.AskFileMessage(
@@ -308,14 +317,7 @@ async def on_chat_start():
308
  )
309
 
310
  embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
311
-
312
- # Clear the existing Chroma vector store
313
- docsearch = await cl.make_async(Chroma.from_texts)(
314
- [], embeddings, metadatas=[]
315
- )
316
- docsearch.delete()
317
 
318
- # Create a new Chroma vector store
319
  docsearch = await cl.make_async(Chroma.from_texts)(
320
  [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
321
  )
@@ -345,8 +347,6 @@ async def on_chat_start():
345
  await msg.update()
346
  # Store the chain in user session
347
  cl.user_session.set("chain", chain)
348
- cl.user_session.set("docsearch", docsearch) # Store the docsearch in session
349
- cl.user_session.set("file_path", file.path) # Store the file path in session
350
 
351
 
352
  @cl.on_message
@@ -366,21 +366,3 @@ async def main(message: cl.Message):
366
 
367
  # Return results
368
  await cl.Message(content=answer, elements=text_elements).send()
369
-
370
- @cl.on_chat_end
371
- async def on_chat_end():
372
- docsearch = cl.user_session.get("docsearch")
373
- file_path = cl.user_session.get("file_path")
374
-
375
- if docsearch:
376
- # Clear the vector store
377
- docsearch.delete()
378
-
379
- if file_path and os.path.exists(file_path):
380
- # Remove the uploaded file
381
- os.remove(file_path)
382
-
383
- # Clear the user session data
384
- cl.user_session.clear()
385
-
386
- logging.info("User session ended, data cleared.")
 
180
 
181
 
182
  # v2
183
+ import re
184
  import PyPDF2
185
  from langchain_community.embeddings import OllamaEmbeddings
186
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
195
  import pypandoc
196
  import pdfkit
197
  from paddleocr import PaddleOCR
198
+ import fitz
199
  import asyncio
200
  from langchain_nomic.embeddings import NomicEmbeddings
 
201
 
202
  llm_groq = ChatGroq(
203
+ model_name='llama3-70b-8192'
204
+ )
205
 
206
  # Initialize anonymizer
207
  anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL'], faker_seed=18)
 
275
  pdf_text += text
276
  return pdf_text
277
 
278
+ # Function to clear the ChromaDB
279
+ async def clear_chroma_db(chroma_instance):
280
+ await chroma_instance.delete()
281
+
282
  @cl.on_chat_start
283
  async def on_chat_start():
284
 
285
  files = None # Initialize variable to store uploaded files
286
 
287
+ # Initialize ChromaDB
288
+ chroma_instance = await cl.make_async(Chroma)()
289
+
290
+ # Clear any existing data in ChromaDB
291
+ await clear_chroma_db(chroma_instance)
292
+
293
  # Wait for the user to upload a file
294
  while files is None:
295
  files = await cl.AskFileMessage(
 
317
  )
318
 
319
  embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
 
 
 
 
 
 
320
 
 
321
  docsearch = await cl.make_async(Chroma.from_texts)(
322
  [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
323
  )
 
347
  await msg.update()
348
  # Store the chain in user session
349
  cl.user_session.set("chain", chain)
 
 
350
 
351
 
352
  @cl.on_message
 
366
 
367
  # Return results
368
  await cl.Message(content=answer, elements=text_elements).send()