Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -180,7 +180,7 @@
|
|
180 |
|
181 |
|
182 |
# v2
|
183 |
-
import re
|
184 |
import PyPDF2
|
185 |
from langchain_community.embeddings import OllamaEmbeddings
|
186 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -195,14 +195,13 @@ import logging
|
|
195 |
import pypandoc
|
196 |
import pdfkit
|
197 |
from paddleocr import PaddleOCR
|
198 |
-
import fitz
|
199 |
import asyncio
|
200 |
from langchain_nomic.embeddings import NomicEmbeddings
|
201 |
-
import os
|
202 |
|
203 |
llm_groq = ChatGroq(
|
204 |
-
|
205 |
-
)
|
206 |
|
207 |
# Initialize anonymizer
|
208 |
anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL'], faker_seed=18)
|
@@ -276,11 +275,21 @@ async def extract_text_from_mixed_pdf(file_path):
|
|
276 |
pdf_text += text
|
277 |
return pdf_text
|
278 |
|
|
|
|
|
|
|
|
|
279 |
@cl.on_chat_start
|
280 |
async def on_chat_start():
|
281 |
|
282 |
files = None # Initialize variable to store uploaded files
|
283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
# Wait for the user to upload a file
|
285 |
while files is None:
|
286 |
files = await cl.AskFileMessage(
|
@@ -308,14 +317,7 @@ async def on_chat_start():
|
|
308 |
)
|
309 |
|
310 |
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
|
311 |
-
|
312 |
-
# Clear the existing Chroma vector store
|
313 |
-
docsearch = await cl.make_async(Chroma.from_texts)(
|
314 |
-
[], embeddings, metadatas=[]
|
315 |
-
)
|
316 |
-
docsearch.delete()
|
317 |
|
318 |
-
# Create a new Chroma vector store
|
319 |
docsearch = await cl.make_async(Chroma.from_texts)(
|
320 |
[anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
|
321 |
)
|
@@ -345,8 +347,6 @@ async def on_chat_start():
|
|
345 |
await msg.update()
|
346 |
# Store the chain in user session
|
347 |
cl.user_session.set("chain", chain)
|
348 |
-
cl.user_session.set("docsearch", docsearch) # Store the docsearch in session
|
349 |
-
cl.user_session.set("file_path", file.path) # Store the file path in session
|
350 |
|
351 |
|
352 |
@cl.on_message
|
@@ -366,21 +366,3 @@ async def main(message: cl.Message):
|
|
366 |
|
367 |
# Return results
|
368 |
await cl.Message(content=answer, elements=text_elements).send()
|
369 |
-
|
370 |
-
@cl.on_chat_end
|
371 |
-
async def on_chat_end():
|
372 |
-
docsearch = cl.user_session.get("docsearch")
|
373 |
-
file_path = cl.user_session.get("file_path")
|
374 |
-
|
375 |
-
if docsearch:
|
376 |
-
# Clear the vector store
|
377 |
-
docsearch.delete()
|
378 |
-
|
379 |
-
if file_path and os.path.exists(file_path):
|
380 |
-
# Remove the uploaded file
|
381 |
-
os.remove(file_path)
|
382 |
-
|
383 |
-
# Clear the user session data
|
384 |
-
cl.user_session.clear()
|
385 |
-
|
386 |
-
logging.info("User session ended, data cleared.")
|
|
|
180 |
|
181 |
|
182 |
# v2
|
183 |
+
import re
|
184 |
import PyPDF2
|
185 |
from langchain_community.embeddings import OllamaEmbeddings
|
186 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
195 |
import pypandoc
|
196 |
import pdfkit
|
197 |
from paddleocr import PaddleOCR
|
198 |
+
import fitz
|
199 |
import asyncio
|
200 |
from langchain_nomic.embeddings import NomicEmbeddings
|
|
|
201 |
|
202 |
llm_groq = ChatGroq(
|
203 |
+
model_name='llama3-70b-8192'
|
204 |
+
)
|
205 |
|
206 |
# Initialize anonymizer
|
207 |
anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL'], faker_seed=18)
|
|
|
275 |
pdf_text += text
|
276 |
return pdf_text
|
277 |
|
278 |
+
# Function to clear the ChromaDB
|
279 |
+
async def clear_chroma_db(chroma_instance):
|
280 |
+
await chroma_instance.delete()
|
281 |
+
|
282 |
@cl.on_chat_start
|
283 |
async def on_chat_start():
|
284 |
|
285 |
files = None # Initialize variable to store uploaded files
|
286 |
|
287 |
+
# Initialize ChromaDB
|
288 |
+
chroma_instance = await cl.make_async(Chroma)()
|
289 |
+
|
290 |
+
# Clear any existing data in ChromaDB
|
291 |
+
await clear_chroma_db(chroma_instance)
|
292 |
+
|
293 |
# Wait for the user to upload a file
|
294 |
while files is None:
|
295 |
files = await cl.AskFileMessage(
|
|
|
317 |
)
|
318 |
|
319 |
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
|
|
|
321 |
docsearch = await cl.make_async(Chroma.from_texts)(
|
322 |
[anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
|
323 |
)
|
|
|
347 |
await msg.update()
|
348 |
# Store the chain in user session
|
349 |
cl.user_session.set("chain", chain)
|
|
|
|
|
350 |
|
351 |
|
352 |
@cl.on_message
|
|
|
366 |
|
367 |
# Return results
|
368 |
await cl.Message(content=answer, elements=text_elements).send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|