Shreyas094 commited on
Commit
2093ce4
·
verified ·
1 Parent(s): 0a69d83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -8,6 +8,7 @@ from typing import List
8
  from pydantic import BaseModel, Field
9
  from tempfile import NamedTemporaryFile
10
  from langchain_community.vectorstores import FAISS
 
11
  from langchain_community.document_loaders import PyPDFLoader
12
  from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from llama_parse import LlamaParse
@@ -460,25 +461,29 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
- retriever = database.as_retriever(search_kwargs={"k": 20})
464
- logging.info(f"Retrieving relevant documents for query: {query}")
465
- relevant_docs = retriever.get_relevant_documents(query)
466
- logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
467
-
468
- # Filter relevant_docs based on selected documents
469
- filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
470
- logging.info(f"Number of filtered documents: {len(filtered_docs)}")
471
 
 
 
472
  if not filtered_docs:
473
- logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
474
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
475
  return
476
 
477
- for doc in filtered_docs:
 
 
 
 
 
 
 
 
478
  logging.info(f"Document source: {doc.metadata['source']}")
479
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
480
 
481
- context_str = "\n".join([doc.page_content for doc in filtered_docs])
482
  logging.info(f"Total context length: {len(context_str)}")
483
 
484
  if model == "@cf/meta/llama-3.1-8b-instruct":
 
8
  from pydantic import BaseModel, Field
9
  from tempfile import NamedTemporaryFile
10
  from langchain_community.vectorstores import FAISS
11
+ from langchain_core.vectorstores import VectorStore
12
  from langchain_community.document_loaders import PyPDFLoader
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
14
  from llama_parse import LlamaParse
 
461
  yield "No documents available. Please upload PDF documents to answer questions."
462
  return
463
 
464
+ # Pre-filter the documents
465
+ filtered_docs = [doc for doc in database.docstore.values() if doc.metadata["source"] in selected_docs]
 
 
 
 
 
 
466
 
467
+ logging.info(f"Number of documents after pre-filtering: {len(filtered_docs)}")
468
+
469
  if not filtered_docs:
470
+ logging.warning(f"No documents found for the selected sources: {selected_docs}")
471
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
472
  return
473
 
474
+ # Create a new FAISS index with only the selected documents
475
+ filtered_db = FAISS.from_documents(filtered_docs, embed)
476
+
477
+ retriever = filtered_db.as_retriever(search_kwargs={"k": 10})
478
+ logging.info(f"Retrieving relevant documents for query: {query}")
479
+ relevant_docs = retriever.get_relevant_documents(query)
480
+ logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
481
+
482
+ for doc in relevant_docs:
483
  logging.info(f"Document source: {doc.metadata['source']}")
484
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
485
 
486
+ context_str = "\n".join([doc.page_content for doc in relevant_docs])
487
  logging.info(f"Total context length: {len(context_str)}")
488
 
489
  if model == "@cf/meta/llama-3.1-8b-instruct":