Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -571,31 +571,46 @@ def qa_llm():
|
|
571 |
return qa
|
572 |
|
573 |
def process_answer(user_question, full_text):
|
574 |
-
"""Generate an answer to the user’s question
|
575 |
try:
|
576 |
logging.info("Processing user question")
|
|
|
|
|
|
|
577 |
|
578 |
-
#
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
""
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
596 |
answer = generated_text['result']
|
597 |
|
598 |
-
# If the answer
|
599 |
if "not provide" in answer or "no information" in answer:
|
600 |
return "The document does not provide sufficient information to answer your question."
|
601 |
|
@@ -604,7 +619,7 @@ def process_answer(user_question, full_text):
|
|
604 |
|
605 |
except Exception as e:
|
606 |
logging.error(f"Error during answer generation: {str(e)}")
|
607 |
-
return "
|
608 |
|
609 |
|
610 |
# Streamlit UI Setup
|
|
|
571 |
return qa
|
572 |
|
573 |
def process_answer(user_question, full_text):
|
574 |
+
"""Generate an answer to the user’s question based on the extracted text from the PDF."""
|
575 |
try:
|
576 |
logging.info("Processing user question")
|
577 |
+
|
578 |
+
# Set up the retriever with the PDF content (this could be your embedded database or a direct retrieval from full_text)
|
579 |
+
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
580 |
|
581 |
+
# Use Chroma for document storage and retrieval if you’re storing documents in a vector store
|
582 |
+
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
|
583 |
+
retriever = db.as_retriever() # Set up the retriever to use Chroma database
|
584 |
+
|
585 |
+
# Here we're just adding the full_text as a document for simplicity
|
586 |
+
db.add_documents([full_text])
|
587 |
+
|
588 |
+
# Set up the language model pipeline (assuming you already have a pipeline set up)
|
589 |
+
llm = llm_pipeline()
|
590 |
+
|
591 |
+
# Construct the retrieval chain using the retriever and LLM
|
592 |
+
qa_chain = RetrievalQA.from_chain_type(
|
593 |
+
llm=llm,
|
594 |
+
chain_type="stuff",
|
595 |
+
retriever=retriever,
|
596 |
+
return_source_documents=True
|
597 |
+
)
|
598 |
+
|
599 |
+
# Create a tailored prompt for the question (providing context to the chatbot)
|
600 |
+
tailored_prompt = f"""
|
601 |
+
You are a helpful RAG-based chatbot designed to assist with answering questions from any uploaded document.
|
602 |
+
You should answer the question using relevant information from the provided PDF text.
|
603 |
+
Please provide a clear, informative answer based on the document content.
|
604 |
+
User question: {user_question}
|
605 |
+
"""
|
606 |
+
|
607 |
+
# Generate the answer using the retrieval-augmented generation model
|
608 |
+
generated_text = qa_chain({"query": tailored_prompt})
|
609 |
+
|
610 |
+
# Extract the generated answer
|
611 |
answer = generated_text['result']
|
612 |
|
613 |
+
# If the answer is empty or not very informative, provide a fallback message
|
614 |
if "not provide" in answer or "no information" in answer:
|
615 |
return "The document does not provide sufficient information to answer your question."
|
616 |
|
|
|
619 |
|
620 |
except Exception as e:
|
621 |
logging.error(f"Error during answer generation: {str(e)}")
|
622 |
+
return "Sorry, I encountered an issue while processing your question."
|
623 |
|
624 |
|
625 |
# Streamlit UI Setup
|