pradeepsengarr commited on
Commit
39d36c9
Β·
verified Β·
1 Parent(s): daa5ddb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -27
app.py CHANGED
@@ -483,50 +483,103 @@ def setup_qa(db):
483
  return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
484
 
485
  # --- Process Answer ---
486
- def process_answer(question, full_text):
487
- # STEP 1: Chunk the PDF text
488
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
489
- docs = text_splitter.create_documents([full_text])
490
 
491
- # STEP 2: Create embeddings
492
- embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
493
- db = Chroma.from_documents(docs, embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
 
495
- # STEP 3: Retrieve relevant chunks using the question
496
- retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
497
- relevant_docs = retriever.get_relevant_documents(question)
498
 
499
- # STEP 4: Format the context
500
- context = "\n\n".join([doc.page_content for doc in relevant_docs])
501
 
502
- # STEP 5: Prompting
503
- prompt_template = """
504
- You are a helpful assistant that answers questions based on the context below.
 
 
 
 
 
505
 
506
- Context:
507
- {context}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
 
509
- Question: {question}
 
 
510
 
511
- Answer:
512
- """.strip()
513
 
514
- prompt = prompt_template.format(context=context, question=question)
 
 
515
 
516
- # STEP 6: Load the model and generate response
517
- llm = HuggingFacePipeline.from_model_id(
518
- model_id="MBZUAI/LaMini-T5-738M",
519
- task="text2text-generation",
520
- model_kwargs={"temperature": 0.3, "max_length": 256},
521
  )
522
 
523
- return llm.invoke(prompt)
 
 
 
 
 
 
 
524
 
525
 
526
  # --- UI Layout ---
527
  with st.sidebar:
528
  st.header("πŸ“„ Upload PDF")
529
  uploaded_file = st.file_uploader("Choose a PDF", type=["pdf"])
 
530
 
531
  # --- Main Interface ---
532
  if uploaded_file:
@@ -551,6 +604,14 @@ if uploaded_file:
551
  st.markdown("---")
552
  st.markdown("**πŸ’‘ Suggestions:**")
553
  st.caption("Try: \"Summarize this document\" or \"What is the key idea?\"")
 
 
 
 
 
 
 
 
554
 
555
  else:
556
  st.error("⚠️ No text could be extracted from the PDF. Try another file.")
 
483
  return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
484
 
485
  # --- Process Answer ---
486
+ # def process_answer(question, full_text):
487
+ # # STEP 1: Chunk the PDF text
488
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
489
+ # docs = text_splitter.create_documents([full_text])
490
 
491
+ # # STEP 2: Create embeddings
492
+ # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
493
+ # db = Chroma.from_documents(docs, embeddings)
494
+
495
+ # # STEP 3: Retrieve relevant chunks using the question
496
+ # retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
497
+ # relevant_docs = retriever.get_relevant_documents(question)
498
+
499
+ # # STEP 4: Format the context
500
+ # context = "\n\n".join([doc.page_content for doc in relevant_docs])
501
+
502
+ # # STEP 5: Prompting
503
+ # prompt_template = """
504
+ # You are a helpful assistant that answers questions based on the context below.
505
+
506
+ # Context:
507
+ # {context}
508
+
509
+ # Question: {question}
510
 
511
+ # Answer:
512
+ # """.strip()
 
513
 
514
+ # prompt = prompt_template.format(context=context, question=question)
 
515
 
516
+ # # STEP 6: Load the model and generate response
517
+ # llm = HuggingFacePipeline.from_model_id(
518
+ # model_id="MBZUAI/LaMini-T5-738M",
519
+ # task="text2text-generation",
520
+ # model_kwargs={"temperature": 0.3, "max_length": 256},
521
+ # )
522
+
523
+ # return llm.invoke(prompt)
524
 
525
+ def process_answer(question, full_text):
526
+ from langchain_community.document_loaders import TextLoader
527
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
528
+ from langchain.vectorstores import Chroma
529
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
530
+ from langchain.chains import RetrievalQA
531
+ from langchain import HuggingFacePipeline
532
+ from transformers import pipeline
533
+ import os
534
+ import shutil
535
+
536
+ # Save to temp file and load it as document
537
+ with open("temp_text.txt", "w") as f:
538
+ f.write(full_text)
539
+
540
+ loader = TextLoader("temp_text.txt")
541
+ docs = loader.load()
542
+
543
+ # Chunking the docs
544
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
545
+ splits = text_splitter.split_documents(docs)
546
+
547
+ # Embeddings
548
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
549
 
550
+ # Clean up old DB if exists
551
+ if os.path.exists("chroma_db"):
552
+ shutil.rmtree("chroma_db")
553
 
554
+ db = Chroma.from_documents(splits, embeddings, persist_directory="chroma_db")
555
+ retriever = db.as_retriever()
556
 
557
+ # Model pipeline
558
+ pipe = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", max_length=512)
559
+ llm = HuggingFacePipeline(pipeline=pipe)
560
 
561
+ # Retrieval QA chain
562
+ qa_chain = RetrievalQA.from_chain_type(
563
+ llm=llm,
564
+ retriever=retriever,
565
+ return_source_documents=False
566
  )
567
 
568
+ # Check if question is about summarization
569
+ if "summarize" in question.lower() or "summary" in question.lower() or "tl;dr" in question.lower():
570
+ prompt = f"Summarize the following document:\n\n{full_text[:3000]}" # trimming to 3K chars for model
571
+ summary = llm(prompt)
572
+ return summary
573
+ else:
574
+ answer = qa_chain.run(question)
575
+ return answer
576
 
577
 
578
  # --- UI Layout ---
579
  with st.sidebar:
580
  st.header("πŸ“„ Upload PDF")
581
  uploaded_file = st.file_uploader("Choose a PDF", type=["pdf"])
582
+
583
 
584
  # --- Main Interface ---
585
  if uploaded_file:
 
604
  st.markdown("---")
605
  st.markdown("**πŸ’‘ Suggestions:**")
606
  st.caption("Try: \"Summarize this document\" or \"What is the key idea?\"")
607
+ with st.expander("πŸ’‘ Suggestions", expanded=True):
608
+ st.markdown("""
609
+ - "Summarize this document"
610
+ - "Give a quick summary"
611
+ - "What are the main points?"
612
+ - "Explain this document in short"
613
+ """)
614
+
615
 
616
  else:
617
  st.error("⚠️ No text could be extracted from the PDF. Try another file.")