pradeepsengarr commited on
Commit
4977945
·
verified ·
1 Parent(s): 39d36c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -20
app.py CHANGED
@@ -522,6 +522,9 @@ def setup_qa(db):
522
 
523
  # return llm.invoke(prompt)
524
 
 
 
 
525
  def process_answer(question, full_text):
526
  from langchain_community.document_loaders import TextLoader
527
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -530,49 +533,44 @@ def process_answer(question, full_text):
530
  from langchain.chains import RetrievalQA
531
  from langchain import HuggingFacePipeline
532
  from transformers import pipeline
533
- import os
534
- import shutil
535
 
536
- # Save to temp file and load it as document
537
  with open("temp_text.txt", "w") as f:
538
  f.write(full_text)
539
 
540
  loader = TextLoader("temp_text.txt")
541
  docs = loader.load()
542
 
543
- # Chunking the docs
544
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
545
  splits = text_splitter.split_documents(docs)
546
 
547
- # Embeddings
548
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
549
 
550
- # Clean up old DB if exists
551
- if os.path.exists("chroma_db"):
552
- shutil.rmtree("chroma_db")
 
 
553
 
554
- db = Chroma.from_documents(splits, embeddings, persist_directory="chroma_db")
555
  retriever = db.as_retriever()
556
 
557
- # Model pipeline
558
  pipe = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", max_length=512)
559
  llm = HuggingFacePipeline(pipeline=pipe)
560
 
561
- # Retrieval QA chain
562
- qa_chain = RetrievalQA.from_chain_type(
563
- llm=llm,
564
- retriever=retriever,
565
- return_source_documents=False
566
- )
567
 
568
- # Check if question is about summarization
569
  if "summarize" in question.lower() or "summary" in question.lower() or "tl;dr" in question.lower():
570
- prompt = f"Summarize the following document:\n\n{full_text[:3000]}" # trimming to 3K chars for model
571
  summary = llm(prompt)
572
  return summary
573
  else:
574
- answer = qa_chain.run(question)
575
- return answer
576
 
577
 
578
  # --- UI Layout ---
 
522
 
523
  # return llm.invoke(prompt)
524
 
525
+ import tempfile
526
+ import os
527
+
528
  def process_answer(question, full_text):
529
  from langchain_community.document_loaders import TextLoader
530
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
533
  from langchain.chains import RetrievalQA
534
  from langchain import HuggingFacePipeline
535
  from transformers import pipeline
 
 
536
 
537
+ # Save the full_text to a temporary file
538
  with open("temp_text.txt", "w") as f:
539
  f.write(full_text)
540
 
541
  loader = TextLoader("temp_text.txt")
542
  docs = loader.load()
543
 
544
+ # Chunk the documents
545
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
546
  splits = text_splitter.split_documents(docs)
547
 
548
+ # Load embeddings
549
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
550
 
551
+ # Create a temporary directory for ChromaDB
552
+ chroma_dir = os.path.join(tempfile.gettempdir(), "chroma_db")
553
+ if os.path.exists(chroma_dir):
554
+ import shutil
555
+ shutil.rmtree(chroma_dir)
556
 
557
+ db = Chroma.from_documents(splits, embeddings, persist_directory=chroma_dir)
558
  retriever = db.as_retriever()
559
 
560
+ # Set up the model
561
  pipe = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", max_length=512)
562
  llm = HuggingFacePipeline(pipeline=pipe)
563
 
564
+ # RAG-style retrieval QA
565
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
 
 
 
 
566
 
567
+ # Smart prompting
568
  if "summarize" in question.lower() or "summary" in question.lower() or "tl;dr" in question.lower():
569
+ prompt = f"Summarize the following document:\n\n{full_text[:3000]}"
570
  summary = llm(prompt)
571
  return summary
572
  else:
573
+ return qa_chain.run(question)
 
574
 
575
 
576
  # --- UI Layout ---