Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -522,6 +522,9 @@ def setup_qa(db):
|
|
522 |
|
523 |
# return llm.invoke(prompt)
|
524 |
|
|
|
|
|
|
|
525 |
def process_answer(question, full_text):
|
526 |
from langchain_community.document_loaders import TextLoader
|
527 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -530,49 +533,44 @@ def process_answer(question, full_text):
|
|
530 |
from langchain.chains import RetrievalQA
|
531 |
from langchain import HuggingFacePipeline
|
532 |
from transformers import pipeline
|
533 |
-
import os
|
534 |
-
import shutil
|
535 |
|
536 |
-
# Save
|
537 |
with open("temp_text.txt", "w") as f:
|
538 |
f.write(full_text)
|
539 |
|
540 |
loader = TextLoader("temp_text.txt")
|
541 |
docs = loader.load()
|
542 |
|
543 |
-
#
|
544 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
545 |
splits = text_splitter.split_documents(docs)
|
546 |
|
547 |
-
#
|
548 |
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
549 |
|
550 |
-
#
|
551 |
-
|
552 |
-
|
|
|
|
|
553 |
|
554 |
-
db = Chroma.from_documents(splits, embeddings, persist_directory=
|
555 |
retriever = db.as_retriever()
|
556 |
|
557 |
-
#
|
558 |
pipe = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", max_length=512)
|
559 |
llm = HuggingFacePipeline(pipeline=pipe)
|
560 |
|
561 |
-
#
|
562 |
-
qa_chain = RetrievalQA.from_chain_type(
|
563 |
-
llm=llm,
|
564 |
-
retriever=retriever,
|
565 |
-
return_source_documents=False
|
566 |
-
)
|
567 |
|
568 |
-
#
|
569 |
if "summarize" in question.lower() or "summary" in question.lower() or "tl;dr" in question.lower():
|
570 |
-
prompt = f"Summarize the following document:\n\n{full_text[:3000]}"
|
571 |
summary = llm(prompt)
|
572 |
return summary
|
573 |
else:
|
574 |
-
|
575 |
-
return answer
|
576 |
|
577 |
|
578 |
# --- UI Layout ---
|
|
|
522 |
|
523 |
# return llm.invoke(prompt)
|
524 |
|
525 |
+
import tempfile
|
526 |
+
import os
|
527 |
+
|
528 |
def process_answer(question, full_text):
|
529 |
from langchain_community.document_loaders import TextLoader
|
530 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
533 |
from langchain.chains import RetrievalQA
|
534 |
from langchain import HuggingFacePipeline
|
535 |
from transformers import pipeline
|
|
|
|
|
536 |
|
537 |
+
# Save the full_text to a temporary file
|
538 |
with open("temp_text.txt", "w") as f:
|
539 |
f.write(full_text)
|
540 |
|
541 |
loader = TextLoader("temp_text.txt")
|
542 |
docs = loader.load()
|
543 |
|
544 |
+
# Chunk the documents
|
545 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
546 |
splits = text_splitter.split_documents(docs)
|
547 |
|
548 |
+
# Load embeddings
|
549 |
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
550 |
|
551 |
+
# Create a temporary directory for ChromaDB
|
552 |
+
chroma_dir = os.path.join(tempfile.gettempdir(), "chroma_db")
|
553 |
+
if os.path.exists(chroma_dir):
|
554 |
+
import shutil
|
555 |
+
shutil.rmtree(chroma_dir)
|
556 |
|
557 |
+
db = Chroma.from_documents(splits, embeddings, persist_directory=chroma_dir)
|
558 |
retriever = db.as_retriever()
|
559 |
|
560 |
+
# Set up the model
|
561 |
pipe = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", max_length=512)
|
562 |
llm = HuggingFacePipeline(pipeline=pipe)
|
563 |
|
564 |
+
# RAG-style retrieval QA
|
565 |
+
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
|
|
|
|
|
|
|
|
|
566 |
|
567 |
+
# Smart prompting
|
568 |
if "summarize" in question.lower() or "summary" in question.lower() or "tl;dr" in question.lower():
|
569 |
+
prompt = f"Summarize the following document:\n\n{full_text[:3000]}"
|
570 |
summary = llm(prompt)
|
571 |
return summary
|
572 |
else:
|
573 |
+
return qa_chain.run(question)
|
|
|
574 |
|
575 |
|
576 |
# --- UI Layout ---
|