Spaces:

pradeepsengarr
/

Bot_RAG

Sleeping

pradeepsengarr commited on Apr 16

Commit

c1bd4a1

verified ·

1 Parent(s): 6e1c9c8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -169,13 +169,23 @@ Answer:
 )
 # Load PDF and split into chunks
 def load_and_split_pdf(uploaded_file):
-    loader = PyPDFLoader(uploaded_file.name)
     documents = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
     chunks = text_splitter.split_documents(documents)
     return chunks
 # Build vectorstore from document chunks
 def build_vectorstore(chunks):
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

 )
 # Load PDF and split into chunks
+from langchain_community.document_loaders import PyPDFLoader
+import tempfile
 def load_and_split_pdf(uploaded_file):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+        tmp_file.write(uploaded_file.read())
+        tmp_file_path = tmp_file.name
+    loader = PyPDFLoader(tmp_file_path)
     documents = loader.load()
+    # Then your text splitting logic follows
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     chunks = text_splitter.split_documents(documents)
     return chunks
 # Build vectorstore from document chunks
 def build_vectorstore(chunks):
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")