Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -169,13 +169,23 @@ Answer:
|
|
169 |
)
|
170 |
|
171 |
# Load PDF and split into chunks
|
|
|
|
|
|
|
|
|
172 |
def load_and_split_pdf(uploaded_file):
|
173 |
-
|
|
|
|
|
|
|
|
|
174 |
documents = loader.load()
|
175 |
-
|
|
|
|
|
176 |
chunks = text_splitter.split_documents(documents)
|
177 |
return chunks
|
178 |
-
|
179 |
# Build vectorstore from document chunks
|
180 |
def build_vectorstore(chunks):
|
181 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
169 |
)
|
170 |
|
171 |
# Load PDF and split into chunks
|
172 |
+
|
173 |
+
from langchain_community.document_loaders import PyPDFLoader
|
174 |
+
import tempfile
|
175 |
+
|
176 |
def load_and_split_pdf(uploaded_file):
|
177 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
178 |
+
tmp_file.write(uploaded_file.read())
|
179 |
+
tmp_file_path = tmp_file.name
|
180 |
+
|
181 |
+
loader = PyPDFLoader(tmp_file_path)
|
182 |
documents = loader.load()
|
183 |
+
|
184 |
+
# Then your text splitting logic follows
|
185 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
186 |
chunks = text_splitter.split_documents(documents)
|
187 |
return chunks
|
188 |
+
|
189 |
# Build vectorstore from document chunks
|
190 |
def build_vectorstore(chunks):
|
191 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|