Spaces:
Build error
Build error
Create utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
3 |
+
from langchain.schema import Document
|
4 |
+
|
5 |
+
# Function to format document content
|
6 |
+
def format_doc(doc: Document) -> str:
|
7 |
+
return f"Document_Title: {doc.metadata.get('title', 'Unknown')}\nPage: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}"
|
8 |
+
|
9 |
+
# Function to load and process document
|
10 |
+
def load_document(uploaded_file):
|
11 |
+
file_path = f"/tmp/{uploaded_file.name}"
|
12 |
+
with open(file_path, "wb") as f:
|
13 |
+
f.write(uploaded_file.getbuffer())
|
14 |
+
|
15 |
+
loader = PyMuPDFLoader(file_path)
|
16 |
+
docs = loader.load()
|
17 |
+
return docs
|