DrishtiSharma commited on
Commit
07de8c7
·
verified ·
1 Parent(s): 3ac097b

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +17 -0
utils.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import PyMuPDFLoader
3
+ from langchain.schema import Document
4
+
5
+ # Function to format document content
6
+ def format_doc(doc: Document) -> str:
7
+ return f"Document_Title: {doc.metadata.get('title', 'Unknown')}\nPage: {doc.metadata.get('page', 'Unknown')}\nContent: {doc.page_content}"
8
+
9
+ # Function to load and process document
10
+ def load_document(uploaded_file):
11
+ file_path = f"/tmp/{uploaded_file.name}"
12
+ with open(file_path, "wb") as f:
13
+ f.write(uploaded_file.getbuffer())
14
+
15
+ loader = PyMuPDFLoader(file_path)
16
+ docs = loader.load()
17
+ return docs