from chainlit import AskFileMessage from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyMuPDFLoader def split_file(file: AskFileMessage): import tempfile text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) Loader = PyMuPDFLoader with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile: with open(tempfile.name, "wb") as f: f.write(file.content) loader = Loader(tempfile.name) documents = loader.load() docs = text_splitter.split_documents(documents) for i, doc in enumerate(docs): doc.metadata["source"] = f"source_{id}" return docs