from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from eval_config import CHUNK_SIZE, CHUNK_OVERLAP def load_training_documents(file_path): loader = PyPDFLoader(file_path) data = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, length_function=len ) return text_splitter.split_documents(data) def load_sample_questions(questions): return questions