sbaiiinfo / ragas_finetune_eval /eval_data_loader.py
Sujal Bhat
deliverables
0d0eac6
raw
history blame contribute delete
552 Bytes
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from eval_config import CHUNK_SIZE, CHUNK_OVERLAP
def load_training_documents(file_path):
loader = PyPDFLoader(file_path)
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=CHUNK_SIZE,
chunk_overlap=CHUNK_OVERLAP,
length_function=len
)
return text_splitter.split_documents(data)
def load_sample_questions(questions):
return questions