mcqt / pdf_to_quizz.py
Nikhil2904's picture
Upload 15 files
600c297 verified
raw
history blame contribute delete
769 Bytes
from langchain.document_loaders import PyPDFLoader
from quizz_generator import generate_quizz
from langchain.text_splitter import NLTKTextSplitter
import nltk
from typing import List
nltk.download('punkt')
def pdf_to_quizz(pdf_file_name):
loader = PyPDFLoader(pdf_file_name)
docs = loader.load_and_split(NLTKTextSplitter(chunk_size=700, chunk_overlap=0))
paragraphs =list(map(lambda doc: doc.page_content.replace("\n", " ").strip(), docs))
i = 0
batch_paragraph : List[str] = []
for paragraph in paragraphs:
i+=1
if i<=10:
batch_paragraph.append(paragraph)
else:
break
return generate_quizz(batch_paragraph)
# def process_paragraph(paragraph):
# return generate_quizz(paragraph)