File size: 769 Bytes
600c297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from langchain.document_loaders import PyPDFLoader
from quizz_generator import generate_quizz
from langchain.text_splitter import NLTKTextSplitter
import nltk
from typing import List

nltk.download('punkt')

def pdf_to_quizz(pdf_file_name):

    loader = PyPDFLoader(pdf_file_name)

    docs = loader.load_and_split(NLTKTextSplitter(chunk_size=700, chunk_overlap=0))
    paragraphs =list(map(lambda doc: doc.page_content.replace("\n", " ").strip(), docs))

    i = 0
    batch_paragraph : List[str] = []
    for paragraph in paragraphs:
        i+=1
        if i<=10:
            batch_paragraph.append(paragraph)
        else:
            break

    return generate_quizz(batch_paragraph)
  
# def process_paragraph(paragraph):
#     return  generate_quizz(paragraph)