Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import os
|
3 |
+
import streamlit as st
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from PyPDF2 import PdfReader
|
6 |
+
from langchain_community.embeddings import OpenAIEmbeddings
|
7 |
+
from langchain_community.vectorstores import FAISS
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain_community.llms import OpenAI
|
10 |
+
import openai
|
11 |
+
import random
|
12 |
+
|
13 |
+
# Load environment variables
|
14 |
+
load_dotenv()
|
15 |
+
openai_api_key = os.getenv('OPENAI_API_KEY')
|
16 |
+
openai.api_key = openai_api_key
|
17 |
+
|
18 |
+
# Initialize Streamlit session states
|
19 |
+
if 'vectorDB' not in st.session_state:
|
20 |
+
st.session_state.vectorDB = None
|
21 |
+
|
22 |
+
# Function to extract text from a PDF file
|
23 |
+
def get_pdf_text(pdf):
|
24 |
+
text = ""
|
25 |
+
pdf_reader = PdfReader(pdf)
|
26 |
+
for page in pdf_reader.pages:
|
27 |
+
text += page.extract_text()
|
28 |
+
return text
|
29 |
+
|
30 |
+
def get_text_chunks(text: str):
|
31 |
+
""" This function will split the text into smaller chunks"""
|
32 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
33 |
+
chunk_size=1000,
|
34 |
+
chunk_overlap=100,
|
35 |
+
length_function=len,
|
36 |
+
is_separator_regex=False,
|
37 |
+
)
|
38 |
+
chunks = text_splitter.split_text(text)
|
39 |
+
return chunks
|
40 |
+
|
41 |
+
def get_vectorstore(text_chunks):
|
42 |
+
""" This function will create a vector database as well as create and store the embedding of the text chunks into the VectorDB """
|
43 |
+
embeddings = OpenAIEmbeddings()
|
44 |
+
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
45 |
+
return vectorstore
|
46 |
+
|
47 |
+
def search_vectorstore(vectorDB, query):
|
48 |
+
""" This function searches for specific data or content in the vector database"""
|
49 |
+
# Specify the search type
|
50 |
+
search_type = 'similarity'
|
51 |
+
search_results = vectorDB.search(query, search_type=search_type)
|
52 |
+
return search_results
|
53 |
+
|
54 |
+
def generate_quiz_questions(search_results, num_questions):
|
55 |
+
""" Generate quiz questions with options using GPT-3.5-turbo-16k based on the search results """
|
56 |
+
st.header(f"Quiz Generator: {quiz_name}")
|
57 |
+
st.subheader(f"Topic: {quiz_topic}")
|
58 |
+
|
59 |
+
# Placeholder for quiz questions
|
60 |
+
quiz_questions = []
|
61 |
+
|
62 |
+
# Generate questions using GPT-3.5-turbo-16k with the correct endpoint
|
63 |
+
for i in range(num_questions):
|
64 |
+
prompt = f"Generate a multiple-choice question and correct answer for the same related to:\n- {random.choice(search_results)}"
|
65 |
+
|
66 |
+
# Prepare messages
|
67 |
+
messages = [
|
68 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
69 |
+
{"role": "user", "content": prompt},
|
70 |
+
]
|
71 |
+
|
72 |
+
# Generate questions using GPT-3.5-turbo-16k with the correct endpoint
|
73 |
+
response = openai.ChatCompletion.create(
|
74 |
+
model="gpt-3.5-turbo-16k",
|
75 |
+
messages=messages,
|
76 |
+
max_tokens=200,
|
77 |
+
n=num_questions,
|
78 |
+
stop=None,
|
79 |
+
temperature=0.7,
|
80 |
+
)
|
81 |
+
|
82 |
+
# Extract the generated question from the response
|
83 |
+
question = response['choices'][0]['message']['content'].strip()
|
84 |
+
correct_answer = f"Correct Answer: {random.choice(search_results)}"
|
85 |
+
|
86 |
+
|
87 |
+
# Generate options (3 incorrect + 1 correct)
|
88 |
+
correct_option = f"Correct: {random.choice(search_results)}"
|
89 |
+
incorrect_options = [f"Incorrect: {random.choice(search_results)}" for _ in range(3)]
|
90 |
+
options = incorrect_options + [correct_option]
|
91 |
+
|
92 |
+
# Shuffle the options to randomize the order
|
93 |
+
random.shuffle(options)
|
94 |
+
|
95 |
+
# Append question and options to the quiz_questions list
|
96 |
+
quiz_questions.append((question, options, correct_answer))
|
97 |
+
|
98 |
+
# Display quiz questions with options
|
99 |
+
st.write('Quiz Questions:')
|
100 |
+
for i, (question, options, correct_answer) in enumerate(quiz_questions, 1):
|
101 |
+
st.write(f"{i}. {question}")
|
102 |
+
correct_option = [opt for opt in options if opt.startswith('Correct')][0]
|
103 |
+
|
104 |
+
if __name__ == '__main__':
|
105 |
+
st.set_page_config(page_title="CB Quiz Generator", page_icon="📝")
|
106 |
+
st.title('🤖CB Quiz Generator🧠')
|
107 |
+
|
108 |
+
# User inputs
|
109 |
+
quiz_name = st.text_input('Enter Quiz Name:')
|
110 |
+
quiz_topic = st.text_input('Enter Quiz Topic:')
|
111 |
+
num_questions = st.number_input('Enter Number of Questions:', min_value=1, value=1, step=1)
|
112 |
+
pdf_content = st.file_uploader("Upload PDF Content for Questions:", type='pdf')
|
113 |
+
|
114 |
+
# Generate quiz if all inputs are provided
|
115 |
+
if quiz_name and quiz_topic and num_questions and pdf_content:
|
116 |
+
# Process PDF and create vector database
|
117 |
+
vectorDB = get_vectorstore(get_text_chunks(get_pdf_text(pdf_content)))
|
118 |
+
|
119 |
+
# Searching for a specific query in the vector database
|
120 |
+
if quiz_topic:
|
121 |
+
search_results = search_vectorstore(vectorDB, quiz_topic)
|
122 |
+
|
123 |
+
# Generate quiz questions with options using GPT-3.5-turbo-16k based on the search results
|
124 |
+
generate_quiz_questions(search_results, num_questions)
|
125 |
+
else:
|
126 |
+
st.error('Please provide a topic to search for!')
|