Spaces:

Rupesx007
/

quiz_generator

Sleeping

App Files Files Community

quiz_generator / app.py

Rupesx007

updated app.py

b69329e verified about 2 months ago

raw

history blame contribute delete

17.2 kB

	import os
	import re
	import PyPDF2
	import docx
	import googleapiclient.discovery
	import nltk
	from nltk.tokenize import sent_tokenize
	from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
	from youtube_transcript_api import YouTubeTranscriptApi
	import streamlit as st
	import pandas as pd
	import random
	from io import StringIO
	import logging

	# Setup logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Download necessary NLTK resources
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')

	class QuizGenerator:
	def __init__(self):
	# Initialize the summarizer and question generator models
	self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	# Load question generation model
	self.qg_model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-base-qg-hl")
	self.qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-base-qg-hl")

	# Initialize MCQ generation components
	self.qa_model = pipeline('question-answering', model='distilbert-base-cased-distilled-squad')

	def extract_text_from_pdf(self, pdf_file):
	"""Extract text from a PDF file."""
	try:
	text = ""
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	for page in pdf_reader.pages:
	text += page.extract_text() + "\n"
	return text
	except Exception as e:
	logger.error(f"Error extracting text from PDF: {e}")
	return ""

	def extract_text_from_docx(self, docx_file):
	"""Extract text from a DOCX file."""
	try:
	doc = docx.Document(docx_file)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text
	except Exception as e:
	logger.error(f"Error extracting text from DOCX: {e}")
	return ""

	def extract_text_from_txt(self, txt_file):
	"""Extract text from a TXT file."""
	try:
	return txt_file.read().decode('utf-8')
	except Exception as e:
	logger.error(f"Error extracting text from TXT: {e}")
	return ""

	def get_youtube_transcript(self, video_id):
	"""Extract transcript from a YouTube video."""
	try:
	transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
	transcript = ' '.join([item['text'] for item in transcript_list])
	return transcript
	except Exception as e:
	logger.error(f"Error getting YouTube transcript: {e}")
	return ""

	def summarize_text(self, text, max_length=1000):
	"""Summarize long text to make processing more efficient."""
	if len(text) <= max_length:
	return text

	chunks = self._split_text_into_chunks(text, max_length=3000)
	summaries = []

	for chunk in chunks:
	if len(chunk) < 100: # Skip chunks that are too small
	continue

	summary = self.summarizer(chunk, max_length=300, min_length=100, do_sample=False)
	summaries.append(summary[0]['summary_text'])

	return " ".join(summaries)

	def _split_text_into_chunks(self, text, max_length=3000):
	"""Split text into chunks of max_length characters."""
	sentences = sent_tokenize(text)
	chunks = []
	current_chunk = ""

	for sentence in sentences:
	if len(current_chunk) + len(sentence) <= max_length:
	current_chunk += " " + sentence
	else:
	chunks.append(current_chunk.strip())
	current_chunk = sentence

	if current_chunk:
	chunks.append(current_chunk.strip())

	return chunks

	def generate_questions(self, text, num_questions=5):
	"""Generate questions based on the input text."""
	try:
	# Summarize text if it's too long
	processed_text = self.summarize_text(text)

	# Split into sentences
	sentences = sent_tokenize(processed_text)

	questions = []
	random.shuffle(sentences) # Randomize to get different questions each time

	for sentence in sentences[:min(num_questions * 3, len(sentences))]: # Process more sentences than needed
	if len(sentence.split()) < 5: # Skip short sentences
	continue

	# Format for the question generation model
	input_text = f"generate question: {sentence}"

	# Generate question
	inputs = self.qg_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
	outputs = self.qg_model.generate(inputs, max_length=64, num_beams=4, early_stopping=True)
	question = self.qg_tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Use QA model to get answer
	qa_input = {
	'question': question,
	'context': processed_text
	}
	answer = self.qa_model(qa_input)

	if answer['score'] > 0.1: # Only keep questions with reasonable confidence
	questions.append({
	'question': question,
	'answer': answer['answer'],
	'context': sentence
	})

	if len(questions) >= num_questions:
	break

	return questions

	except Exception as e:
	logger.error(f"Error generating questions: {e}")
	return []

	def generate_mcq(self, questions, num_options=4):
	"""Convert open-ended questions to multiple-choice questions."""
	mcqs = []

	for q in questions:
	correct_answer = q['answer']

	# Generate distractors (incorrect options)
	distractors = self._generate_distractors(q['context'], correct_answer, num_options-1)

	# Create options list with correct answer
	options = distractors + [correct_answer]
	random.shuffle(options)

	# Find position of correct answer
	correct_index = options.index(correct_answer)

	mcqs.append({
	'question': q['question'],
	'options': options,
	'correct_answer': correct_answer,
	'correct_index': correct_index
	})

	return mcqs

	def _generate_distractors(self, context, correct_answer, num_distractors=3):
	"""Generate plausible but incorrect answers."""
	# Simple approach - extract other nouns from the text
	words = nltk.word_tokenize(context)
	pos_tags = nltk.pos_tag(words)

	# Extract nouns and named entities
	nouns = [word for word, pos in pos_tags if pos in ('NN', 'NNS', 'NNP', 'NNPS') and word.lower() != correct_answer.lower()]

	# Deduplicate and filter
	unique_nouns = list(set(nouns))
	distractors = [noun for noun in unique_nouns if len(noun) > 2]

	# If we don't have enough distractors, add some generic ones
	generic_distractors = ["None of the above", "Cannot be determined", "All of the above"]

	# Combine and return required number
	combined = list(distractors) + generic_distractors
	random.shuffle(combined)

	return combined[:num_distractors]

	def generate_true_false(self, text, num_questions=5):
	"""Generate true/false questions from text."""
	try:
	# Generate factual statements first
	questions = self.generate_questions(text, num_questions)
	true_false = []

	for q in questions:
	# Original statement is true
	true_statement = {
	'statement': q['context'],
	'is_true': True
	}

	# Create a false version by negating or changing key parts
	words = q['context'].split()
	if len(words) > 4:
	# Simple modification: replace a word or add a negation
	change_idx = random.randint(0, len(words)-1)
	words[change_idx] = random.choice(["not", "never", "rarely", "incorrectly"]) + " " + words[change_idx]
	false_statement = {
	'statement': " ".join(words),
	'is_true': False
	}

	true_false.extend([true_statement, false_statement])

	# Shuffle and return required number
	random.shuffle(true_false)
	return true_false[:num_questions]

	except Exception as e:
	logger.error(f"Error generating true/false questions: {e}")
	return []

	def create_streamlit_app():
	st.set_page_config(page_title="QuizWhiz", page_icon="📚", layout="wide")

	st.title("QuizWhiz - Comprehensive Quiz Generator")
	st.subheader("Generate quizzes from various sources: text, documents, and YouTube videos")

	quiz_gen = QuizGenerator()

	# Sidebar for options
	st.sidebar.header("Quiz Options")
	quiz_type = st.sidebar.selectbox(
	"Question Type",
	["Multiple Choice", "True/False", "Open-Ended"]
	)

	num_questions = st.sidebar.slider("Number of Questions", 3, 20, 5)

	# Source selection
	st.header("Select Your Content Source")
	source_type = st.radio(
	"Content Source",
	["Text Input", "Document Upload", "YouTube Video", "Topic/Subject"]
	)

	text_content = ""

	# Handle different source types
	if source_type == "Text Input":
	text_content = st.text_area("Enter your text content here:", height=250)

	elif source_type == "Document Upload":
	uploaded_file = st.file_uploader("Upload your document", type=['pdf', 'docx', 'txt'])

	if uploaded_file is not None:
	st.success(f"File '{uploaded_file.name}' uploaded successfully!")

	# Extract text based on file type
	if uploaded_file.name.endswith('.pdf'):
	text_content = quiz_gen.extract_text_from_pdf(uploaded_file)
	elif uploaded_file.name.endswith('.docx'):
	text_content = quiz_gen.extract_text_from_docx(uploaded_file)
	elif uploaded_file.name.endswith('.txt'):
	text_content = quiz_gen.extract_text_from_txt(uploaded_file)

	# Show text preview
	if text_content:
	with st.expander("Preview Extracted Text"):
	st.text(text_content[:500] + "..." if len(text_content) > 500 else text_content)
	else:
	st.error("Failed to extract text from the document.")

	elif source_type == "YouTube Video":
	youtube_url = st.text_input("Enter YouTube Video URL:")

	if youtube_url:
	# Extract video ID from URL
	video_id_match = re.search(r'(?:v=\|\/)([0-9A-Za-z_-]{11}).*', youtube_url)

	if video_id_match:
	video_id = video_id_match.group(1)

	# Show video embed
	st.video(youtube_url)

	# Extract transcript
	with st.spinner("Extracting video transcript..."):
	text_content = quiz_gen.get_youtube_transcript(video_id)

	if text_content:
	with st.expander("Preview Transcript"):
	st.text(text_content[:500] + "..." if len(text_content) > 500 else text_content)
	else:
	st.error("Failed to extract transcript. This video might not have captions.")
	else:
	st.error("Invalid YouTube URL. Please enter a valid URL.")

	elif source_type == "Topic/Subject":
	topic = st.text_input("Enter a topic or subject:")

	if topic:
	# For this demo, we'll use a predefined text about the topic
	# In a real app, you might use an API to fetch content about the topic
	st.info(f"Generating quiz about: {topic}")
	text_content = f"The topic of {topic} is a fascinating subject to explore. " \
	f"There are many important concepts and facts related to {topic} " \
	f"that make it an essential area of study. Understanding {topic} " \
	f"requires careful consideration of its key principles."

	# Placeholder for a real implementation that would gather information about the topic
	st.warning("In a complete implementation, this would gather information about the topic from reliable sources.")

	# Generate Quiz Button
	if text_content:
	if st.button("Generate Quiz"):
	with st.spinner("Generating quiz questions..."):
	if quiz_type == "Multiple Choice":
	# Generate questions first
	questions = quiz_gen.generate_questions(text_content, num_questions)
	# Convert to MCQs
	mcqs = quiz_gen.generate_mcq(questions)

	if mcqs:
	st.success(f"Generated {len(mcqs)} multiple choice questions!")

	# Display questions
	for i, q in enumerate(mcqs, 1):
	st.subheader(f"Question {i}: {q['question']}")

	# Display options
	option_letters = ['A', 'B', 'C', 'D']
	for j, option in enumerate(q['options']):
	st.write(f"{option_letters[j]}. {option}")

	# Reveal answer in expander
	with st.expander("Reveal Answer"):
	st.write(f"Correct Answer: {option_letters[q['correct_index']]}. {q['correct_answer']}")

	st.divider()
	else:
	st.error("Failed to generate questions. Try with different content.")

	elif quiz_type == "True/False":
	tf_questions = quiz_gen.generate_true_false(text_content, num_questions)

	if tf_questions:
	st.success(f"Generated {len(tf_questions)} true/false questions!")

	# Display questions
	for i, q in enumerate(tf_questions, 1):
	st.subheader(f"Question {i}: True or False?")
	st.write(q['statement'])

	# Reveal answer
	with st.expander("Reveal Answer"):
	st.write(f"Answer: {'True' if q['is_true'] else 'False'}")

	st.divider()
	else:
	st.error("Failed to generate true/false questions. Try with different content.")

	elif quiz_type == "Open-Ended":
	questions = quiz_gen.generate_questions(text_content, num_questions)

	if questions:
	st.success(f"Generated {len(questions)} open-ended questions!")

	# Display questions
	for i, q in enumerate(questions, 1):
	st.subheader(f"Question {i}: {q['question']}")

	# Reveal answer
	with st.expander("Reveal Answer"):
	st.write(f"Suggested Answer: {q['answer']}")
	st.write(f"Context: {q['context']}")

	st.divider()
	else:
	st.error("Failed to generate questions. Try with different content.")
	else:
	st.info("Please provide content or select a source to generate a quiz.")

	# Footer
	st.sidebar.divider()
	st.sidebar.caption("QuizWhiz - Powered by AI")
	st.sidebar.caption("© 2025 QuizWhiz Technologies")

	if __name__ == "__main__":
	create_streamlit_app()