Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import os | |
from src.FisrtModule.module1 import MisconceptionModel | |
from src.SecondModule.module2 import SimilarQuestionGenerator | |
from src.ThirdModule.module3 import AnswerVerifier | |
import logging | |
from typing import Optional, Tuple | |
from pylatexenc.latex2text import LatexNodes2Text | |
import re | |
logging.basicConfig(level=logging.DEBUG) | |
# Initialize Misconception Model | |
def load_misconception_model(): | |
return MisconceptionModel( | |
model_name="minsuas/Misconceptions__1", | |
misconception_mapping_path=os.path.join(data_path, 'misconception_mapping.parquet'), | |
misconception_embs_paths=[os.path.join(data_path, f'embs_misconception-9-9.npy')] | |
) | |
# Streamlit νμ΄μ§ κΈ°λ³Έ μ€μ | |
st.set_page_config( | |
page_title="MisconcepTutor", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
def load_answer_verifier(): | |
"""λ΅μ κ²μ¦ λͺ¨λΈ λ‘λ""" | |
from src.ThirdModule.module3 import AnswerVerifier | |
return AnswerVerifier() | |
# κ²½λ‘ μ€μ | |
base_path = os.path.dirname(os.path.abspath(__file__)) | |
data_path = os.path.join(base_path, 'Data') | |
misconception_csv_path = os.path.join(data_path, 'misconception_mapping.csv') | |
# λ‘κΉ μ€μ | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# μΈμ μν μ΄κΈ°ν - κ°μ₯ λ¨Όμ μ€νλλλ‘ μ΅μλ¨μ λ°°μΉ | |
if 'initialized' not in st.session_state: | |
st.session_state.initialized = True | |
st.session_state.wrong_questions = [] | |
st.session_state.misconceptions = [] | |
st.session_state.current_question_index = 0 | |
st.session_state.generated_questions = [] | |
st.session_state.current_step = 'initial' | |
st.session_state.selected_wrong_answer = None | |
st.session_state.questions = [] | |
logger.info("Session state initialized") | |
# λ¬Έμ μμ±κΈ° μ΄κΈ°ν | |
def load_question_generator(): | |
"""λ¬Έμ μμ± λͺ¨λΈ λ‘λ""" | |
if not os.path.exists(misconception_csv_path): | |
st.error(f"CSV νμΌμ΄ μ‘΄μ¬νμ§ μμ΅λλ€: {misconception_csv_path}") | |
raise FileNotFoundError(f"CSV νμΌμ΄ μ‘΄μ¬νμ§ μμ΅λλ€: {misconception_csv_path}") | |
return SimilarQuestionGenerator(misconception_csv_path=misconception_csv_path) | |
# CSV λ°μ΄ν° λ‘λ ν¨μ | |
#def load_data(data_file='/train.csv'): | |
def load_data(data_file='/processed_mathqa2.csv', selected_indexes=None): | |
try: | |
file_path = os.path.join(data_path, data_file.lstrip('/')) | |
df = pd.read_csv(file_path) | |
logger.info(f"Data loaded successfully from {file_path}") | |
if selected_indexes is not None: | |
df = df.loc[selected_indexes] # Filter rows by index | |
logger.info(f"Data filtered to selected indexes: {selected_indexes}") | |
return df | |
except FileNotFoundError: | |
st.error(f"νμΌμ μ°Ύμ μ μμ΅λλ€: {data_file}") | |
logger.error(f"File not found: {data_file}") | |
return None | |
def start_quiz(): | |
"""ν΄μ¦ μμ λ° μ΄κΈ°ν""" | |
#selected_indexes = [3404, 12977, 3896, 3852, 3589, 12038, 7602, 3473, 1878, 9752, 3946, 2519, 1302, 9] | |
selected_indexes = [2519, 3946, 3404, 3896, 7602, 3852, 12977, 1878, 3473, 9752, 1302, 9, 12038] | |
# 3887 λ΅μ νλ¦Ό | |
# 9699 μμ μ΄μ | |
# 3589 ?? | |
df = load_data(selected_indexes=selected_indexes) | |
if df is None or df.empty: | |
st.error("λ°μ΄ν°λ₯Ό λΆλ¬μ¬ μ μμ΅λλ€. λ°μ΄ν°μ μ νμΈν΄μ£ΌμΈμ.") | |
return | |
#st.session_state.questions = df.sample(n=10, random_state=42) | |
st.session_state.questions = df.iloc[:10] | |
st.session_state.current_step = 'quiz' | |
st.session_state.current_question_index = 0 | |
st.session_state.wrong_questions = [] | |
st.session_state.misconceptions = [] | |
st.session_state.generated_questions = [] | |
logger.info("Quiz started") | |
def generate_similar_question(wrong_q, misconception_id, generator): | |
"""μ μ¬ λ¬Έμ μμ±""" | |
logger.info(f"Generating similar question for misconception_id: {misconception_id}") | |
# μ λ ₯ λ°μ΄ν° μ ν¨μ± κ²μ¬ | |
if not isinstance(wrong_q, dict): | |
logger.error(f"Invalid wrong_q type: {type(wrong_q)}") | |
st.error("μ μ¬ λ¬Έμ μμ±μ νμν λ°μ΄ν° νμμ΄ μλͺ»λμμ΅λλ€.") | |
return None | |
try: | |
# misconception_idκ° μκ±°λ NaNμΈ κ²½μ° λ€λ₯Έ misconception μ¬μ© | |
if pd.isna(misconception_id): | |
logger.info("Original misconception_id is NaN, trying to find alternative") | |
# νμ¬κΉμ§ λμ¨ misconceptionλ€ μ€μμ μ ν | |
available_misconceptions = [m for m in st.session_state.misconceptions if not pd.isna(m)] | |
if available_misconceptions: | |
# κ°μ₯ μ΅κ·Όμ λμ¨ misconception μ ν | |
misconception_id = available_misconceptions[-1] | |
logger.info(f"Using alternative misconception_id: {misconception_id}") | |
else: | |
# κΈ°λ³Έ misconception ID μ¬μ© (μ: κ°μ₯ κΈ°λ³Έμ μΈ misconception) | |
misconception_id = 2001 # μ μ ν κΈ°λ³Έκ°μΌλ‘ μμ νμ | |
logger.info(f"Using default misconception_id: {misconception_id}") | |
# λ°μ΄ν° μ€λΉ (νν λ³ν λ°©μ§) | |
input_data = { | |
'construct_name': str(wrong_q.get('ConstructName', '')), | |
'subject_name': str(wrong_q.get('SubjectName', '')), | |
'question_text': str(wrong_q.get('QuestionText', '')), | |
'correct_answer_text': str(wrong_q.get(f'Answer{wrong_q["CorrectAnswer"]}Text', '')), | |
'wrong_answer_text': str(wrong_q.get(f'Answer{st.session_state.selected_wrong_answer}Text', '')), | |
'misconception_id': int(misconception_id) | |
} | |
logger.info(f"Prepared input data: {input_data}") | |
with st.spinner("π μ μ¬ λ¬Έμ λ₯Ό μμ±νκ³ μμ΅λλ€..."): | |
# μ μ¬ λ¬Έμ μμ± νΈμΆ | |
generated_q, _ = generator.generate_similar_question_with_text( | |
construct_name=input_data['construct_name'], | |
subject_name=input_data['subject_name'], | |
question_text=input_data['question_text'], | |
correct_answer_text=input_data['correct_answer_text'], | |
wrong_answer_text=input_data['wrong_answer_text'], | |
misconception_id=input_data['misconception_id'] | |
) | |
if generated_q: | |
verifier = load_answer_verifier() | |
with st.status("π€ AIκ° λ¬Έμ λ₯Ό κ²ν νκ³ μμ΅λλ€..."): | |
st.write("λ΅μμ μ νμ±μ κ²μ¦νκ³ μμ΅λλ€...") | |
verified_answer = verifier.verify_answer( | |
question=generated_q.question, | |
choices=generated_q.choices | |
) | |
if verified_answer: | |
logger.info(f"Answer verified: {verified_answer}") | |
st.write("β κ²μ¦ μλ£!") | |
result = { | |
'question': generated_q.question, | |
'choices': generated_q.choices, | |
'correct': verified_answer, | |
'explanation': generated_q.explanation | |
} | |
st.session_state['current_similar_question_answer'] = verified_answer | |
return result | |
else: | |
logger.warning("Answer verification failed, using original answer") | |
st.write("β οΈ κ²μ¦μ μ€ν¨νμ΅λλ€. μλ³Έ λ΅μμ μ¬μ©ν©λλ€.") | |
result = { | |
'question': generated_q.question, | |
'choices': generated_q.choices, | |
'correct': generated_q.correct_answer, | |
'explanation': generated_q.explanation | |
} | |
st.session_state['current_similar_question_answer'] = generated_q.correct_answer | |
return result | |
except Exception as e: | |
logger.error(f"Error in generate_similar_question: {str(e)}") | |
st.error(f"λ¬Έμ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}") | |
return None | |
return None | |
def handle_answer(answer, current_q): | |
"""λ΅λ³ μ²λ¦¬""" | |
if answer != current_q['CorrectAnswer']: | |
wrong_q_dict = current_q.to_dict() | |
st.session_state.wrong_questions.append(wrong_q_dict) | |
st.session_state.selected_wrong_answer = answer | |
misconception_key = f'Misconception{answer}Id' | |
misconception_id = current_q.get(misconception_key) | |
st.session_state.misconceptions.append(misconception_id) | |
st.session_state.current_question_index += 1 | |
if st.session_state.current_question_index >= len(st.session_state.questions): | |
st.session_state.current_step = 'review' | |
else: | |
st.session_state.current_step = 'quiz' | |
def display_math_content(content): | |
""" | |
Display mathematical content with proper formatting. | |
Args: | |
content (str): The math content to display | |
""" | |
# Convert LaTeX to plain text for display | |
from pylatexenc.latex2text import LatexNodes2Text | |
# Clean and format the content | |
formatted_content = LatexNodes2Text().latex_to_text(content) | |
st.markdown(f'<div class="math-container">{formatted_content}</div>', unsafe_allow_html=True) | |
def add_custom_css(): | |
st.markdown( | |
""" | |
<style> | |
.problem-header { | |
color: #FF6B6B; | |
font-size: 24px; | |
font-weight: bold; | |
margin-bottom: 20px; | |
} | |
.math-container { | |
background-color: #f0f8ff; | |
padding: 15px 20px; | |
border-radius: 5px; | |
margin: 5px 0; | |
} | |
button { | |
color: #0066ff; | |
font-weight: 500; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
def display_question(question, answers): | |
"""Display question and options with LaTeX formatting""" | |
st.markdown('<div class="problem-header">Problem:</div>', unsafe_allow_html=True) | |
display_math_content(question) | |
# Add custom CSS for options | |
st.markdown(""" | |
<style> | |
.option-container { | |
background-color: #f0f8ff; | |
padding: 10px 20px; | |
margin: 5px 0; | |
border-radius: 5px; | |
cursor: pointer; | |
display: flex; | |
align-items: center; | |
gap: 20px; | |
} | |
.option-text { | |
color: #0066ff; | |
font-weight: 500; | |
width: 30px; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Display options | |
for opt in ['A', 'B', 'C', 'D']: | |
with st.container(): | |
col1, col2 = st.columns([1, 11]) | |
with col1: | |
if st.button(f"{opt}.", key=f"btn_{opt}", help="Click to select"): | |
handle_answer(opt, st.session_state.questions.iloc[st.session_state.current_question_index]) | |
st.rerun() | |
with col2: | |
display_option_content(answers[opt]) | |
def display_option_content(option_text): | |
"""Process and display option content with LaTeX formatting""" | |
from pylatexenc.latex2text import LatexNodes2Text | |
formatted_content = LatexNodes2Text().latex_to_text(option_text) | |
st.markdown(f'<div class="math-container">{formatted_content}</div>', unsafe_allow_html=True) | |
def update_similar_question_display(new_question, i, answered=False): | |
"""Display similar question and its options""" | |
display_math_content(new_question['question']) | |
# Display options | |
for opt in ['A', 'B', 'C', 'D']: | |
with st.container(): | |
col1, col2 = st.columns([1, 11]) | |
with col1: | |
if st.button(f"{opt}.", key=f"sim_btn_{opt}_{i}", help="Click to select"): | |
if not answered: | |
# μ νν μ΅μ (opt)μ st.session_stateμ μ μ₯ | |
st.session_state[f"similar_question_answered_{i}"] = True | |
st.session_state[f"selected_answer_{i}"] = opt | |
correct_answer = st.session_state.get('current_similar_question_answer') | |
# μ λ΅ μ¬λΆλ₯Ό νμΈ | |
st.session_state[f"is_correct_{i}"] = (opt == correct_answer) | |
st.rerun() | |
with col2: | |
display_option_content(new_question['choices'][opt]) | |
def main(): | |
"""λ©μΈ μ ν리μΌμ΄μ λ‘μ§""" | |
st.title("MisconcepTutor") | |
# Misconception Model λ‘λ | |
misconception_model = load_misconception_model() | |
# Generator μ΄κΈ°ν | |
generator = load_question_generator() | |
add_custom_css() | |
# μ΄κΈ° νλ©΄ | |
if st.session_state.current_step == 'initial': | |
st.write("#### νμ΅μ μμνκ² μ΅λλ€. 10κ°μ λ¬Έμ λ₯Ό νμ΄λ³ΌκΉμ?") | |
if st.button("νμ΅ μμ", key="start_quiz"): | |
start_quiz() | |
st.rerun() | |
# ν΄μ¦ νλ©΄ | |
elif st.session_state.current_step == 'quiz': | |
current_q = st.session_state.questions.iloc[st.session_state.current_question_index] | |
# μ§ν μν© νμ | |
progress = st.session_state.current_question_index / 10 | |
st.progress(progress) | |
st.write(f"### λ¬Έμ {st.session_state.current_question_index + 1}/10") | |
# λ¬Έμ νμ | |
st.markdown("---") | |
question_row = current_q['QuestionText'] | |
question_text = LatexNodes2Text().latex_to_text(current_q['QuestionText']) | |
answers ={ | |
'A': current_q['AnswerAText'], | |
'B': current_q['AnswerBText'], | |
'C': current_q['AnswerCText'], | |
'D': current_q['AnswerDText'] | |
} | |
display_question(question_text, answers) | |
# λ³΅μ΅ νλ©΄ | |
elif st.session_state.current_step == 'review': | |
st.write("### νμ΅ κ²°κ³Ό") | |
# κ²°κ³Ό ν΅κ³ | |
col1, col2, col3 = st.columns(3) | |
col1.metric("μ΄ λ¬Έμ μ", 10) | |
col2.metric("λ§μ λ¬Έμ ", 10 - len(st.session_state.wrong_questions)) | |
col3.metric("νλ¦° λ¬Έμ ", len(st.session_state.wrong_questions)) | |
# κ²°κ³Όμ λ°λ₯Έ λ©μμ§ νμ | |
if len(st.session_state.wrong_questions) == 0: | |
st.balloons() # μΆν ν¨κ³Ό | |
st.success("π μΆνν©λλ€! λͺ¨λ λ¬Έμ λ₯Ό λ§μΆμ ¨μ΄μ!") | |
st.markdown(""" | |
### π μνμμ΄μλλ€! | |
μλ²½ν μ μλ₯Ό λ°μΌμ ¨λ€μ! μνμ κ°λ μ μ ννκ² μ΄ν΄νκ³ κ³μ κ² κ°μ΅λλ€. | |
""") | |
elif len(st.session_state.wrong_questions) <= 3: | |
st.success("μ νμ ¨μ΄μ! μ‘°κΈλ§ λ μ°μ΅νλ©΄ μλ²½ν κ±°μμ!") | |
else: | |
st.info("μ²μ²ν κ°λ μ 볡μ΅ν΄λ³΄μμ. μ°μ΅νλ€ λ³΄λ©΄ λμ΄λ κ±°μμ!") | |
# λ€λΉκ²μ΄μ λ²νΌ | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.button("π μλ‘μ΄ λ¬Έμ μΈνΈ μμνκΈ°", use_container_width=True): | |
start_quiz() | |
st.rerun() | |
with col2: | |
if st.button("π μ²μμΌλ‘ λμκ°κΈ°", use_container_width=True): | |
st.session_state.clear() | |
st.rerun() | |
# νλ¦° λ¬Έμ λΆμ λΆλΆ | |
if st.session_state.wrong_questions: | |
st.write("### βοΈ νλ¦° λ¬Έμ λΆμ") | |
tabs = st.tabs([f"π νλ¦° λ¬Έμ #{i + 1}" for i in range(len(st.session_state.wrong_questions))]) | |
for i, (tab, (wrong_q, misconception_id)) in enumerate(zip( | |
tabs, | |
zip(st.session_state.wrong_questions, st.session_state.misconceptions) | |
)): | |
with tab: | |
st.write("**π λ¬Έμ :**") | |
display_math_content(wrong_q['QuestionText']) # λ¬Έμ λ λλ§ | |
st.write("**β μ λ΅:**") | |
display_option_content(wrong_q[f'Answer{wrong_q["CorrectAnswer"]}Text']) | |
st.write("---") | |
st.write("**π κ΄λ ¨λ Misconception:**") | |
if misconception_id and not pd.isna(misconception_id): | |
misconception_text = generator.get_misconception_text(misconception_id) | |
st.info(f"Misconception ID: {int(misconception_id)}\n\n{misconception_text}") | |
else: | |
st.info("Misconception μ λ³΄κ° μμ΅λλ€.") | |
if st.button(f"π μ μ¬ λ¬Έμ νκΈ°", key=f"retry_{i}"): | |
st.session_state[f"show_similar_question_{i}"] = True | |
st.session_state[f"similar_question_answered_{i}"] = False | |
st.rerun() | |
if st.session_state.get(f"show_similar_question_{i}", False): | |
st.divider() | |
new_question = generate_similar_question(wrong_q, misconception_id, generator) | |
if new_question: | |
st.write("### π― μ μ¬ λ¬Έμ ") | |
#display_math_content(new_question['question']) # ν¨μ κ΅μ²΄ | |
# λ΅λ³ μν νμΈ | |
answered = st.session_state.get(f"similar_question_answered_{i}", False) | |
#update_similar_question_display(new_question, i, answered) | |
# μ νν μ΅μ μ μ²λ¦¬νλ ν¨μλ₯Ό νΈμΆ | |
update_similar_question_display(new_question, i) | |
# λ΅λ³ν κ²½μ° κ²°κ³Ό νμ | |
if answered: | |
is_correct = st.session_state.get(f"is_correct_{i}", False) | |
correct_answer = st.session_state.get('current_similar_question_answer') | |
if is_correct: | |
st.success("β μ λ΅μ λλ€!") | |
else: | |
st.error(f"β νλ Έμ΅λλ€. μ λ΅μ {correct_answer}μ λλ€.") | |
# ν΄μ€ νμ | |
st.write("---") | |
st.write("**π ν΄μ€:**", new_question['explanation']) | |
# λ€μ νκΈ° λ²νΌ | |
if st.button("π λ€μ νκΈ°", key=f"reset_{i}"): | |
st.session_state[f"similar_question_answered_{i}"] = False | |
st.session_state[f"selected_answer_{i}"] = None | |
st.session_state[f"is_correct_{i}"] = None | |
st.rerun() | |
# λ¬Έμ λ«κΈ° λ²νΌ | |
if st.button("β λ¬Έμ λ«κΈ°", key=f"close_{i}"): | |
st.session_state[f"show_similar_question_{i}"] = False | |
st.session_state[f"similar_question_answered_{i}"] = False | |
st.session_state[f"selected_answer_{i}"] = None | |
st.session_state[f"is_correct_{i}"] = None | |
st.rerun() | |
# νλ©΄ μλ μ¬λ°± μΆκ° | |
st.markdown("<br>" * 5, unsafe_allow_html=True) # 5μ€μ λΉ μ€ μΆκ° | |
st.markdown(""" | |
<div style="height: 100px;"> | |
</div> | |
""", unsafe_allow_html=True) # μΆκ° μ¬λ°± | |
else: | |
st.error("μ μ¬ λ¬Έμ λ₯Ό μμ±ν μ μμ΅λλ€.") | |
if st.button("β λ«κΈ°", key=f"close_error_{i}"): | |
st.session_state[f"show_similar_question_{i}"] = False | |
st.rerun() | |
# νλ©΄ μλ μ¬λ°± μΆκ° | |
st.markdown("<br>" * 5, unsafe_allow_html=True) # 5μ€μ λΉ μ€ μΆκ° | |
st.markdown(""" | |
<div style="height: 100px;"> | |
</div> | |
""", unsafe_allow_html=True) # μΆκ° μ¬λ°± | |
# # νλ¦° λ¬Έμ λΆμ | |
# if st.session_state.wrong_questions: | |
# st.write("### βοΈ νλ¦° λ¬Έμ λΆμ") | |
# tabs = st.tabs([f"π νλ¦° λ¬Έμ #{i + 1}" for i in range(len(st.session_state.wrong_questions))]) | |
# for i, (tab, (wrong_q, misconception_id)) in enumerate(zip( | |
# tabs, | |
# zip(st.session_state.wrong_questions, st.session_state.misconceptions) | |
# )): | |
# with tab: | |
# st.write("**π λ¬Έμ :**") | |
# st.write(wrong_q['QuestionText']) | |
# st.write("**β μ λ΅:**", wrong_q['CorrectAnswer']) | |
# st.write("---") | |
# st.write("**π κ΄λ ¨λ Misconception:**") | |
# if misconception_id and not pd.isna(misconception_id): | |
# misconception_text = misconception_model.misconception_names.get(misconception_id, "μ 보 μμ") | |
# st.info(f"Misconception ID: {int(misconception_id)}\n\n{misconception_text}") | |
# else: | |
# st.info("Misconception μ λ³΄κ° μμ΅λλ€.") | |
if __name__ == "__main__": | |
main() | |
# random_state 42μμ μ λ΅ | |
# D C A A C | |
# A B B B B |