Jintonic92's picture
Update app.py
7a87ca3 verified
raw
history blame
22.2 kB
import streamlit as st
import pandas as pd
import os
from src.FisrtModule.module1 import MisconceptionModel
from src.SecondModule.module2 import SimilarQuestionGenerator
from src.ThirdModule.module3 import AnswerVerifier
import logging
from typing import Optional, Tuple
from pylatexenc.latex2text import LatexNodes2Text
import re
logging.basicConfig(level=logging.DEBUG)
# Initialize Misconception Model
@st.cache_resource
def load_misconception_model():
return MisconceptionModel(
model_name="minsuas/Misconceptions__1",
misconception_mapping_path=os.path.join(data_path, 'misconception_mapping.parquet'),
misconception_embs_paths=[os.path.join(data_path, f'embs_misconception-9-9.npy')]
)
# Streamlit νŽ˜μ΄μ§€ κΈ°λ³Έ μ„€μ •
st.set_page_config(
page_title="MisconcepTutor",
layout="wide",
initial_sidebar_state="expanded"
)
@st.cache_resource
def load_answer_verifier():
"""λ‹΅μ•ˆ 검증 λͺ¨λΈ λ‘œλ“œ"""
from src.ThirdModule.module3 import AnswerVerifier
return AnswerVerifier()
# 경둜 μ„€μ •
base_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(base_path, 'Data')
misconception_csv_path = os.path.join(data_path, 'misconception_mapping.csv')
# λ‘œκΉ… μ„€μ •
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# μ„Έμ…˜ μƒνƒœ μ΄ˆκΈ°ν™” - κ°€μž₯ λ¨Όμ € μ‹€ν–‰λ˜λ„λ‘ μ΅œμƒλ‹¨μ— 배치
if 'initialized' not in st.session_state:
st.session_state.initialized = True
st.session_state.wrong_questions = []
st.session_state.misconceptions = []
st.session_state.current_question_index = 0
st.session_state.generated_questions = []
st.session_state.current_step = 'initial'
st.session_state.selected_wrong_answer = None
st.session_state.questions = []
logger.info("Session state initialized")
# 문제 생성기 μ΄ˆκΈ°ν™”
@st.cache_resource
def load_question_generator():
"""문제 생성 λͺ¨λΈ λ‘œλ“œ"""
if not os.path.exists(misconception_csv_path):
st.error(f"CSV 파일이 μ‘΄μž¬ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€: {misconception_csv_path}")
raise FileNotFoundError(f"CSV 파일이 μ‘΄μž¬ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€: {misconception_csv_path}")
return SimilarQuestionGenerator(misconception_csv_path=misconception_csv_path)
# CSV 데이터 λ‘œλ“œ ν•¨μˆ˜
@st.cache_data
#def load_data(data_file='/train.csv'):
def load_data(data_file='/processed_mathqa2.csv', selected_indexes=None):
try:
file_path = os.path.join(data_path, data_file.lstrip('/'))
df = pd.read_csv(file_path)
logger.info(f"Data loaded successfully from {file_path}")
if selected_indexes is not None:
df = df.loc[selected_indexes] # Filter rows by index
logger.info(f"Data filtered to selected indexes: {selected_indexes}")
return df
except FileNotFoundError:
st.error(f"νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: {data_file}")
logger.error(f"File not found: {data_file}")
return None
def start_quiz():
"""ν€΄μ¦ˆ μ‹œμž‘ 및 μ΄ˆκΈ°ν™”"""
#selected_indexes = [3404, 12977, 3896, 3852, 3589, 12038, 7602, 3473, 1878, 9752, 3946, 2519, 1302, 9]
selected_indexes = [2519, 3946, 3404, 3896, 7602, 3852, 12977, 1878, 3473, 9752, 1302, 9, 12038]
# 3887 λ‹΅μ•ˆ ν‹€λ¦Ό
# 9699 μˆ˜μ‹ 이상
# 3589 ??
df = load_data(selected_indexes=selected_indexes)
if df is None or df.empty:
st.error("데이터λ₯Ό 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€. 데이터셋을 ν™•μΈν•΄μ£Όμ„Έμš”.")
return
#st.session_state.questions = df.sample(n=10, random_state=42)
st.session_state.questions = df.iloc[:10]
st.session_state.current_step = 'quiz'
st.session_state.current_question_index = 0
st.session_state.wrong_questions = []
st.session_state.misconceptions = []
st.session_state.generated_questions = []
logger.info("Quiz started")
def generate_similar_question(wrong_q, misconception_id, generator):
"""μœ μ‚¬ 문제 생성"""
logger.info(f"Generating similar question for misconception_id: {misconception_id}")
# μž…λ ₯ 데이터 μœ νš¨μ„± 검사
if not isinstance(wrong_q, dict):
logger.error(f"Invalid wrong_q type: {type(wrong_q)}")
st.error("μœ μ‚¬ 문제 생성에 ν•„μš”ν•œ 데이터 ν˜•μ‹μ΄ 잘λͺ»λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
return None
try:
# misconception_idκ°€ μ—†κ±°λ‚˜ NaN인 경우 λ‹€λ₯Έ misconception μ‚¬μš©
if pd.isna(misconception_id):
logger.info("Original misconception_id is NaN, trying to find alternative")
# ν˜„μž¬κΉŒμ§€ λ‚˜μ˜¨ misconceptionλ“€ μ€‘μ—μ„œ 선택
available_misconceptions = [m for m in st.session_state.misconceptions if not pd.isna(m)]
if available_misconceptions:
# κ°€μž₯ μ΅œκ·Όμ— λ‚˜μ˜¨ misconception 선택
misconception_id = available_misconceptions[-1]
logger.info(f"Using alternative misconception_id: {misconception_id}")
else:
# κΈ°λ³Έ misconception ID μ‚¬μš© (예: κ°€μž₯ 기본적인 misconception)
misconception_id = 2001 # μ μ ˆν•œ κΈ°λ³Έκ°’μœΌλ‘œ μˆ˜μ • ν•„μš”
logger.info(f"Using default misconception_id: {misconception_id}")
# 데이터 μ€€λΉ„ (νŠœν”Œ λ³€ν™˜ λ°©μ§€)
input_data = {
'construct_name': str(wrong_q.get('ConstructName', '')),
'subject_name': str(wrong_q.get('SubjectName', '')),
'question_text': str(wrong_q.get('QuestionText', '')),
'correct_answer_text': str(wrong_q.get(f'Answer{wrong_q["CorrectAnswer"]}Text', '')),
'wrong_answer_text': str(wrong_q.get(f'Answer{st.session_state.selected_wrong_answer}Text', '')),
'misconception_id': int(misconception_id)
}
logger.info(f"Prepared input data: {input_data}")
with st.spinner("πŸ“ μœ μ‚¬ 문제λ₯Ό μƒμ„±ν•˜κ³  μžˆμŠ΅λ‹ˆλ‹€..."):
# μœ μ‚¬ 문제 생성 호좜
generated_q, _ = generator.generate_similar_question_with_text(
construct_name=input_data['construct_name'],
subject_name=input_data['subject_name'],
question_text=input_data['question_text'],
correct_answer_text=input_data['correct_answer_text'],
wrong_answer_text=input_data['wrong_answer_text'],
misconception_id=input_data['misconception_id']
)
if generated_q:
verifier = load_answer_verifier()
with st.status("πŸ€” AIκ°€ 문제λ₯Ό κ²€ν† ν•˜κ³  μžˆμŠ΅λ‹ˆλ‹€..."):
st.write("λ‹΅μ•ˆμ˜ 정확성을 κ²€μ¦ν•˜κ³  μžˆμŠ΅λ‹ˆλ‹€...")
verified_answer = verifier.verify_answer(
question=generated_q.question,
choices=generated_q.choices
)
if verified_answer:
logger.info(f"Answer verified: {verified_answer}")
st.write("βœ… 검증 μ™„λ£Œ!")
result = {
'question': generated_q.question,
'choices': generated_q.choices,
'correct': verified_answer,
'explanation': generated_q.explanation
}
st.session_state['current_similar_question_answer'] = verified_answer
return result
else:
logger.warning("Answer verification failed, using original answer")
st.write("⚠️ 검증에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€. 원본 λ‹΅μ•ˆμ„ μ‚¬μš©ν•©λ‹ˆλ‹€.")
result = {
'question': generated_q.question,
'choices': generated_q.choices,
'correct': generated_q.correct_answer,
'explanation': generated_q.explanation
}
st.session_state['current_similar_question_answer'] = generated_q.correct_answer
return result
except Exception as e:
logger.error(f"Error in generate_similar_question: {str(e)}")
st.error(f"문제 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}")
return None
return None
def handle_answer(answer, current_q):
"""λ‹΅λ³€ 처리"""
if answer != current_q['CorrectAnswer']:
wrong_q_dict = current_q.to_dict()
st.session_state.wrong_questions.append(wrong_q_dict)
st.session_state.selected_wrong_answer = answer
misconception_key = f'Misconception{answer}Id'
misconception_id = current_q.get(misconception_key)
st.session_state.misconceptions.append(misconception_id)
st.session_state.current_question_index += 1
if st.session_state.current_question_index >= len(st.session_state.questions):
st.session_state.current_step = 'review'
else:
st.session_state.current_step = 'quiz'
def display_math_content(content):
"""
Display mathematical content with proper formatting.
Args:
content (str): The math content to display
"""
# Convert LaTeX to plain text for display
from pylatexenc.latex2text import LatexNodes2Text
# Clean and format the content
formatted_content = LatexNodes2Text().latex_to_text(content)
st.markdown(f'<div class="math-container">{formatted_content}</div>', unsafe_allow_html=True)
def add_custom_css():
st.markdown(
"""
<style>
.problem-header {
color: #FF6B6B;
font-size: 24px;
font-weight: bold;
margin-bottom: 20px;
}
.math-container {
background-color: #f0f8ff;
padding: 15px 20px;
border-radius: 5px;
margin: 5px 0;
}
button {
color: #0066ff;
font-weight: 500;
}
</style>
""",
unsafe_allow_html=True
)
def display_question(question, answers):
"""Display question and options with LaTeX formatting"""
st.markdown('<div class="problem-header">Problem:</div>', unsafe_allow_html=True)
display_math_content(question)
# Add custom CSS for options
st.markdown("""
<style>
.option-container {
background-color: #f0f8ff;
padding: 10px 20px;
margin: 5px 0;
border-radius: 5px;
cursor: pointer;
display: flex;
align-items: center;
gap: 20px;
}
.option-text {
color: #0066ff;
font-weight: 500;
width: 30px;
}
</style>
""", unsafe_allow_html=True)
# Display options
for opt in ['A', 'B', 'C', 'D']:
with st.container():
col1, col2 = st.columns([1, 11])
with col1:
if st.button(f"{opt}.", key=f"btn_{opt}", help="Click to select"):
handle_answer(opt, st.session_state.questions.iloc[st.session_state.current_question_index])
st.rerun()
with col2:
display_option_content(answers[opt])
def display_option_content(option_text):
"""Process and display option content with LaTeX formatting"""
from pylatexenc.latex2text import LatexNodes2Text
formatted_content = LatexNodes2Text().latex_to_text(option_text)
st.markdown(f'<div class="math-container">{formatted_content}</div>', unsafe_allow_html=True)
def update_similar_question_display(new_question, i, answered=False):
"""Display similar question and its options"""
display_math_content(new_question['question'])
# Display options
for opt in ['A', 'B', 'C', 'D']:
with st.container():
col1, col2 = st.columns([1, 11])
with col1:
if st.button(f"{opt}.", key=f"sim_btn_{opt}_{i}", help="Click to select"):
if not answered:
# μ„ νƒν•œ μ˜΅μ…˜(opt)을 st.session_state에 μ €μž₯
st.session_state[f"similar_question_answered_{i}"] = True
st.session_state[f"selected_answer_{i}"] = opt
correct_answer = st.session_state.get('current_similar_question_answer')
# μ •λ‹΅ μ—¬λΆ€λ₯Ό 확인
st.session_state[f"is_correct_{i}"] = (opt == correct_answer)
st.rerun()
with col2:
display_option_content(new_question['choices'][opt])
def main():
"""메인 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ 둜직"""
st.title("MisconcepTutor")
# Misconception Model λ‘œλ“œ
misconception_model = load_misconception_model()
# Generator μ΄ˆκΈ°ν™”
generator = load_question_generator()
add_custom_css()
# 초기 ν™”λ©΄
if st.session_state.current_step == 'initial':
st.write("#### ν•™μŠ΅μ„ μ‹œμž‘ν•˜κ² μŠ΅λ‹ˆλ‹€. 10개의 문제λ₯Ό ν’€μ–΄λ³ΌκΉŒμš”?")
if st.button("ν•™μŠ΅ μ‹œμž‘", key="start_quiz"):
start_quiz()
st.rerun()
# ν€΄μ¦ˆ ν™”λ©΄
elif st.session_state.current_step == 'quiz':
current_q = st.session_state.questions.iloc[st.session_state.current_question_index]
# μ§„ν–‰ 상황 ν‘œμ‹œ
progress = st.session_state.current_question_index / 10
st.progress(progress)
st.write(f"### 문제 {st.session_state.current_question_index + 1}/10")
# 문제 ν‘œμ‹œ
st.markdown("---")
question_row = current_q['QuestionText']
question_text = LatexNodes2Text().latex_to_text(current_q['QuestionText'])
answers ={
'A': current_q['AnswerAText'],
'B': current_q['AnswerBText'],
'C': current_q['AnswerCText'],
'D': current_q['AnswerDText']
}
display_question(question_text, answers)
# 볡슡 ν™”λ©΄
elif st.session_state.current_step == 'review':
st.write("### ν•™μŠ΅ κ²°κ³Ό")
# κ²°κ³Ό 톡계
col1, col2, col3 = st.columns(3)
col1.metric("총 문제 수", 10)
col2.metric("λ§žμ€ 문제", 10 - len(st.session_state.wrong_questions))
col3.metric("ν‹€λ¦° 문제", len(st.session_state.wrong_questions))
# 결과에 λ”°λ₯Έ λ©”μ‹œμ§€ ν‘œμ‹œ
if len(st.session_state.wrong_questions) == 0:
st.balloons() # μΆ•ν•˜ 효과
st.success("πŸŽ‰ μΆ•ν•˜ν•©λ‹ˆλ‹€! λͺ¨λ“  문제λ₯Ό λ§žμΆ”μ…¨μ–΄μš”!")
st.markdown("""
### πŸ† μˆ˜ν•™μ™•μ΄μ‹­λ‹ˆλ‹€!
μ™„λ²½ν•œ 점수λ₯Ό λ°›μœΌμ…¨λ„€μš”! μˆ˜ν•™μ  κ°œλ…μ„ μ •ν™•ν•˜κ²Œ μ΄ν•΄ν•˜κ³  계신 것 κ°™μŠ΅λ‹ˆλ‹€.
""")
elif len(st.session_state.wrong_questions) <= 3:
st.success("잘 ν•˜μ…¨μ–΄μš”! 쑰금만 더 μ—°μŠ΅ν•˜λ©΄ μ™„λ²½ν•  κ±°μ˜ˆμš”!")
else:
st.info("천천히 κ°œλ…μ„ λ³΅μŠ΅ν•΄λ³΄μ•„μš”. μ—°μŠ΅ν•˜λ‹€ 보면 λŠ˜μ–΄λ‚  κ±°μ˜ˆμš”!")
# λ„€λΉ„κ²Œμ΄μ…˜ λ²„νŠΌ
col1, col2 = st.columns(2)
with col1:
if st.button("πŸ”„ μƒˆλ‘œμš΄ 문제 μ„ΈνŠΈ μ‹œμž‘ν•˜κΈ°", use_container_width=True):
start_quiz()
st.rerun()
with col2:
if st.button("🏠 처음으둜 λŒμ•„κ°€κΈ°", use_container_width=True):
st.session_state.clear()
st.rerun()
# ν‹€λ¦° 문제 뢄석 λΆ€λΆ„
if st.session_state.wrong_questions:
st.write("### ✍️ ν‹€λ¦° 문제 뢄석")
tabs = st.tabs([f"πŸ“ ν‹€λ¦° 문제 #{i + 1}" for i in range(len(st.session_state.wrong_questions))])
for i, (tab, (wrong_q, misconception_id)) in enumerate(zip(
tabs,
zip(st.session_state.wrong_questions, st.session_state.misconceptions)
)):
with tab:
st.write("**πŸ“‹ 문제:**")
display_math_content(wrong_q['QuestionText']) # 문제 λ Œλ”λ§
st.write("**βœ… μ •λ‹΅:**")
display_option_content(wrong_q[f'Answer{wrong_q["CorrectAnswer"]}Text'])
st.write("---")
st.write("**πŸ” κ΄€λ ¨λœ Misconception:**")
if misconception_id and not pd.isna(misconception_id):
misconception_text = generator.get_misconception_text(misconception_id)
st.info(f"Misconception ID: {int(misconception_id)}\n\n{misconception_text}")
else:
st.info("Misconception 정보가 μ—†μŠ΅λ‹ˆλ‹€.")
if st.button(f"πŸ“š μœ μ‚¬ 문제 ν’€κΈ°", key=f"retry_{i}"):
st.session_state[f"show_similar_question_{i}"] = True
st.session_state[f"similar_question_answered_{i}"] = False
st.rerun()
if st.session_state.get(f"show_similar_question_{i}", False):
st.divider()
new_question = generate_similar_question(wrong_q, misconception_id, generator)
if new_question:
st.write("### 🎯 μœ μ‚¬ 문제")
#display_math_content(new_question['question']) # ν•¨μˆ˜ ꡐ체
# λ‹΅λ³€ μƒνƒœ 확인
answered = st.session_state.get(f"similar_question_answered_{i}", False)
#update_similar_question_display(new_question, i, answered)
# μ„ νƒν•œ μ˜΅μ…˜μ„ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜λ₯Ό 호좜
update_similar_question_display(new_question, i)
# λ‹΅λ³€ν•œ 경우 κ²°κ³Ό ν‘œμ‹œ
if answered:
is_correct = st.session_state.get(f"is_correct_{i}", False)
correct_answer = st.session_state.get('current_similar_question_answer')
if is_correct:
st.success("βœ… μ •λ‹΅μž…λ‹ˆλ‹€!")
else:
st.error(f"❌ ν‹€λ ΈμŠ΅λ‹ˆλ‹€. 정닡은 {correct_answer}μž…λ‹ˆλ‹€.")
# ν•΄μ„€ ν‘œμ‹œ
st.write("---")
st.write("**πŸ“ ν•΄μ„€:**", new_question['explanation'])
# λ‹€μ‹œ ν’€κΈ° λ²„νŠΌ
if st.button("πŸ”„ λ‹€μ‹œ ν’€κΈ°", key=f"reset_{i}"):
st.session_state[f"similar_question_answered_{i}"] = False
st.session_state[f"selected_answer_{i}"] = None
st.session_state[f"is_correct_{i}"] = None
st.rerun()
# 문제 λ‹«κΈ° λ²„νŠΌ
if st.button("❌ 문제 λ‹«κΈ°", key=f"close_{i}"):
st.session_state[f"show_similar_question_{i}"] = False
st.session_state[f"similar_question_answered_{i}"] = False
st.session_state[f"selected_answer_{i}"] = None
st.session_state[f"is_correct_{i}"] = None
st.rerun()
# ν™”λ©΄ μ•„λž˜ μ—¬λ°± μΆ”κ°€
st.markdown("<br>" * 5, unsafe_allow_html=True) # 5μ€„μ˜ 빈 쀄 μΆ”κ°€
st.markdown("""
<div style="height: 100px;">
</div>
""", unsafe_allow_html=True) # μΆ”κ°€ μ—¬λ°±
else:
st.error("μœ μ‚¬ 문제λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.")
if st.button("❌ λ‹«κΈ°", key=f"close_error_{i}"):
st.session_state[f"show_similar_question_{i}"] = False
st.rerun()
# ν™”λ©΄ μ•„λž˜ μ—¬λ°± μΆ”κ°€
st.markdown("<br>" * 5, unsafe_allow_html=True) # 5μ€„μ˜ 빈 쀄 μΆ”κ°€
st.markdown("""
<div style="height: 100px;">
</div>
""", unsafe_allow_html=True) # μΆ”κ°€ μ—¬λ°±
# # ν‹€λ¦° 문제 뢄석
# if st.session_state.wrong_questions:
# st.write("### ✍️ ν‹€λ¦° 문제 뢄석")
# tabs = st.tabs([f"πŸ“ ν‹€λ¦° 문제 #{i + 1}" for i in range(len(st.session_state.wrong_questions))])
# for i, (tab, (wrong_q, misconception_id)) in enumerate(zip(
# tabs,
# zip(st.session_state.wrong_questions, st.session_state.misconceptions)
# )):
# with tab:
# st.write("**πŸ“‹ 문제:**")
# st.write(wrong_q['QuestionText'])
# st.write("**βœ… μ •λ‹΅:**", wrong_q['CorrectAnswer'])
# st.write("---")
# st.write("**πŸ” κ΄€λ ¨λœ Misconception:**")
# if misconception_id and not pd.isna(misconception_id):
# misconception_text = misconception_model.misconception_names.get(misconception_id, "정보 μ—†μŒ")
# st.info(f"Misconception ID: {int(misconception_id)}\n\n{misconception_text}")
# else:
# st.info("Misconception 정보가 μ—†μŠ΅λ‹ˆλ‹€.")
if __name__ == "__main__":
main()
# random_state 42μ—μ„œ μ •λ‹΅
# D C A A C
# A B B B B