MisConceptTutor_MS

Sleeping

App Files Files Community

Jintonic92 commited on Jan 12

Commit

2d02136

verified ·

1 Parent(s): 4d43c6c

Update src/SecondModule/module2.py

Browse files

Files changed (1) hide show

src/SecondModule/module2.py +323 -49

src/SecondModule/module2.py CHANGED Viewed

@@ -5,6 +5,8 @@ from dataclasses import dataclass
 import logging
 from dotenv import load_dotenv
 import os
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -32,6 +34,169 @@ class GeneratedQuestion:
     correct_answer: str
     explanation: str
 class SimilarQuestionGenerator:
     def __init__(self, misconception_csv_path: str = 'misconception_mapping.csv'):
         """
@@ -129,68 +294,177 @@ class SimilarQuestionGenerator:
         except Exception as e:
             logger.error(f"Unexpected error in call_model_api: {e}")
             raise
     def parse_model_output(self, output: str) -> GeneratedQuestion:
         if not isinstance(output, str):
             logger.error(f"Invalid output format: {type(output)}. Expected string.")
-            raise ValueError("Model output is not a string.")
-        logger.info(f"Parsing output: {output}")
-        output_lines = output.strip().splitlines()
-        logger.debug(f"Split output into lines: {output_lines}")
-        question, choices, correct_answer, explanation = "", {}, "", ""
-        for line in output_lines:
             if line.lower().startswith("question:"):
-                question = line.split(":", 1)[1].strip()
-            elif line.startswith("A)"):
-                choices["A"] = line[2:].strip()
-            elif line.startswith("B)"):
-                choices["B"] = line[2:].strip()
-            elif line.startswith("C)"):
-                choices["C"] = line[2:].strip()
-            elif line.startswith("D)"):
-                choices["D"] = line[2:].strip()
             elif line.lower().startswith("correct answer:"):
-                correct_answer = line.split(":", 1)[1].strip()
             elif line.lower().startswith("explanation:"):
-                explanation = line.split(":", 1)[1].strip()
-        if not question or len(choices) < 4 or not correct_answer or not explanation:
-            logger.warning("Incomplete generated question.")
         return GeneratedQuestion(question, choices, correct_answer, explanation)
-    def generate_similar_question_with_text(self, construct_name: str, subject_name: str, question_text: str, correct_answer_text: str, wrong_answer_text: str, misconception_id: float) -> Tuple[Optional[GeneratedQuestion], Optional[str]]:
-        logger.info("generate_similar_question_with_text initiated")
-        # 예외 처리 추가
         try:
             misconception_text = self.get_misconception_text(misconception_id)
             logger.info(f"Misconception text retrieved: {misconception_text}")
         except Exception as e:
             logger.error(f"Error retrieving misconception text: {e}")
             return None, None
-        if not misconception_text:
-            logger.info("Skipping question generation due to lack of misconception.")
-            return None, None
-        prompt = self.generate_prompt(construct_name, subject_name, question_text, correct_answer_text, wrong_answer_text, misconception_text)
-        logger.info(f"Generated prompt: {prompt}")
-        generated_text = None  # 기본값으로 초기화
-        try:
-            logger.info("Calling call_model_api...")
-            generated_text = self.call_model_api(prompt)
-            logger.info(f"Generated text from API: {generated_text}")
-            # 파싱
-            generated_question = self.parse_model_output(generated_text)
-            logger.info(f"Generated question object: {generated_question}")
-            return generated_question, generated_text
-        except Exception as e:
-            logger.error(f"Failed to generate question: {e}")
-            logger.debug(f"API output for debugging: {generated_text}")
-            return None, generated_text

 import logging
 from dotenv import load_dotenv
 import os
+import time
+import re
 # Set up logging
 logging.basicConfig(level=logging.INFO)
     correct_answer: str
     explanation: str
+# class SimilarQuestionGenerator:
+#     def __init__(self, misconception_csv_path: str = 'misconception_mapping.csv'):
+#         """
+#         Initialize the generator by loading the misconception mapping and the language model.
+#         """
+#         self._load_data(misconception_csv_path)
+#     def _load_data(self, misconception_csv_path: str):
+#         logger.info("Loading misconception mapping...")
+#         self.misconception_df = pd.read_csv(misconception_csv_path)
+#     def get_misconception_text(self, misconception_id: float) -> Optional[str]:
+#         # MisconceptionId를 받아 해당 ID에 매칭되는 오개념 설명 텍스트를 반환합니다
+#         """Retrieve the misconception text based on the misconception ID."""
+#         if pd.isna(misconception_id):  # NaN 체크
+#             logger.warning("Received NaN for misconception_id.")
+#             return "No misconception provided."
+#         try:
+#             row = self.misconception_df[self.misconception_df['MisconceptionId'] == int(misconception_id)]
+#             if not row.empty:
+#                 return row.iloc[0]['MisconceptionName']
+#         except ValueError as e:
+#             logger.error(f"Error processing misconception_id: {e}")
+#         logger.warning(f"No misconception found for ID: {misconception_id}")
+#         return "Misconception not found."
+#     def generate_prompt(self, construct_name: str, subject_name: str, question_text: str, correct_answer_text: str, wrong_answer_text: str, misconception_text: str) -> str:
+#         """Create a prompt for the language model."""
+#         #문제 생성을 위한 프롬프트 텍스트를 생성
+#         logger.info("Generating prompt...")
+#         misconception_clause = (f"that targets the following misconception: \"{misconception_text}\"." if misconception_text != "There is no misconception" else "")
+#         prompt = f"""
+#             <|begin_of_text|>
+#             <|start_header_id|>system<|end_header_id|>
+#             You are an educational assistant designed to generate multiple-choice questions {misconception_clause}
+#             <|eot_id|>
+#             <|start_header_id|>user<|end_header_id|>
+#             You need to create a similar multiple-choice question based on the following details:
+#             Construct Name: {construct_name}
+#             Subject Name: {subject_name}
+#             Question Text: {question_text}
+#             Correct Answer: {correct_answer_text}
+#             Wrong Answer: {wrong_answer_text}
+#             Please follow this output format:
+#             ---
+#             Question: <Your Question Text>
+#             A) <Choice A>
+#             B) <Choice B>
+#             C) <Choice C>
+#             D) <Choice D>
+#             Correct Answer: <Correct Choice (e.g., A)>
+#             Explanation: <Brief explanation for the correct answer>
+#             ---
+#             Ensure that the question is conceptually similar but not identical to the original. Ensure clarity and educational value.
+#             <|eot_id|>
+#             <|start_header_id|>assistant<|end_header_id|>
+#         """.strip()
+#         logger.debug(f"Generated prompt: {prompt}")
+#         return prompt
+#     def call_model_api(self, prompt: str) -> str:
+#         """Hugging Face API 호출"""
+#         logger.info("Calling Hugging Face API...")
+#         headers = {"Authorization": f"Bearer {API_KEY}"}
+#         try:
+#             response = requests.post(API_URL, headers=headers, json={"inputs": prompt})
+#             response.raise_for_status()
+#             response_data = response.json()
+#             logger.debug(f"Raw API response: {response_data}")
+#             # API 응답이 리스트인 경우 처리
+#             if isinstance(response_data, list):
+#                 if response_data and isinstance(response_data[0], dict):
+#                     generated_text = response_data[0].get('generated_text', '')
+#                 else:
+#                     generated_text = response_data[0] if response_data else ''
+#             # API 응답이 딕셔너리인 경우 처리
+#             elif isinstance(response_data, dict):
+#                 generated_text = response_data.get('generated_text', '')
+#             else:
+#                 generated_text = str(response_data)
+#             logger.info(f"Generated text: {generated_text}")
+#             return generated_text
+#         except requests.exceptions.RequestException as e:
+#             logger.error(f"API request failed: {e}")
+#             raise
+#         except Exception as e:
+#             logger.error(f"Unexpected error in call_model_api: {e}")
+#             raise
+#     def parse_model_output(self, output: str) -> GeneratedQuestion:
+#         if not isinstance(output, str):
+#             logger.error(f"Invalid output format: {type(output)}. Expected string.")
+#             raise ValueError("Model output is not a string.")
+#         logger.info(f"Parsing output: {output}")
+#         output_lines = output.strip().splitlines()
+#         logger.debug(f"Split output into lines: {output_lines}")
+#         question, choices, correct_answer, explanation = "", {}, "", ""
+#         for line in output_lines:
+#             if line.lower().startswith("question:"):
+#                 question = line.split(":", 1)[1].strip()
+#             elif line.startswith("A)"):
+#                 choices["A"] = line[2:].strip()
+#             elif line.startswith("B)"):
+#                 choices["B"] = line[2:].strip()
+#             elif line.startswith("C)"):
+#                 choices["C"] = line[2:].strip()
+#             elif line.startswith("D)"):
+#                 choices["D"] = line[2:].strip()
+#             elif line.lower().startswith("correct answer:"):
+#                 correct_answer = line.split(":", 1)[1].strip()
+#             elif line.lower().startswith("explanation:"):
+#                 explanation = line.split(":", 1)[1].strip()
+#         if not question or len(choices) < 4 or not correct_answer or not explanation:
+#             logger.warning("Incomplete generated question.")
+#         return GeneratedQuestion(question, choices, correct_answer, explanation)
+#     def generate_similar_question_with_text(self, construct_name: str, subject_name: str, question_text: str, correct_answer_text: str, wrong_answer_text: str, misconception_id: float) -> Tuple[Optional[GeneratedQuestion], Optional[str]]:
+#         logger.info("generate_similar_question_with_text initiated")
+#         # 예외 처리 추가
+#         try:
+#             misconception_text = self.get_misconception_text(misconception_id)
+#             logger.info(f"Misconception text retrieved: {misconception_text}")
+#         except Exception as e:
+#             logger.error(f"Error retrieving misconception text: {e}")
+#             return None, None
+#         if not misconception_text:
+#             logger.info("Skipping question generation due to lack of misconception.")
+#             return None, None
+#         prompt = self.generate_prompt(construct_name, subject_name, question_text, correct_answer_text, wrong_answer_text, misconception_text)
+#         logger.info(f"Generated prompt: {prompt}")
+#         generated_text = None  # 기본값으로 초기화
+#         try:
+#             logger.info("Calling call_model_api...")
+#             generated_text = self.call_model_api(prompt)
+#             logger.info(f"Generated text from API: {generated_text}")
+#             # 파싱
+#             generated_question = self.parse_model_output(generated_text)
+#             logger.info(f"Generated question object: {generated_question}")
+#             return generated_question, generated_text
+#         except Exception as e:
+#             logger.error(f"Failed to generate question: {e}")
+#             logger.debug(f"API output for debugging: {generated_text}")
+#             return None, generated_text
 class SimilarQuestionGenerator:
     def __init__(self, misconception_csv_path: str = 'misconception_mapping.csv'):
         """
         except Exception as e:
             logger.error(f"Unexpected error in call_model_api: {e}")
             raise
+            # --- module2.py 중 일부 ---
     def parse_model_output(self, output: str) -> GeneratedQuestion:
+        """Parse the model output with improved extraction of the question components."""
         if not isinstance(output, str):
             logger.error(f"Invalid output format: {type(output)}. Expected string.")
+            raise ValueError("Model output is not a string")
+        logger.info("Parsing model output...")
+        # 1) 전체 텍스트를 줄 단위로 나눔
+        lines = output.splitlines()
+        # 2) 마지막으로 등장하는 Question~Explanation 블록을 찾기 위한 임시 변수
+        question = ""
+        choices = {}
+        correct_answer = ""
+        explanation = ""
+        # 이 블록을 여러 번 만날 수 있으니, 일단 발견할 때마다 저장해두고 덮어씌우는 방식.
+        # 최종적으로 "마지막에 발견된" Question 블록이 아래 변수를 덮어쓰게 됨
+        temp_question = ""
+        temp_choices = {}
+        temp_correct = ""
+        temp_explanation = ""
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            # Question:
             if line.lower().startswith("question:"):
+                # 지금까지 저장해둔 이전 블록들을 최종 저장 영역에 덮어씌운다
+                if temp_question:
+                    question = temp_question
+                    choices = temp_choices
+                    correct_answer = temp_correct
+                    explanation = temp_explanation
+                # 새 블록을 시작
+                temp_question = line.split(":", 1)[1].strip()
+                temp_choices = {}
+                temp_correct = ""
+                temp_explanation = ""
+            # A) / B) / C) / D)
+            elif re.match(r"^[ABCD]\)", line):
+                # "A) 선택지 내용"
+                letter = line[0]  # A, B, C, D
+                choice_text = line[2:].strip()
+                temp_choices[letter] = choice_text
+            # Correct Answer:
             elif line.lower().startswith("correct answer:"):
+                # "Correct Answer: A)" 형태에서 A만 추출
+                ans_part = line.split(":", 1)[1].strip()
+                temp_correct = ans_part[0].upper() if ans_part else ""
+            # Explanation:
             elif line.lower().startswith("explanation:"):
+                temp_explanation = line.split(":", 1)[1].strip()
+        # 루프가 끝난 뒤, 한 번 더 최신 블록을 최종 변수에 반영
+        if temp_question:
+            question = temp_question
+            choices = temp_choices
+            correct_answer = temp_correct
+            explanation = temp_explanation
+        # 이제 question, choices, correct_answer, explanation이 최종 파싱 결과
+        logger.debug(f"Parsed components - Question: {question}, Choices: {choices}, "
+                     f"Correct Answer: {correct_answer}, Explanation: {explanation}")
         return GeneratedQuestion(question, choices, correct_answer, explanation)
+    def validate_generated_question(self, question: GeneratedQuestion) -> bool:
+        """Validate if all components of the generated question are present and valid."""
+        logger.info("Validating generated question...")
+        try:
+            # Check if question text exists and is not too short
+            if not question.question or len(question.question.strip()) < 10:
+                logger.warning("Question text is missing or too short")
+                return False
+            # Check if all four choices exist and are not empty
+            required_choices = set(['A', 'B', 'C', 'D'])
+            if set(question.choices.keys()) != required_choices:
+                logger.warning(f"Missing choices. Found: {set(question.choices.keys())}")
+                return False
+            if not all(choice.strip() for choice in question.choices.values()):
+                logger.warning("Empty choice text found")
+                return False
+            # Check if correct answer is valid (should be just A, B, C, or D)
+            if not question.correct_answer or question.correct_answer not in required_choices:
+                logger.warning(f"Invalid correct answer: {question.correct_answer}")
+                return False
+            # Check if explanation exists and is not too short
+            if not question.explanation or len(question.explanation.strip()) < 20:
+                logger.warning("Explanation is missing or too short")
+                return False
+            logger.info("Question validation passed")
+            return True
+        except Exception as e:
+            logger.error(f"Error during validation: {e}")
+            return False
+    def generate_similar_question_with_text(self, construct_name: str, subject_name: str,
+                                          question_text: str, correct_answer_text: str,
+                                          wrong_answer_text: str, misconception_id: float,
+                                          max_retries: int = 3) -> Tuple[Optional[GeneratedQuestion], Optional[str]]:
+        """Generate a similar question with validation and retry mechanism."""
+        logger.info("generate_similar_question_with_text initiated")
+        # Get misconception text
         try:
             misconception_text = self.get_misconception_text(misconception_id)
             logger.info(f"Misconception text retrieved: {misconception_text}")
+            if not misconception_text:
+                logger.info("Skipping question generation due to lack of misconception.")
+                return None, None
         except Exception as e:
             logger.error(f"Error retrieving misconception text: {e}")
             return None, None
+        # Generate prompt once since it doesn't change between retries
+        prompt = self.generate_prompt(construct_name, subject_name, question_text,
+                                    correct_answer_text, wrong_answer_text, misconception_text)
+        # Attempt generation with retries
+        for attempt in range(max_retries):
+            try:
+                logger.info(f"Attempt {attempt + 1} of {max_retries}")
+                # Call API
+                generated_text = self.call_model_api(prompt)
+                logger.info(f"Generated text from API: {generated_text}")
+                # Parse output
+                generated_question = self.parse_model_output(generated_text)
+                # Validate the generated question
+                if self.validate_generated_question(generated_question):
+                    logger.info("Successfully generated valid question")
+                    return generated_question, generated_text
+                else:
+                    logger.warning(f"Generated question failed validation on attempt {attempt + 1}")
+                    # If this was the last attempt, return None
+                    if attempt == max_retries - 1:
+                        logger.error("Max retries reached without generating valid question")
+                        return None, generated_text
+                    # Add delay between retries to avoid rate limiting
+                    time.sleep(2)  # 2 second delay between retries
+            except Exception as e:
+                logger.error(f"Error during question generation attempt {attempt + 1}: {e}")
+                if attempt == max_retries - 1:
+                    return None, None
+                time.sleep(2)  # Add delay before retry
+        return None, None