Jintonic92 commited on
Commit
2d02136
ยท
verified ยท
1 Parent(s): 4d43c6c

Update src/SecondModule/module2.py

Browse files
Files changed (1) hide show
  1. src/SecondModule/module2.py +323 -49
src/SecondModule/module2.py CHANGED
@@ -5,6 +5,8 @@ from dataclasses import dataclass
5
  import logging
6
  from dotenv import load_dotenv
7
  import os
 
 
8
 
9
  # Set up logging
10
  logging.basicConfig(level=logging.INFO)
@@ -32,6 +34,169 @@ class GeneratedQuestion:
32
  correct_answer: str
33
  explanation: str
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  class SimilarQuestionGenerator:
36
  def __init__(self, misconception_csv_path: str = 'misconception_mapping.csv'):
37
  """
@@ -129,68 +294,177 @@ class SimilarQuestionGenerator:
129
  except Exception as e:
130
  logger.error(f"Unexpected error in call_model_api: {e}")
131
  raise
 
 
132
  def parse_model_output(self, output: str) -> GeneratedQuestion:
 
 
133
  if not isinstance(output, str):
134
  logger.error(f"Invalid output format: {type(output)}. Expected string.")
135
- raise ValueError("Model output is not a string.")
136
-
137
- logger.info(f"Parsing output: {output}")
138
- output_lines = output.strip().splitlines()
139
- logger.debug(f"Split output into lines: {output_lines}")
140
-
141
- question, choices, correct_answer, explanation = "", {}, "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- for line in output_lines:
144
  if line.lower().startswith("question:"):
145
- question = line.split(":", 1)[1].strip()
146
- elif line.startswith("A)"):
147
- choices["A"] = line[2:].strip()
148
- elif line.startswith("B)"):
149
- choices["B"] = line[2:].strip()
150
- elif line.startswith("C)"):
151
- choices["C"] = line[2:].strip()
152
- elif line.startswith("D)"):
153
- choices["D"] = line[2:].strip()
 
 
 
 
 
 
 
 
 
 
 
 
154
  elif line.lower().startswith("correct answer:"):
155
- correct_answer = line.split(":", 1)[1].strip()
 
 
 
 
156
  elif line.lower().startswith("explanation:"):
157
- explanation = line.split(":", 1)[1].strip()
158
-
159
- if not question or len(choices) < 4 or not correct_answer or not explanation:
160
- logger.warning("Incomplete generated question.")
 
 
 
 
 
 
 
 
 
161
  return GeneratedQuestion(question, choices, correct_answer, explanation)
162
 
163
- def generate_similar_question_with_text(self, construct_name: str, subject_name: str, question_text: str, correct_answer_text: str, wrong_answer_text: str, misconception_id: float) -> Tuple[Optional[GeneratedQuestion], Optional[str]]:
164
- logger.info("generate_similar_question_with_text initiated")
165
 
166
- # ์˜ˆ์™ธ ์ฒ˜๋ฆฌ ์ถ”๊ฐ€
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  try:
168
  misconception_text = self.get_misconception_text(misconception_id)
169
  logger.info(f"Misconception text retrieved: {misconception_text}")
 
 
 
170
  except Exception as e:
171
  logger.error(f"Error retrieving misconception text: {e}")
172
  return None, None
173
-
174
- if not misconception_text:
175
- logger.info("Skipping question generation due to lack of misconception.")
176
- return None, None
177
-
178
- prompt = self.generate_prompt(construct_name, subject_name, question_text, correct_answer_text, wrong_answer_text, misconception_text)
179
- logger.info(f"Generated prompt: {prompt}")
180
-
181
- generated_text = None # ๊ธฐ๋ณธ๊ฐ’์œผ๋กœ ์ดˆ๊ธฐํ™”
182
- try:
183
- logger.info("Calling call_model_api...")
184
- generated_text = self.call_model_api(prompt)
185
- logger.info(f"Generated text from API: {generated_text}")
186
-
187
- # ํŒŒ์‹ฑ
188
- generated_question = self.parse_model_output(generated_text)
189
- logger.info(f"Generated question object: {generated_question}")
190
- return generated_question, generated_text
191
-
192
- except Exception as e:
193
- logger.error(f"Failed to generate question: {e}")
194
- logger.debug(f"API output for debugging: {generated_text}")
195
- return None, generated_text
196
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import logging
6
  from dotenv import load_dotenv
7
  import os
8
+ import time
9
+ import re
10
 
11
  # Set up logging
12
  logging.basicConfig(level=logging.INFO)
 
34
  correct_answer: str
35
  explanation: str
36
 
37
+ # class SimilarQuestionGenerator:
38
+ # def __init__(self, misconception_csv_path: str = 'misconception_mapping.csv'):
39
+ # """
40
+ # Initialize the generator by loading the misconception mapping and the language model.
41
+ # """
42
+ # self._load_data(misconception_csv_path)
43
+
44
+ # def _load_data(self, misconception_csv_path: str):
45
+ # logger.info("Loading misconception mapping...")
46
+ # self.misconception_df = pd.read_csv(misconception_csv_path)
47
+
48
+ # def get_misconception_text(self, misconception_id: float) -> Optional[str]:
49
+ # # MisconceptionId๋ฅผ ๋ฐ›์•„ ํ•ด๋‹น ID์— ๋งค์นญ๋˜๋Š” ์˜ค๊ฐœ๋… ์„ค๋ช… ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค
50
+ # """Retrieve the misconception text based on the misconception ID."""
51
+ # if pd.isna(misconception_id): # NaN ์ฒดํฌ
52
+ # logger.warning("Received NaN for misconception_id.")
53
+ # return "No misconception provided."
54
+
55
+ # try:
56
+ # row = self.misconception_df[self.misconception_df['MisconceptionId'] == int(misconception_id)]
57
+ # if not row.empty:
58
+ # return row.iloc[0]['MisconceptionName']
59
+ # except ValueError as e:
60
+ # logger.error(f"Error processing misconception_id: {e}")
61
+
62
+ # logger.warning(f"No misconception found for ID: {misconception_id}")
63
+ # return "Misconception not found."
64
+
65
+ # def generate_prompt(self, construct_name: str, subject_name: str, question_text: str, correct_answer_text: str, wrong_answer_text: str, misconception_text: str) -> str:
66
+ # """Create a prompt for the language model."""
67
+ # #๋ฌธ์ œ ์ƒ์„ฑ์„ ์œ„ํ•œ ํ”„๋กฌํ”„ํŠธ ํ…์ŠคํŠธ๋ฅผ ์ƒ์„ฑ
68
+ # logger.info("Generating prompt...")
69
+ # misconception_clause = (f"that targets the following misconception: \"{misconception_text}\"." if misconception_text != "There is no misconception" else "")
70
+ # prompt = f"""
71
+ # <|begin_of_text|>
72
+ # <|start_header_id|>system<|end_header_id|>
73
+ # You are an educational assistant designed to generate multiple-choice questions {misconception_clause}
74
+ # <|eot_id|>
75
+ # <|start_header_id|>user<|end_header_id|>
76
+ # You need to create a similar multiple-choice question based on the following details:
77
+
78
+ # Construct Name: {construct_name}
79
+ # Subject Name: {subject_name}
80
+ # Question Text: {question_text}
81
+ # Correct Answer: {correct_answer_text}
82
+ # Wrong Answer: {wrong_answer_text}
83
+
84
+ # Please follow this output format:
85
+ # ---
86
+ # Question: <Your Question Text>
87
+ # A) <Choice A>
88
+ # B) <Choice B>
89
+ # C) <Choice C>
90
+ # D) <Choice D>
91
+ # Correct Answer: <Correct Choice (e.g., A)>
92
+ # Explanation: <Brief explanation for the correct answer>
93
+ # ---
94
+ # Ensure that the question is conceptually similar but not identical to the original. Ensure clarity and educational value.
95
+ # <|eot_id|>
96
+ # <|start_header_id|>assistant<|end_header_id|>
97
+ # """.strip()
98
+ # logger.debug(f"Generated prompt: {prompt}")
99
+ # return prompt
100
+
101
+ # def call_model_api(self, prompt: str) -> str:
102
+ # """Hugging Face API ํ˜ธ์ถœ"""
103
+ # logger.info("Calling Hugging Face API...")
104
+ # headers = {"Authorization": f"Bearer {API_KEY}"}
105
+
106
+ # try:
107
+ # response = requests.post(API_URL, headers=headers, json={"inputs": prompt})
108
+ # response.raise_for_status()
109
+
110
+ # response_data = response.json()
111
+ # logger.debug(f"Raw API response: {response_data}")
112
+
113
+ # # API ์‘๋‹ต์ด ๋ฆฌ์ŠคํŠธ์ธ ๊ฒฝ์šฐ ์ฒ˜๋ฆฌ
114
+ # if isinstance(response_data, list):
115
+ # if response_data and isinstance(response_data[0], dict):
116
+ # generated_text = response_data[0].get('generated_text', '')
117
+ # else:
118
+ # generated_text = response_data[0] if response_data else ''
119
+ # # API ์‘๋‹ต์ด ๋”•์…”๋„ˆ๋ฆฌ์ธ ๊ฒฝ์šฐ ์ฒ˜๋ฆฌ
120
+ # elif isinstance(response_data, dict):
121
+ # generated_text = response_data.get('generated_text', '')
122
+ # else:
123
+ # generated_text = str(response_data)
124
+
125
+ # logger.info(f"Generated text: {generated_text}")
126
+ # return generated_text
127
+
128
+ # except requests.exceptions.RequestException as e:
129
+ # logger.error(f"API request failed: {e}")
130
+ # raise
131
+ # except Exception as e:
132
+ # logger.error(f"Unexpected error in call_model_api: {e}")
133
+ # raise
134
+ # def parse_model_output(self, output: str) -> GeneratedQuestion:
135
+ # if not isinstance(output, str):
136
+ # logger.error(f"Invalid output format: {type(output)}. Expected string.")
137
+ # raise ValueError("Model output is not a string.")
138
+
139
+ # logger.info(f"Parsing output: {output}")
140
+ # output_lines = output.strip().splitlines()
141
+ # logger.debug(f"Split output into lines: {output_lines}")
142
+
143
+ # question, choices, correct_answer, explanation = "", {}, "", ""
144
+
145
+ # for line in output_lines:
146
+ # if line.lower().startswith("question:"):
147
+ # question = line.split(":", 1)[1].strip()
148
+ # elif line.startswith("A)"):
149
+ # choices["A"] = line[2:].strip()
150
+ # elif line.startswith("B)"):
151
+ # choices["B"] = line[2:].strip()
152
+ # elif line.startswith("C)"):
153
+ # choices["C"] = line[2:].strip()
154
+ # elif line.startswith("D)"):
155
+ # choices["D"] = line[2:].strip()
156
+ # elif line.lower().startswith("correct answer:"):
157
+ # correct_answer = line.split(":", 1)[1].strip()
158
+ # elif line.lower().startswith("explanation:"):
159
+ # explanation = line.split(":", 1)[1].strip()
160
+
161
+ # if not question or len(choices) < 4 or not correct_answer or not explanation:
162
+ # logger.warning("Incomplete generated question.")
163
+ # return GeneratedQuestion(question, choices, correct_answer, explanation)
164
+
165
+ # def generate_similar_question_with_text(self, construct_name: str, subject_name: str, question_text: str, correct_answer_text: str, wrong_answer_text: str, misconception_id: float) -> Tuple[Optional[GeneratedQuestion], Optional[str]]:
166
+ # logger.info("generate_similar_question_with_text initiated")
167
+
168
+ # # ์˜ˆ์™ธ ์ฒ˜๋ฆฌ ์ถ”๊ฐ€
169
+ # try:
170
+ # misconception_text = self.get_misconception_text(misconception_id)
171
+ # logger.info(f"Misconception text retrieved: {misconception_text}")
172
+ # except Exception as e:
173
+ # logger.error(f"Error retrieving misconception text: {e}")
174
+ # return None, None
175
+
176
+ # if not misconception_text:
177
+ # logger.info("Skipping question generation due to lack of misconception.")
178
+ # return None, None
179
+
180
+ # prompt = self.generate_prompt(construct_name, subject_name, question_text, correct_answer_text, wrong_answer_text, misconception_text)
181
+ # logger.info(f"Generated prompt: {prompt}")
182
+
183
+ # generated_text = None # ๊ธฐ๋ณธ๊ฐ’์œผ๋กœ ์ดˆ๊ธฐํ™”
184
+ # try:
185
+ # logger.info("Calling call_model_api...")
186
+ # generated_text = self.call_model_api(prompt)
187
+ # logger.info(f"Generated text from API: {generated_text}")
188
+
189
+ # # ํŒŒ์‹ฑ
190
+ # generated_question = self.parse_model_output(generated_text)
191
+ # logger.info(f"Generated question object: {generated_question}")
192
+ # return generated_question, generated_text
193
+
194
+ # except Exception as e:
195
+ # logger.error(f"Failed to generate question: {e}")
196
+ # logger.debug(f"API output for debugging: {generated_text}")
197
+ # return None, generated_text
198
+
199
+
200
  class SimilarQuestionGenerator:
201
  def __init__(self, misconception_csv_path: str = 'misconception_mapping.csv'):
202
  """
 
294
  except Exception as e:
295
  logger.error(f"Unexpected error in call_model_api: {e}")
296
  raise
297
+
298
+ # --- module2.py ์ค‘ ์ผ๋ถ€ ---
299
  def parse_model_output(self, output: str) -> GeneratedQuestion:
300
+ """Parse the model output with improved extraction of the question components."""
301
+
302
  if not isinstance(output, str):
303
  logger.error(f"Invalid output format: {type(output)}. Expected string.")
304
+ raise ValueError("Model output is not a string")
305
+
306
+ logger.info("Parsing model output...")
307
+
308
+ # 1) ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ์ค„ ๋‹จ์œ„๋กœ ๋‚˜๋ˆ”
309
+ lines = output.splitlines()
310
+
311
+ # 2) ๋งˆ์ง€๋ง‰์œผ๋กœ ๋“ฑ์žฅํ•˜๋Š” Question~Explanation ๋ธ”๋ก์„ ์ฐพ๊ธฐ ์œ„ํ•œ ์ž„์‹œ ๋ณ€์ˆ˜
312
+ question = ""
313
+ choices = {}
314
+ correct_answer = ""
315
+ explanation = ""
316
+
317
+ # ์ด ๋ธ”๋ก์„ ์—ฌ๋Ÿฌ ๋ฒˆ ๋งŒ๋‚  ์ˆ˜ ์žˆ์œผ๋‹ˆ, ์ผ๋‹จ ๋ฐœ๊ฒฌํ•  ๋•Œ๋งˆ๋‹ค ์ €์žฅํ•ด๋‘๊ณ  ๋ฎ์–ด์”Œ์šฐ๋Š” ๋ฐฉ์‹.
318
+ # ์ตœ์ข…์ ์œผ๋กœ "๋งˆ์ง€๋ง‰์— ๋ฐœ๊ฒฌ๋œ" Question ๋ธ”๋ก์ด ์•„๋ž˜ ๋ณ€์ˆ˜๋ฅผ ๋ฎ์–ด์“ฐ๊ฒŒ ๋จ
319
+ temp_question = ""
320
+ temp_choices = {}
321
+ temp_correct = ""
322
+ temp_explanation = ""
323
+
324
+ for line in lines:
325
+ line = line.strip()
326
+ if not line:
327
+ continue
328
 
329
+ # Question:
330
  if line.lower().startswith("question:"):
331
+ # ์ง€๊ธˆ๊นŒ์ง€ ์ €์žฅํ•ด๋‘” ์ด์ „ ๋ธ”๋ก๋“ค์„ ์ตœ์ข… ์ €์žฅ ์˜์—ญ์— ๋ฎ์–ด์”Œ์šด๋‹ค
332
+ if temp_question:
333
+ question = temp_question
334
+ choices = temp_choices
335
+ correct_answer = temp_correct
336
+ explanation = temp_explanation
337
+
338
+ # ์ƒˆ ๋ธ”๋ก์„ ์‹œ์ž‘
339
+ temp_question = line.split(":", 1)[1].strip()
340
+ temp_choices = {}
341
+ temp_correct = ""
342
+ temp_explanation = ""
343
+
344
+ # A) / B) / C) / D)
345
+ elif re.match(r"^[ABCD]\)", line):
346
+ # "A) ์„ ํƒ์ง€ ๋‚ด์šฉ"
347
+ letter = line[0] # A, B, C, D
348
+ choice_text = line[2:].strip()
349
+ temp_choices[letter] = choice_text
350
+
351
+ # Correct Answer:
352
  elif line.lower().startswith("correct answer:"):
353
+ # "Correct Answer: A)" ํ˜•ํƒœ์—์„œ A๋งŒ ์ถ”์ถœ
354
+ ans_part = line.split(":", 1)[1].strip()
355
+ temp_correct = ans_part[0].upper() if ans_part else ""
356
+
357
+ # Explanation:
358
  elif line.lower().startswith("explanation:"):
359
+ temp_explanation = line.split(":", 1)[1].strip()
360
+
361
+ # ๋ฃจํ”„๊ฐ€ ๋๋‚œ ๋’ค, ํ•œ ๋ฒˆ ๋” ์ตœ์‹  ๋ธ”๋ก์„ ์ตœ์ข… ๋ณ€์ˆ˜์— ๋ฐ˜์˜
362
+ if temp_question:
363
+ question = temp_question
364
+ choices = temp_choices
365
+ correct_answer = temp_correct
366
+ explanation = temp_explanation
367
+
368
+ # ์ด์ œ question, choices, correct_answer, explanation์ด ์ตœ์ข… ํŒŒ์‹ฑ ๊ฒฐ๊ณผ
369
+ logger.debug(f"Parsed components - Question: {question}, Choices: {choices}, "
370
+ f"Correct Answer: {correct_answer}, Explanation: {explanation}")
371
+
372
  return GeneratedQuestion(question, choices, correct_answer, explanation)
373
 
 
 
374
 
375
+
376
+
377
+
378
+ def validate_generated_question(self, question: GeneratedQuestion) -> bool:
379
+ """Validate if all components of the generated question are present and valid."""
380
+ logger.info("Validating generated question...")
381
+
382
+ try:
383
+ # Check if question text exists and is not too short
384
+ if not question.question or len(question.question.strip()) < 10:
385
+ logger.warning("Question text is missing or too short")
386
+ return False
387
+
388
+ # Check if all four choices exist and are not empty
389
+ required_choices = set(['A', 'B', 'C', 'D'])
390
+ if set(question.choices.keys()) != required_choices:
391
+ logger.warning(f"Missing choices. Found: {set(question.choices.keys())}")
392
+ return False
393
+
394
+ if not all(choice.strip() for choice in question.choices.values()):
395
+ logger.warning("Empty choice text found")
396
+ return False
397
+
398
+ # Check if correct answer is valid (should be just A, B, C, or D)
399
+ if not question.correct_answer or question.correct_answer not in required_choices:
400
+ logger.warning(f"Invalid correct answer: {question.correct_answer}")
401
+ return False
402
+
403
+ # Check if explanation exists and is not too short
404
+ if not question.explanation or len(question.explanation.strip()) < 20:
405
+ logger.warning("Explanation is missing or too short")
406
+ return False
407
+
408
+ logger.info("Question validation passed")
409
+ return True
410
+
411
+ except Exception as e:
412
+ logger.error(f"Error during validation: {e}")
413
+ return False
414
+
415
+ def generate_similar_question_with_text(self, construct_name: str, subject_name: str,
416
+ question_text: str, correct_answer_text: str,
417
+ wrong_answer_text: str, misconception_id: float,
418
+ max_retries: int = 3) -> Tuple[Optional[GeneratedQuestion], Optional[str]]:
419
+ """Generate a similar question with validation and retry mechanism."""
420
+ logger.info("generate_similar_question_with_text initiated")
421
+
422
+ # Get misconception text
423
  try:
424
  misconception_text = self.get_misconception_text(misconception_id)
425
  logger.info(f"Misconception text retrieved: {misconception_text}")
426
+ if not misconception_text:
427
+ logger.info("Skipping question generation due to lack of misconception.")
428
+ return None, None
429
  except Exception as e:
430
  logger.error(f"Error retrieving misconception text: {e}")
431
  return None, None
432
+
433
+ # Generate prompt once since it doesn't change between retries
434
+ prompt = self.generate_prompt(construct_name, subject_name, question_text,
435
+ correct_answer_text, wrong_answer_text, misconception_text)
436
+
437
+ # Attempt generation with retries
438
+ for attempt in range(max_retries):
439
+ try:
440
+ logger.info(f"Attempt {attempt + 1} of {max_retries}")
441
+
442
+ # Call API
443
+ generated_text = self.call_model_api(prompt)
444
+ logger.info(f"Generated text from API: {generated_text}")
445
+
446
+ # Parse output
447
+ generated_question = self.parse_model_output(generated_text)
448
+
449
+ # Validate the generated question
450
+ if self.validate_generated_question(generated_question):
451
+ logger.info("Successfully generated valid question")
452
+ return generated_question, generated_text
453
+ else:
454
+ logger.warning(f"Generated question failed validation on attempt {attempt + 1}")
455
+
456
+ # If this was the last attempt, return None
457
+ if attempt == max_retries - 1:
458
+ logger.error("Max retries reached without generating valid question")
459
+ return None, generated_text
460
+
461
+ # Add delay between retries to avoid rate limiting
462
+ time.sleep(2) # 2 second delay between retries
463
+
464
+ except Exception as e:
465
+ logger.error(f"Error during question generation attempt {attempt + 1}: {e}")
466
+ if attempt == max_retries - 1:
467
+ return None, None
468
+ time.sleep(2) # Add delay before retry
469
+
470
+ return None, None