# # module3.py import re import requests from typing import Optional, Tuple import logging from dotenv import load_dotenv import os from collections import Counter # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load .env file load_dotenv() # Hugging Face API information API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" API_KEY = os.getenv("HUGGINGFACE_API_KEY") if not API_KEY: raise ValueError("API_KEY가 설정되지 않았습니다. .env 파일을 확인하세요.") class AnswerVerifier: def verify_answer(self, question: str, choices: dict, num_checks: int = 5) -> Tuple[Optional[str], str]: """ Self-consistency approach를 활용한 답변 검증 num_checks: 동일 질문에 대해 반복 검증할 횟수 반환값: (검증된 답안, 설명) 튜플 """ try: answers = [] for i, _ in enumerate(range(num_checks)): prompt = self._create_prompt(question, choices) headers = {"Authorization": f"Bearer {API_KEY}"} response = requests.post( API_URL, headers=headers, json={"inputs": prompt} ) response.raise_for_status() response_data = response.json() logger.debug(f"Raw API response: {response_data}") # API 응답 처리 generated_text = self._process_response(response_data) logger.debug(f"Trial {i+1}:") logger.debug(f"Generated text: {generated_text}") answer = self._extract_answer(generated_text) logger.debug(f"Extracted answer: {answer}") if answer: answers.append(answer) if not answers: return None, "No valid answers extracted" # # 다수결 투표로 최종 답안 결정 # final_answer, explanation = self._get_majority_vote(answers) # logger.info(f"Final verified answer: {final_answer} ({explanation})") # return final_answer, explanation # Return only the final answer instead of a tuple final_answer, explanation = self._get_majority_vote(answers) logger.info(f"Final verified answer: {final_answer} ({explanation})") return final_answer # 기존: return final_answer, explanation except Exception as e: logger.error(f"Error in verify_answer: {e}") return None, f"Error occurred: {str(e)}" def _create_prompt(self, question: str, choices: dict) -> str: """개선된 프롬프트 - 더 명확한 응답 형식 요구""" return f""" <|begin_of_text|> <|start_header_id|>system<|end_header_id|> You are an expert mathematics teacher evaluating multiple-choice answers. Analyze the question and options carefully to select the correct answer. IMPORTANT: You must respond ONLY with "Answer: X" where X is A, B, C, or D. Do not include any explanation or additional text. <|eot_id|> <|start_header_id|>user<|end_header_id|> Question: {question} Options: A) {choices['A']} B) {choices['B']} C) {choices['C']} D) {choices['D']} Provide your answer in the format: "Answer: X" (where X is A, B, C, or D) <|eot_id|> <|start_header_id|>assistant<|end_header_id|> """.strip() def _process_response(self, response_data) -> str: """API 응답 데이터 처리 - 개선된 버전""" generated_text = "" if isinstance(response_data, list): if response_data and isinstance(response_data[0], dict): generated_text = response_data[0].get('generated_text', '') else: generated_text = response_data[0] if response_data else '' elif isinstance(response_data, dict): generated_text = response_data.get('generated_text', '') else: generated_text = str(response_data) # assistant 응답 부분만 추출 parts = generated_text.split('<|start_header_id|>assistant<|end_header_id|>') if len(parts) > 1: return parts[-1].strip() return generated_text.strip() def _extract_answer(self, response: str) -> Optional[str]: """개선된 답안 추출 로직""" response = response.strip().upper() # 1. "ANSWER: X" 형식 찾기 answer_pattern = r'(?:ANSWER:|CORRECT ANSWER:)\s*([ABCD])' answer_match = re.search(answer_pattern, response) if answer_match: return answer_match.group(1) # 2. 단독으로 있는 A, B, C, D 찾기 standalone_pattern = r'\b([ABCD])\b' matches = re.findall(standalone_pattern, response) # 마지막에 있는 답안 반환 (일반적으로 최종 답안이 마지막에 위치) if matches: return matches[-1] return None def _get_majority_vote(self, answers: list) -> Tuple[str, str]: """개선된 다수결 투표 시스템""" if not answers: return None, "No valid answers extracted" counter = Counter(answers) # 동점인 경우 처리 max_count = max(counter.values()) top_answers = [ans for ans, count in counter.items() if count == max_count] if len(top_answers) > 1: return None, f"Tie between answers: {top_answers}" final_answer = counter.most_common(1)[0][0] total_votes = len(answers) confidence = (counter[final_answer] / total_votes) * 100 # 신뢰도 임계값 설정 if confidence < 60: return None, f"Low confidence ({confidence:.1f}%) for answer {final_answer}" explanation = (f"Answer '{final_answer}' selected with {confidence:.1f}% confidence " f"({counter[final_answer]}/{total_votes} votes). " f"Distribution: {dict(counter)}") return final_answer, explanation