mcqt / parser_test.py
Nikhil2904's picture
Upload 15 files
600c297 verified
from langchain.output_parsers.regex import RegexParser
def transform(input_list):
new_list = []
for key in input_list:
if 'question1' in key or 'question2' in key:
question_dict = {}
question_num = key[-1]
question_dict[f'question'] = input_list[key]
question_dict[f'A'] = input_list[f'A_{question_num}']
question_dict[f'B'] = input_list[f'B_{question_num}']
question_dict[f'C'] = input_list[f'C_{question_num}']
question_dict[f'D'] = input_list[f'D_{question_num}']
question_dict[f'reponse'] = input_list[f'reponse{question_num}']
new_list.append(question_dict)
return new_list
# Define input string to parse
#input_string = "Question 1: What is the conclusion of the study regarding the use of pretrained weights on 2D-Slice models with ResNet encoders initialized with ImageNet-1K pretrained weights for 3D Deep Neuroimaging?\nCHOIX_A: Pretrained weights consistently underperforms random initialization\nCHOIX_B: Pretrained weights consistently outperforms random initialization\nCHOIX_C: Pretrained weights have no effect on the performance of the models\nCHOIX_D: The study did not test the use of pretrained weights on 2D-Slice models\n\nRéponse: B\n\nQuestion 2: What is the main hypothesis that the study validates?\nCHOIX_A: Models trained on natural images (2D) cannot be helpful for neuroimaging tasks\nCHOIX_B: Models trained on natural images (2D) can be helpful for neuroimaging tasks\nCHOIX_C: 2D-Slice-CNNs cannot be used for neuroimaging tasks\nCHOIX_D: 2D-Slice-CNNs are the only models that can be used for neuroimaging tasks\n\nRéponse: B"
# doc = '''question : What was the reason for not asking for the LLM-based condition to show its work in the preliminary work on the paper?
# CHOICE_A: The author thought it would increase the likelihood of transcribing the wrong answer.
# CHOICE_B: The author wanted to avoid confusing the participant with a lot of numbers.
# CHOICE_C: The author believed that precise probabilities had nothing to do with the problem.
# CHOICE_D:The author wanted to use a meta-prompt that didn't require determining precise probabilities.
# reponse: B
# '''
doc = 'question: What is the purpose of the get_parsed_value function in the given document?\r\n CHOICE_A: To parse the value based on the given parser and document.\r\n CHOICE_B: To merge the parsed values into the quizz dictionary.\r\n CHOICE_C: To create a new dictionary called parsers.\r\n CHOICE_D: To define a new function called update method.\r\nreponse: A\r\n\r\r'
parsers = {
"question": RegexParser(
#regex=r"question\s+:\s+\n?(.*?)(?:\n)+",
regex=r"question:\s*(.*?)\s+(?:\n)+",
output_keys=["question"]
),
"A": RegexParser(
regex=r"(?:\n)+\s*CHOICE_A:(.*?)\n+",
output_keys=["A"]
),
"B": RegexParser(
regex=r"(?:\n)+\s*CHOICE_B:(.*?)\n+",
output_keys=["B"]
),
"C": RegexParser(
regex=r"(?:\n)+\s*CHOICE_C:(.*?)\n+",
output_keys=["C"]
),
"D": RegexParser(
regex=r"(?:\n)+\s*CHOICE_D:(.*?)\n+",
output_keys=["D"]
),
"reponse": RegexParser(
regex=r"(?:\n)+reponse:\s?(.*)",
output_keys=["reponse"]
)
}
def get_parsed_value(parser, key, doc):
result = parser.parse(doc)
value = result.get(key).strip()
return {key: value}
quizz = {}
for key, parser in parsers.items():
quizz.update(get_parsed_value(parser, key, doc))
quizz_list = [quizz]
output_parser = RegexParser(
regex=r"question\s?\d?:\s+\n?(.*?)\n\s*CHOICE_A(.*?)\n\s*CHOICE_B(.*?)\n\s*CHOICE_C(.*?)\n\s*CHOICE_D(.*?)(?:\n)+reponse:\s?(.*)",
output_keys=["question1", "A_1", "B_1", "C_1", "D_1", "reponse1"]
)
# Use the RegexParser to parse the input string
output_dict = transform(output_parser.parse(doc))
# Print the parsed output
print(output_dict)