Spaces:
Configuration error
Configuration error
from langchain.output_parsers.regex import RegexParser | |
def transform(input_list): | |
new_list = [] | |
for key in input_list: | |
if 'question1' in key or 'question2' in key: | |
question_dict = {} | |
question_num = key[-1] | |
question_dict[f'question'] = input_list[key] | |
question_dict[f'A'] = input_list[f'A_{question_num}'] | |
question_dict[f'B'] = input_list[f'B_{question_num}'] | |
question_dict[f'C'] = input_list[f'C_{question_num}'] | |
question_dict[f'D'] = input_list[f'D_{question_num}'] | |
question_dict[f'reponse'] = input_list[f'reponse{question_num}'] | |
new_list.append(question_dict) | |
return new_list | |
# Define input string to parse | |
#input_string = "Question 1: What is the conclusion of the study regarding the use of pretrained weights on 2D-Slice models with ResNet encoders initialized with ImageNet-1K pretrained weights for 3D Deep Neuroimaging?\nCHOIX_A: Pretrained weights consistently underperforms random initialization\nCHOIX_B: Pretrained weights consistently outperforms random initialization\nCHOIX_C: Pretrained weights have no effect on the performance of the models\nCHOIX_D: The study did not test the use of pretrained weights on 2D-Slice models\n\nRéponse: B\n\nQuestion 2: What is the main hypothesis that the study validates?\nCHOIX_A: Models trained on natural images (2D) cannot be helpful for neuroimaging tasks\nCHOIX_B: Models trained on natural images (2D) can be helpful for neuroimaging tasks\nCHOIX_C: 2D-Slice-CNNs cannot be used for neuroimaging tasks\nCHOIX_D: 2D-Slice-CNNs are the only models that can be used for neuroimaging tasks\n\nRéponse: B" | |
# doc = '''question : What was the reason for not asking for the LLM-based condition to show its work in the preliminary work on the paper? | |
# CHOICE_A: The author thought it would increase the likelihood of transcribing the wrong answer. | |
# CHOICE_B: The author wanted to avoid confusing the participant with a lot of numbers. | |
# CHOICE_C: The author believed that precise probabilities had nothing to do with the problem. | |
# CHOICE_D:The author wanted to use a meta-prompt that didn't require determining precise probabilities. | |
# reponse: B | |
# ''' | |
doc = 'question: What is the purpose of the get_parsed_value function in the given document?\r\n CHOICE_A: To parse the value based on the given parser and document.\r\n CHOICE_B: To merge the parsed values into the quizz dictionary.\r\n CHOICE_C: To create a new dictionary called parsers.\r\n CHOICE_D: To define a new function called update method.\r\nreponse: A\r\n\r\r' | |
parsers = { | |
"question": RegexParser( | |
#regex=r"question\s+:\s+\n?(.*?)(?:\n)+", | |
regex=r"question:\s*(.*?)\s+(?:\n)+", | |
output_keys=["question"] | |
), | |
"A": RegexParser( | |
regex=r"(?:\n)+\s*CHOICE_A:(.*?)\n+", | |
output_keys=["A"] | |
), | |
"B": RegexParser( | |
regex=r"(?:\n)+\s*CHOICE_B:(.*?)\n+", | |
output_keys=["B"] | |
), | |
"C": RegexParser( | |
regex=r"(?:\n)+\s*CHOICE_C:(.*?)\n+", | |
output_keys=["C"] | |
), | |
"D": RegexParser( | |
regex=r"(?:\n)+\s*CHOICE_D:(.*?)\n+", | |
output_keys=["D"] | |
), | |
"reponse": RegexParser( | |
regex=r"(?:\n)+reponse:\s?(.*)", | |
output_keys=["reponse"] | |
) | |
} | |
def get_parsed_value(parser, key, doc): | |
result = parser.parse(doc) | |
value = result.get(key).strip() | |
return {key: value} | |
quizz = {} | |
for key, parser in parsers.items(): | |
quizz.update(get_parsed_value(parser, key, doc)) | |
quizz_list = [quizz] | |
output_parser = RegexParser( | |
regex=r"question\s?\d?:\s+\n?(.*?)\n\s*CHOICE_A(.*?)\n\s*CHOICE_B(.*?)\n\s*CHOICE_C(.*?)\n\s*CHOICE_D(.*?)(?:\n)+reponse:\s?(.*)", | |
output_keys=["question1", "A_1", "B_1", "C_1", "D_1", "reponse1"] | |
) | |
# Use the RegexParser to parse the input string | |
output_dict = transform(output_parser.parse(doc)) | |
# Print the parsed output | |
print(output_dict) | |