from langchain.output_parsers.regex import RegexParser

def transform(input_list):
    new_list = []
    for key in input_list:
        if 'question1' in key or 'question2' in key:
            question_dict = {}
            question_num = key[-1]
            question_dict[f'question'] = input_list[key]
            question_dict[f'A'] = input_list[f'A_{question_num}']
            question_dict[f'B'] = input_list[f'B_{question_num}']
            question_dict[f'C'] = input_list[f'C_{question_num}']
            question_dict[f'D'] = input_list[f'D_{question_num}']
            question_dict[f'reponse'] = input_list[f'reponse{question_num}']
            new_list.append(question_dict)
    return new_list

# Define input string to parse
#input_string = "Question 1: What is the conclusion of the study regarding the use of pretrained weights on 2D-Slice models with ResNet encoders initialized with ImageNet-1K pretrained weights for 3D Deep Neuroimaging?\nCHOIX_A: Pretrained weights consistently underperforms random initialization\nCHOIX_B: Pretrained weights consistently outperforms random initialization\nCHOIX_C: Pretrained weights have no effect on the performance of the models\nCHOIX_D: The study did not test the use of pretrained weights on 2D-Slice models\n\nRéponse: B\n\nQuestion 2: What is the main hypothesis that the study validates?\nCHOIX_A: Models trained on natural images (2D) cannot be helpful for neuroimaging tasks\nCHOIX_B: Models trained on natural images (2D) can be helpful for neuroimaging tasks\nCHOIX_C: 2D-Slice-CNNs cannot be used for neuroimaging tasks\nCHOIX_D: 2D-Slice-CNNs are the only models that can be used for neuroimaging tasks\n\nRéponse: B"
# doc = '''question :      What was the reason for not asking for the LLM-based condition to show its work in the preliminary work on the paper?


#  CHOICE_A:     The author thought it would increase the likelihood of transcribing the wrong answer.
#  CHOICE_B:    The author wanted to avoid confusing the participant with a lot of numbers.
#  CHOICE_C:    The author believed that precise probabilities had nothing to do with the problem.
#  CHOICE_D:The author wanted to use a meta-prompt that didn't require determining precise probabilities.


# reponse: B


# '''

doc = 'question: What is the purpose of the get_parsed_value function in the given document?\r\n CHOICE_A: To parse the value based on the given parser and document.\r\n CHOICE_B: To merge the parsed values into the quizz dictionary.\r\n CHOICE_C: To create a new dictionary called parsers.\r\n CHOICE_D: To define a new function called update method.\r\nreponse: A\r\n\r\r'

parsers = {
    "question": RegexParser(
        #regex=r"question\s+:\s+\n?(.*?)(?:\n)+",
        regex=r"question:\s*(.*?)\s+(?:\n)+",
        output_keys=["question"]
    ),
    "A": RegexParser(
        regex=r"(?:\n)+\s*CHOICE_A:(.*?)\n+",
        output_keys=["A"]
    ),
    "B": RegexParser(
        regex=r"(?:\n)+\s*CHOICE_B:(.*?)\n+",
        output_keys=["B"]
    ),
    "C": RegexParser(
        regex=r"(?:\n)+\s*CHOICE_C:(.*?)\n+",
        output_keys=["C"]
    ),
    "D": RegexParser(
        regex=r"(?:\n)+\s*CHOICE_D:(.*?)\n+",
        output_keys=["D"]
    ),
    "reponse": RegexParser(
        regex=r"(?:\n)+reponse:\s?(.*)",
        output_keys=["reponse"]
    )
}

def get_parsed_value(parser, key, doc):
    result = parser.parse(doc)
    value = result.get(key).strip()
    return {key: value}

quizz = {}
for key, parser in parsers.items():
    quizz.update(get_parsed_value(parser, key, doc))

quizz_list = [quizz]

output_parser = RegexParser(
    regex=r"question\s?\d?:\s+\n?(.*?)\n\s*CHOICE_A(.*?)\n\s*CHOICE_B(.*?)\n\s*CHOICE_C(.*?)\n\s*CHOICE_D(.*?)(?:\n)+reponse:\s?(.*)", 
    output_keys=["question1", "A_1", "B_1", "C_1", "D_1", "reponse1"]
)

# Use the RegexParser to parse the input string
output_dict = transform(output_parser.parse(doc))

# Print the parsed output
print(output_dict)