Spaces:

Nikhil2904
/

mcqt

Configuration error

App Files Files Community

mcqt / parser_test.py

Nikhil2904

Upload 15 files

600c297 verified about 1 year ago

raw

history blame contribute delete

3.97 kB

	from langchain.output_parsers.regex import RegexParser

	def transform(input_list):
	new_list = []
	for key in input_list:
	if 'question1' in key or 'question2' in key:
	question_dict = {}
	question_num = key[-1]
	question_dict[f'question'] = input_list[key]
	question_dict[f'A'] = input_list[f'A_{question_num}']
	question_dict[f'B'] = input_list[f'B_{question_num}']
	question_dict[f'C'] = input_list[f'C_{question_num}']
	question_dict[f'D'] = input_list[f'D_{question_num}']
	question_dict[f'reponse'] = input_list[f'reponse{question_num}']
	new_list.append(question_dict)
	return new_list

	# Define input string to parse
	#input_string = "Question 1: What is the conclusion of the study regarding the use of pretrained weights on 2D-Slice models with ResNet encoders initialized with ImageNet-1K pretrained weights for 3D Deep Neuroimaging?\nCHOIX_A: Pretrained weights consistently underperforms random initialization\nCHOIX_B: Pretrained weights consistently outperforms random initialization\nCHOIX_C: Pretrained weights have no effect on the performance of the models\nCHOIX_D: The study did not test the use of pretrained weights on 2D-Slice models\n\nRéponse: B\n\nQuestion 2: What is the main hypothesis that the study validates?\nCHOIX_A: Models trained on natural images (2D) cannot be helpful for neuroimaging tasks\nCHOIX_B: Models trained on natural images (2D) can be helpful for neuroimaging tasks\nCHOIX_C: 2D-Slice-CNNs cannot be used for neuroimaging tasks\nCHOIX_D: 2D-Slice-CNNs are the only models that can be used for neuroimaging tasks\n\nRéponse: B"
	# doc = '''question : What was the reason for not asking for the LLM-based condition to show its work in the preliminary work on the paper?


	# CHOICE_A: The author thought it would increase the likelihood of transcribing the wrong answer.
	# CHOICE_B: The author wanted to avoid confusing the participant with a lot of numbers.
	# CHOICE_C: The author believed that precise probabilities had nothing to do with the problem.
	# CHOICE_D:The author wanted to use a meta-prompt that didn't require determining precise probabilities.


	# reponse: B


	# '''

	doc = 'question: What is the purpose of the get_parsed_value function in the given document?\r\n CHOICE_A: To parse the value based on the given parser and document.\r\n CHOICE_B: To merge the parsed values into the quizz dictionary.\r\n CHOICE_C: To create a new dictionary called parsers.\r\n CHOICE_D: To define a new function called update method.\r\nreponse: A\r\n\r\r'

	parsers = {
	"question": RegexParser(
	#regex=r"question\s+:\s+\n?(.*?)(?:\n)+",
	regex=r"question:\s(.?)\s+(?:\n)+",
	output_keys=["question"]
	),
	"A": RegexParser(
	regex=r"(?:\n)+\sCHOICE_A:(.?)\n+",
	output_keys=["A"]
	),
	"B": RegexParser(
	regex=r"(?:\n)+\sCHOICE_B:(.?)\n+",
	output_keys=["B"]
	),
	"C": RegexParser(
	regex=r"(?:\n)+\sCHOICE_C:(.?)\n+",
	output_keys=["C"]
	),
	"D": RegexParser(
	regex=r"(?:\n)+\sCHOICE_D:(.?)\n+",
	output_keys=["D"]
	),
	"reponse": RegexParser(
	regex=r"(?:\n)+reponse:\s?(.*)",
	output_keys=["reponse"]
	)
	}

	def get_parsed_value(parser, key, doc):
	result = parser.parse(doc)
	value = result.get(key).strip()
	return {key: value}

	quizz = {}
	for key, parser in parsers.items():
	quizz.update(get_parsed_value(parser, key, doc))

	quizz_list = [quizz]

	output_parser = RegexParser(
	regex=r"question\s?\d?:\s+\n?(.?)\n\sCHOICE_A(.?)\n\sCHOICE_B(.?)\n\sCHOICE_C(.?)\n\sCHOICE_D(.?)(?:\n)+reponse:\s?(.)",
	output_keys=["question1", "A_1", "B_1", "C_1", "D_1", "reponse1"]
	)

	# Use the RegexParser to parse the input string
	output_dict = transform(output_parser.parse(doc))

	# Print the parsed output
	print(output_dict)