TwT-6
/

api-demo

Model card Files Files and versions Community

api-demo / opencompass-my-api /opencompass /datasets /IFEval /evaluation_main.py

TwT-6

Upload 2667 files

256a159 verified about 1 year ago

raw

history blame contribute delete

4.62 kB

	# flake8: noqa
	# yapf: disable

	# Copyright 2023 The Google Research Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import dataclasses
	from typing import Dict, List, Optional, Union

	from absl import flags

	import opencompass.datasets.IFEval.instructions_registry as instructions_registry

	_INPUT_DATA = flags.DEFINE_string('input_data',
	None,
	'path to input data',
	required=True)

	_INPUT_RESPONSE_DATA = flags.DEFINE_string('input_response_data',
	None,
	'path to input response data',
	required=False)

	_OUTPUT_DIR = flags.DEFINE_string(
	'output_dir',
	None,
	'Output directory for inference and eval results.',
	required=True,
	)


	@dataclasses.dataclass
	class InputExample:
	key: int
	instruction_id_list: List[str]
	prompt: str
	kwargs: List[Dict[str, Optional[Union[str, int]]]]


	@dataclasses.dataclass
	class OutputExample:
	instruction_id_list: List[str]
	prompt: str
	response: str
	follow_all_instructions: bool
	follow_instruction_list: List[bool]


	def test_instruction_following_strict(
	inp,
	response,
	):
	"""Tests response to see if instrutions are followed."""
	instruction_list = inp.instruction_id_list
	is_following_list = []

	for index, instruction_id in enumerate(instruction_list):
	instruction_cls = instructions_registry.INSTRUCTION_DICT[
	instruction_id]
	instruction = instruction_cls(instruction_id)
	instruction.build_description(**inp.kwargs[index])
	args = instruction.get_instruction_args()
	if args and 'prompt' in args:
	instruction.build_description(prompt=inp.prompt)

	if response.strip() and instruction.check_following(response):
	is_following_list.append(True)
	else:
	is_following_list.append(False)

	return OutputExample(
	instruction_id_list=inp.instruction_id_list,
	prompt=inp.prompt,
	response=response,
	follow_all_instructions=all(is_following_list),
	follow_instruction_list=is_following_list,
	)


	def test_instruction_following_loose(
	inp,
	response,
	):
	"""Tests response for an upper bound for following instructions."""
	r = response.split('\n')
	response_remove_first = '\n'.join(r[1:]).strip()
	response_remove_last = '\n'.join(r[:-1]).strip()
	response_remove_both = '\n'.join(r[1:-1]).strip()
	revised_response = response.replace('*', '')
	revised_response_remove_first = response_remove_first.replace('*', '')
	revised_response_remove_last = response_remove_last.replace('*', '')
	revised_response_remove_both = response_remove_both.replace('*', '')
	all_responses = [
	response,
	revised_response,
	response_remove_first,
	response_remove_last,
	response_remove_both,
	revised_response_remove_first,
	revised_response_remove_last,
	revised_response_remove_both,
	]
	instruction_list = inp.instruction_id_list
	is_following_list = []

	for index, instruction_id in enumerate(instruction_list):
	instruction_cls = instructions_registry.INSTRUCTION_DICT[
	instruction_id]
	instruction = instruction_cls(instruction_id)

	instruction.build_description(**inp.kwargs[index])
	args = instruction.get_instruction_args()
	if args and 'prompt' in args:
	instruction.build_description(prompt=inp.prompt)

	is_following = False
	for r in all_responses:
	if r.strip() and instruction.check_following(r):
	is_following = True
	break

	is_following_list.append(is_following)

	return OutputExample(
	instruction_id_list=inp.instruction_id_list,
	prompt=inp.prompt,
	response=response,
	follow_all_instructions=all(is_following_list),
	follow_instruction_list=is_following_list,
	)