TwT-6
/

api-demo

Model card Files Files and versions Community

api-demo / opencompass-my-api /opencompass /datasets /lawbench /utils /function_utils.py

TwT-6

Upload 2667 files

256a159 verified about 1 year ago

raw

history blame contribute delete

1.94 kB

	from rouge_chinese import Rouge
	import jieba
	from nltk.translate.gleu_score import corpus_gleu

	def compute_f1_two_sets(pred_set, gt_set):
	precision = len(pred_set.intersection(gt_set)) / len(pred_set) if len(pred_set) > 0 else 0
	recall = len(pred_set.intersection(gt_set)) / len(gt_set) if len(gt_set) > 0 else 0
	f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0
	return f1

	def multi_choice_judge(prediction, option_list, answer_token):
	# a dict, key: letters in the option list, value: count of the letter in the prediction
	count_dict, abstention, accuracy = {}, 0, 0
	for option in option_list:
	option_count = prediction.count(option)
	count_dict[option] = 1 if option_count > 0 else 0 # multiple occurrence of the same letter is counted as 1

	if sum(count_dict.values()) == 0:
	abstention = 1
	# if the answer token is the only predicted token, the prediction is correct
	elif count_dict[answer_token] == 1 and sum(count_dict.values()) == 1:
	accuracy = 1
	return {"score": accuracy, "abstention": abstention}

	"""
	compute the rouge score.
	hyps and refs are lists of hyposisis and reference strings
	empty predictions are replaces with 无内容
	"""


	def compute_rouge(hyps, refs):
	assert(len(hyps) == len(refs))
	hyps = [' '.join(jieba.cut(h)) for h in hyps]
	hyps = [h if h.strip() != "" else "无内容" for h in hyps]
	refs = [' '.join(jieba.cut(r)) for r in refs]
	return Rouge().get_scores(hyps, refs)

	"""
	compute the gleu score.
	hyps and refs are lists of hyposisis and reference strings
	empty predictions are replaces with 无内容
	"""
	def compute_gleu(hyps, refs):
	assert(len(hyps) == len(refs))
	hyps = [' '.join(jieba.cut(h)) for h in hyps]
	hyps = [h if h.strip() != "" else "无内容" for h in hyps]
	refs = [[' '.join(jieba.cut(r))] for r in refs]
	return corpus_gleu(refs, hyps)