|
import datasets |
|
import json |
|
|
|
def get_float_qid(qid): |
|
return float(qid[1:].replace('-', '.')) |
|
|
|
humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf") |
|
|
|
qid_to_idx_mapping = {x["qid"]: idx for idx, x in enumerate(humaneval_v_data)} |
|
|
|
reranked_data = sorted(humaneval_v_data, key=lambda x: get_float_qid(x["qid"])) |
|
|
|
id_idx_mappping = {x["qid"]: idx for idx, x in enumerate(reranked_data)} |
|
|
|
|
|
old_to_new_id = {id_idx_mappping[x["qid"]]: qid_to_idx_mapping[x["qid"]] for x in humaneval_v_data} |
|
|
|
print(json.dumps(old_to_new_id, indent=4)) |