import json # 读取数据 with open("sub_mbpp.json", "r", encoding="utf-8") as f: data = json.load(f) # 定义划分区间数 num_intervals = 3 # 计算每个特征的值范围 token_min = min(item['token'] for item in data) token_max = max(item['token'] for item in data) token_interval_size = (token_max - token_min) / num_intervals cyclomatic_complexity_min = min(item['cc'] for item in data) cyclomatic_complexity_max = max(item['cc'] for item in data) cyclomatic_complexity_interval_size = (cyclomatic_complexity_max - cyclomatic_complexity_min) / num_intervals count1=0 count2=0 count3=0 # 根据等距划分数据 for item in data: # 计算 token 特征的区间 token_diff = int((item['token'] - token_min) // token_interval_size) item['token_diff'] = min(token_diff,num_intervals-1) # 计算 cyclomatic_complexity 特征的区间 CC_diff = int((item['cc'] - cyclomatic_complexity_min) // cyclomatic_complexity_interval_size) item['CC_diff'] = min(CC_diff,num_intervals-1) # 确保区间索引在范围内 if item['CC_diff']==0: count1=count1+1 if item['CC_diff'] ==1: count2 = count2 + 1 if item['CC_diff']==2: count3=count3+1 # 恢复原始顺序 data.sort(key=lambda x: x['id']) print(count1,count2,count3) # 将更新后的数据写回JSON文件 with open('EI.json', 'w', encoding='utf-8') as file: json.dump(data, file, ensure_ascii=False, indent=4)