lmy0802 commited on
Commit
23fc8a9
·
verified ·
1 Parent(s): 6694215

Upload 7 files

Browse files
dividing_into_different_subsets/4/EI/CC_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3,CC_subset_4
2
+ CodeFuse-DeepSeek-33b,80.16,65.62,50.0,75.0
3
+ Nxcode-CQ-7B,87.02,90.31,55.0,87.5
4
+ codegemma-2b,31.31,15.47,0.0,2.5
5
+ codegemma-7b,45.71,21.56,0.0,18.75
6
+ codegemma-7b-it,56.79,44.06,32.5,21.25
7
+ deepseek-coder-1.3b-base,37.34,18.59,0.0,0.0
8
+ deepseek-coder-6.7b-base,51.98,28.75,0.0,5.0
9
+ deepseek_coder-6.7b-instruct,73.69,71.09,17.5,36.25
10
+ deepseek_coder_33b-base,57.02,42.03,15.0,22.5
11
+ deepseek_coder_33b-instruct,68.97,60.16,32.5,41.25
12
+ codeqwen1.5-7b,54.84,40.16,0.0,57.5
dividing_into_different_subsets/4/EI/EI.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/4/EI/calculate_humaneval_result.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ # 定义文件所在的目录
5
+ input_dir = 'E:\python-testn\pythonProject3\hh_1\evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ # with open("token_counts_EI.csv","w", newline='') as csvfile:
11
+ # writer = csv.writer(csvfile)
12
+ # writer.writerow(["Model", "token_subset_1", "token_subset_2","token_subset_3","token_subset_4"])
13
+
14
+
15
+ # with open("line_counts_EI.csv","w", newline='') as csvfile:
16
+ # writer = csv.writer(csvfile)
17
+ # writer.writerow(["Model", "line_subset_1", "line_subset_2","line_subset_3","line_subset_4"])
18
+
19
+ with open("CC_EI.csv","w", newline='') as csvfile:
20
+ writer = csv.writer(csvfile)
21
+ writer.writerow(["Model", "CC_subset_1", "CC_subset_2","CC_subset_3","CC_subset_4"])
22
+
23
+
24
+
25
+ for file_name in files:
26
+ # 构建完整的文件路径
27
+ input_file_path = os.path.join(input_dir, file_name)
28
+ first_underscore_index = file_name.find('_')
29
+
30
+ # 找到最后一个 - 的位置
31
+ last_dash_index = file_name.rfind('-')
32
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
33
+ print(model_name)
34
+ with open(input_file_path,"r",encoding="utf-8") as file:
35
+ data1=json.load(file)
36
+
37
+ with open("EI.json","r",encoding="utf-8") as file:
38
+ data2=json.load(file)
39
+ sum0=0
40
+ count0=0
41
+ sum1=0
42
+ count1=0
43
+ sum2=0
44
+ count2=0
45
+ sum3 = 0
46
+ count3 = 0
47
+
48
+
49
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
50
+ # #按照token个数划分后的评估结果
51
+ # if item2["token_diff"] == 0:
52
+ # index, value = item1
53
+ # print(item2["token_diff"],index,value)
54
+ # sum0=sum0+value
55
+ # count0=count0+1
56
+ # if item2["token_diff"] == 1:
57
+ # index, value = item1
58
+ # print(item2["token_diff"], index, value)
59
+ # sum1=sum1+value
60
+ # count1=count1+1
61
+ # if item2["token_diff"] == 2:
62
+ # index, value = item1
63
+ # print(item2["token_diff"], index, value)
64
+ # sum2=sum2+value
65
+ # count2=count2+1
66
+ # if item2["token_diff"] == 3:
67
+ # index, value = item1
68
+ # print(item2["token_diff"], index, value)
69
+ # sum3=sum3+value
70
+ # count3=count3+1
71
+
72
+
73
+ #按照行数划分后的评估结果
74
+ # if item2["line_diff"] == 0:
75
+ # index, value = item1
76
+ # print(item2["line_diff"],index,value)
77
+ # sum0=sum0+value
78
+ # count0=count0+1
79
+ # if item2["line_diff"] == 1:
80
+ # index, value = item1
81
+ # print(item2["line_diff"], index, value)
82
+ # sum1=sum1+value
83
+ # count1=count1+1
84
+ # if item2["line_diff"] == 2:
85
+ # index, value = item1
86
+ # print(item2["line_diff"], index, value)
87
+ # sum2=sum2+value
88
+ # count2=count2+1
89
+ # if item2["line_diff"] == 3:
90
+ # index, value = item1
91
+ # print(item2["line_diff"], index, value)
92
+ # sum3=sum3+value
93
+ # count3=count3+1
94
+
95
+ #按照圈复杂度划分后的评估结果
96
+ if item2["CC_diff"] == 0:
97
+ index, value = item1
98
+ print(item2["CC_diff"],index,value)
99
+ sum0=sum0+value
100
+ count0=count0+1
101
+ if item2["CC_diff"] == 1:
102
+ index, value = item1
103
+ print(item2["CC_diff"], index, value)
104
+ sum1=sum1+value
105
+ count1=count1+1
106
+ if item2["CC_diff"] == 2:
107
+ index, value = item1
108
+ print(item2["CC_diff"], index, value)
109
+ sum2=sum2+value
110
+ count2=count2+1
111
+ if item2["CC_diff"] == 3 :
112
+ index, value = item1
113
+ print(item2["CC_diff"], index, value)
114
+ sum3=sum3+value
115
+ count3=count3+1
116
+
117
+
118
+
119
+ mean0=round(sum0/count0*100,2)
120
+
121
+ mean1=round(sum1/count1*100,2)
122
+ mean2=round(sum2/count2*100,2)
123
+ mean3=round(sum3/count3*100,2)
124
+ print("count_result!!")
125
+ print(count0,count1,count2,count3)
126
+ print(mean0,mean1,mean2,mean3)
127
+ # with open("token_counts_EI.csv", mode='a', newline='', encoding='utf-8') as file:
128
+ # writer = csv.writer(file)
129
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3])
130
+ #
131
+ # with open("line_counts_EI.csv", mode='a', newline='', encoding='utf-8') as file:
132
+ # writer = csv.writer(file)
133
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3])
134
+
135
+ with open("CC_EI.csv", mode='a', newline='', encoding='utf-8') as file:
136
+ writer = csv.writer(file)
137
+ writer.writerow([model_name,mean0,mean1,mean2,mean3])
138
+
139
+
dividing_into_different_subsets/4/EI/even.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ # 读取数据
4
+ with open("humaneval_new.json", "r", encoding="utf-8") as f:
5
+ data = json.load(f)
6
+
7
+ # 定义划分区间数
8
+ num_intervals = 4
9
+
10
+ # 计算每个特征的值范围
11
+ line_min = min(item['line'] for item in data)
12
+ line_max = max(item['line'] for item in data)
13
+ line_interval_size = (line_max - line_min) / num_intervals
14
+
15
+ token_min = min(item['token'] for item in data)
16
+ token_max = max(item['token'] for item in data)
17
+ token_interval_size = (token_max - token_min) / num_intervals
18
+
19
+ cyclomatic_complexity_min = min(item['cyclomatic_complexity'] for item in data)
20
+ cyclomatic_complexity_max = max(item['cyclomatic_complexity'] for item in data)
21
+ cyclomatic_complexity_interval_size = (cyclomatic_complexity_max - cyclomatic_complexity_min) / num_intervals
22
+ count1=0
23
+ count2=0
24
+ count3=0
25
+ count4=0
26
+
27
+ # 根据等距划分数据
28
+ for item in data:
29
+ # 计算 line 特征的区间
30
+ line_diff = int((item['line'] - line_min) // line_interval_size)
31
+ item['line_diff'] = min(line_diff,num_intervals-1) # 确保区间索引在范围内
32
+
33
+
34
+ # 计算 token 特征的区间
35
+ token_diff = int((item['token'] - token_min) // token_interval_size)
36
+ item['token_diff'] = min(token_diff,num_intervals-1)
37
+ if item['token_diff'] == 0:
38
+ count1 = count1 + 1
39
+ if item['token_diff'] == 1:
40
+ count2 = count2 + 1
41
+ if item['token_diff'] == 2:
42
+ count3 = count3 + 1
43
+ if item['token_diff'] == 3:
44
+ count4 = count4 + 1
45
+
46
+
47
+ # 计算 cyclomatic_complexity 特征的区间
48
+ CC_diff = int((item['cyclomatic_complexity'] - cyclomatic_complexity_min) // cyclomatic_complexity_interval_size)
49
+ item['CC_diff'] = min(CC_diff,num_intervals-1) # 确保区间索引在范围内
50
+
51
+ # 恢复原始顺序
52
+ data.sort(key=lambda x: x['id'])
53
+ print(count1,count2,count3,count4)
54
+
55
+ # 将更新后的数据写回JSON文件
56
+ with open('EI.json', 'w', encoding='utf-8') as file:
57
+ json.dump(data, file, ensure_ascii=False, indent=4)
dividing_into_different_subsets/4/EI/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/4/EI/line_counts_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,line_subset_1,line_subset_2,line_subset_3,line_subset_4
2
+ CodeFuse-DeepSeek-33b,79.38,73.08,72.73,75.0
3
+ Nxcode-CQ-7B,90.05,86.35,70.45,78.75
4
+ codegemma-2b,33.04,22.79,4.09,3.75
5
+ codegemma-7b,46.29,34.62,14.09,20.0
6
+ codegemma-7b-it,59.23,49.33,20.91,43.75
7
+ deepseek-coder-1.3b-base,38.2,27.21,4.09,33.75
8
+ deepseek-coder-6.7b-base,53.92,40.38,8.64,16.25
9
+ deepseek_coder-6.7b-instruct,76.8,71.35,37.27,42.5
10
+ deepseek_coder_33b-base,60.72,46.35,22.27,26.25
11
+ deepseek_coder_33b-instruct,73.81,60.77,41.36,17.5
12
+ codeqwen1.5-7b,56.65,49.62,18.18,37.5
dividing_into_different_subsets/4/EI/token_counts_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,token_subset_1,token_subset_2,token_subset_3,token_subset_4
2
+ CodeFuse-DeepSeek-33b,80.61,72.0,66.67,100.0
3
+ Nxcode-CQ-7B,89.59,85.9,81.33,20.0
4
+ codegemma-2b,37.19,15.8,1.0,0.0
5
+ codegemma-7b,49.39,27.7,20.0,0.0
6
+ codegemma-7b-it,62.65,39.1,41.33,0.0
7
+ deepseek-coder-1.3b-base,44.29,16.5,9.0,0.0
8
+ deepseek-coder-6.7b-base,57.09,33.4,15.0,0.0
9
+ deepseek_coder-6.7b-instruct,76.17,71.6,46.33,0.0
10
+ deepseek_coder_33b-base,63.32,40.3,28.67,0.0
11
+ deepseek_coder_33b-instruct,75.1,56.4,44.33,0.0
12
+ codeqwen1.5-7b,59.03,43.9,29.67,0.0