lmy0802 commited on
Commit
b2ea093
·
verified ·
1 Parent(s): c923f73

Upload 7 files

Browse files
dividing_into_different_subsets/7/EI/CC_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3,CC_subset_4,CC_subset_5,CC_subset_6,CC_subset_7
2
+ CodeFuse-DeepSeek-33b,80.39,80.0,70.37,33.33,100.0,100.0,66.67
3
+ Nxcode-CQ-7B,87.25,86.87,91.67,84.17,20.0,55.0,98.33
4
+ codegemma-2b,38.63,26.33,17.22,5.0,0.0,0.0,3.33
5
+ codegemma-7b,51.08,42.07,22.22,15.0,0.0,40.0,11.67
6
+ codegemma-7b-it,58.14,55.87,46.11,38.33,0.0,5.0,26.67
7
+ deepseek-coder-1.3b-base,47.06,30.73,20.0,9.17,0.0,0.0,0.0
8
+ deepseek-coder-6.7b-base,56.96,48.6,30.0,18.33,0.0,0.0,6.67
9
+ deepseek_coder-6.7b-instruct,73.24,74.0,77.22,37.5,0.0,0.0,48.33
10
+ deepseek_coder_33b-base,58.33,56.13,42.78,36.67,0.0,80.0,3.33
11
+ deepseek_coder_33b-instruct,68.04,69.6,61.11,56.67,0.0,10.0,51.67
12
+ codeqwen1.5-7b,59.02,52.0,43.89,16.67,0.0,95.0,45.0
dividing_into_different_subsets/7/EI/EI.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/7/EI/calculate_humaneval_result.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ # 定义文件所在的目录
5
+ input_dir = 'E:/python-testn/pythonProject3/hh_1/evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ # with open("token_counts_EI.csv","w", newline='') as csvfile:
11
+ # writer = csv.writer(csvfile)
12
+ # writer.writerow(["Model", "token_subset_1", "token_subset_2","token_subset_3","token_subset_4","token_subset_5","token_subset_6","token_subset_7"])
13
+ #
14
+
15
+ with open("line_counts_EI.csv","w", newline='') as csvfile:
16
+ writer = csv.writer(csvfile)
17
+ writer.writerow(["Model", "line_subset_1", "line_subset_2","line_subset_3","line_subset_4","line_subset_5","line_subset_6","line_subset_7"])
18
+
19
+ # with open("CC_EI.csv", "w", newline='') as csvfile:
20
+ # writer = csv.writer(csvfile)
21
+ # writer.writerow(["Model", "CC_subset_1", "CC_subset_2","CC_subset_3","CC_subset_4","CC_subset_5","CC_subset_6","CC_subset_7"])
22
+ #
23
+
24
+
25
+ for file_name in files:
26
+ # 构建完整的文件路径
27
+ input_file_path = os.path.join(input_dir, file_name)
28
+ first_underscore_index = file_name.find('_')
29
+
30
+ # 找到最后一个 - 的位置
31
+ last_dash_index = file_name.rfind('-')
32
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
33
+ print(model_name)
34
+ with open(input_file_path,"r",encoding="utf-8") as file:
35
+ data1=json.load(file)
36
+
37
+ with open("EI.json", "r", encoding="utf-8") as file:
38
+ data2=json.load(file)
39
+ sum0=0
40
+ count0=0
41
+ sum1=0
42
+ count1=0
43
+ sum2=0
44
+ count2=0
45
+ sum3 = 0
46
+ count3 = 0
47
+ sum4=0
48
+ count4=0
49
+ sum5 = 0
50
+ count5 = 0
51
+ sum6=0
52
+ count6=0
53
+
54
+
55
+
56
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
57
+ # #按照token个数划分后的评估结果
58
+ # if item2["token_diff"] == 0:
59
+ # index, value = item1
60
+ # print(item2["token_diff"],index,value)
61
+ # sum0=sum0+value
62
+ # count0=count0+1
63
+ # if item2["token_diff"] == 1:
64
+ # index, value = item1
65
+ # print(item2["token_diff"], index, value)
66
+ # sum1=sum1+value
67
+ # count1=count1+1
68
+ # if item2["token_diff"] == 2:
69
+ # index, value = item1
70
+ # print(item2["token_diff"], index, value)
71
+ # sum2=sum2+value
72
+ # count2=count2+1
73
+ # if item2["token_diff"] == 3:
74
+ # index, value = item1
75
+ # print(item2["token_diff"], index, value)
76
+ # sum3=sum3+value
77
+ # count3=count3+1
78
+ # if item2["token_diff"] == 4:
79
+ # index, value = item1
80
+ # print(item2["token_diff"], index, value)
81
+ # sum4 = sum4 + value
82
+ # count4 = count4 + 1
83
+ # if item2["token_diff"] ==5:
84
+ # index, value = item1
85
+ # print(item2["token_diff"], index, value)
86
+ # sum5 = sum5 + value
87
+ # count5 = count5 + 1
88
+ # if item2["token_diff"] ==6:
89
+ # index, value = item1
90
+ # print(item2["token_diff"], index, value)
91
+ # sum6 = sum6 + value
92
+ # count6 = count6 + 1
93
+
94
+
95
+ #按照行数划分后的评估结果
96
+ if item2["line_diff"] == 0:
97
+ index, value = item1
98
+ print(item2["line_diff"],index,value)
99
+ sum0=sum0+value
100
+ count0=count0+1
101
+ if item2["line_diff"] == 1:
102
+ index, value = item1
103
+ print(item2["line_diff"], index, value)
104
+ sum1=sum1+value
105
+ count1=count1+1
106
+ if item2["line_diff"] == 2:
107
+ index, value = item1
108
+ print(item2["line_diff"], index, value)
109
+ sum2=sum2+value
110
+ count2=count2+1
111
+ if item2["line_diff"] == 3:
112
+ index, value = item1
113
+ print(item2["line_diff"], index, value)
114
+ sum3=sum3+value
115
+ count3=count3+1
116
+ if item2["line_diff"] == 4:
117
+ index, value = item1
118
+ print(item2["line_diff"], index, value)
119
+ sum4=sum4+value
120
+ count4=count4+1
121
+ if item2["line_diff"] == 5:
122
+ index, value = item1
123
+ print(item2["line_diff"], index, value)
124
+ sum5 = sum5 + value
125
+ count5 = count5 + 1
126
+ if item2["line_diff"] == 6:
127
+ index, value = item1
128
+ print(item2["line_diff"], index, value)
129
+ sum6 = sum6 + value
130
+ count6 = count6 + 1
131
+
132
+
133
+ #按照圈复杂度划分后的评估结果
134
+ # if item2["CC_diff"] == 0:
135
+ # index, value = item1
136
+ # print(item2["CC_diff"],index,value)
137
+ # sum0=sum0+value
138
+ # count0=count0+1
139
+ # if item2["CC_diff"] == 1:
140
+ # index, value = item1
141
+ # print(item2["CC_diff"], index, value)
142
+ # sum1=sum1+value
143
+ # count1=count1+1
144
+ # if item2["CC_diff"] == 2:
145
+ # index, value = item1
146
+ # print(item2["CC_diff"], index, value)
147
+ # sum2=sum2+value
148
+ # count2=count2+1
149
+ # if item2["CC_diff"] == 3 :
150
+ # index, value = item1
151
+ # print(item2["CC_diff"], index, value)
152
+ # sum3=sum3+value
153
+ # count3=count3+1
154
+ # if item2["CC_diff"] == 4 :
155
+ # index, value = item1
156
+ # print(item2["CC_diff"], index, value)
157
+ # sum4=sum4+value
158
+ # count4=count4+1
159
+ # if item2["CC_diff"] == 5 :
160
+ # index, value = item1
161
+ # print(item2["CC_diff"], index, value)
162
+ # sum5=sum5+value
163
+ # count5=count5+1
164
+ # if item2["CC_diff"] == 6 :
165
+ # index, value = item1
166
+ # print(item2["CC_diff"], index, value)
167
+ # sum6=sum6+value
168
+ # count6=count6+1
169
+
170
+
171
+
172
+ mean0=round(sum0/count0*100,2)
173
+
174
+ mean1=round(sum1/count1*100,2)
175
+ mean2=round(sum2/count2*100,2)
176
+ mean3=round(sum3/count3*100,2)
177
+ mean4=round(sum4/count4*100,2)
178
+ if count5==0:
179
+ mean5=0
180
+ else:
181
+ mean5 = round(sum5 / count5 * 100, 2)
182
+ mean6=round(sum6/count6*100,2)
183
+ print("count_result!!")
184
+ print(count0,count1,count2,count3,count4,count5,count6)
185
+ print(mean0,mean1,mean2,mean3,count4,mean5,mean6)
186
+ # with open("token_counts_EI.csv", mode='a', newline='', encoding='utf-8') as file:
187
+ # writer = csv.writer(file)
188
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6])
189
+
190
+ with open("line_counts_EI.csv", mode='a', newline='', encoding='utf-8') as file:
191
+ writer = csv.writer(file)
192
+ writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6])
193
+ #
194
+ # with open("CC_EI.csv", mode='a', newline='', encoding='utf-8') as file:
195
+ # writer = csv.writer(file)
196
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6])
197
+
198
+
dividing_into_different_subsets/7/EI/even.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ # 读取数据
4
+ with open("humaneval_new.json", "r", encoding="utf-8") as f:
5
+ data = json.load(f)
6
+
7
+ # 定义划分区间数
8
+ num_intervals = 7
9
+
10
+ # 计算每个特征的值范围
11
+ line_min = min(item['line'] for item in data)
12
+ line_max = max(item['line'] for item in data)
13
+ line_interval_size = (line_max - line_min) / num_intervals
14
+
15
+ token_min = min(item['token'] for item in data)
16
+ token_max = max(item['token'] for item in data)
17
+ token_interval_size = (token_max - token_min) / num_intervals
18
+
19
+ cyclomatic_complexity_min = min(item['cyclomatic_complexity'] for item in data)
20
+ cyclomatic_complexity_max = max(item['cyclomatic_complexity'] for item in data)
21
+ cyclomatic_complexity_interval_size = (cyclomatic_complexity_max - cyclomatic_complexity_min) / num_intervals
22
+ count1=0
23
+ count2=0
24
+ count3=0
25
+ count4=0
26
+ count5=0
27
+ count6=0
28
+ count7=0
29
+
30
+ # 根据等距划分数据
31
+ for item in data:
32
+ # 计算 line 特征的区间
33
+ line_diff = int((item['line'] - line_min) // line_interval_size)
34
+ item['line_diff'] = min(line_diff,num_intervals-1) # 确保区间索引在范围内
35
+
36
+
37
+ # 计算 token 特征的区间
38
+ token_diff = int((item['token'] - token_min) // token_interval_size)
39
+ item['token_diff'] = min(token_diff,num_intervals-1)
40
+ if item['token_diff'] == 0:
41
+ count1 = count1 + 1
42
+ if item['token_diff'] == 1:
43
+ count2 = count2 + 1
44
+ if item['token_diff'] == 2:
45
+ count3 = count3 + 1
46
+ if item['token_diff'] == 3:
47
+ count4 = count4 + 1
48
+ if item['token_diff'] == 4:
49
+ count5 = count5 + 1
50
+ if item['token_diff'] == 5:
51
+ count6 = count6 + 1
52
+ if item['token_diff'] == 6:
53
+ count7 = count7 + 1
54
+ # 计算 cyclomatic_complexity 特征的区间
55
+ CC_diff = int((item['cyclomatic_complexity'] - cyclomatic_complexity_min) // cyclomatic_complexity_interval_size)
56
+ item['CC_diff'] = min(CC_diff,num_intervals-1) # 确保区间索引在范围内
57
+
58
+ # 恢复原始顺序
59
+ data.sort(key=lambda x: x['id'])
60
+ print(count1,count2,count3,count4,count5,count6,count7)
61
+
62
+ # 将更新后的数据写回JSON文件
63
+ with open('EI.json', 'w', encoding='utf-8') as file:
64
+ json.dump(data, file, ensure_ascii=False, indent=4)
dividing_into_different_subsets/7/EI/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/7/EI/line_counts_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,line_subset_1,line_subset_2,line_subset_3,line_subset_4,line_subset_5,line_subset_6,line_subset_7
2
+ CodeFuse-DeepSeek-33b,77.27,77.61,70.97,76.92,100.0,66.67,100.0
3
+ Nxcode-CQ-7B,90.11,90.45,83.87,83.08,50.0,71.67,97.5
4
+ codegemma-2b,47.16,20.3,22.1,21.54,8.75,3.33,2.5
5
+ codegemma-7b,56.14,34.85,35.0,36.92,18.75,3.33,35.0
6
+ codegemma-7b-it,69.43,48.96,50.0,40.38,28.75,35.0,42.5
7
+ deepseek-coder-1.3b-base,49.43,27.76,25.32,24.62,6.25,15.0,45.0
8
+ deepseek-coder-6.7b-base,64.89,42.31,38.39,41.92,0.0,10.0,17.5
9
+ deepseek_coder-6.7b-instruct,82.39,71.19,69.84,68.46,27.5,36.67,35.0
10
+ deepseek_coder_33b-base,72.5,48.51,43.23,54.23,11.25,5.0,52.5
11
+ deepseek_coder_33b-instruct,82.95,64.63,56.13,75.0,0.0,28.33,32.5
12
+ codeqwen1.5-7b,60.91,49.48,52.42,44.62,18.75,20.0,45.0
dividing_into_different_subsets/7/EI/token_counts_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,token_subset_1,token_subset_2,token_subset_3,token_subset_4,token_subset_5,token_subset_6,token_subset_7
2
+ CodeFuse-DeepSeek-33b,76.19,84.51,68.18,57.89,77.78,0,100.0
3
+ Nxcode-CQ-7B,94.05,86.48,89.77,76.58,86.11,0,20.0
4
+ codegemma-2b,42.98,29.3,23.86,1.32,1.67,0,0.0
5
+ codegemma-7b,54.52,41.76,30.23,22.63,19.44,0,0.0
6
+ codegemma-7b-it,69.76,53.8,41.36,38.68,35.56,0,0.0
7
+ deepseek-coder-1.3b-base,48.33,36.34,17.27,9.21,15.0,0,0.0
8
+ deepseek-coder-6.7b-base,67.86,47.46,31.36,22.37,17.22,0,0.0
9
+ deepseek_coder-6.7b-instruct,86.31,70.28,76.14,54.74,45.56,0,0.0
10
+ deepseek_coder_33b-base,68.21,57.32,41.36,27.89,30.56,0,0.0
11
+ deepseek_coder_33b-instruct,82.74,67.61,64.55,35.79,52.22,0,0.0
12
+ codeqwen1.5-7b,65.6,53.73,37.95,40.26,28.33,0,0.0