lmy0802 commited on
Commit
98721a2
·
verified ·
1 Parent(s): b2ea093

Upload 7 files

Browse files
dividing_into_different_subsets/8/EI/CC_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3,CC_subset_4,CC_subset_5,CC_subset_6,CC_subset_7,CC_subset_8
2
+ CodeFuse-DeepSeek-33b,80.39,80.0,70.37,40.0,0.0,100.0,50.0,100.0
3
+ Nxcode-CQ-7B,87.25,86.87,91.67,83.0,90.0,20.0,75.0,100.0
4
+ codegemma-2b,38.63,26.33,17.22,6.0,0.0,0.0,5.0,0.0
5
+ codegemma-7b,51.08,42.07,22.22,18.0,0.0,0.0,30.0,7.5
6
+ codegemma-7b-it,58.14,55.87,46.11,33.0,65.0,0.0,5.0,37.5
7
+ deepseek-coder-1.3b-base,47.06,30.73,20.0,11.0,0.0,0.0,0.0,0.0
8
+ deepseek-coder-6.7b-base,56.96,48.6,30.0,22.0,0.0,0.0,7.5,2.5
9
+ deepseek_coder-6.7b-instruct,73.24,74.0,77.22,38.0,35.0,0.0,22.5,50.0
10
+ deepseek_coder_33b-base,58.33,56.13,42.78,38.0,30.0,0.0,40.0,5.0
11
+ deepseek_coder_33b-instruct,68.04,69.6,61.11,55.0,65.0,0.0,32.5,50.0
12
+ codeqwen1.5-7b,59.02,52.0,43.89,20.0,0.0,0.0,90.0,25.0
dividing_into_different_subsets/8/EI/EI.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/8/EI/calculate_humaneval_result.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ # 定义文件所在的目录
5
+ input_dir = 'E:/python-testn/pythonProject3/hh_1/evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ # with open("token_counts_EI.csv","w", newline='') as csvfile:
11
+ # writer = csv.writer(csvfile)
12
+ # writer.writerow(["Model", "token_subset_1", "token_subset_2","token_subset_3","token_subset_4","token_subset_5","token_subset_6","token_subset_7","token_subset_8"])
13
+
14
+
15
+ # with open("line_counts_EI.csv","w", newline='') as csvfile:
16
+ # writer = csv.writer(csvfile)
17
+ # writer.writerow(["Model", "line_subset_1", "line_subset_2","line_subset_3","line_subset_4","line_subset_5","line_subset_6","line_subset_7","line_subset_8"])
18
+
19
+ with open("CC_EI.csv", "w", newline='') as csvfile:
20
+ writer = csv.writer(csvfile)
21
+ writer.writerow(["Model", "CC_subset_1", "CC_subset_2","CC_subset_3","CC_subset_4","CC_subset_5","CC_subset_6","CC_subset_7","CC_subset_8"])
22
+
23
+
24
+
25
+ for file_name in files:
26
+ # 构建完整的文件路径
27
+ input_file_path = os.path.join(input_dir, file_name)
28
+ first_underscore_index = file_name.find('_')
29
+
30
+ # 找到最后一个 - 的位置
31
+ last_dash_index = file_name.rfind('-')
32
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
33
+ print(model_name)
34
+ with open(input_file_path,"r",encoding="utf-8") as file:
35
+ data1=json.load(file)
36
+
37
+ with open("EI.json", "r", encoding="utf-8") as file:
38
+ data2=json.load(file)
39
+ sum0=0
40
+ count0=0
41
+ sum1=0
42
+ count1=0
43
+ sum2=0
44
+ count2=0
45
+ sum3 = 0
46
+ count3 = 0
47
+ sum4=0
48
+ count4=0
49
+ sum5 = 0
50
+ count5 = 0
51
+ sum6=0
52
+ count6=0
53
+ sum7=0
54
+ count7=0
55
+
56
+
57
+
58
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
59
+ # #按照token个数划分后的评估结果
60
+ # if item2["token_diff"] == 0:
61
+ # index, value = item1
62
+ # print(item2["token_diff"],index,value)
63
+ # sum0=sum0+value
64
+ # count0=count0+1
65
+ # if item2["token_diff"] == 1:
66
+ # index, value = item1
67
+ # print(item2["token_diff"], index, value)
68
+ # sum1=sum1+value
69
+ # count1=count1+1
70
+ # if item2["token_diff"] == 2:
71
+ # index, value = item1
72
+ # print(item2["token_diff"], index, value)
73
+ # sum2=sum2+value
74
+ # count2=count2+1
75
+ # if item2["token_diff"] == 3:
76
+ # index, value = item1
77
+ # print(item2["token_diff"], index, value)
78
+ # sum3=sum3+value
79
+ # count3=count3+1
80
+ # if item2["token_diff"] == 4:
81
+ # index, value = item1
82
+ # print(item2["token_diff"], index, value)
83
+ # sum4 = sum4 + value
84
+ # count4 = count4 + 1
85
+ # if item2["token_diff"] ==5:
86
+ # index, value = item1
87
+ # print(item2["token_diff"], index, value)
88
+ # sum5 = sum5 + value
89
+ # count5 = count5 + 1
90
+ # if item2["token_diff"] ==6:
91
+ # index, value = item1
92
+ # print(item2["token_diff"], index, value)
93
+ # sum6 = sum6 + value
94
+ # count6 = count6 + 1
95
+ # if item2["token_diff"] ==7:
96
+ # index, value = item1
97
+ # print(item2["token_diff"], index, value)
98
+ # sum7 = sum7 + value
99
+ # count7 = count7 + 1
100
+
101
+
102
+ #按照行数划分后的评估结果
103
+ # if item2["line_diff"] == 0:
104
+ # index, value = item1
105
+ # print(item2["line_diff"],index,value)
106
+ # sum0=sum0+value
107
+ # count0=count0+1
108
+ # if item2["line_diff"] == 1:
109
+ # index, value = item1
110
+ # print(item2["line_diff"], index, value)
111
+ # sum1=sum1+value
112
+ # count1=count1+1
113
+ # if item2["line_diff"] == 2:
114
+ # index, value = item1
115
+ # print(item2["line_diff"], index, value)
116
+ # sum2=sum2+value
117
+ # count2=count2+1
118
+ # if item2["line_diff"] == 3:
119
+ # index, value = item1
120
+ # print(item2["line_diff"], index, value)
121
+ # sum3=sum3+value
122
+ # count3=count3+1
123
+ # if item2["line_diff"] == 4:
124
+ # index, value = item1
125
+ # print(item2["line_diff"], index, value)
126
+ # sum4=sum4+value
127
+ # count4=count4+1
128
+ # if item2["line_diff"] == 5:
129
+ # index, value = item1
130
+ # print(item2["line_diff"], index, value)
131
+ # sum5 = sum5 + value
132
+ # count5 = count5 + 1
133
+ # if item2["line_diff"] == 6:
134
+ # index, value = item1
135
+ # print(item2["line_diff"], index, value)
136
+ # sum6 = sum6 + value
137
+ # count6 = count6 + 1
138
+ # if item2["line_diff"] == 7:
139
+ # index, value = item1
140
+ # print(item2["line_diff"], index, value)
141
+ # sum7 = sum7 + value
142
+ # count7 = count7 + 1
143
+
144
+
145
+ #按照圈复杂度划分后的评估结果
146
+ if item2["CC_diff"] == 0:
147
+ index, value = item1
148
+ print(item2["CC_diff"],index,value)
149
+ sum0=sum0+value
150
+ count0=count0+1
151
+ if item2["CC_diff"] == 1:
152
+ index, value = item1
153
+ print(item2["CC_diff"], index, value)
154
+ sum1=sum1+value
155
+ count1=count1+1
156
+ if item2["CC_diff"] == 2:
157
+ index, value = item1
158
+ print(item2["CC_diff"], index, value)
159
+ sum2=sum2+value
160
+ count2=count2+1
161
+ if item2["CC_diff"] == 3 :
162
+ index, value = item1
163
+ print(item2["CC_diff"], index, value)
164
+ sum3=sum3+value
165
+ count3=count3+1
166
+ if item2["CC_diff"] == 4 :
167
+ index, value = item1
168
+ print(item2["CC_diff"], index, value)
169
+ sum4=sum4+value
170
+ count4=count4+1
171
+ if item2["CC_diff"] == 5 :
172
+ index, value = item1
173
+ print(item2["CC_diff"], index, value)
174
+ sum5=sum5+value
175
+ count5=count5+1
176
+ if item2["CC_diff"] == 6 :
177
+ index, value = item1
178
+ print(item2["CC_diff"], index, value)
179
+ sum6=sum6+value
180
+ count6=count6+1
181
+ if item2["CC_diff"] == 7:
182
+ index, value = item1
183
+ print(item2["CC_diff"], index, value)
184
+ sum7 = sum7 + value
185
+ count7 = count7+ 1
186
+
187
+ mean0=round(sum0/count0*100,2)
188
+
189
+ mean1=round(sum1/count1*100,2)
190
+ mean2=round(sum2/count2*100,2)
191
+ mean3=round(sum3/count3*100,2)
192
+ mean4=round(sum4/count4*100,2)
193
+ mean5 = round(sum5 / count5 * 100, 2)
194
+ if count6==0:
195
+ mean6=0
196
+ else:
197
+ mean6=round(sum6/count6*100,2)
198
+ mean7 = round(sum7 / count7 * 100, 2)
199
+ print("count_result!!")
200
+ print(count0,count1,count2,count3,count4,count5,count6,count7)
201
+ print(mean0,mean1,mean2,mean3,count4,mean5,mean6,mean7)
202
+ # with open("token_counts_EI.csv", mode='a', newline='', encoding='utf-8') as file:
203
+ # writer = csv.writer(file)
204
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6,mean7])
205
+
206
+ # with open("line_counts_EI.csv", mode='a', newline='', encoding='utf-8') as file:
207
+ # writer = csv.writer(file)
208
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6,mean7])
209
+
210
+ with open("CC_EI.csv", mode='a', newline='', encoding='utf-8') as file:
211
+ writer = csv.writer(file)
212
+ writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6,mean7])
213
+
214
+
dividing_into_different_subsets/8/EI/even.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ # 读取数据
4
+ with open("humaneval_new.json", "r", encoding="utf-8") as f:
5
+ data = json.load(f)
6
+
7
+ # 定义划分区间数
8
+ num_intervals = 8
9
+
10
+ # 计算每个特征的值范围
11
+ line_min = min(item['line'] for item in data)
12
+ line_max = max(item['line'] for item in data)
13
+ line_interval_size = (line_max - line_min) / num_intervals
14
+
15
+ token_min = min(item['token'] for item in data)
16
+ token_max = max(item['token'] for item in data)
17
+ token_interval_size = (token_max - token_min) / num_intervals
18
+
19
+ cyclomatic_complexity_min = min(item['cyclomatic_complexity'] for item in data)
20
+ cyclomatic_complexity_max = max(item['cyclomatic_complexity'] for item in data)
21
+ cyclomatic_complexity_interval_size = (cyclomatic_complexity_max - cyclomatic_complexity_min) / num_intervals
22
+ count1=0
23
+ count2=0
24
+ count3=0
25
+ count4=0
26
+ count5=0
27
+ count6=0
28
+ count7=0
29
+ count8=0
30
+
31
+ # 根据等距划分数据
32
+ for item in data:
33
+ # 计算 line 特征的区间
34
+ line_diff = int((item['line'] - line_min) // line_interval_size)
35
+ item['line_diff'] = min(line_diff,num_intervals-1) # 确保区间索引在范围内
36
+
37
+
38
+ # 计算 token 特征的区间
39
+ token_diff = int((item['token'] - token_min) // token_interval_size)
40
+ item['token_diff'] = min(token_diff,num_intervals-1)
41
+ if item['token_diff'] == 0:
42
+ count1 = count1 + 1
43
+ if item['token_diff'] == 1:
44
+ count2 = count2 + 1
45
+ if item['token_diff'] == 2:
46
+ count3 = count3 + 1
47
+ if item['token_diff'] == 3:
48
+ count4 = count4 + 1
49
+ if item['token_diff'] == 4:
50
+ count5 = count5 + 1
51
+ if item['token_diff'] == 5:
52
+ count6 = count6 + 1
53
+ if item['token_diff'] == 6:
54
+ count7 = count7 + 1
55
+ if item['token_diff'] == 7:
56
+ count8 = count8 + 1
57
+ # 计算 cyclomatic_complexity 特征的区间
58
+ CC_diff = int((item['cyclomatic_complexity'] - cyclomatic_complexity_min) // cyclomatic_complexity_interval_size)
59
+ item['CC_diff'] = min(CC_diff,num_intervals-1) # 确保区间索引在范围内
60
+
61
+ # 恢复原始顺序
62
+ data.sort(key=lambda x: x['id'])
63
+ print(count1,count2,count3,count4,count5,count6,count7,count8)
64
+
65
+ # 将更新后的数据写回JSON文件
66
+ with open('EI.json', 'w', encoding='utf-8') as file:
67
+ json.dump(data, file, ensure_ascii=False, indent=4)
dividing_into_different_subsets/8/EI/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/8/EI/line_counts_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,line_subset_1,line_subset_2,line_subset_3,line_subset_4,line_subset_5,line_subset_6,line_subset_7,line_subset_8
2
+ CodeFuse-DeepSeek-33b,75.86,80.88,69.23,84.62,57.14,100.0,50.0,100.0
3
+ Nxcode-CQ-7B,92.93,88.82,86.92,84.62,82.86,48.75,60.0,97.5
4
+ codegemma-2b,51.03,25.37,20.13,30.77,6.43,0.0,5.0,2.5
5
+ codegemma-7b,60.17,40.37,31.41,44.23,20.0,3.75,5.0,35.0
6
+ codegemma-7b-it,72.24,53.68,46.79,56.92,27.14,10.0,45.0,42.5
7
+ deepseek-coder-1.3b-base,53.1,31.84,24.87,34.23,6.43,0.0,22.5,45.0
8
+ deepseek-coder-6.7b-base,73.28,45.66,37.44,49.23,13.57,0.0,15.0,17.5
9
+ deepseek_coder-6.7b-instruct,79.31,75.74,72.05,69.23,52.86,10.0,50.0,35.0
10
+ deepseek_coder_33b-base,75.0,54.63,43.08,56.15,32.86,3.75,0.0,52.5
11
+ deepseek_coder_33b-instruct,83.28,69.78,57.05,71.92,53.57,20.0,2.5,32.5
12
+ codeqwen1.5-7b,63.97,53.53,45.51,61.92,28.57,0.0,30.0,45.0
dividing_into_different_subsets/8/EI/token_counts_EI.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,token_subset_1,token_subset_2,token_subset_3,token_subset_4,token_subset_5,token_subset_6,token_subset_7,token_subset_8
2
+ CodeFuse-DeepSeek-33b,75.0,83.87,78.12,61.11,66.67,66.67,0,100.0
3
+ Nxcode-CQ-7B,93.47,87.34,87.03,83.89,80.56,82.5,0,20.0
4
+ codegemma-2b,45.97,32.1,19.84,8.61,0.0,2.5,0,0.0
5
+ codegemma-7b,59.31,43.63,26.87,29.17,18.89,21.67,0,0.0
6
+ codegemma-7b-it,73.33,56.45,37.19,42.5,40.0,43.33,0,0.0
7
+ deepseek-coder-1.3b-base,54.58,38.31,19.06,11.94,0.0,22.5,0,0.0
8
+ deepseek-coder-6.7b-base,72.22,48.31,37.81,25.56,14.44,15.83,0,0.0
9
+ deepseek_coder-6.7b-instruct,85.28,70.89,80.16,56.39,43.33,50.83,0,0.0
10
+ deepseek_coder_33b-base,73.06,57.66,45.78,30.56,28.33,29.17,0,0.0
11
+ deepseek_coder_33b-instruct,87.78,67.74,66.25,38.89,28.33,68.33,0,0.0
12
+ codeqwen1.5-7b,68.19,53.71,43.75,44.17,24.44,37.5,0,0.0