ggcristian commited on
Commit
8e9d8db
·
1 Parent(s): 297e281

RTL-Repo belongs to a new task: Line Completion

Browse files
Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -24,6 +24,10 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
24
  valid_benchmarks = cc_benchs
25
  if benchmark == 'All':
26
  subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
 
 
 
 
27
 
28
  # Apply benchmark filter if not "All"
29
  if benchmark != 'All':
@@ -43,6 +47,8 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
43
  return filter_bench_all(subset, df_agg, agg_column='Agg S2R')
44
  elif task == 'Code Completion':
45
  return filter_bench_all(subset, df_agg, agg_column='Agg MC')
 
 
46
  elif benchmark == 'RTL-Repo':
47
  return filter_RTLRepo(subset)
48
  else:
@@ -64,6 +70,8 @@ def update_benchmarks_by_task(task):
64
  return gr.update(choices=["All"] + s2r_benchs, value="All")
65
  elif task == "Code Completion":
66
  return gr.update(choices=["All"] + cc_benchs, value="All")
 
 
67
  else:
68
  return gr.update(choices=["All"] + benchmarks, value="All")
69
 
@@ -138,9 +146,10 @@ function refresh() {
138
  with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
139
  df, benchmarks, metrics, default_metric = read_data()
140
  df_agg = parse_agg("./aggregated_scores.csv")
141
- tasks = ["Spec-to-RTL", "Code Completion"]
142
  s2r_benchs = ["VerilogEval S2R", "RTLLM"]
143
- cc_benchs = ["VerilogEval MC", "VeriGen", "RTL-Repo"]
 
144
  rtl_metrics = ["Exact Matching (EM)"]
145
  non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
146
  model_types = ['All', 'General', 'Coding', 'RTL-Specific']
@@ -180,21 +189,21 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
180
  with gr.Tabs():
181
  with gr.Tab("Leaderboard"):
182
  with gr.Row(equal_height=True):
183
- with gr.Column(scale=1):
184
  task_radio = gr.Radio(choices=tasks, label="Select Task", value='Spec-to-RTL')
185
- with gr.Column(scale=1.75):
186
  benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
187
 
188
  with gr.Row(equal_height=True):
189
- with gr.Column(scale=1.91):
190
  search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
191
- with gr.Column(scale=1):
192
  model_type_dropdown = gr.Dropdown(
193
  choices=model_types,
194
  label="Select Model Type",
195
  value='All'
196
  )
197
- with gr.Column(scale=2):
198
  params_slider = gr.Slider(
199
  minimum=df['Params'].min(),
200
  maximum=700,
 
24
  valid_benchmarks = cc_benchs
25
  if benchmark == 'All':
26
  subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
27
+ elif task == "Line Completion":
28
+ valid_benchmarks = lc_benchs
29
+ if benchmark == 'All':
30
+ subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
31
 
32
  # Apply benchmark filter if not "All"
33
  if benchmark != 'All':
 
47
  return filter_bench_all(subset, df_agg, agg_column='Agg S2R')
48
  elif task == 'Code Completion':
49
  return filter_bench_all(subset, df_agg, agg_column='Agg MC')
50
+ elif task == 'Line Completion':
51
+ return filter_RTLRepo(subset)
52
  elif benchmark == 'RTL-Repo':
53
  return filter_RTLRepo(subset)
54
  else:
 
70
  return gr.update(choices=["All"] + s2r_benchs, value="All")
71
  elif task == "Code Completion":
72
  return gr.update(choices=["All"] + cc_benchs, value="All")
73
+ elif task == "Line Completion":
74
+ return gr.update(choices=lc_benchs, value="RTL-Repo")
75
  else:
76
  return gr.update(choices=["All"] + benchmarks, value="All")
77
 
 
146
  with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
147
  df, benchmarks, metrics, default_metric = read_data()
148
  df_agg = parse_agg("./aggregated_scores.csv")
149
+ tasks = ["Spec-to-RTL", "Code Completion", "Line Completion"]
150
  s2r_benchs = ["VerilogEval S2R", "RTLLM"]
151
+ cc_benchs = ["VerilogEval MC", "VeriGen"]
152
+ lc_benchs = ["RTL-Repo"]
153
  rtl_metrics = ["Exact Matching (EM)"]
154
  non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
155
  model_types = ['All', 'General', 'Coding', 'RTL-Specific']
 
189
  with gr.Tabs():
190
  with gr.Tab("Leaderboard"):
191
  with gr.Row(equal_height=True):
192
+ with gr.Column():
193
  task_radio = gr.Radio(choices=tasks, label="Select Task", value='Spec-to-RTL')
194
+ with gr.Column():
195
  benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
196
 
197
  with gr.Row(equal_height=True):
198
+ with gr.Column():
199
  search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
200
+ with gr.Column():
201
  model_type_dropdown = gr.Dropdown(
202
  choices=model_types,
203
  label="Select Model Type",
204
  value='All'
205
  )
206
+ with gr.Column():
207
  params_slider = gr.Slider(
208
  minimum=df['Params'].min(),
209
  maximum=700,