Spaces:
Running
Running
Commit
·
8e9d8db
1
Parent(s):
297e281
RTL-Repo belongs to a new task: Line Completion
Browse files
app.py
CHANGED
@@ -24,6 +24,10 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
|
|
24 |
valid_benchmarks = cc_benchs
|
25 |
if benchmark == 'All':
|
26 |
subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# Apply benchmark filter if not "All"
|
29 |
if benchmark != 'All':
|
@@ -43,6 +47,8 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
|
|
43 |
return filter_bench_all(subset, df_agg, agg_column='Agg S2R')
|
44 |
elif task == 'Code Completion':
|
45 |
return filter_bench_all(subset, df_agg, agg_column='Agg MC')
|
|
|
|
|
46 |
elif benchmark == 'RTL-Repo':
|
47 |
return filter_RTLRepo(subset)
|
48 |
else:
|
@@ -64,6 +70,8 @@ def update_benchmarks_by_task(task):
|
|
64 |
return gr.update(choices=["All"] + s2r_benchs, value="All")
|
65 |
elif task == "Code Completion":
|
66 |
return gr.update(choices=["All"] + cc_benchs, value="All")
|
|
|
|
|
67 |
else:
|
68 |
return gr.update(choices=["All"] + benchmarks, value="All")
|
69 |
|
@@ -138,9 +146,10 @@ function refresh() {
|
|
138 |
with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
|
139 |
df, benchmarks, metrics, default_metric = read_data()
|
140 |
df_agg = parse_agg("./aggregated_scores.csv")
|
141 |
-
tasks = ["Spec-to-RTL", "Code Completion"]
|
142 |
s2r_benchs = ["VerilogEval S2R", "RTLLM"]
|
143 |
-
cc_benchs = ["VerilogEval MC", "VeriGen"
|
|
|
144 |
rtl_metrics = ["Exact Matching (EM)"]
|
145 |
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
|
146 |
model_types = ['All', 'General', 'Coding', 'RTL-Specific']
|
@@ -180,21 +189,21 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
180 |
with gr.Tabs():
|
181 |
with gr.Tab("Leaderboard"):
|
182 |
with gr.Row(equal_height=True):
|
183 |
-
with gr.Column(
|
184 |
task_radio = gr.Radio(choices=tasks, label="Select Task", value='Spec-to-RTL')
|
185 |
-
with gr.Column(
|
186 |
benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
|
187 |
|
188 |
with gr.Row(equal_height=True):
|
189 |
-
with gr.Column(
|
190 |
search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
|
191 |
-
with gr.Column(
|
192 |
model_type_dropdown = gr.Dropdown(
|
193 |
choices=model_types,
|
194 |
label="Select Model Type",
|
195 |
value='All'
|
196 |
)
|
197 |
-
with gr.Column(
|
198 |
params_slider = gr.Slider(
|
199 |
minimum=df['Params'].min(),
|
200 |
maximum=700,
|
|
|
24 |
valid_benchmarks = cc_benchs
|
25 |
if benchmark == 'All':
|
26 |
subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
|
27 |
+
elif task == "Line Completion":
|
28 |
+
valid_benchmarks = lc_benchs
|
29 |
+
if benchmark == 'All':
|
30 |
+
subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
|
31 |
|
32 |
# Apply benchmark filter if not "All"
|
33 |
if benchmark != 'All':
|
|
|
47 |
return filter_bench_all(subset, df_agg, agg_column='Agg S2R')
|
48 |
elif task == 'Code Completion':
|
49 |
return filter_bench_all(subset, df_agg, agg_column='Agg MC')
|
50 |
+
elif task == 'Line Completion':
|
51 |
+
return filter_RTLRepo(subset)
|
52 |
elif benchmark == 'RTL-Repo':
|
53 |
return filter_RTLRepo(subset)
|
54 |
else:
|
|
|
70 |
return gr.update(choices=["All"] + s2r_benchs, value="All")
|
71 |
elif task == "Code Completion":
|
72 |
return gr.update(choices=["All"] + cc_benchs, value="All")
|
73 |
+
elif task == "Line Completion":
|
74 |
+
return gr.update(choices=lc_benchs, value="RTL-Repo")
|
75 |
else:
|
76 |
return gr.update(choices=["All"] + benchmarks, value="All")
|
77 |
|
|
|
146 |
with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
|
147 |
df, benchmarks, metrics, default_metric = read_data()
|
148 |
df_agg = parse_agg("./aggregated_scores.csv")
|
149 |
+
tasks = ["Spec-to-RTL", "Code Completion", "Line Completion"]
|
150 |
s2r_benchs = ["VerilogEval S2R", "RTLLM"]
|
151 |
+
cc_benchs = ["VerilogEval MC", "VeriGen"]
|
152 |
+
lc_benchs = ["RTL-Repo"]
|
153 |
rtl_metrics = ["Exact Matching (EM)"]
|
154 |
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
|
155 |
model_types = ['All', 'General', 'Coding', 'RTL-Specific']
|
|
|
189 |
with gr.Tabs():
|
190 |
with gr.Tab("Leaderboard"):
|
191 |
with gr.Row(equal_height=True):
|
192 |
+
with gr.Column():
|
193 |
task_radio = gr.Radio(choices=tasks, label="Select Task", value='Spec-to-RTL')
|
194 |
+
with gr.Column():
|
195 |
benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
|
196 |
|
197 |
with gr.Row(equal_height=True):
|
198 |
+
with gr.Column():
|
199 |
search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
|
200 |
+
with gr.Column():
|
201 |
model_type_dropdown = gr.Dropdown(
|
202 |
choices=model_types,
|
203 |
label="Select Model Type",
|
204 |
value='All'
|
205 |
)
|
206 |
+
with gr.Column():
|
207 |
params_slider = gr.Slider(
|
208 |
minimum=df['Params'].min(),
|
209 |
maximum=700,
|