Spaces:

HPAI-BSC
/

TuRTLe-Leaderboard

Running

App Files Files Community

ggcristian commited on 25 days ago

Commit

8e9d8db

1 Parent(s): 297e281

RTL-Repo belongs to a new task: Line Completion

Browse files

Files changed (1) hide show

app.py +16 -7

app.py CHANGED Viewed

@@ -24,6 +24,10 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
         valid_benchmarks = cc_benchs
         if benchmark == 'All':
             subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
     # Apply benchmark filter if not "All"
     if benchmark != 'All':
@@ -43,6 +47,8 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
             return filter_bench_all(subset, df_agg, agg_column='Agg S2R')
         elif task == 'Code Completion':
             return filter_bench_all(subset, df_agg, agg_column='Agg MC')
     elif benchmark == 'RTL-Repo':
         return filter_RTLRepo(subset)
     else:
@@ -64,6 +70,8 @@ def update_benchmarks_by_task(task):
         return gr.update(choices=["All"] + s2r_benchs, value="All")
     elif task == "Code Completion":
         return gr.update(choices=["All"] + cc_benchs, value="All")
     else:
         return gr.update(choices=["All"] + benchmarks, value="All")
@@ -138,9 +146,10 @@ function refresh() {
 with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
     df, benchmarks, metrics, default_metric = read_data()
     df_agg = parse_agg("./aggregated_scores.csv")
-    tasks = ["Spec-to-RTL", "Code Completion"]
     s2r_benchs = ["VerilogEval S2R", "RTLLM"]
-    cc_benchs = ["VerilogEval MC", "VeriGen", "RTL-Repo"]
     rtl_metrics = ["Exact Matching (EM)"]
     non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
     model_types = ['All', 'General', 'Coding', 'RTL-Specific']
@@ -180,21 +189,21 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
     with gr.Tabs():
         with gr.Tab("Leaderboard"):
             with gr.Row(equal_height=True):
-                with gr.Column(scale=1):
                     task_radio = gr.Radio(choices=tasks, label="Select Task", value='Spec-to-RTL')
-                with gr.Column(scale=1.75):
                     benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
             with gr.Row(equal_height=True):
-                with gr.Column(scale=1.91):
                     search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
-                with gr.Column(scale=1):
                     model_type_dropdown = gr.Dropdown(
                         choices=model_types,
                         label="Select Model Type",
                         value='All'
                     )
-                with gr.Column(scale=2):
                     params_slider = gr.Slider(
                         minimum=df['Params'].min(),
                         maximum=700,

         valid_benchmarks = cc_benchs
         if benchmark == 'All':
             subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
+    elif task == "Line Completion":
+        valid_benchmarks = lc_benchs
+        if benchmark == 'All':
+            subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
     # Apply benchmark filter if not "All"
     if benchmark != 'All':
             return filter_bench_all(subset, df_agg, agg_column='Agg S2R')
         elif task == 'Code Completion':
             return filter_bench_all(subset, df_agg, agg_column='Agg MC')
+        elif task == 'Line Completion':
+            return filter_RTLRepo(subset)
     elif benchmark == 'RTL-Repo':
         return filter_RTLRepo(subset)
     else:
         return gr.update(choices=["All"] + s2r_benchs, value="All")
     elif task == "Code Completion":
         return gr.update(choices=["All"] + cc_benchs, value="All")
+    elif task == "Line Completion":
+        return gr.update(choices=lc_benchs, value="RTL-Repo")
     else:
         return gr.update(choices=["All"] + benchmarks, value="All")
 with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
     df, benchmarks, metrics, default_metric = read_data()
     df_agg = parse_agg("./aggregated_scores.csv")
+    tasks = ["Spec-to-RTL", "Code Completion", "Line Completion"]
     s2r_benchs = ["VerilogEval S2R", "RTLLM"]
+    cc_benchs = ["VerilogEval MC", "VeriGen"]
+    lc_benchs = ["RTL-Repo"]
     rtl_metrics = ["Exact Matching (EM)"]
     non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
     model_types = ['All', 'General', 'Coding', 'RTL-Specific']
     with gr.Tabs():
         with gr.Tab("Leaderboard"):
             with gr.Row(equal_height=True):
+                with gr.Column():
                     task_radio = gr.Radio(choices=tasks, label="Select Task", value='Spec-to-RTL')
+                with gr.Column():
                     benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
             with gr.Row(equal_height=True):
+                with gr.Column():
                     search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
+                with gr.Column():
                     model_type_dropdown = gr.Dropdown(
                         choices=model_types,
                         label="Select Model Type",
                         value='All'
                     )
+                with gr.Column():
                     params_slider = gr.Slider(
                         minimum=df['Params'].min(),
                         maximum=700,