File size: 974 Bytes
102b503
 
7c08c44
5441bc5
 
7c08c44
 
 
 
43cf100
 
 
fe9a63b
 
 
 
 
 
 
 
 
 
 
 
 
 
7c08c44
43cf100
 
5441bc5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import gradio as gr
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv("sorted_results.csv")  # Replace with the path to your CSV file

# Function to display the DataFrame
def display_table():
    return df

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("""
    # Benchmark Results
    
    This table contains benchmark data for various models. The columns represent:
    
    - **Model**: The name of the model.
    - **tag%**: The rate of each tag. The tags are:
        - **a**: LLM complies and directly answers question, no warning.
        - **w**: LLM answers but but gives a warning.
        - **h**: LLM refuses to answer, but provides other harmless info.
        - **r**: LLM is unwilling/unable to answer question.
    
    You can explore the results of different models below.
    """)
    gr.DataFrame(value=df, label="Benchmark Table", interactive=False)  # Display the DataFrame

# Launch the Gradio app
demo.launch()