File size: 5,369 Bytes
0869b01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
import pandas as pd

tools = pd.read_csv("./data/tools.csv")
# all_trades = pd.read_csv('./data/all_trades_profitability.csv')

demo = gr.Blocks()

INC_TOOLS = [
    'prediction-online', 
    'prediction-offline', 
    'claude-prediction-online', 
    'claude-prediction-offline', 
    'prediction-offline-sme',
    'prediction-online-sme',
    'prediction-request-rag',
    'prediction-request-reasoning',
    'prediction-url-cot-claude', 
    'prediction-request-rag-claude',
    'prediction-request-reasoning-claude'
]

def set_error(row):
    if row.error not in [True, False]:
        if not row.prompt_response:
            return True
        return False
    return row.error

def get_error_data():
    tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
    tools_inc['error'] = tools_inc.apply(set_error, axis=1)
    error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
    error['error_perc'] = (error[True] / (error[False] + error[True]))*100
    error['total_requests'] = error[False] + error[True]

    return error

def get_error_data_all(error):
    error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True:'sum'}).reset_index()
    error_total['error_perc'] = (error_total[True] / error_total['total_requests'])*100
    # convert column name to string
    error_total.columns = error_total.columns.astype(str)
    # format all values to 4 decimal places for error_perc
    error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
    return error_total

error = get_error_data()
error_all = get_error_data_all(error)
print(error_all.head())

with demo:
    gr.HTML("<h1>Olas Predict Actual Performance</h1>")
    gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")

    with gr.Tabs():
        with gr.TabItem("🔥 Error Dashboard"):
            with gr.Row():
                gr.Markdown("This plot shows the percentage of requests that resulted in an error.")
            with gr.Row():
                # plot 
                with gr.Column():
                    gr.LinePlot(
                        value=error_all,
                        x="request_month_year_week",
                        y="error_perc",
                        title="Error Percentage",
                        x_title="Week",
                        y_title="Error Percentage",
                        height=400,
                        show_label=True
                    )
            gr.Markdown("This plot shows the percentage of requests that resulted in an error.")
            
            # Dropdown for selecting the tool
            sel_tool = gr.Dropdown(
                value="prediction-online", 
                choices=INC_TOOLS, 
                label="Select a tool"
            )
            plot_tool_error = gr.LinePlot(
                title="Error Percentage", 
                x_title="Week", 
                y_title="Error Percentage",
                render=False
            )
            
            # Dropdown for selecting the week
            sel_week = gr.Dropdown(
                value=error['request_month_year_week'].iloc[-1], 
                choices=error['request_month_year_week'].unique().tolist(), 
                label="Select a week"
            )
            plot_week_error = gr.BarPlot(
                title="Error Percentage", 
                x_title="Tool", 
                y_title="Error Percentage",
                render=False
            )

            def update_tool_plot(selected_tool):
                filtered_data = error[error['tool'] == selected_tool]
                # convert column name to string
                filtered_data.columns = filtered_data.columns.astype(str)
                # conver error_perc to 4 decimal place
                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
                print(filtered_data.head())
                return {
                    "x": filtered_data['request_month_year_week'].tolist(),
                    "y": filtered_data['error_perc'].tolist(),
                }
            
            def update_week_plot(selected_week):
                filtered_data = error[error['request_month_year_week'] == selected_week]
                filtered_data.columns = filtered_data.columns.astype(str)
                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
                print(filtered_data.head())
                return {
                    "x": filtered_data['tool'].tolist(),
                    "y": filtered_data['error_perc'].tolist(),
                }

            sel_tool.change(fn=update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
            sel_week.change(fn=update_week_plot, inputs=sel_week, outputs=plot_week_error)
            
            with gr.Row():
                plot_tool_error.render()
            with gr.Row():
                plot_week_error.render()
                
        with gr.TabItem("ℹ️ About"):
            with gr.Accordion("About the Benchmark", open=False):
                gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")

demo.queue(default_concurrency_limit=40).launch()