Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
@@ -146,29 +146,16 @@ def calculate_order_by_first_substring(selected_models):
|
|
146 |
query_ids_df = first_columns[first_columns["Model Type"] == "Text Only"]
|
147 |
query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)]
|
148 |
|
149 |
-
print(len(query_ids_df))
|
150 |
-
|
151 |
query_ids_df = query_ids_df.groupby("query_id").filter(
|
152 |
lambda x: x["parsed_judge_response"].eq(1).all()
|
153 |
)
|
154 |
|
155 |
-
print(len(query_ids_df))
|
156 |
-
|
157 |
query_ids = query_ids_df.query_id.unique()
|
158 |
fsm_ids = query_ids_df.fsm_id.unique()
|
159 |
-
print(
|
160 |
-
"fsm_ids",
|
161 |
-
len(fsm_ids),
|
162 |
-
"Total of 25 FSM is solvable by everything on the first substring",
|
163 |
-
)
|
164 |
|
165 |
text_only = all_data[all_data["Model Type"] == "Text Only"]
|
166 |
text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
|
167 |
|
168 |
-
print(
|
169 |
-
f"Number of query_ids from text_only_filtered: {len(text_only_filtered.query_id.unique())}"
|
170 |
-
)
|
171 |
-
|
172 |
text_only_filtered = (
|
173 |
text_only_filtered.groupby(["Model Name"])["parsed_judge_response"]
|
174 |
.mean()
|
@@ -182,7 +169,10 @@ def calculate_order_by_first_substring(selected_models):
|
|
182 |
)
|
183 |
text_only_filtered.sort_values("Accuracy", ascending=False, inplace=True)
|
184 |
|
185 |
-
|
|
|
|
|
|
|
186 |
|
187 |
|
188 |
with gr.Blocks() as demo:
|
@@ -233,13 +223,20 @@ with gr.Blocks() as demo:
|
|
233 |
label="Models to include",
|
234 |
choices=all_text_only_model_names,
|
235 |
value=all_text_only_model_names,
|
|
|
236 |
)
|
|
|
|
|
|
|
|
|
|
|
237 |
constrained_leader_board_text = gr.Dataframe()
|
238 |
|
239 |
-
included_models.
|
240 |
fn=calculate_order_by_first_substring,
|
241 |
inputs=[included_models],
|
242 |
-
outputs=[constrained_leader_board_text],
|
|
|
243 |
)
|
244 |
|
245 |
demo.launch()
|
|
|
146 |
query_ids_df = first_columns[first_columns["Model Type"] == "Text Only"]
|
147 |
query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)]
|
148 |
|
|
|
|
|
149 |
query_ids_df = query_ids_df.groupby("query_id").filter(
|
150 |
lambda x: x["parsed_judge_response"].eq(1).all()
|
151 |
)
|
152 |
|
|
|
|
|
153 |
query_ids = query_ids_df.query_id.unique()
|
154 |
fsm_ids = query_ids_df.fsm_id.unique()
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
text_only = all_data[all_data["Model Type"] == "Text Only"]
|
157 |
text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
|
158 |
|
|
|
|
|
|
|
|
|
159 |
text_only_filtered = (
|
160 |
text_only_filtered.groupby(["Model Name"])["parsed_judge_response"]
|
161 |
.mean()
|
|
|
169 |
)
|
170 |
text_only_filtered.sort_values("Accuracy", ascending=False, inplace=True)
|
171 |
|
172 |
+
number_of_queries = len(query_ids)
|
173 |
+
number_of_fsms = len(fsm_ids)
|
174 |
+
|
175 |
+
return text_only_filtered, number_of_queries, number_of_fsms
|
176 |
|
177 |
|
178 |
with gr.Blocks() as demo:
|
|
|
223 |
label="Models to include",
|
224 |
choices=all_text_only_model_names,
|
225 |
value=all_text_only_model_names,
|
226 |
+
interactive=True,
|
227 |
)
|
228 |
+
with gr.Row():
|
229 |
+
number_of_queries = gr.Textbox(label="Number of queries to include")
|
230 |
+
|
231 |
+
number_of_fsms = gr.Textbox(label="Number of FSMs to include")
|
232 |
+
|
233 |
constrained_leader_board_text = gr.Dataframe()
|
234 |
|
235 |
+
included_models.select(
|
236 |
fn=calculate_order_by_first_substring,
|
237 |
inputs=[included_models],
|
238 |
+
outputs=[constrained_leader_board_text, number_of_queries, number_of_fsms],
|
239 |
+
queue=True,
|
240 |
)
|
241 |
|
242 |
demo.launch()
|