Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
8ac5b07
1
Parent(s):
f176095
update oauth to follow the session
Browse files- yourbench_space/app.py +2 -2
- yourbench_space/utils.py +5 -5
yourbench_space/app.py
CHANGED
@@ -108,7 +108,7 @@ def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_na
|
|
108 |
eval_ds_name = f"{org_name}/{eval_name}"
|
109 |
# Test dataset existence
|
110 |
try:
|
111 |
-
load_dataset(eval_ds_name, streaming=True)
|
112 |
except Exception as e:
|
113 |
print(f"Error while loading the dataset: {e}")
|
114 |
return
|
@@ -232,7 +232,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
232 |
answers_df = gr.DataFrame()
|
233 |
|
234 |
stages_table.change(
|
235 |
-
update_dataset, inputs=[stages_table, hf_org_dropdown, hf_dataset_name], outputs=[ingestion_df, summarization_df, single_hop, answers_df]
|
236 |
)
|
237 |
|
238 |
log_timer = gr.Timer(1.0, active=True)
|
|
|
108 |
eval_ds_name = f"{org_name}/{eval_name}"
|
109 |
# Test dataset existence
|
110 |
try:
|
111 |
+
load_dataset(eval_ds_name, streaming=True, token=oauth_token.token)
|
112 |
except Exception as e:
|
113 |
print(f"Error while loading the dataset: {e}")
|
114 |
return
|
|
|
232 |
answers_df = gr.DataFrame()
|
233 |
|
234 |
stages_table.change(
|
235 |
+
update_dataset, inputs=[stages_table, hf_org_dropdown, hf_dataset_name, login_btn], outputs=[ingestion_df, summarization_df, single_hop, answers_df]
|
236 |
)
|
237 |
|
238 |
log_timer = gr.Timer(1.0, active=True)
|
yourbench_space/utils.py
CHANGED
@@ -52,7 +52,7 @@ def save_files(uuid: str, files: List[pathlib.Path]) -> str:
|
|
52 |
else "No files were saved"
|
53 |
)
|
54 |
|
55 |
-
def update_dataset(stages, hf_org, hf_prefix):
|
56 |
"""
|
57 |
Updates the dataset based on the provided stages and dataset configuration.
|
58 |
"""
|
@@ -66,16 +66,16 @@ def update_dataset(stages, hf_org, hf_prefix):
|
|
66 |
|
67 |
if "ingestion" in stages:
|
68 |
# TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
|
69 |
-
ingestion_ds = load_dataset(dataset_name, name="ingested", split="train").select_columns("document_text")
|
70 |
ingestion_df = pd.DataFrame(ingestion_ds[0]) # only one row
|
71 |
if "summarization" in stages:
|
72 |
-
summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
|
73 |
summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
|
74 |
if "single_shot_question_generation" in stages:
|
75 |
-
single_hop_ds = load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True)
|
76 |
single_hop_df = pd.DataFrame([next(iter(single_hop_ds)) for _ in range(5)])
|
77 |
if "answer_generation" in stages:
|
78 |
-
answers_ds = load_dataset(dataset_name, name="answer_generation", split="train", streaming=True)
|
79 |
answers_df = pd.DataFrame([next(iter(answers_ds)) for _ in range(5)])
|
80 |
|
81 |
return (ingestion_df, summarization_df, single_hop_df, answers_df)
|
|
|
52 |
else "No files were saved"
|
53 |
)
|
54 |
|
55 |
+
def update_dataset(stages, hf_org, hf_prefix, oauth_token: gr.OAuthToken):
|
56 |
"""
|
57 |
Updates the dataset based on the provided stages and dataset configuration.
|
58 |
"""
|
|
|
66 |
|
67 |
if "ingestion" in stages:
|
68 |
# TODO: why is the key "ingested" and not "ingestion"? (does not match the other splits)
|
69 |
+
ingestion_ds = load_dataset(dataset_name, name="ingested", split="train", token=oauth_token.token).select_columns("document_text")
|
70 |
ingestion_df = pd.DataFrame(ingestion_ds[0]) # only one row
|
71 |
if "summarization" in stages:
|
72 |
+
summarization_ds = load_dataset(dataset_name, name="summarization", split="train", streaming=True, token=oauth_token.token).select_columns(['raw_document_summary', 'document_summary', 'summarization_model'])
|
73 |
summarization_df = pd.DataFrame([next(iter(summarization_ds)) for _ in range(5)])
|
74 |
if "single_shot_question_generation" in stages:
|
75 |
+
single_hop_ds = load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True, token=oauth_token.token)
|
76 |
single_hop_df = pd.DataFrame([next(iter(single_hop_ds)) for _ in range(5)])
|
77 |
if "answer_generation" in stages:
|
78 |
+
answers_ds = load_dataset(dataset_name, name="answer_generation", split="train", streaming=True, token=oauth_token.token)
|
79 |
answers_df = pd.DataFrame([next(iter(answers_ds)) for _ in range(5)])
|
80 |
|
81 |
return (ingestion_df, summarization_df, single_hop_df, answers_df)
|