update with trust remote code and new wording
#4
by
ZeroCommand
- opened
- app_leaderboard.py +1 -1
- fetch_utils.py +4 -5
- text_classification.py +2 -2
- text_classification_ui_helpers.py +4 -8
- wordings.py +7 -1
app_leaderboard.py
CHANGED
@@ -21,7 +21,7 @@ def get_records_from_dataset_repo(dataset_id):
|
|
21 |
logger.info(f"Dataset {dataset_id} has splits {dataset_split}")
|
22 |
|
23 |
try:
|
24 |
-
ds = datasets.load_dataset(dataset_id, dataset_config[0]
|
25 |
df = ds.to_pandas()
|
26 |
return df
|
27 |
except Exception as e:
|
|
|
21 |
logger.info(f"Dataset {dataset_id} has splits {dataset_split}")
|
22 |
|
23 |
try:
|
24 |
+
ds = datasets.load_dataset(dataset_id, dataset_config[0], split=dataset_split[0])
|
25 |
df = ds.to_pandas()
|
26 |
return df
|
27 |
except Exception as e:
|
fetch_utils.py
CHANGED
@@ -14,19 +14,18 @@ def check_dataset_and_get_config(dataset_id):
|
|
14 |
|
15 |
def check_dataset_and_get_split(dataset_id, dataset_config):
|
16 |
try:
|
17 |
-
|
18 |
except Exception as e:
|
19 |
# Dataset may not exist
|
20 |
logging.warning(
|
21 |
f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
|
22 |
)
|
23 |
return None
|
24 |
-
|
25 |
-
splits = list(ds.keys())
|
26 |
return splits
|
27 |
-
|
28 |
# Dataset has no splits
|
29 |
logging.warning(
|
30 |
-
f"Dataset {dataset_id} with config {dataset_config} has no splits
|
31 |
)
|
32 |
return None
|
|
|
14 |
|
15 |
def check_dataset_and_get_split(dataset_id, dataset_config):
|
16 |
try:
|
17 |
+
splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
|
18 |
except Exception as e:
|
19 |
# Dataset may not exist
|
20 |
logging.warning(
|
21 |
f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
|
22 |
)
|
23 |
return None
|
24 |
+
if isinstance(splits, list):
|
|
|
25 |
return splits
|
26 |
+
else:
|
27 |
# Dataset has no splits
|
28 |
logging.warning(
|
29 |
+
f"Dataset {dataset_id} with config {dataset_config} has no splits"
|
30 |
)
|
31 |
return None
|
text_classification.py
CHANGED
@@ -254,7 +254,7 @@ def infer_output_label_column(
|
|
254 |
|
255 |
def check_dataset_features_validity(d_id, config, split):
|
256 |
# We assume dataset is ok here
|
257 |
-
ds = datasets.load_dataset(d_id, config)
|
258 |
try:
|
259 |
dataset_features = ds.features
|
260 |
except AttributeError:
|
@@ -278,7 +278,7 @@ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
|
|
278 |
prediction_result = None
|
279 |
try:
|
280 |
# Use the first item to test prediction
|
281 |
-
ds = datasets.load_dataset(dataset_id, dataset_config)
|
282 |
if "text" not in ds.features.keys():
|
283 |
# Dataset does not have text column
|
284 |
prediction_input = ds[0][select_the_first_string_column(ds)]
|
|
|
254 |
|
255 |
def check_dataset_features_validity(d_id, config, split):
|
256 |
# We assume dataset is ok here
|
257 |
+
ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
|
258 |
try:
|
259 |
dataset_features = ds.features
|
260 |
except AttributeError:
|
|
|
278 |
prediction_result = None
|
279 |
try:
|
280 |
# Use the first item to test prediction
|
281 |
+
ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
|
282 |
if "text" not in ds.features.keys():
|
283 |
# Dataset does not have text column
|
284 |
prediction_input = ds[0][select_the_first_string_column(ds)]
|
text_classification_ui_helpers.py
CHANGED
@@ -59,11 +59,7 @@ def check_dataset(dataset_id):
|
|
59 |
gr.update(),
|
60 |
""
|
61 |
)
|
62 |
-
splits =
|
63 |
-
datasets.load_dataset(
|
64 |
-
dataset_id, configs[0]
|
65 |
-
).keys()
|
66 |
-
)
|
67 |
return (
|
68 |
gr.update(choices=configs, value=configs[0], visible=True),
|
69 |
gr.update(choices=splits, value=splits[0], visible=True),
|
@@ -176,7 +172,7 @@ def precheck_model_ds_enable_example_btn(
|
|
176 |
return (gr.update(), gr.update(), "")
|
177 |
|
178 |
try:
|
179 |
-
ds = datasets.load_dataset(dataset_id, dataset_config)
|
180 |
df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
|
181 |
ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
|
182 |
|
@@ -243,7 +239,7 @@ def align_columns_and_show_prediction(
|
|
243 |
|
244 |
model_labels = list(prediction_response.keys())
|
245 |
|
246 |
-
ds = datasets.load_dataset(dataset_id, dataset_config)
|
247 |
ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
|
248 |
|
249 |
# when dataset does not have labels or features
|
@@ -334,7 +330,7 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
|
|
334 |
check_column_mapping_keys_validity(all_mappings)
|
335 |
|
336 |
# get ds labels and features again for alignment
|
337 |
-
ds = datasets.load_dataset(d_id, config)
|
338 |
ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
|
339 |
label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
|
340 |
|
|
|
59 |
gr.update(),
|
60 |
""
|
61 |
)
|
62 |
+
splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
|
|
|
|
|
|
|
|
|
63 |
return (
|
64 |
gr.update(choices=configs, value=configs[0], visible=True),
|
65 |
gr.update(choices=splits, value=splits[0], visible=True),
|
|
|
172 |
return (gr.update(), gr.update(), "")
|
173 |
|
174 |
try:
|
175 |
+
ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
|
176 |
df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
|
177 |
ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
|
178 |
|
|
|
239 |
|
240 |
model_labels = list(prediction_response.keys())
|
241 |
|
242 |
+
ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
|
243 |
ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
|
244 |
|
245 |
# when dataset does not have labels or features
|
|
|
330 |
check_column_mapping_keys_validity(all_mappings)
|
331 |
|
332 |
# get ds labels and features again for alignment
|
333 |
+
ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
|
334 |
ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
|
335 |
label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
|
336 |
|
wordings.py
CHANGED
@@ -41,11 +41,17 @@ PREDICTION_SAMPLE_MD = """
|
|
41 |
"""
|
42 |
|
43 |
MAPPING_STYLED_ERROR_WARNING = """
|
44 |
-
<h3 style="text-align: center;color:
|
45 |
We cannot auto-align the labels/features of your dataset and model. Please double check the info below and select correct mapping before submission.
|
46 |
</h3>
|
47 |
"""
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
|
50 |
Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
|
51 |
"""
|
|
|
41 |
"""
|
42 |
|
43 |
MAPPING_STYLED_ERROR_WARNING = """
|
44 |
+
<h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
|
45 |
We cannot auto-align the labels/features of your dataset and model. Please double check the info below and select correct mapping before submission.
|
46 |
</h3>
|
47 |
"""
|
48 |
|
49 |
+
UNMATCHED_MODEL_DATASET_STYLED_ERROR = """
|
50 |
+
<h3 style="text-align: center;color: #fa5f5f; background-color: #fbe2e2; border-radius: 8px; padding: 10px; ">
|
51 |
+
Your model and dataset have different numbers of labels. Please double check your model and dataset.
|
52 |
+
</h3>
|
53 |
+
"""
|
54 |
+
|
55 |
NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
|
56 |
Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
|
57 |
"""
|