Spaces:

ZeroCommand
/

test-giskard-evaluator

Sleeping

App Files Files Community

update with trust remote code and new wording

by ZeroCommand - opened Feb 8, 2024

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+18

-17

Files changed (5) hide show

app_leaderboard.py +1 -1
fetch_utils.py +4 -5
text_classification.py +2 -2
text_classification_ui_helpers.py +4 -8
wordings.py +7 -1

app_leaderboard.py CHANGED Viewed

@@ -21,7 +21,7 @@ def get_records_from_dataset_repo(dataset_id):
     logger.info(f"Dataset {dataset_id} has splits {dataset_split}")
     try:
-        ds = datasets.load_dataset(dataset_id, dataset_config[0])[dataset_split[0]]
         df = ds.to_pandas()
         return df
     except Exception as e:

     logger.info(f"Dataset {dataset_id} has splits {dataset_split}")
     try:
+        ds = datasets.load_dataset(dataset_id, dataset_config[0], split=dataset_split[0])
         df = ds.to_pandas()
         return df
     except Exception as e:

fetch_utils.py CHANGED Viewed

@@ -14,19 +14,18 @@ def check_dataset_and_get_config(dataset_id):
 def check_dataset_and_get_split(dataset_id, dataset_config):
     try:
-        ds = datasets.load_dataset(dataset_id, dataset_config)
     except Exception as e:
         # Dataset may not exist
         logging.warning(
             f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
         )
         return None
-    try:
-        splits = list(ds.keys())
         return splits
-    except Exception as e:
         # Dataset has no splits
         logging.warning(
-            f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}"
         )
         return None

 def check_dataset_and_get_split(dataset_id, dataset_config):
     try:
+        splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
     except Exception as e:
         # Dataset may not exist
         logging.warning(
             f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
         )
         return None
+    if isinstance(splits, list):
         return splits
+    else:
         # Dataset has no splits
         logging.warning(
+            f"Dataset {dataset_id} with config {dataset_config} has no splits"
         )
         return None

text_classification.py CHANGED Viewed

@@ -254,7 +254,7 @@ def infer_output_label_column(
 def check_dataset_features_validity(d_id, config, split):
     # We assume dataset is ok here
-    ds = datasets.load_dataset(d_id, config)[split]
     try:
         dataset_features = ds.features
     except AttributeError:
@@ -278,7 +278,7 @@ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
     prediction_result = None
     try:
         # Use the first item to test prediction
-        ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
         if "text" not in ds.features.keys():
             # Dataset does not have text column
             prediction_input = ds[0][select_the_first_string_column(ds)]

 def check_dataset_features_validity(d_id, config, split):
     # We assume dataset is ok here
+    ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
     try:
         dataset_features = ds.features
     except AttributeError:
     prediction_result = None
     try:
         # Use the first item to test prediction
+        ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
         if "text" not in ds.features.keys():
             # Dataset does not have text column
             prediction_input = ds[0][select_the_first_string_column(ds)]

text_classification_ui_helpers.py CHANGED Viewed

@@ -59,11 +59,7 @@ def check_dataset(dataset_id):
                 gr.update(),
                 ""
             )
-        splits = list(
-                    datasets.load_dataset(
-                        dataset_id, configs[0]
-                    ).keys()
-                )
         return (
             gr.update(choices=configs, value=configs[0], visible=True),
             gr.update(choices=splits, value=splits[0], visible=True),
@@ -176,7 +172,7 @@ def precheck_model_ds_enable_example_btn(
         return (gr.update(), gr.update(), "")
     try:
-        ds = datasets.load_dataset(dataset_id, dataset_config)
         df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
         ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
@@ -243,7 +239,7 @@ def align_columns_and_show_prediction(
     model_labels = list(prediction_response.keys())
-    ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
     ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
     # when dataset does not have labels or features
@@ -334,7 +330,7 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
     check_column_mapping_keys_validity(all_mappings)
     # get ds labels and features again for alignment
-    ds = datasets.load_dataset(d_id, config)[split]
     ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
     label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)

                 gr.update(),
                 ""
             )
+        splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
         return (
             gr.update(choices=configs, value=configs[0], visible=True),
             gr.update(choices=splits, value=splits[0], visible=True),
         return (gr.update(), gr.update(), "")
     try:
+        ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
         df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
         ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
     model_labels = list(prediction_response.keys())
+    ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
     ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
     # when dataset does not have labels or features
     check_column_mapping_keys_validity(all_mappings)
     # get ds labels and features again for alignment
+    ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
     ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
     label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)

wordings.py CHANGED Viewed

@@ -41,11 +41,17 @@ PREDICTION_SAMPLE_MD = """
                         """
 MAPPING_STYLED_ERROR_WARNING = """
-                        <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
                         We cannot auto-align the labels/features of your dataset and model. Please double check the info below and select correct mapping before submission.
                         </h3>
                         """
 NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
                         Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
                       """

                         """
 MAPPING_STYLED_ERROR_WARNING = """
+                        <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
                         We cannot auto-align the labels/features of your dataset and model. Please double check the info below and select correct mapping before submission.
                         </h3>
                         """
+UNMATCHED_MODEL_DATASET_STYLED_ERROR = """
+                        <h3 style="text-align: center;color: #fa5f5f; background-color: #fbe2e2; border-radius: 8px; padding: 10px; ">
+                        Your model and dataset have different numbers of labels. Please double check your model and dataset.
+                        </h3>
+                        """
 NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
                         Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
                       """