hpprc commited on 23 days ago

Commit

73b2b2e

verified ·

1 Parent(s): 6a30c5a

Upload 17 files

Browse files

Files changed (17) hide show

results/Classification/scores_amazon_counterfactual_classification.json +23 -0
results/Classification/scores_amazon_review_classification.json +23 -0
results/Classification/scores_massive_intent_classification.json +23 -0
results/Classification/scores_massive_scenario_classification.json +23 -0
results/Clustering/scores_livedoor_news.json +36 -0
results/Clustering/scores_mewsc16.json +36 -0
results/PairClassification/scores_paws_x_ja.json +41 -0
results/Reranking/scores_esci.json +31 -0
results/Retrieval/scores_jagovfaqs_22k.json +43 -0
results/Retrieval/scores_jaqket.json +43 -0
results/Retrieval/scores_mrtydi.json +43 -0
results/Retrieval/scores_nlp_journal_abs_intro.json +43 -0
results/Retrieval/scores_nlp_journal_title_abs.json +43 -0
results/Retrieval/scores_nlp_journal_title_intro.json +43 -0
results/STS/scores_jsick.json +31 -0
results/STS/scores_jsts.json +31 -0
results/summary.json +62 -0

results/Classification/scores_amazon_counterfactual_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.8212903958100242,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.9055793991416309,
+                "macro_f1": 0.6510076252723311
+            },
+            "logreg": {
+                "accuracy": 0.9184549356223176,
+                "macro_f1": 0.756611138600253
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.936830835117773,
+                "macro_f1": 0.8212903958100242
+            }
+        }
+    }
+}

results/Classification/scores_amazon_review_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.6106205588763483,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.456,
+                "macro_f1": 0.44947743361535253
+            },
+            "logreg": {
+                "accuracy": 0.6058,
+                "macro_f1": 0.602223447104313
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.6132,
+                "macro_f1": 0.6106205588763483
+            }
+        }
+    }
+}

results/Classification/scores_massive_intent_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.821659588084153,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.7830791933103788,
+                "macro_f1": 0.7540349043356395
+            },
+            "logreg": {
+                "accuracy": 0.8558780127889818,
+                "macro_f1": 0.8412198611245395
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.8493611297915266,
+                "macro_f1": 0.821659588084153
+            }
+        }
+    }
+}

results/Classification/scores_massive_scenario_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.8927605477819499,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.8612887358583374,
+                "macro_f1": 0.8475812013823129
+            },
+            "logreg": {
+                "accuracy": 0.8957206099360551,
+                "macro_f1": 0.889349188280443
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.8927370544720915,
+                "macro_f1": 0.8927605477819499
+            }
+        }
+    }
+}

results/Clustering/scores_livedoor_news.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.6198770016343527,
+    "details": {
+        "optimal_clustering_model_name": "MiniBatchKMeans",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.6130011361971373,
+                "homogeneity_score": 0.6020130359528088,
+                "completeness_score": 0.6243978092094987
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.5962671368925023,
+                "homogeneity_score": 0.5811463016386679,
+                "completeness_score": 0.6121958497856022
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.574682592952658,
+                "homogeneity_score": 0.5686551081511829,
+                "completeness_score": 0.5808392238234845
+            },
+            "Birch": {
+                "v_measure_score": 0.5978998508456345,
+                "homogeneity_score": 0.582382606601413,
+                "completeness_score": 0.6142666261599271
+            }
+        },
+        "test_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.6198770016343527,
+                "homogeneity_score": 0.6102816569132538,
+                "completeness_score": 0.6297788978398051
+            }
+        }
+    }
+}

results/Clustering/scores_mewsc16.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.4938330752654621,
+    "details": {
+        "optimal_clustering_model_name": "Birch",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.5156167461651439,
+                "homogeneity_score": 0.564258229752961,
+                "completeness_score": 0.47469591858066335
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.5567865030669769,
+                "homogeneity_score": 0.5991745068307929,
+                "completeness_score": 0.5199996459830565
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.437046545249617,
+                "homogeneity_score": 0.4798220986983772,
+                "completeness_score": 0.40127350306484766
+            },
+            "Birch": {
+                "v_measure_score": 0.5639601753239182,
+                "homogeneity_score": 0.6063086319538272,
+                "completeness_score": 0.527141271397583
+            }
+        },
+        "test_scores": {
+            "Birch": {
+                "v_measure_score": 0.4938330752654621,
+                "homogeneity_score": 0.5270581166850505,
+                "completeness_score": 0.4645485534255365
+            }
+        }
+    }
+}

results/PairClassification/scores_paws_x_ja.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+    "metric_name": "binary_f1",
+    "metric_value": 0.6259758694109298,
+    "details": {
+        "optimal_distance_metric": "dot_similarities",
+        "val_scores": {
+            "cosine_distances": {
+                "accuracy": 0.5725,
+                "accuracy_threshold": 0.7325241565704346,
+                "binary_f1": 0.5979670522257273,
+                "binary_f1_threshold": 1.0
+            },
+            "manhatten_distances": {
+                "accuracy": 0.6105,
+                "accuracy_threshold": 63.98142623901367,
+                "binary_f1": 0.6014825273561596,
+                "binary_f1_threshold": 395.8541259765625
+            },
+            "euclidean_distances": {
+                "accuracy": 0.6115,
+                "accuracy_threshold": 2.9641363620758057,
+                "binary_f1": 0.6016949152542372,
+                "binary_f1_threshold": 18.21161460876465
+            },
+            "dot_similarities": {
+                "accuracy": 0.583,
+                "accuracy_threshold": 872.2387084960938,
+                "binary_f1": 0.6023329798515377,
+                "binary_f1_threshold": 735.9739990234375
+            }
+        },
+        "test_scores": {
+            "dot_similarities": {
+                "accuracy": 0.581,
+                "accuracy_threshold": 872.2387084960938,
+                "binary_f1": 0.6259758694109298,
+                "binary_f1_threshold": 735.9739990234375
+            }
+        }
+    }
+}

results/Reranking/scores_esci.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9343186677190654,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "ndcg@10": 0.9470348664613528,
+                "ndcg@20": 0.9578682740907674,
+                "ndcg@40": 0.9654194921086661
+            },
+            "dot_score": {
+                "ndcg@10": 0.9392104044460415,
+                "ndcg@20": 0.9517801007430513,
+                "ndcg@40": 0.9601312974205363
+            },
+            "euclidean_distance": {
+                "ndcg@10": 0.9464007864974945,
+                "ndcg@20": 0.9573505616979566,
+                "ndcg@40": 0.9649519050597076
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "ndcg@10": 0.9343186677190654,
+                "ndcg@20": 0.9503468163615926,
+                "ndcg@40": 0.9590645882216304
+            }
+        }
+    }
+}

results/Retrieval/scores_jagovfaqs_22k.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.7787754194132134,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.6548698449839134,
+                "accuracy@3": 0.8101784147411524,
+                "accuracy@5": 0.8531734425270547,
+                "accuracy@10": 0.89587598713074,
+                "ndcg@10": 0.7791387074676754,
+                "mrr@10": 0.7413217222152583
+            },
+            "dot_score": {
+                "accuracy@1": 0.4910792629423808,
+                "accuracy@3": 0.6806083650190115,
+                "accuracy@5": 0.741152383737935,
+                "accuracy@10": 0.804913717461246,
+                "ndcg@10": 0.6486117073018491,
+                "mrr@10": 0.5984882565681033
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.6516525299795262,
+                "accuracy@3": 0.8110558642878034,
+                "accuracy@5": 0.8520035097981866,
+                "accuracy@10": 0.8964609534951741,
+                "ndcg@10": 0.7781881943647594,
+                "mrr@10": 0.739879269442007
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.6494152046783626,
+                "accuracy@3": 0.8096491228070175,
+                "accuracy@5": 0.8552631578947368,
+                "accuracy@10": 0.9002923976608187,
+                "ndcg@10": 0.7787754194132134,
+                "mrr@10": 0.7394212382808872
+            }
+        }
+    }
+}

results/Retrieval/scores_jaqket.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.7038903636852758,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.5417085427135678,
+                "accuracy@3": 0.7608040201005025,
+                "accuracy@5": 0.821105527638191,
+                "accuracy@10": 0.857286432160804,
+                "ndcg@10": 0.7077597877351898,
+                "mrr@10": 0.6586655499720832
+            },
+            "dot_score": {
+                "accuracy@1": 0.3608040201005025,
+                "accuracy@3": 0.49547738693467336,
+                "accuracy@5": 0.5547738693467337,
+                "accuracy@10": 0.6201005025125628,
+                "ndcg@10": 0.48596673144333313,
+                "mrr@10": 0.44361489989630637
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.5417085427135678,
+                "accuracy@3": 0.7577889447236181,
+                "accuracy@5": 0.8231155778894472,
+                "accuracy@10": 0.8582914572864322,
+                "ndcg@10": 0.7079566000561661,
+                "mrr@10": 0.6586160963547902
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.526579739217653,
+                "accuracy@3": 0.7552657973921765,
+                "accuracy@5": 0.8144433299899699,
+                "accuracy@10": 0.8706118355065195,
+                "ndcg@10": 0.7038903636852758,
+                "mrr@10": 0.6498861664358158
+            }
+        }
+    }
+}

results/Retrieval/scores_mrtydi.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.4906924953691417,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.37823275862068967,
+                "accuracy@3": 0.5668103448275862,
+                "accuracy@5": 0.6368534482758621,
+                "accuracy@10": 0.7101293103448276,
+                "ndcg@10": 0.5413311560443561,
+                "mrr@10": 0.4875230911330045
+            },
+            "dot_score": {
+                "accuracy@1": 0.21336206896551724,
+                "accuracy@3": 0.36745689655172414,
+                "accuracy@5": 0.4353448275862069,
+                "accuracy@10": 0.53125,
+                "ndcg@10": 0.36305057475619895,
+                "mrr@10": 0.3104350540503558
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.38362068965517243,
+                "accuracy@3": 0.5657327586206896,
+                "accuracy@5": 0.6379310344827587,
+                "accuracy@10": 0.7101293103448276,
+                "ndcg@10": 0.5443451625184184,
+                "mrr@10": 0.49149048987411037
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.3736111111111111,
+                "accuracy@3": 0.5347222222222222,
+                "accuracy@5": 0.6083333333333333,
+                "accuracy@10": 0.6916666666666667,
+                "ndcg@10": 0.4906924953691417,
+                "mrr@10": 0.47281470458553737
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_abs_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9935436297129833,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 1.0,
+                "accuracy@3": 1.0,
+                "accuracy@5": 1.0,
+                "accuracy@10": 1.0,
+                "ndcg@10": 1.0,
+                "mrr@10": 1.0
+            },
+            "dot_score": {
+                "accuracy@1": 0.8688524590163934,
+                "accuracy@3": 0.9754098360655737,
+                "accuracy@5": 1.0,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9405886901730665,
+                "mrr@10": 0.9206284153005463
+            },
+            "euclidean_distance": {
+                "accuracy@1": 1.0,
+                "accuracy@3": 1.0,
+                "accuracy@5": 1.0,
+                "accuracy@10": 1.0,
+                "ndcg@10": 1.0,
+                "mrr@10": 1.0
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9878048780487805,
+                "accuracy@3": 0.9959349593495935,
+                "accuracy@5": 0.9979674796747967,
+                "accuracy@10": 0.9979674796747967,
+                "ndcg@10": 0.9935436297129833,
+                "mrr@10": 0.9920392953929539
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_title_abs.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9800724651571329,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9590163934426229,
+                "accuracy@3": 0.9754098360655737,
+                "accuracy@5": 0.9836065573770492,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9787291019625602,
+                "mrr@10": 0.9719945355191256
+            },
+            "dot_score": {
+                "accuracy@1": 0.8770491803278688,
+                "accuracy@3": 0.9754098360655737,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9465822466240205,
+                "mrr@10": 0.9285519125683058
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.9590163934426229,
+                "accuracy@3": 0.9754098360655737,
+                "accuracy@5": 0.9836065573770492,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9782768299015435,
+                "mrr@10": 0.9715391621129326
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.959349593495935,
+                "accuracy@3": 0.9898373983739838,
+                "accuracy@5": 0.991869918699187,
+                "accuracy@10": 0.9939024390243902,
+                "ndcg@10": 0.9800724651571329,
+                "mrr@10": 0.9753274616079494
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_title_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9662644169750596,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9180327868852459,
+                "accuracy@3": 0.9918032786885246,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9624303956967228,
+                "mrr@10": 0.9521857923497267
+            },
+            "dot_score": {
+                "accuracy@1": 0.8524590163934426,
+                "accuracy@3": 0.9426229508196722,
+                "accuracy@5": 0.9754098360655737,
+                "accuracy@10": 0.9836065573770492,
+                "ndcg@10": 0.9211840621538752,
+                "mrr@10": 0.9006830601092896
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.9180327868852459,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9836065573770492,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9612517660828541,
+                "mrr@10": 0.9508196721311475
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9308943089430894,
+                "accuracy@3": 0.983739837398374,
+                "accuracy@5": 0.9898373983739838,
+                "accuracy@10": 0.9939024390243902,
+                "ndcg@10": 0.9662644169750596,
+                "mrr@10": 0.956963801780875
+            }
+        }
+    }
+}

results/STS/scores_jsick.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.7812165639898206,
+    "details": {
+        "optimal_similarity_metric": "manhatten_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8081424308280184,
+                "spearman": 0.7858960474676613
+            },
+            "manhatten_distance": {
+                "pearson": 0.8149453883286166,
+                "spearman": 0.7866688378992329
+            },
+            "euclidean_distance": {
+                "pearson": 0.8149453883286166,
+                "spearman": 0.7866688378992329
+            },
+            "dot_score": {
+                "pearson": 0.7289190000591521,
+                "spearman": 0.6947631991077574
+            }
+        },
+        "test_scores": {
+            "manhatten_distance": {
+                "pearson": 0.8101410164828766,
+                "spearman": 0.7812165639898206
+            }
+        }
+    }
+}

results/STS/scores_jsts.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.8432397778118456,
+    "details": {
+        "optimal_similarity_metric": "manhatten_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8535555729076842,
+                "spearman": 0.819115099484669
+            },
+            "manhatten_distance": {
+                "pearson": 0.8633943133209987,
+                "spearman": 0.8261879574678256
+            },
+            "euclidean_distance": {
+                "pearson": 0.8633943133209987,
+                "spearman": 0.8261879574678256
+            },
+            "dot_score": {
+                "pearson": 0.713304993887736,
+                "spearman": 0.6363102927036544
+            }
+        },
+        "test_scores": {
+            "manhatten_distance": {
+                "pearson": 0.8819766195104106,
+                "spearman": 0.8432397778118456
+            }
+        }
+    }
+}

results/summary.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+    "Classification": {
+        "amazon_counterfactual_classification": {
+            "macro_f1": 0.8212903958100242
+        },
+        "amazon_review_classification": {
+            "macro_f1": 0.6106205588763483
+        },
+        "massive_intent_classification": {
+            "macro_f1": 0.821659588084153
+        },
+        "massive_scenario_classification": {
+            "macro_f1": 0.8927605477819499
+        }
+    },
+    "Reranking": {
+        "esci": {
+            "ndcg@10": 0.9343186677190654
+        }
+    },
+    "Retrieval": {
+        "jagovfaqs_22k": {
+            "ndcg@10": 0.7787754194132134
+        },
+        "jaqket": {
+            "ndcg@10": 0.7038903636852758
+        },
+        "mrtydi": {
+            "ndcg@10": 0.4906924953691417
+        },
+        "nlp_journal_abs_intro": {
+            "ndcg@10": 0.9935436297129833
+        },
+        "nlp_journal_title_abs": {
+            "ndcg@10": 0.9800724651571329
+        },
+        "nlp_journal_title_intro": {
+            "ndcg@10": 0.9662644169750596
+        }
+    },
+    "STS": {
+        "jsick": {
+            "spearman": 0.7812165639898206
+        },
+        "jsts": {
+            "spearman": 0.8432397778118456
+        }
+    },
+    "Clustering": {
+        "livedoor_news": {
+            "v_measure_score": 0.6198770016343527
+        },
+        "mewsc16": {
+            "v_measure_score": 0.4938330752654621
+        }
+    },
+    "PairClassification": {
+        "paws_x_ja": {
+            "binary_f1": 0.6259758694109298
+        }
+    }
+}