tattrongvu's picture
Upload folder using huggingface_hub
5925d69 verified
{
"metadata": {
"timestamp": "2025-03-09T16:34:55.435464",
"vidore_benchmark_version": "0.1.dev293+g9358cf6"
},
"metrics": {
"vidore/arxivqa_test_subsampled": {
"ndcg_at_1": 0.896,
"ndcg_at_3": 0.9285,
"ndcg_at_5": 0.93409,
"ndcg_at_10": 0.93887,
"ndcg_at_20": 0.94191,
"ndcg_at_50": 0.94277,
"ndcg_at_100": 0.94346,
"map_at_1": 0.896,
"map_at_3": 0.921,
"map_at_5": 0.924,
"map_at_10": 0.92612,
"map_at_20": 0.92696,
"map_at_50": 0.92712,
"map_at_100": 0.92719,
"recall_at_1": 0.896,
"recall_at_3": 0.95,
"recall_at_5": 0.964,
"recall_at_10": 0.978,
"recall_at_20": 0.99,
"recall_at_50": 0.994,
"recall_at_100": 0.998,
"precision_at_1": 0.896,
"precision_at_3": 0.31667,
"precision_at_5": 0.1928,
"precision_at_10": 0.0978,
"precision_at_20": 0.0495,
"precision_at_50": 0.01988,
"precision_at_100": 0.00998,
"mrr_at_1": 0.896,
"mrr_at_3": 0.9213333333333332,
"mrr_at_5": 0.9240333333333333,
"mrr_at_10": 0.9264857142857141,
"mrr_at_20": 0.9274158230658229,
"mrr_at_50": 0.9274847885830643,
"mrr_at_100": 0.9275602871585629,
"naucs_at_1_max": 0.8456651988738907,
"naucs_at_1_std": -0.12317729011766297,
"naucs_at_1_diff1": 0.9409423951490659,
"naucs_at_3_max": 0.8706816059757188,
"naucs_at_3_std": 0.006965452847808854,
"naucs_at_3_diff1": 0.9522875816993475,
"naucs_at_5_max": 0.9080558149185568,
"naucs_at_5_std": -0.04549227098247105,
"naucs_at_5_diff1": 0.9555192447349298,
"naucs_at_10_max": 0.8866819455054739,
"naucs_at_10_std": -0.08407605466429298,
"naucs_at_10_diff1": 0.9762329174093952,
"naucs_at_20_max": 0.9477124183006524,
"naucs_at_20_std": 0.25144724556488873,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": 0.9564270152505304,
"naucs_at_50_std": 0.4352629940865033,
"naucs_at_50_diff1": 1.0,
"naucs_at_100_max": 1.0,
"naucs_at_100_std": 1.0,
"naucs_at_100_diff1": 1.0
},
"vidore/docvqa_test_subsampled": {
"ndcg_at_1": 0.54324,
"ndcg_at_3": 0.60994,
"ndcg_at_5": 0.6398,
"ndcg_at_10": 0.66199,
"ndcg_at_20": 0.67267,
"ndcg_at_50": 0.68225,
"ndcg_at_100": 0.68945,
"map_at_1": 0.54324,
"map_at_3": 0.59387,
"map_at_5": 0.61027,
"map_at_10": 0.61899,
"map_at_20": 0.62193,
"map_at_50": 0.62343,
"map_at_100": 0.62407,
"recall_at_1": 0.54324,
"recall_at_3": 0.65632,
"recall_at_5": 0.72949,
"recall_at_10": 0.80044,
"recall_at_20": 0.84257,
"recall_at_50": 0.89135,
"recall_at_100": 0.9357,
"precision_at_1": 0.54324,
"precision_at_3": 0.21877,
"precision_at_5": 0.1459,
"precision_at_10": 0.08004,
"precision_at_20": 0.04213,
"precision_at_50": 0.01783,
"precision_at_100": 0.00936,
"mrr_at_1": 0.5432372505543237,
"mrr_at_3": 0.5946045824094608,
"mrr_at_5": 0.6103473762010347,
"mrr_at_10": 0.6190493788054763,
"mrr_at_20": 0.6220782499013536,
"mrr_at_50": 0.623690385474884,
"mrr_at_100": 0.624317545713404,
"naucs_at_1_max": 0.5195120835206619,
"naucs_at_1_std": -0.31481029514111575,
"naucs_at_1_diff1": 0.8886195631998732,
"naucs_at_3_max": 0.4894265436758743,
"naucs_at_3_std": -0.35348286092835673,
"naucs_at_3_diff1": 0.825360351363726,
"naucs_at_5_max": 0.49365870598559275,
"naucs_at_5_std": -0.3667256163548182,
"naucs_at_5_diff1": 0.8190874936930554,
"naucs_at_10_max": 0.49415281281253026,
"naucs_at_10_std": -0.1787227124233066,
"naucs_at_10_diff1": 0.760124698410118,
"naucs_at_20_max": 0.4624420442016527,
"naucs_at_20_std": -0.14955421166412503,
"naucs_at_20_diff1": 0.7582711922759812,
"naucs_at_50_max": 0.46372165402671495,
"naucs_at_50_std": -0.009043079292315782,
"naucs_at_50_diff1": 0.7459081960812681,
"naucs_at_100_max": 0.41099442703800165,
"naucs_at_100_std": 0.4785954026968201,
"naucs_at_100_diff1": 0.7044383203056372
},
"vidore/infovqa_test_subsampled": {
"ndcg_at_1": 0.89676,
"ndcg_at_3": 0.92534,
"ndcg_at_5": 0.93031,
"ndcg_at_10": 0.935,
"ndcg_at_20": 0.93663,
"ndcg_at_50": 0.93904,
"ndcg_at_100": 0.94001,
"map_at_1": 0.89676,
"map_at_3": 0.91903,
"map_at_5": 0.92176,
"map_at_10": 0.92376,
"map_at_20": 0.92426,
"map_at_50": 0.92465,
"map_at_100": 0.92473,
"recall_at_1": 0.89676,
"recall_at_3": 0.94332,
"recall_at_5": 0.95547,
"recall_at_10": 0.96964,
"recall_at_20": 0.97571,
"recall_at_50": 0.98785,
"recall_at_100": 0.99393,
"precision_at_1": 0.89676,
"precision_at_3": 0.31444,
"precision_at_5": 0.19109,
"precision_at_10": 0.09696,
"precision_at_20": 0.04879,
"precision_at_50": 0.01976,
"precision_at_100": 0.00994,
"mrr_at_1": 0.8967611336032388,
"mrr_at_3": 0.9183535762483129,
"mrr_at_5": 0.9206815114709849,
"mrr_at_10": 0.9232191054559475,
"mrr_at_20": 0.9235380847222951,
"mrr_at_50": 0.9239302146072342,
"mrr_at_100": 0.9240125185515768,
"naucs_at_1_max": 0.7062394427119798,
"naucs_at_1_std": -0.2812879238976883,
"naucs_at_1_diff1": 0.9410064894465787,
"naucs_at_3_max": 0.7564908149226892,
"naucs_at_3_std": -0.3059756682732854,
"naucs_at_3_diff1": 0.9422566807977654,
"naucs_at_5_max": 0.84758972902799,
"naucs_at_5_std": -0.0929285302133143,
"naucs_at_5_diff1": 0.9450692388254637,
"naucs_at_10_max": 0.8860131032188592,
"naucs_at_10_std": 0.24420167069765694,
"naucs_at_10_diff1": 0.9368484108193146,
"naucs_at_20_max": 0.8806609405034168,
"naucs_at_20_std": 0.28312028971462133,
"naucs_at_20_diff1": 0.9428274225349419,
"naucs_at_50_max": 0.78308879001761,
"naucs_at_50_std": 0.08727770914673263,
"naucs_at_50_diff1": 0.9537108770403242,
"naucs_at_100_max": 0.6097113980568468,
"naucs_at_100_std": -0.06311386037266957,
"naucs_at_100_diff1": 0.9074217540806789
},
"vidore/tabfquad_test_subsampled": {
"ndcg_at_1": 0.90714,
"ndcg_at_3": 0.94809,
"ndcg_at_5": 0.95085,
"ndcg_at_10": 0.95556,
"ndcg_at_20": 0.95556,
"ndcg_at_50": 0.95633,
"ndcg_at_100": 0.95633,
"map_at_1": 0.90714,
"map_at_3": 0.93869,
"map_at_5": 0.94012,
"map_at_10": 0.94212,
"map_at_20": 0.94212,
"map_at_50": 0.94227,
"map_at_100": 0.94227,
"recall_at_1": 0.90714,
"recall_at_3": 0.975,
"recall_at_5": 0.98214,
"recall_at_10": 0.99643,
"recall_at_20": 0.99643,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.90714,
"precision_at_3": 0.325,
"precision_at_5": 0.19643,
"precision_at_10": 0.09964,
"precision_at_20": 0.04982,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.9071428571428571,
"mrr_at_3": 0.9386904761904761,
"mrr_at_5": 0.9401190476190476,
"mrr_at_10": 0.9421173469387754,
"mrr_at_20": 0.9421173469387754,
"mrr_at_50": 0.9422602040816326,
"mrr_at_100": 0.9422602040816326,
"naucs_at_1_max": 0.47509516627163817,
"naucs_at_1_std": 0.09654887596064192,
"naucs_at_1_diff1": 0.9256625727213957,
"naucs_at_3_max": 0.7953181272509063,
"naucs_at_3_std": 0.6569294384420459,
"naucs_at_3_diff1": 0.9626517273576122,
"naucs_at_5_max": 0.9477124183006519,
"naucs_at_5_std": 0.8585434173669502,
"naucs_at_5_diff1": 0.9477124183006519,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 0.8692810457516478,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 0.8692810457516478,
"naucs_at_50_max": 1.0,
"naucs_at_50_std": 1.0,
"naucs_at_50_diff1": 1.0,
"naucs_at_100_max": 1.0,
"naucs_at_100_std": 1.0,
"naucs_at_100_diff1": 1.0
},
"vidore/tatdqa_test": {
"ndcg_at_1": 0.68165,
"ndcg_at_3": 0.78589,
"ndcg_at_5": 0.80738,
"ndcg_at_10": 0.81887,
"ndcg_at_20": 0.82338,
"ndcg_at_50": 0.82868,
"ndcg_at_100": 0.82966,
"map_at_1": 0.68165,
"map_at_3": 0.76094,
"map_at_5": 0.77284,
"map_at_10": 0.77775,
"map_at_20": 0.77902,
"map_at_50": 0.77992,
"map_at_100": 0.78,
"recall_at_1": 0.68165,
"recall_at_3": 0.85784,
"recall_at_5": 0.91009,
"recall_at_10": 0.94471,
"recall_at_20": 0.96233,
"recall_at_50": 0.98846,
"recall_at_100": 0.99453,
"precision_at_1": 0.68165,
"precision_at_3": 0.28595,
"precision_at_5": 0.18202,
"precision_at_10": 0.09447,
"precision_at_20": 0.04812,
"precision_at_50": 0.01977,
"precision_at_100": 0.00995,
"mrr_at_1": 0.6822600243013366,
"mrr_at_3": 0.7609356014580796,
"mrr_at_5": 0.7725394896719318,
"mrr_at_10": 0.7775212636695014,
"mrr_at_20": 0.7787535437042965,
"mrr_at_50": 0.7796543784238207,
"mrr_at_100": 0.7797401545799822,
"naucs_at_1_max": 0.16062026715795888,
"naucs_at_1_std": -0.3264360293606425,
"naucs_at_1_diff1": 0.8234210928574244,
"naucs_at_3_max": 0.2735402374223285,
"naucs_at_3_std": -0.2518265754222236,
"naucs_at_3_diff1": 0.7126512468213168,
"naucs_at_5_max": 0.3207976518601587,
"naucs_at_5_std": -0.09980902320534159,
"naucs_at_5_diff1": 0.654620096904173,
"naucs_at_10_max": 0.4248907897527349,
"naucs_at_10_std": 0.05751189029665147,
"naucs_at_10_diff1": 0.6072701680557849,
"naucs_at_20_max": 0.5189102534669143,
"naucs_at_20_std": 0.17852289719761627,
"naucs_at_20_diff1": 0.5826792796971454,
"naucs_at_50_max": 0.6766878761446519,
"naucs_at_50_std": 0.4444418209829138,
"naucs_at_50_diff1": 0.5707904926948768,
"naucs_at_100_max": 0.5171029913955458,
"naucs_at_100_std": 0.4414823767216705,
"naucs_at_100_diff1": 0.48651595702377487
},
"vidore/shiftproject_test": {
"ndcg_at_1": 0.79,
"ndcg_at_3": 0.88309,
"ndcg_at_5": 0.88309,
"ndcg_at_10": 0.88666,
"ndcg_at_20": 0.89172,
"ndcg_at_50": 0.89372,
"ndcg_at_100": 0.89546,
"map_at_1": 0.79,
"map_at_3": 0.86,
"map_at_5": 0.86,
"map_at_10": 0.86167,
"map_at_20": 0.86308,
"map_at_50": 0.8634,
"map_at_100": 0.86359,
"recall_at_1": 0.79,
"recall_at_3": 0.95,
"recall_at_5": 0.95,
"recall_at_10": 0.96,
"recall_at_20": 0.98,
"recall_at_50": 0.99,
"recall_at_100": 1.0,
"precision_at_1": 0.79,
"precision_at_3": 0.31667,
"precision_at_5": 0.19,
"precision_at_10": 0.096,
"precision_at_20": 0.049,
"precision_at_50": 0.0198,
"precision_at_100": 0.01,
"mrr_at_1": 0.79,
"mrr_at_3": 0.8633333333333333,
"mrr_at_5": 0.8633333333333333,
"mrr_at_10": 0.865,
"mrr_at_20": 0.8664354066985647,
"mrr_at_50": 0.8667579873437259,
"mrr_at_100": 0.8669466665890089,
"naucs_at_1_max": 0.039218986702575266,
"naucs_at_1_std": -0.30031019308918766,
"naucs_at_1_diff1": 0.8456248346839152,
"naucs_at_3_max": 0.5220354808590087,
"naucs_at_3_std": -0.057609710550887504,
"naucs_at_3_diff1": 0.8160597572362241,
"naucs_at_5_max": 0.5220354808590145,
"naucs_at_5_std": -0.05760971055088009,
"naucs_at_5_diff1": 0.8160597572362281,
"naucs_at_10_max": 0.4352240896358576,
"naucs_at_10_std": -0.10270774976656832,
"naucs_at_10_diff1": 0.9305555555555542,
"naucs_at_20_max": 0.1914098972922579,
"naucs_at_20_std": -0.3968253968253954,
"naucs_at_20_diff1": 0.861111111111116,
"naucs_at_50_max": 0.554154995331464,
"naucs_at_50_std": 0.35807656395892007,
"naucs_at_50_diff1": 1.0,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"vidore/syntheticDocQA_artificial_intelligence_test": {
"ndcg_at_1": 1.0,
"ndcg_at_3": 1.0,
"ndcg_at_5": 1.0,
"ndcg_at_10": 1.0,
"ndcg_at_20": 1.0,
"ndcg_at_50": 1.0,
"ndcg_at_100": 1.0,
"map_at_1": 1.0,
"map_at_3": 1.0,
"map_at_5": 1.0,
"map_at_10": 1.0,
"map_at_20": 1.0,
"map_at_50": 1.0,
"map_at_100": 1.0,
"recall_at_1": 1.0,
"recall_at_3": 1.0,
"recall_at_5": 1.0,
"recall_at_10": 1.0,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 1.0,
"precision_at_3": 0.33333,
"precision_at_5": 0.2,
"precision_at_10": 0.1,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 1.0,
"mrr_at_3": 1.0,
"mrr_at_5": 1.0,
"mrr_at_10": 1.0,
"mrr_at_20": 1.0,
"mrr_at_50": 1.0,
"mrr_at_100": 1.0,
"naucs_at_1_max": null,
"naucs_at_1_std": null,
"naucs_at_1_diff1": null,
"naucs_at_3_max": 1.0,
"naucs_at_3_std": 1.0,
"naucs_at_3_diff1": 1.0,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 1.0,
"naucs_at_5_diff1": 1.0,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"vidore/syntheticDocQA_energy_test": {
"ndcg_at_1": 0.94,
"ndcg_at_3": 0.95893,
"ndcg_at_5": 0.95893,
"ndcg_at_10": 0.96208,
"ndcg_at_20": 0.96718,
"ndcg_at_50": 0.96718,
"ndcg_at_100": 0.96718,
"map_at_1": 0.94,
"map_at_3": 0.955,
"map_at_5": 0.955,
"map_at_10": 0.95625,
"map_at_20": 0.95767,
"map_at_50": 0.95767,
"map_at_100": 0.95767,
"recall_at_1": 0.94,
"recall_at_3": 0.97,
"recall_at_5": 0.97,
"recall_at_10": 0.98,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.94,
"precision_at_3": 0.32333,
"precision_at_5": 0.194,
"precision_at_10": 0.098,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.94,
"mrr_at_3": 0.955,
"mrr_at_5": 0.955,
"mrr_at_10": 0.9566666666666667,
"mrr_at_20": 0.9582142857142857,
"mrr_at_50": 0.9582142857142857,
"mrr_at_100": 0.9582142857142857,
"naucs_at_1_max": 0.49004046062869583,
"naucs_at_1_std": -0.6582633053221264,
"naucs_at_1_diff1": 1.0,
"naucs_at_3_max": 0.7424525365701778,
"naucs_at_3_std": -1.040616246498596,
"naucs_at_3_diff1": 1.0,
"naucs_at_5_max": 0.742452536570183,
"naucs_at_5_std": -1.040616246498598,
"naucs_at_5_diff1": 1.0,
"naucs_at_10_max": 0.9346405228758136,
"naucs_at_10_std": -0.690943043884218,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"vidore/syntheticDocQA_government_reports_test": {
"ndcg_at_1": 0.92,
"ndcg_at_3": 0.95655,
"ndcg_at_5": 0.96085,
"ndcg_at_10": 0.96419,
"ndcg_at_20": 0.96419,
"ndcg_at_50": 0.96419,
"ndcg_at_100": 0.96419,
"map_at_1": 0.92,
"map_at_3": 0.94833,
"map_at_5": 0.95083,
"map_at_10": 0.95226,
"map_at_20": 0.95226,
"map_at_50": 0.95226,
"map_at_100": 0.95226,
"recall_at_1": 0.92,
"recall_at_3": 0.98,
"recall_at_5": 0.99,
"recall_at_10": 1.0,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.92,
"precision_at_3": 0.32667,
"precision_at_5": 0.198,
"precision_at_10": 0.1,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.93,
"mrr_at_3": 0.9533333333333333,
"mrr_at_5": 0.9558333333333333,
"mrr_at_10": 0.9574999999999999,
"mrr_at_20": 0.9574999999999999,
"mrr_at_50": 0.9574999999999999,
"mrr_at_100": 0.9574999999999999,
"naucs_at_1_max": 0.6289682539682547,
"naucs_at_1_std": 0.44461951447245585,
"naucs_at_1_diff1": 0.9509803921568633,
"naucs_at_3_max": 1.0,
"naucs_at_3_std": 0.3384687208216551,
"naucs_at_3_diff1": 0.9346405228758099,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 0.5541549953314738,
"naucs_at_5_diff1": 0.8692810457516413,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"vidore/syntheticDocQA_healthcare_industry_test": {
"ndcg_at_1": 0.94,
"ndcg_at_3": 0.97655,
"ndcg_at_5": 0.97655,
"ndcg_at_10": 0.97655,
"ndcg_at_20": 0.97655,
"ndcg_at_50": 0.97655,
"ndcg_at_100": 0.97655,
"map_at_1": 0.94,
"map_at_3": 0.96833,
"map_at_5": 0.96833,
"map_at_10": 0.96833,
"map_at_20": 0.96833,
"map_at_50": 0.96833,
"map_at_100": 0.96833,
"recall_at_1": 0.94,
"recall_at_3": 1.0,
"recall_at_5": 1.0,
"recall_at_10": 1.0,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.94,
"precision_at_3": 0.33333,
"precision_at_5": 0.2,
"precision_at_10": 0.1,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.95,
"mrr_at_3": 0.9733333333333333,
"mrr_at_5": 0.9733333333333333,
"mrr_at_10": 0.9733333333333333,
"mrr_at_20": 0.9733333333333333,
"mrr_at_50": 0.9733333333333333,
"mrr_at_100": 0.9733333333333333,
"naucs_at_1_max": 0.5662931839402436,
"naucs_at_1_std": 0.18837535014005719,
"naucs_at_1_diff1": 0.9782135076252712,
"naucs_at_3_max": 1.0,
"naucs_at_3_std": 1.0,
"naucs_at_3_diff1": 1.0,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 1.0,
"naucs_at_5_diff1": 1.0,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
}
}
}