{ "metadata": { "timestamp": "2025-03-09T16:34:55.435464", "vidore_benchmark_version": "0.1.dev293+g9358cf6" }, "metrics": { "vidore/arxivqa_test_subsampled": { "ndcg_at_1": 0.896, "ndcg_at_3": 0.9285, "ndcg_at_5": 0.93409, "ndcg_at_10": 0.93887, "ndcg_at_20": 0.94191, "ndcg_at_50": 0.94277, "ndcg_at_100": 0.94346, "map_at_1": 0.896, "map_at_3": 0.921, "map_at_5": 0.924, "map_at_10": 0.92612, "map_at_20": 0.92696, "map_at_50": 0.92712, "map_at_100": 0.92719, "recall_at_1": 0.896, "recall_at_3": 0.95, "recall_at_5": 0.964, "recall_at_10": 0.978, "recall_at_20": 0.99, "recall_at_50": 0.994, "recall_at_100": 0.998, "precision_at_1": 0.896, "precision_at_3": 0.31667, "precision_at_5": 0.1928, "precision_at_10": 0.0978, "precision_at_20": 0.0495, "precision_at_50": 0.01988, "precision_at_100": 0.00998, "mrr_at_1": 0.896, "mrr_at_3": 0.9213333333333332, "mrr_at_5": 0.9240333333333333, "mrr_at_10": 0.9264857142857141, "mrr_at_20": 0.9274158230658229, "mrr_at_50": 0.9274847885830643, "mrr_at_100": 0.9275602871585629, "naucs_at_1_max": 0.8456651988738907, "naucs_at_1_std": -0.12317729011766297, "naucs_at_1_diff1": 0.9409423951490659, "naucs_at_3_max": 0.8706816059757188, "naucs_at_3_std": 0.006965452847808854, "naucs_at_3_diff1": 0.9522875816993475, "naucs_at_5_max": 0.9080558149185568, "naucs_at_5_std": -0.04549227098247105, "naucs_at_5_diff1": 0.9555192447349298, "naucs_at_10_max": 0.8866819455054739, "naucs_at_10_std": -0.08407605466429298, "naucs_at_10_diff1": 0.9762329174093952, "naucs_at_20_max": 0.9477124183006524, "naucs_at_20_std": 0.25144724556488873, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": 0.9564270152505304, "naucs_at_50_std": 0.4352629940865033, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 }, "vidore/docvqa_test_subsampled": { "ndcg_at_1": 0.54324, "ndcg_at_3": 0.60994, "ndcg_at_5": 0.6398, "ndcg_at_10": 0.66199, "ndcg_at_20": 0.67267, "ndcg_at_50": 0.68225, "ndcg_at_100": 0.68945, "map_at_1": 0.54324, "map_at_3": 0.59387, "map_at_5": 0.61027, "map_at_10": 0.61899, "map_at_20": 0.62193, "map_at_50": 0.62343, "map_at_100": 0.62407, "recall_at_1": 0.54324, "recall_at_3": 0.65632, "recall_at_5": 0.72949, "recall_at_10": 0.80044, "recall_at_20": 0.84257, "recall_at_50": 0.89135, "recall_at_100": 0.9357, "precision_at_1": 0.54324, "precision_at_3": 0.21877, "precision_at_5": 0.1459, "precision_at_10": 0.08004, "precision_at_20": 0.04213, "precision_at_50": 0.01783, "precision_at_100": 0.00936, "mrr_at_1": 0.5432372505543237, "mrr_at_3": 0.5946045824094608, "mrr_at_5": 0.6103473762010347, "mrr_at_10": 0.6190493788054763, "mrr_at_20": 0.6220782499013536, "mrr_at_50": 0.623690385474884, "mrr_at_100": 0.624317545713404, "naucs_at_1_max": 0.5195120835206619, "naucs_at_1_std": -0.31481029514111575, "naucs_at_1_diff1": 0.8886195631998732, "naucs_at_3_max": 0.4894265436758743, "naucs_at_3_std": -0.35348286092835673, "naucs_at_3_diff1": 0.825360351363726, "naucs_at_5_max": 0.49365870598559275, "naucs_at_5_std": -0.3667256163548182, "naucs_at_5_diff1": 0.8190874936930554, "naucs_at_10_max": 0.49415281281253026, "naucs_at_10_std": -0.1787227124233066, "naucs_at_10_diff1": 0.760124698410118, "naucs_at_20_max": 0.4624420442016527, "naucs_at_20_std": -0.14955421166412503, "naucs_at_20_diff1": 0.7582711922759812, "naucs_at_50_max": 0.46372165402671495, "naucs_at_50_std": -0.009043079292315782, "naucs_at_50_diff1": 0.7459081960812681, "naucs_at_100_max": 0.41099442703800165, "naucs_at_100_std": 0.4785954026968201, "naucs_at_100_diff1": 0.7044383203056372 }, "vidore/infovqa_test_subsampled": { "ndcg_at_1": 0.89676, "ndcg_at_3": 0.92534, "ndcg_at_5": 0.93031, "ndcg_at_10": 0.935, "ndcg_at_20": 0.93663, "ndcg_at_50": 0.93904, "ndcg_at_100": 0.94001, "map_at_1": 0.89676, "map_at_3": 0.91903, "map_at_5": 0.92176, "map_at_10": 0.92376, "map_at_20": 0.92426, "map_at_50": 0.92465, "map_at_100": 0.92473, "recall_at_1": 0.89676, "recall_at_3": 0.94332, "recall_at_5": 0.95547, "recall_at_10": 0.96964, "recall_at_20": 0.97571, "recall_at_50": 0.98785, "recall_at_100": 0.99393, "precision_at_1": 0.89676, "precision_at_3": 0.31444, "precision_at_5": 0.19109, "precision_at_10": 0.09696, "precision_at_20": 0.04879, "precision_at_50": 0.01976, "precision_at_100": 0.00994, "mrr_at_1": 0.8967611336032388, "mrr_at_3": 0.9183535762483129, "mrr_at_5": 0.9206815114709849, "mrr_at_10": 0.9232191054559475, "mrr_at_20": 0.9235380847222951, "mrr_at_50": 0.9239302146072342, "mrr_at_100": 0.9240125185515768, "naucs_at_1_max": 0.7062394427119798, "naucs_at_1_std": -0.2812879238976883, "naucs_at_1_diff1": 0.9410064894465787, "naucs_at_3_max": 0.7564908149226892, "naucs_at_3_std": -0.3059756682732854, "naucs_at_3_diff1": 0.9422566807977654, "naucs_at_5_max": 0.84758972902799, "naucs_at_5_std": -0.0929285302133143, "naucs_at_5_diff1": 0.9450692388254637, "naucs_at_10_max": 0.8860131032188592, "naucs_at_10_std": 0.24420167069765694, "naucs_at_10_diff1": 0.9368484108193146, "naucs_at_20_max": 0.8806609405034168, "naucs_at_20_std": 0.28312028971462133, "naucs_at_20_diff1": 0.9428274225349419, "naucs_at_50_max": 0.78308879001761, "naucs_at_50_std": 0.08727770914673263, "naucs_at_50_diff1": 0.9537108770403242, "naucs_at_100_max": 0.6097113980568468, "naucs_at_100_std": -0.06311386037266957, "naucs_at_100_diff1": 0.9074217540806789 }, "vidore/tabfquad_test_subsampled": { "ndcg_at_1": 0.90714, "ndcg_at_3": 0.94809, "ndcg_at_5": 0.95085, "ndcg_at_10": 0.95556, "ndcg_at_20": 0.95556, "ndcg_at_50": 0.95633, "ndcg_at_100": 0.95633, "map_at_1": 0.90714, "map_at_3": 0.93869, "map_at_5": 0.94012, "map_at_10": 0.94212, "map_at_20": 0.94212, "map_at_50": 0.94227, "map_at_100": 0.94227, "recall_at_1": 0.90714, "recall_at_3": 0.975, "recall_at_5": 0.98214, "recall_at_10": 0.99643, "recall_at_20": 0.99643, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.90714, "precision_at_3": 0.325, "precision_at_5": 0.19643, "precision_at_10": 0.09964, "precision_at_20": 0.04982, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.9071428571428571, "mrr_at_3": 0.9386904761904761, "mrr_at_5": 0.9401190476190476, "mrr_at_10": 0.9421173469387754, "mrr_at_20": 0.9421173469387754, "mrr_at_50": 0.9422602040816326, "mrr_at_100": 0.9422602040816326, "naucs_at_1_max": 0.47509516627163817, "naucs_at_1_std": 0.09654887596064192, "naucs_at_1_diff1": 0.9256625727213957, "naucs_at_3_max": 0.7953181272509063, "naucs_at_3_std": 0.6569294384420459, "naucs_at_3_diff1": 0.9626517273576122, "naucs_at_5_max": 0.9477124183006519, "naucs_at_5_std": 0.8585434173669502, "naucs_at_5_diff1": 0.9477124183006519, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 0.8692810457516478, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.8692810457516478, "naucs_at_50_max": 1.0, "naucs_at_50_std": 1.0, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 }, "vidore/tatdqa_test": { "ndcg_at_1": 0.68165, "ndcg_at_3": 0.78589, "ndcg_at_5": 0.80738, "ndcg_at_10": 0.81887, "ndcg_at_20": 0.82338, "ndcg_at_50": 0.82868, "ndcg_at_100": 0.82966, "map_at_1": 0.68165, "map_at_3": 0.76094, "map_at_5": 0.77284, "map_at_10": 0.77775, "map_at_20": 0.77902, "map_at_50": 0.77992, "map_at_100": 0.78, "recall_at_1": 0.68165, "recall_at_3": 0.85784, "recall_at_5": 0.91009, "recall_at_10": 0.94471, "recall_at_20": 0.96233, "recall_at_50": 0.98846, "recall_at_100": 0.99453, "precision_at_1": 0.68165, "precision_at_3": 0.28595, "precision_at_5": 0.18202, "precision_at_10": 0.09447, "precision_at_20": 0.04812, "precision_at_50": 0.01977, "precision_at_100": 0.00995, "mrr_at_1": 0.6822600243013366, "mrr_at_3": 0.7609356014580796, "mrr_at_5": 0.7725394896719318, "mrr_at_10": 0.7775212636695014, "mrr_at_20": 0.7787535437042965, "mrr_at_50": 0.7796543784238207, "mrr_at_100": 0.7797401545799822, "naucs_at_1_max": 0.16062026715795888, "naucs_at_1_std": -0.3264360293606425, "naucs_at_1_diff1": 0.8234210928574244, "naucs_at_3_max": 0.2735402374223285, "naucs_at_3_std": -0.2518265754222236, "naucs_at_3_diff1": 0.7126512468213168, "naucs_at_5_max": 0.3207976518601587, "naucs_at_5_std": -0.09980902320534159, "naucs_at_5_diff1": 0.654620096904173, "naucs_at_10_max": 0.4248907897527349, "naucs_at_10_std": 0.05751189029665147, "naucs_at_10_diff1": 0.6072701680557849, "naucs_at_20_max": 0.5189102534669143, "naucs_at_20_std": 0.17852289719761627, "naucs_at_20_diff1": 0.5826792796971454, "naucs_at_50_max": 0.6766878761446519, "naucs_at_50_std": 0.4444418209829138, "naucs_at_50_diff1": 0.5707904926948768, "naucs_at_100_max": 0.5171029913955458, "naucs_at_100_std": 0.4414823767216705, "naucs_at_100_diff1": 0.48651595702377487 }, "vidore/shiftproject_test": { "ndcg_at_1": 0.79, "ndcg_at_3": 0.88309, "ndcg_at_5": 0.88309, "ndcg_at_10": 0.88666, "ndcg_at_20": 0.89172, "ndcg_at_50": 0.89372, "ndcg_at_100": 0.89546, "map_at_1": 0.79, "map_at_3": 0.86, "map_at_5": 0.86, "map_at_10": 0.86167, "map_at_20": 0.86308, "map_at_50": 0.8634, "map_at_100": 0.86359, "recall_at_1": 0.79, "recall_at_3": 0.95, "recall_at_5": 0.95, "recall_at_10": 0.96, "recall_at_20": 0.98, "recall_at_50": 0.99, "recall_at_100": 1.0, "precision_at_1": 0.79, "precision_at_3": 0.31667, "precision_at_5": 0.19, "precision_at_10": 0.096, "precision_at_20": 0.049, "precision_at_50": 0.0198, "precision_at_100": 0.01, "mrr_at_1": 0.79, "mrr_at_3": 0.8633333333333333, "mrr_at_5": 0.8633333333333333, "mrr_at_10": 0.865, "mrr_at_20": 0.8664354066985647, "mrr_at_50": 0.8667579873437259, "mrr_at_100": 0.8669466665890089, "naucs_at_1_max": 0.039218986702575266, "naucs_at_1_std": -0.30031019308918766, "naucs_at_1_diff1": 0.8456248346839152, "naucs_at_3_max": 0.5220354808590087, "naucs_at_3_std": -0.057609710550887504, "naucs_at_3_diff1": 0.8160597572362241, "naucs_at_5_max": 0.5220354808590145, "naucs_at_5_std": -0.05760971055088009, "naucs_at_5_diff1": 0.8160597572362281, "naucs_at_10_max": 0.4352240896358576, "naucs_at_10_std": -0.10270774976656832, "naucs_at_10_diff1": 0.9305555555555542, "naucs_at_20_max": 0.1914098972922579, "naucs_at_20_std": -0.3968253968253954, "naucs_at_20_diff1": 0.861111111111116, "naucs_at_50_max": 0.554154995331464, "naucs_at_50_std": 0.35807656395892007, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "vidore/syntheticDocQA_artificial_intelligence_test": { "ndcg_at_1": 1.0, "ndcg_at_3": 1.0, "ndcg_at_5": 1.0, "ndcg_at_10": 1.0, "ndcg_at_20": 1.0, "ndcg_at_50": 1.0, "ndcg_at_100": 1.0, "map_at_1": 1.0, "map_at_3": 1.0, "map_at_5": 1.0, "map_at_10": 1.0, "map_at_20": 1.0, "map_at_50": 1.0, "map_at_100": 1.0, "recall_at_1": 1.0, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 1.0, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 1.0, "mrr_at_3": 1.0, "mrr_at_5": 1.0, "mrr_at_10": 1.0, "mrr_at_20": 1.0, "mrr_at_50": 1.0, "mrr_at_100": 1.0, "naucs_at_1_max": null, "naucs_at_1_std": null, "naucs_at_1_diff1": null, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "vidore/syntheticDocQA_energy_test": { "ndcg_at_1": 0.94, "ndcg_at_3": 0.95893, "ndcg_at_5": 0.95893, "ndcg_at_10": 0.96208, "ndcg_at_20": 0.96718, "ndcg_at_50": 0.96718, "ndcg_at_100": 0.96718, "map_at_1": 0.94, "map_at_3": 0.955, "map_at_5": 0.955, "map_at_10": 0.95625, "map_at_20": 0.95767, "map_at_50": 0.95767, "map_at_100": 0.95767, "recall_at_1": 0.94, "recall_at_3": 0.97, "recall_at_5": 0.97, "recall_at_10": 0.98, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.32333, "precision_at_5": 0.194, "precision_at_10": 0.098, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.94, "mrr_at_3": 0.955, "mrr_at_5": 0.955, "mrr_at_10": 0.9566666666666667, "mrr_at_20": 0.9582142857142857, "mrr_at_50": 0.9582142857142857, "mrr_at_100": 0.9582142857142857, "naucs_at_1_max": 0.49004046062869583, "naucs_at_1_std": -0.6582633053221264, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 0.7424525365701778, "naucs_at_3_std": -1.040616246498596, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 0.742452536570183, "naucs_at_5_std": -1.040616246498598, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.9346405228758136, "naucs_at_10_std": -0.690943043884218, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "vidore/syntheticDocQA_government_reports_test": { "ndcg_at_1": 0.92, "ndcg_at_3": 0.95655, "ndcg_at_5": 0.96085, "ndcg_at_10": 0.96419, "ndcg_at_20": 0.96419, "ndcg_at_50": 0.96419, "ndcg_at_100": 0.96419, "map_at_1": 0.92, "map_at_3": 0.94833, "map_at_5": 0.95083, "map_at_10": 0.95226, "map_at_20": 0.95226, "map_at_50": 0.95226, "map_at_100": 0.95226, "recall_at_1": 0.92, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.93, "mrr_at_3": 0.9533333333333333, "mrr_at_5": 0.9558333333333333, "mrr_at_10": 0.9574999999999999, "mrr_at_20": 0.9574999999999999, "mrr_at_50": 0.9574999999999999, "mrr_at_100": 0.9574999999999999, "naucs_at_1_max": 0.6289682539682547, "naucs_at_1_std": 0.44461951447245585, "naucs_at_1_diff1": 0.9509803921568633, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.3384687208216551, "naucs_at_3_diff1": 0.9346405228758099, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.5541549953314738, "naucs_at_5_diff1": 0.8692810457516413, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "vidore/syntheticDocQA_healthcare_industry_test": { "ndcg_at_1": 0.94, "ndcg_at_3": 0.97655, "ndcg_at_5": 0.97655, "ndcg_at_10": 0.97655, "ndcg_at_20": 0.97655, "ndcg_at_50": 0.97655, "ndcg_at_100": 0.97655, "map_at_1": 0.94, "map_at_3": 0.96833, "map_at_5": 0.96833, "map_at_10": 0.96833, "map_at_20": 0.96833, "map_at_50": 0.96833, "map_at_100": 0.96833, "recall_at_1": 0.94, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.95, "mrr_at_3": 0.9733333333333333, "mrr_at_5": 0.9733333333333333, "mrr_at_10": 0.9733333333333333, "mrr_at_20": 0.9733333333333333, "mrr_at_50": 0.9733333333333333, "mrr_at_100": 0.9733333333333333, "naucs_at_1_max": 0.5662931839402436, "naucs_at_1_std": 0.18837535014005719, "naucs_at_1_diff1": 0.9782135076252712, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null } } }