Last active
May 13, 2026 20:25
-
-
Save Helw150/a06d335d41cbe7121ded948c89dd3f36 to your computer and use it in GitHub Desktop.
Grug MoE data-mixture comparison (v0/v2/v3/v4) across compute scales — full lm-eval results across 17 tasks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [ | |
| { | |
| "mix": "v0", | |
| "hidden_dim": 512, | |
| "budget": 2.19e+17, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "bpb,none": 0.844007877090332, | |
| "bpb_stderr,none": 0.004281199742552705, | |
| "acc_norm,none": 0.27332288847742486, | |
| "acc_norm_stderr,none": 0.003753850490600821, | |
| "choice_logprob,none": -3.043445214997677, | |
| "choice_logprob_stderr,none": 0.026555210423050656, | |
| "choice_prob_norm,none": 0.25426655551896116, | |
| "choice_prob_norm_stderr,none": 0.00045542617738300654, | |
| "choice_logprob_norm,none": -1.3982650663285163, | |
| "choice_logprob_norm_stderr,none": 0.001983432015974189, | |
| "acc,none": 0.25751317476143, | |
| "acc_stderr,none": 0.0036830955102596413, | |
| "logprob,none": -12.398654076514974, | |
| "logprob_stderr,none": 0.04022890901630262 | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "logprob,none": -9.150586650434162, | |
| "logprob_stderr,none": 0.0437860436375247, | |
| "choice_prob_norm,none": 0.2533328393394025, | |
| "choice_prob_norm_stderr,none": 0.00038756810709663183, | |
| "choice_logprob,none": -3.0488093627343145, | |
| "choice_logprob_stderr,none": 0.027576481710546295, | |
| "acc_norm,none": 0.27795185870958555, | |
| "acc_norm_stderr,none": 0.003768336194386366, | |
| "bpb,none": 0.548681596735387, | |
| "bpb_stderr,none": 0.0027853622786252513, | |
| "choice_logprob_norm,none": -1.3931666061972847, | |
| "choice_logprob_norm_stderr,none": 0.0016667230544346093, | |
| "acc,none": 0.25801167924797036, | |
| "acc_stderr,none": 0.0036777670843517758 | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.18344709897610922, | |
| "acc_stderr,none": 0.011310170179554538, | |
| "acc_norm,none": 0.2235494880546075, | |
| "acc_norm_stderr,none": 0.012174896631202612, | |
| "bpb,none": 1.4939646127332267, | |
| "bpb_stderr,none": 0.026052531422912324, | |
| "logprob,none": -25.201515757183166, | |
| "logprob_stderr,none": 0.40587633676351703, | |
| "choice_logprob,none": -6.179149515974148, | |
| "choice_logprob_stderr,none": 0.19074901798103436, | |
| "choice_prob_norm,none": 0.24421896079372202, | |
| "choice_prob_norm_stderr,none": 0.0020540497334768444, | |
| "choice_logprob_norm,none": -1.4612485921546, | |
| "choice_logprob_norm_stderr,none": 0.010459092921839218 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.39941077441077444, | |
| "acc_stderr,none": 0.010050018228742123, | |
| "acc_norm,none": 0.38215488215488214, | |
| "acc_norm_stderr,none": 0.009970747281292443, | |
| "bpb,none": 1.3642281627798005, | |
| "bpb_stderr,none": 0.013579442555016155, | |
| "logprob,none": -18.200637711820377, | |
| "logprob_stderr,none": 0.24737452991476383, | |
| "choice_logprob,none": -3.2206430668231834, | |
| "choice_logprob_stderr,none": 0.09855560033006468, | |
| "choice_prob_norm,none": 0.27701670445333143, | |
| "choice_prob_norm_stderr,none": 0.0017801352285700502, | |
| "choice_logprob_norm,none": -1.3367244872650283, | |
| "choice_logprob_norm_stderr,none": 0.007127254716244113 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.535474006116208, | |
| "acc_stderr,none": 0.00872301756568373, | |
| "bpb,none": 0.6421441648309578, | |
| "bpb_stderr,none": 0.007033838483279408, | |
| "logprob,none": -1.078191109170243, | |
| "logprob_stderr,none": 0.009527333827094765, | |
| "choice_logprob,none": -0.7567635277470782, | |
| "choice_logprob_stderr,none": 0.008415952419342955, | |
| "choice_prob_norm,none": 0.5259153499375072, | |
| "choice_prob_norm_stderr,none": 0.002922074311745619, | |
| "choice_logprob_norm,none": -0.7032777721755389, | |
| "choice_logprob_norm_stderr,none": 0.006455724758507388 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.6073394495412844, | |
| "acc_stderr,none": 0.008541161248702911, | |
| "acc_norm,none": 0.6100917431192661, | |
| "acc_norm_stderr,none": 0.00853043797286262, | |
| "bpb,none": 0.781249138328381, | |
| "bpb_stderr,none": 0.01792698664529871, | |
| "logprob,none": -1.1282020455106683, | |
| "logprob_stderr,none": 0.02461400496836941, | |
| "choice_logprob,none": -1.1095931076643404, | |
| "choice_logprob_stderr,none": 0.024625304958620156, | |
| "choice_prob_norm,none": 0.5761914163699757, | |
| "choice_prob_norm_stderr,none": 0.005965626139627796, | |
| "choice_logprob_norm,none": -0.8994806084977474, | |
| "choice_logprob_norm_stderr,none": 0.017654638360913667 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.54, | |
| "acc_stderr,none": 0.05009082659620333, | |
| "bpb,none": 1.932529378886802, | |
| "bpb_stderr,none": 0.050196515588808964, | |
| "logprob,none": -36.46297584533691, | |
| "logprob_stderr,none": 0.6275435679451141, | |
| "choice_logprob,none": -1.5684399778005398, | |
| "choice_logprob_stderr,none": 0.25810873920107935, | |
| "choice_prob_norm,none": 0.5008998882565759, | |
| "choice_prob_norm_stderr,none": 0.008403353777918801, | |
| "choice_logprob_norm,none": -0.7073998440685311, | |
| "choice_logprob_norm_stderr,none": 0.018757463416034615 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.21867321867321868, | |
| "acc_stderr,none": 0.011834072858346475, | |
| "bpb,none": 3.3944986407019795, | |
| "bpb_stderr,none": 0.07036943089405387, | |
| "logprob,none": -2.352887162217149, | |
| "logprob_stderr,none": 0.048776372621821336, | |
| "choice_logprob,none": -2.2934845489053415, | |
| "choice_logprob_stderr,none": 0.04875833998131373, | |
| "choice_prob_norm,none": 0.21005525824056398, | |
| "choice_prob_norm_stderr,none": 0.00573610313594625, | |
| "choice_logprob_norm,none": -2.7836692402555645, | |
| "choice_logprob_norm_stderr,none": 0.07056143439586685 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.21785421785421785, | |
| "acc_stderr,none": 0.011818079981132525, | |
| "acc_norm,none": 0.2497952497952498, | |
| "acc_norm_stderr,none": 0.012393731327126517, | |
| "bpb,none": 1.0283069448483697, | |
| "bpb_stderr,none": 0.011691902814805668, | |
| "logprob,none": -8.32615207786154, | |
| "logprob_stderr,none": 0.06789450079528266, | |
| "choice_logprob,none": -2.9300014365243117, | |
| "choice_logprob_stderr,none": 0.06343665511527985, | |
| "choice_prob_norm,none": 0.21264917554927573, | |
| "choice_prob_norm_stderr,none": 0.001895513866004499, | |
| "choice_logprob_norm,none": -1.604318958879433, | |
| "choice_logprob_norm_stderr,none": 0.010183666364888024 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.2755427205735909, | |
| "acc_stderr,none": 0.004458742356237827, | |
| "acc_norm,none": 0.2842063333997212, | |
| "acc_norm_stderr,none": 0.00450113789523078, | |
| "bpb,none": 1.059523145316047, | |
| "bpb_stderr,none": 0.0022444255991642737, | |
| "logprob,none": -97.3619536872025, | |
| "logprob_stderr,none": 0.48363229578906214, | |
| "choice_logprob,none": -27.102028974702925, | |
| "choice_logprob_stderr,none": 0.31415916304164965, | |
| "choice_prob_norm,none": 0.2561454675384467, | |
| "choice_prob_norm_stderr,none": 0.0004366764395449189, | |
| "choice_logprob_norm,none": -1.3767884583825902, | |
| "choice_logprob_norm_stderr,none": 0.0017318776455852686 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.2731527584146584, | |
| "acc_stderr,none": 0.004446680081493803, | |
| "acc_norm,none": 0.2832105158334993, | |
| "acc_norm_stderr,none": 0.0044963697421320625, | |
| "bpb,none": 1.0621285463880195, | |
| "bpb_stderr,none": 0.0022168107518051694, | |
| "logprob,none": -97.87679240248208, | |
| "logprob_stderr,none": 0.48800701290965154, | |
| "choice_logprob,none": -27.41035264155543, | |
| "choice_logprob_stderr,none": 0.3170651738314153, | |
| "choice_prob_norm,none": 0.255918238323607, | |
| "choice_prob_norm_stderr,none": 0.00043183433091282055, | |
| "choice_logprob_norm,none": -1.3773365329893177, | |
| "choice_logprob_norm_stderr,none": 0.0017110710547015664 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.156, | |
| "acc_stderr,none": 0.01624363602839109, | |
| "acc_norm,none": 0.276, | |
| "acc_norm_stderr,none": 0.02001121929807354, | |
| "bpb,none": 2.0457006929799872, | |
| "bpb_stderr,none": 0.05234425573331846, | |
| "logprob,none": -21.423335208892823, | |
| "logprob_stderr,none": 0.5723682593644139, | |
| "choice_logprob,none": -6.946740517766322, | |
| "choice_logprob_stderr,none": 0.3398069620244233, | |
| "choice_prob_norm,none": 0.2559159542838489, | |
| "choice_prob_norm_stderr,none": 0.005456659411254479, | |
| "choice_logprob_norm,none": -1.509851229757715, | |
| "choice_logprob_norm_stderr,none": 0.03220580301083027 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6186071817192601, | |
| "acc_stderr,none": 0.01133285040652868, | |
| "acc_norm,none": 0.6006528835690969, | |
| "acc_norm_stderr,none": 0.011427006685027255, | |
| "bpb,none": 1.3454683630407622, | |
| "bpb_stderr,none": 0.010830648783079756, | |
| "logprob,none": -80.70319272696129, | |
| "logprob_stderr,none": 1.6881639018523786, | |
| "choice_logprob,none": -3.7250006152484225, | |
| "choice_logprob_stderr,none": 0.24830992680518993, | |
| "choice_prob_norm,none": 0.5057906922894322, | |
| "choice_prob_norm_stderr,none": 0.00141780632702078, | |
| "choice_logprob_norm,none": -0.689898526159878, | |
| "choice_logprob_norm_stderr,none": 0.0031762510346056622 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5082872928176796, | |
| "acc_stderr,none": 0.014050555322824194, | |
| "bpb,none": 0.48343625036531535, | |
| "bpb_stderr,none": 0.026419434099997325, | |
| "logprob,none": -21.73852891101551, | |
| "logprob_stderr,none": 0.31737259551999136, | |
| "choice_logprob,none": -0.7682591176586984, | |
| "choice_logprob_stderr,none": 0.013940856014081878, | |
| "choice_prob_norm,none": 0.5008961398172945, | |
| "choice_prob_norm_stderr,none": 0.0007634802237262012, | |
| "choice_logprob_norm,none": -0.6934601876237907, | |
| "choice_logprob_norm_stderr,none": 0.002288066806855179 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5238095238095238, | |
| "acc_stderr,none": 0.03028256065887908, | |
| "bpb,none": 0.7321154207777446, | |
| "bpb_stderr,none": 0.02406740256267047, | |
| "logprob,none": -30.54474130916945, | |
| "logprob_stderr,none": 0.5608394280435071, | |
| "choice_logprob,none": -0.8329238946298883, | |
| "choice_logprob_stderr,none": 0.04411749135415132, | |
| "choice_prob_norm,none": 0.5007932504499706, | |
| "choice_prob_norm_stderr,none": 0.0012283331704642, | |
| "choice_logprob_norm,none": -0.692377059436893, | |
| "choice_logprob_norm_stderr,none": 0.0024497244389824267 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.2577097776715276, | |
| "acc_stderr,none": 0.00676332572657815, | |
| "acc_norm,none": 0.2577097776715276, | |
| "acc_norm_stderr,none": 0.00676332572657815, | |
| "logprob,none": -1.6676009283264146, | |
| "logprob_stderr,none": 0.01211629223566799 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.21778627779105905, | |
| "acc_stderr,none": 0.006382428483871769, | |
| "acc_norm,none": 0.25077695433899116, | |
| "acc_norm_stderr,none": 0.006702816982619845, | |
| "bpb,none": 0.5672725270755324, | |
| "bpb_stderr,none": 0.00544400700742468, | |
| "logprob,none": -7.468935673868944, | |
| "logprob_stderr,none": 0.04929996698112747, | |
| "choice_logprob,none": -3.156554204167492, | |
| "choice_logprob_stderr,none": 0.04064688133216061, | |
| "choice_prob_norm,none": 0.24998755186001656, | |
| "choice_prob_norm_stderr,none": 0.0008006475335414557, | |
| "choice_logprob_norm,none": -1.4105696716118796, | |
| "choice_logprob_norm_stderr,none": 0.0036161209895869753 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.23745410036719705, | |
| "acc_stderr,none": 0.014896277441041843, | |
| "logprob,none": -27.668901143488423, | |
| "logprob_stderr,none": 0.5155153998573488 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 249.00128224150893, | |
| "nll_stderr,none": 2.7224087755745114, | |
| "bpb,none": 1.2938561600269785, | |
| "bpb_stderr,none": 0.008077388733314489 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 95.12898236367761, | |
| "nll_stderr,none": 4.950599241344104, | |
| "bpb,none": 0.8647876570287482, | |
| "bpb_stderr,none": 0.027305095746882137 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v2", | |
| "hidden_dim": 512, | |
| "budget": 2.19e+17, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "choice_logprob_norm,none": -1.395105808725625, | |
| "choice_logprob_norm_stderr,none": 0.0018874264609554715, | |
| "bpb,none": 0.7086432691399365, | |
| "bpb_stderr,none": 0.0037642409695114263, | |
| "acc_norm,none": 0.2804443811422874, | |
| "acc_norm_stderr,none": 0.0037814795663111355, | |
| "choice_prob_norm,none": 0.25430199441097195, | |
| "choice_prob_norm_stderr,none": 0.0004345215762368066, | |
| "acc,none": 0.2664150405925082, | |
| "acc_stderr,none": 0.003720863676213897, | |
| "logprob,none": -10.879439385268903, | |
| "logprob_stderr,none": 0.04589588515139443, | |
| "choice_logprob,none": -3.26679195925276, | |
| "choice_logprob_stderr,none": 0.030529247948099983 | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc_norm,none": 0.2745335422304515, | |
| "acc_norm_stderr,none": 0.003753912793560197, | |
| "choice_prob_norm,none": 0.25438074961280066, | |
| "choice_prob_norm_stderr,none": 0.0004152774493322435, | |
| "bpb,none": 0.555535466797542, | |
| "bpb_stderr,none": 0.0031185170868660004, | |
| "choice_logprob,none": -3.5104746791472374, | |
| "choice_logprob_stderr,none": 0.03519841089032272, | |
| "choice_logprob_norm,none": -1.3945937913065722, | |
| "choice_logprob_norm_stderr,none": 0.001874030058258482, | |
| "acc,none": 0.26370887337986043, | |
| "acc_stderr,none": 0.003707121434082123, | |
| "logprob,none": -9.198550460823393, | |
| "logprob_stderr,none": 0.05378404592279555 | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.19112627986348124, | |
| "acc_stderr,none": 0.011490055292778599, | |
| "acc_norm,none": 0.22098976109215018, | |
| "acc_norm_stderr,none": 0.012124929206818258, | |
| "bpb,none": 1.489832873492701, | |
| "bpb_stderr,none": 0.02717879408473882, | |
| "logprob,none": -24.8621712874227, | |
| "logprob_stderr,none": 0.39980296901624135, | |
| "choice_logprob,none": -6.160979878220389, | |
| "choice_logprob_stderr,none": 0.1906458394945994, | |
| "choice_prob_norm,none": 0.24613135192540953, | |
| "choice_prob_norm_stderr,none": 0.002183901046334659, | |
| "choice_logprob_norm,none": -1.4617512614955517, | |
| "choice_logprob_norm_stderr,none": 0.011962603981041372 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.42297979797979796, | |
| "acc_stderr,none": 0.010137328382209088, | |
| "acc_norm,none": 0.406986531986532, | |
| "acc_norm_stderr,none": 0.010080695355466598, | |
| "bpb,none": 1.387534733610468, | |
| "bpb_stderr,none": 0.01537529127210736, | |
| "logprob,none": -18.011766900317838, | |
| "logprob_stderr,none": 0.2406318292539621, | |
| "choice_logprob,none": -3.110095427498679, | |
| "choice_logprob_stderr,none": 0.10149254975214446, | |
| "choice_prob_norm,none": 0.28301377885231027, | |
| "choice_prob_norm_stderr,none": 0.0019324969155231323, | |
| "choice_logprob_norm,none": -1.3236154616516096, | |
| "choice_logprob_norm_stderr,none": 0.007749568127906316 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5666666666666667, | |
| "acc_stderr,none": 0.008666972565214514, | |
| "bpb,none": 0.8127147770859842, | |
| "bpb_stderr,none": 0.01077108724649005, | |
| "logprob,none": -1.3609937103516465, | |
| "logprob_stderr,none": 0.01643642626382858, | |
| "choice_logprob,none": -0.7703883397780966, | |
| "choice_logprob_stderr,none": 0.011417959600312522, | |
| "choice_prob_norm,none": 0.5482880534919546, | |
| "choice_prob_norm_stderr,none": 0.003682284394153467, | |
| "choice_logprob_norm,none": -0.6999610769423285, | |
| "choice_logprob_norm_stderr,none": 0.008559105338723531 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5327217125382263, | |
| "acc_stderr,none": 0.008726308038444403, | |
| "acc_norm,none": 0.5434250764525994, | |
| "acc_norm_stderr,none": 0.008712010793695301, | |
| "bpb,none": 0.601618129293495, | |
| "bpb_stderr,none": 0.01109298411838269, | |
| "logprob,none": -0.9971367144803388, | |
| "logprob_stderr,none": 0.01710603442884549, | |
| "choice_logprob,none": -0.966932997884124, | |
| "choice_logprob_stderr,none": 0.016961873963066938, | |
| "choice_prob_norm,none": 0.5295111556301993, | |
| "choice_prob_norm_stderr,none": 0.004306882748278512, | |
| "choice_logprob_norm,none": -0.7885458839494792, | |
| "choice_logprob_norm_stderr,none": 0.010834385112369352 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.6, | |
| "acc_stderr,none": 0.04923659639173309, | |
| "bpb,none": 1.9200667700736944, | |
| "bpb_stderr,none": 0.05129172565603094, | |
| "logprob,none": -36.307588214874265, | |
| "logprob_stderr,none": 0.7025852589241688, | |
| "choice_logprob,none": -1.6349282082433967, | |
| "choice_logprob_stderr,none": 0.2563397538643638, | |
| "choice_prob_norm,none": 0.5045399053052483, | |
| "choice_prob_norm_stderr,none": 0.009019915694046634, | |
| "choice_logprob_norm,none": -0.7030432609585621, | |
| "choice_logprob_norm_stderr,none": 0.02061245573401139 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.19656019656019655, | |
| "acc_stderr,none": 0.011377439773964, | |
| "bpb,none": 3.700175443457485, | |
| "bpb_stderr,none": 0.09103222386766453, | |
| "logprob,none": -2.564766176209696, | |
| "logprob_stderr,none": 0.06309872931397345, | |
| "choice_logprob,none": -2.5384716222597494, | |
| "choice_logprob_stderr,none": 0.06310537788715918, | |
| "choice_prob_norm,none": 0.2011090417277458, | |
| "choice_prob_norm_stderr,none": 0.004951753153655882, | |
| "choice_logprob_norm,none": -3.1040275132391195, | |
| "choice_logprob_norm_stderr,none": 0.09112662467956259 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.21621621621621623, | |
| "acc_stderr,none": 0.01178588917548664, | |
| "acc_norm,none": 0.23587223587223588, | |
| "acc_norm_stderr,none": 0.012154622820781504, | |
| "bpb,none": 1.0214474073668882, | |
| "bpb_stderr,none": 0.012401567172941753, | |
| "logprob,none": -8.22377738944826, | |
| "logprob_stderr,none": 0.07575049549544435, | |
| "choice_logprob,none": -3.0526434544044316, | |
| "choice_logprob_stderr,none": 0.06789605144455163, | |
| "choice_prob_norm,none": 0.21230096957013608, | |
| "choice_prob_norm_stderr,none": 0.0020234177041320695, | |
| "choice_logprob_norm,none": -1.6128310731104252, | |
| "choice_logprob_norm_stderr,none": 0.010728376678642517 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.2818163712407887, | |
| "acc_stderr,none": 0.004489648865080841, | |
| "acc_norm,none": 0.29575781716789484, | |
| "acc_norm_stderr,none": 0.004554499409290671, | |
| "bpb,none": 1.0604522626751571, | |
| "bpb_stderr,none": 0.002347232941483141, | |
| "logprob,none": -97.45293985764549, | |
| "logprob_stderr,none": 0.4877105995743941, | |
| "choice_logprob,none": -27.044610263316372, | |
| "choice_logprob_stderr,none": 0.3163294608459877, | |
| "choice_prob_norm,none": 0.25792808333865613, | |
| "choice_prob_norm_stderr,none": 0.0004617017000080293, | |
| "choice_logprob_norm,none": -1.3713845923943457, | |
| "choice_logprob_norm_stderr,none": 0.0018215144098603715 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.27972515435172274, | |
| "acc_stderr,none": 0.004479467619464835, | |
| "acc_norm,none": 0.2927703644692292, | |
| "acc_norm_stderr,none": 0.004541039698729829, | |
| "bpb,none": 1.052438515268161, | |
| "bpb_stderr,none": 0.0022855055454450374, | |
| "logprob,none": -96.76464004902174, | |
| "logprob_stderr,none": 0.48497949747468927, | |
| "choice_logprob,none": -27.207802913947848, | |
| "choice_logprob_stderr,none": 0.3170761167400707, | |
| "choice_prob_norm,none": 0.2572310812638424, | |
| "choice_prob_norm_stderr,none": 0.00044265170655079846, | |
| "choice_logprob_norm,none": -1.3728559573518737, | |
| "choice_logprob_norm_stderr,none": 0.0017515763916904391 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.166, | |
| "acc_stderr,none": 0.01665661687653113, | |
| "acc_norm,none": 0.268, | |
| "acc_norm_stderr,none": 0.019827714859587592, | |
| "bpb,none": 2.0430624431901023, | |
| "bpb_stderr,none": 0.05348166345275381, | |
| "logprob,none": -21.434500136375426, | |
| "logprob_stderr,none": 0.5811476660213469, | |
| "choice_logprob,none": -7.1119697689300665, | |
| "choice_logprob_stderr,none": 0.3474521931093143, | |
| "choice_prob_norm,none": 0.2596508704253059, | |
| "choice_prob_norm_stderr,none": 0.005753092038542383, | |
| "choice_logprob_norm,none": -1.4996703821450863, | |
| "choice_logprob_norm_stderr,none": 0.03237955248457045 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6126224156692056, | |
| "acc_stderr,none": 0.01136603808343591, | |
| "acc_norm,none": 0.5984766050054406, | |
| "acc_norm_stderr,none": 0.011437324373397844, | |
| "bpb,none": 1.346865107049732, | |
| "bpb_stderr,none": 0.011462353078616861, | |
| "logprob,none": -80.50279071048243, | |
| "logprob_stderr,none": 1.6973949430845747, | |
| "choice_logprob,none": -3.8368748542399627, | |
| "choice_logprob_stderr,none": 0.2499631337880358, | |
| "choice_prob_norm,none": 0.5057863173560802, | |
| "choice_prob_norm_stderr,none": 0.0014744863002791257, | |
| "choice_logprob_norm,none": -0.6904674005767536, | |
| "choice_logprob_norm_stderr,none": 0.0032460385380844265 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5146014206787688, | |
| "acc_stderr,none": 0.014046492383275842, | |
| "bpb,none": 0.4855374598470294, | |
| "bpb_stderr,none": 0.025742442598531194, | |
| "logprob,none": -21.923497182700046, | |
| "logprob_stderr,none": 0.3158033457422442, | |
| "choice_logprob,none": -0.8000682439938214, | |
| "choice_logprob_stderr,none": 0.016809219923152076, | |
| "choice_prob_norm,none": 0.5009294975632409, | |
| "choice_prob_norm_stderr,none": 0.0007856596659935949, | |
| "choice_logprob_norm,none": -0.693629638764659, | |
| "choice_logprob_norm_stderr,none": 0.0024759192573889106 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5091575091575091, | |
| "acc_stderr,none": 0.030311867945261854, | |
| "bpb,none": 0.7423261934631866, | |
| "bpb_stderr,none": 0.02451552761399825, | |
| "logprob,none": -30.840766480554155, | |
| "logprob_stderr,none": 0.5852038618254907, | |
| "choice_logprob,none": -0.9987486646253337, | |
| "choice_logprob_stderr,none": 0.06145714611343547, | |
| "choice_prob_norm,none": 0.5002619838156802, | |
| "choice_prob_norm_stderr,none": 0.001284676402507507, | |
| "choice_logprob_norm,none": -0.6935330445997154, | |
| "choice_logprob_norm_stderr,none": 0.002600351355440756 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.2763566818073153, | |
| "acc_stderr,none": 0.006915208017087954, | |
| "acc_norm,none": 0.2763566818073153, | |
| "acc_norm_stderr,none": 0.006915208017087954, | |
| "logprob,none": -1.4470207297343145, | |
| "logprob_stderr,none": 0.007964545144399966 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.22687066698541716, | |
| "acc_stderr,none": 0.006476244480762607, | |
| "acc_norm,none": 0.2534066459478843, | |
| "acc_norm_stderr,none": 0.00672603380780031, | |
| "bpb,none": 0.4870434199938374, | |
| "bpb_stderr,none": 0.005712604826753254, | |
| "logprob,none": -6.335990024017655, | |
| "logprob_stderr,none": 0.05467469291586012, | |
| "choice_logprob,none": -3.1816474104731958, | |
| "choice_logprob_stderr,none": 0.045359823837981435, | |
| "choice_prob_norm,none": 0.25134192350728396, | |
| "choice_prob_norm_stderr,none": 0.0008110848923755798, | |
| "choice_logprob_norm,none": -1.4059551486234891, | |
| "choice_logprob_norm_stderr,none": 0.0036537496170158316 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.22643818849449204, | |
| "acc_stderr,none": 0.014651337324602592, | |
| "logprob,none": -26.759819291066947, | |
| "logprob_stderr,none": 0.5077560946554257 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 254.650383417854, | |
| "nll_stderr,none": 2.865119070723374, | |
| "bpb,none": 1.3215150142333845, | |
| "bpb_stderr,none": 0.008789955135988231 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 107.93575578201109, | |
| "nll_stderr,none": 5.431641005740183, | |
| "bpb,none": 0.9908253818862274, | |
| "bpb_stderr,none": 0.03155326583552453 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v3", | |
| "hidden_dim": 512, | |
| "budget": 2.19e+17, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.26684233015239994, | |
| "acc_stderr,none": 0.0037260168667051403, | |
| "acc_norm,none": 0.2812989602620709, | |
| "acc_norm_stderr,none": 0.0037901415430174196, | |
| "bpb,none": 0.6269147047636995, | |
| "bpb_stderr,none": 0.0032322057653284024, | |
| "logprob,none": -9.742547235739172, | |
| "logprob_stderr,none": 0.039023499834109354, | |
| "choice_logprob,none": -2.8547423022152967, | |
| "choice_logprob_stderr,none": 0.025801330341823882, | |
| "choice_prob_norm,none": 0.2541127371908383, | |
| "choice_prob_norm_stderr,none": 0.00041266809020224345, | |
| "choice_logprob_norm,none": -1.3928761929921796, | |
| "choice_logprob_norm_stderr,none": 0.0017974926944226569, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.2634240136732659, | |
| "acc_stderr,none": 0.003714353925489619, | |
| "acc_norm,none": 0.27994587665574705, | |
| "acc_norm_stderr,none": 0.003786695065744675, | |
| "bpb,none": 0.3349757325611144, | |
| "bpb_stderr,none": 0.0018413122245872007, | |
| "logprob,none": -6.265405864100078, | |
| "logprob_stderr,none": 0.040860611694026196, | |
| "choice_logprob,none": -2.696346618180892, | |
| "choice_logprob_stderr,none": 0.026354033252580806, | |
| "choice_prob_norm,none": 0.2525867221345686, | |
| "choice_prob_norm_stderr,none": 0.00028995850438336346, | |
| "choice_logprob_norm,none": -1.3874403115669558, | |
| "choice_logprob_norm_stderr,none": 0.0012565002854746177, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.18003412969283278, | |
| "acc_stderr,none": 0.011227856729050042, | |
| "acc_norm,none": 0.2090443686006826, | |
| "acc_norm_stderr,none": 0.011882746987406448, | |
| "bpb,none": 1.5056673895489818, | |
| "bpb_stderr,none": 0.026601531332319416, | |
| "logprob,none": -25.544686375217633, | |
| "logprob_stderr,none": 0.41765748907984457, | |
| "choice_logprob,none": -6.292516564525333, | |
| "choice_logprob_stderr,none": 0.18918113367678352, | |
| "choice_prob_norm,none": 0.24443493452430692, | |
| "choice_prob_norm_stderr,none": 0.002000694071827269, | |
| "choice_logprob_norm,none": -1.4584900106904373, | |
| "choice_logprob_norm_stderr,none": 0.010473236534207186 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.41203703703703703, | |
| "acc_stderr,none": 0.010099765857562759, | |
| "acc_norm,none": 0.3939393939393939, | |
| "acc_norm_stderr,none": 0.010026305355981802, | |
| "bpb,none": 1.366618370072764, | |
| "bpb_stderr,none": 0.013590783840504918, | |
| "logprob,none": -18.46069774623672, | |
| "logprob_stderr,none": 0.257249295863158, | |
| "choice_logprob,none": -3.3198551903359, | |
| "choice_logprob_stderr,none": 0.10477930078908752, | |
| "choice_prob_norm,none": 0.28026239543277925, | |
| "choice_prob_norm_stderr,none": 0.001861688811683428, | |
| "choice_logprob_norm,none": -1.3284425372742612, | |
| "choice_logprob_norm_stderr,none": 0.007358125735955402 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5296636085626911, | |
| "acc_stderr,none": 0.008729651343606142, | |
| "bpb,none": 0.7080951594868731, | |
| "bpb_stderr,none": 0.007534277740155469, | |
| "logprob,none": -1.22105823525595, | |
| "logprob_stderr,none": 0.01234856754062047, | |
| "choice_logprob,none": -0.7337582696269975, | |
| "choice_logprob_stderr,none": 0.006584067693744113, | |
| "choice_prob_norm,none": 0.5249368286929101, | |
| "choice_prob_norm_stderr,none": 0.00239055660626695, | |
| "choice_logprob_norm,none": -0.6828626460422125, | |
| "choice_logprob_norm_stderr,none": 0.005038215177149247 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5446483180428134, | |
| "acc_stderr,none": 0.008710119143608424, | |
| "acc_norm,none": 0.5602446483180428, | |
| "acc_norm_stderr,none": 0.008681343983423958, | |
| "bpb,none": 0.5584264300582101, | |
| "bpb_stderr,none": 0.010294208995737797, | |
| "logprob,none": -0.8867521860548481, | |
| "logprob_stderr,none": 0.01445560999137545, | |
| "choice_logprob,none": -0.8766496772391723, | |
| "choice_logprob_stderr,none": 0.014457699509682795, | |
| "choice_prob_norm,none": 0.5335762395809978, | |
| "choice_prob_norm_stderr,none": 0.004063768694464024, | |
| "choice_logprob_norm,none": -0.7611293301293726, | |
| "choice_logprob_norm_stderr,none": 0.010069680463397658 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.55, | |
| "acc_stderr,none": 0.05, | |
| "bpb,none": 1.9412281989491103, | |
| "bpb_stderr,none": 0.053503958123955586, | |
| "logprob,none": -36.64149988174439, | |
| "logprob_stderr,none": 0.7121939038766281, | |
| "choice_logprob,none": -1.8378197154960296, | |
| "choice_logprob_stderr,none": 0.30618391834273084, | |
| "choice_prob_norm,none": 0.4962514280310157, | |
| "choice_prob_norm_stderr,none": 0.008901716274512847, | |
| "choice_logprob_norm,none": -0.7193142676418873, | |
| "choice_logprob_norm_stderr,none": 0.020288767729933287 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.20638820638820637, | |
| "acc_stderr,none": 0.011586881879177835, | |
| "bpb,none": 3.3802579735168834, | |
| "bpb_stderr,none": 0.07427730102694352, | |
| "logprob,none": -2.3430162839085034, | |
| "logprob_stderr,none": 0.05148510178642825, | |
| "choice_logprob,none": -2.3370383041004765, | |
| "choice_logprob_stderr,none": 0.05148720286997365, | |
| "choice_prob_norm,none": 0.20358528776009918, | |
| "choice_prob_norm_stderr,none": 0.0047617388299911545, | |
| "choice_logprob_norm,none": -2.81402997057312, | |
| "choice_logprob_norm_stderr,none": 0.07434291950311715 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.24078624078624078, | |
| "acc_stderr,none": 0.012241029737913613, | |
| "acc_norm,none": 0.2596232596232596, | |
| "acc_norm_stderr,none": 0.012552154236305981, | |
| "bpb,none": 0.6344186175567482, | |
| "bpb_stderr,none": 0.010779995969509551, | |
| "logprob,none": -5.138235038269943, | |
| "logprob_stderr,none": 0.07413193757799576, | |
| "choice_logprob,none": -3.060806680465147, | |
| "choice_logprob_stderr,none": 0.07139531567057507, | |
| "choice_prob_norm,none": 0.21106069650439113, | |
| "choice_prob_norm_stderr,none": 0.0018090940948582233, | |
| "choice_logprob_norm,none": -1.610079916779593, | |
| "choice_logprob_norm_stderr,none": 0.010101732954993387 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.27434773949412466, | |
| "acc_stderr,none": 0.004452731272840582, | |
| "acc_norm,none": 0.2811192989444334, | |
| "acc_norm_stderr,none": 0.004486268470666261, | |
| "bpb,none": 1.0900296120083375, | |
| "bpb_stderr,none": 0.0023101684356920922, | |
| "logprob,none": -100.17899374039709, | |
| "logprob_stderr,none": 0.49890266118413285, | |
| "choice_logprob,none": -28.179910831945875, | |
| "choice_logprob_stderr,none": 0.325081453192061, | |
| "choice_prob_norm,none": 0.25565736368719405, | |
| "choice_prob_norm_stderr,none": 0.0004470412811279136, | |
| "choice_logprob_norm,none": -1.3794781373392433, | |
| "choice_logprob_norm_stderr,none": 0.0017778156873739898 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.2704640509858594, | |
| "acc_stderr,none": 0.004432917403755067, | |
| "acc_norm,none": 0.28121888070105555, | |
| "acc_norm_stderr,none": 0.004486752200430311, | |
| "bpb,none": 1.0971178635470091, | |
| "bpb_stderr,none": 0.0022757577536049486, | |
| "logprob,none": -100.99335159730256, | |
| "logprob_stderr,none": 0.5037734921466325, | |
| "choice_logprob,none": -28.430880582977448, | |
| "choice_logprob_stderr,none": 0.3272217991027028, | |
| "choice_prob_norm,none": 0.25518029494158306, | |
| "choice_prob_norm_stderr,none": 0.00043631586852072534, | |
| "choice_logprob_norm,none": -1.3806908926720352, | |
| "choice_logprob_norm_stderr,none": 0.0017406953068043025 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.148, | |
| "acc_stderr,none": 0.015896458561251246, | |
| "acc_norm,none": 0.246, | |
| "acc_norm_stderr,none": 0.019279819056352555, | |
| "bpb,none": 2.071987712869857, | |
| "bpb_stderr,none": 0.05143296580703173, | |
| "logprob,none": -21.7383246717453, | |
| "logprob_stderr,none": 0.5877514820689698, | |
| "choice_logprob,none": -7.230179626940098, | |
| "choice_logprob_stderr,none": 0.3540939204939283, | |
| "choice_prob_norm,none": 0.2544992299744627, | |
| "choice_prob_norm_stderr,none": 0.005523972794841713, | |
| "choice_logprob_norm,none": -1.5166920137611337, | |
| "choice_logprob_norm_stderr,none": 0.030278205322549843 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6137105549510338, | |
| "acc_stderr,none": 0.011360138833823683, | |
| "acc_norm,none": 0.6164309031556039, | |
| "acc_norm_stderr,none": 0.011345128734116278, | |
| "bpb,none": 1.3502708496117517, | |
| "bpb_stderr,none": 0.011122066203128382, | |
| "logprob,none": -81.08814221964307, | |
| "logprob_stderr,none": 1.7058425758276525, | |
| "choice_logprob,none": -3.843236403699351, | |
| "choice_logprob_stderr,none": 0.2579128186990966, | |
| "choice_prob_norm,none": 0.5057770048799564, | |
| "choice_prob_norm_stderr,none": 0.0014346697565567714, | |
| "choice_logprob_norm,none": -0.6897870863103704, | |
| "choice_logprob_norm_stderr,none": 0.0030871781803235997 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.4909234411996843, | |
| "acc_stderr,none": 0.0140501700944977, | |
| "bpb,none": 0.4946571271106702, | |
| "bpb_stderr,none": 0.02708889395838287, | |
| "logprob,none": -22.255206053522315, | |
| "logprob_stderr,none": 0.3224974639590515, | |
| "choice_logprob,none": -0.7829697673753819, | |
| "choice_logprob_stderr,none": 0.014830317845139945, | |
| "choice_prob_norm,none": 0.5007496740254456, | |
| "choice_prob_norm_stderr,none": 0.0007808958933147997, | |
| "choice_logprob_norm,none": -0.6940473917226799, | |
| "choice_logprob_norm_stderr,none": 0.0025529763168751843 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5128205128205128, | |
| "acc_stderr,none": 0.030306985365626097, | |
| "bpb,none": 0.7274675068738532, | |
| "bpb_stderr,none": 0.02522797824744406, | |
| "logprob,none": -30.15777778276157, | |
| "logprob_stderr,none": 0.5934826212390054, | |
| "choice_logprob,none": -0.9212171118300828, | |
| "choice_logprob_stderr,none": 0.05261533627178825, | |
| "choice_prob_norm,none": 0.5004322248767205, | |
| "choice_prob_norm_stderr,none": 0.001356269558879158, | |
| "choice_logprob_norm,none": -0.6932931798321386, | |
| "choice_logprob_norm_stderr,none": 0.002740215457699106 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.2749223045661009, | |
| "acc_stderr,none": 0.006904070961661391, | |
| "acc_norm,none": 0.2749223045661009, | |
| "acc_norm_stderr,none": 0.006904070961661391, | |
| "logprob,none": -1.525614637009641, | |
| "logprob_stderr,none": 0.009778845593370562 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.24695194836241932, | |
| "acc_stderr,none": 0.006668460128724211, | |
| "acc_norm,none": 0.265598852498207, | |
| "acc_norm_stderr,none": 0.006829481867963804, | |
| "bpb,none": 0.3154387533014714, | |
| "bpb_stderr,none": 0.003722807714106041, | |
| "logprob,none": -4.373024704067039, | |
| "logprob_stderr,none": 0.04474834589222865, | |
| "choice_logprob,none": -2.523247571724084, | |
| "choice_logprob_stderr,none": 0.034800071950167516, | |
| "choice_prob_norm,none": 0.2513992180444905, | |
| "choice_prob_norm_stderr,none": 0.0005814317939933852, | |
| "choice_logprob_norm,none": -1.3936370368957913, | |
| "choice_logprob_norm_stderr,none": 0.0026294029779488307 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.2386780905752754, | |
| "acc_stderr,none": 0.014922629695456416, | |
| "logprob,none": -27.703576085617083, | |
| "logprob_stderr,none": 0.5364291512585702 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 236.30206623612435, | |
| "nll_stderr,none": 2.81112604378555, | |
| "bpb,none": 1.2134874700959222, | |
| "bpb_stderr,none": 0.008180005640304246 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 79.22695929829668, | |
| "nll_stderr,none": 4.277470061189917, | |
| "bpb,none": 0.7462923271430614, | |
| "bpb_stderr,none": 0.02737976564104958 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v4", | |
| "hidden_dim": 512, | |
| "budget": 2.19e+17, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2553055120353226, | |
| "acc_stderr,none": 0.00367838581462502, | |
| "acc_norm,none": 0.27125765560461473, | |
| "acc_norm_stderr,none": 0.0037488085614330816, | |
| "bpb,none": 0.6390266847406983, | |
| "bpb_stderr,none": 0.003566320582268053, | |
| "logprob,none": -9.013411016877344, | |
| "logprob_stderr,none": 0.03638521570634543, | |
| "choice_logprob,none": -2.6363229740059633, | |
| "choice_logprob_stderr,none": 0.022939767755543034, | |
| "choice_prob_norm,none": 0.2524952111006773, | |
| "choice_prob_norm_stderr,none": 0.00037153168615355745, | |
| "choice_logprob_norm,none": -1.3953207683721074, | |
| "choice_logprob_norm_stderr,none": 0.0016240531970126697, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.26114513602050987, | |
| "acc_stderr,none": 0.0036996431729517452, | |
| "acc_norm,none": 0.2709015809713716, | |
| "acc_norm_stderr,none": 0.0037445679826478536, | |
| "bpb,none": 0.34944660629584556, | |
| "bpb_stderr,none": 0.0018626168249877765, | |
| "logprob,none": -6.3097955451112515, | |
| "logprob_stderr,none": 0.04402366277082722, | |
| "choice_logprob,none": -2.7861677576228936, | |
| "choice_logprob_stderr,none": 0.02728436697932576, | |
| "choice_prob_norm,none": 0.2527074983287065, | |
| "choice_prob_norm_stderr,none": 0.0002808249583230201, | |
| "choice_logprob_norm,none": -1.3858056394730192, | |
| "choice_logprob_norm_stderr,none": 0.0012062018737212107, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.17320819112627986, | |
| "acc_stderr,none": 0.011058694183280345, | |
| "acc_norm,none": 0.21843003412969283, | |
| "acc_norm_stderr,none": 0.012074291605700971, | |
| "bpb,none": 1.515098615907457, | |
| "bpb_stderr,none": 0.02647828888168984, | |
| "logprob,none": -25.406775207649726, | |
| "logprob_stderr,none": 0.4010915752813438, | |
| "choice_logprob,none": -6.128682172554726, | |
| "choice_logprob_stderr,none": 0.18670629478233425, | |
| "choice_prob_norm,none": 0.24424003177704995, | |
| "choice_prob_norm_stderr,none": 0.0019909469925435603, | |
| "choice_logprob_norm,none": -1.4608414972698616, | |
| "choice_logprob_norm_stderr,none": 0.010678727697787002 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.41624579124579125, | |
| "acc_stderr,none": 0.01011481940450087, | |
| "acc_norm,none": 0.39057239057239057, | |
| "acc_norm_stderr,none": 0.01001105911206425, | |
| "bpb,none": 1.4087741651802528, | |
| "bpb_stderr,none": 0.013796311572408584, | |
| "logprob,none": -18.62068775486866, | |
| "logprob_stderr,none": 0.24609795377486243, | |
| "choice_logprob,none": -3.277522570024079, | |
| "choice_logprob_stderr,none": 0.10122211999497388, | |
| "choice_prob_norm,none": 0.27907478198842045, | |
| "choice_prob_norm_stderr,none": 0.0018462385567067831, | |
| "choice_logprob_norm,none": -1.3317550554033473, | |
| "choice_logprob_norm_stderr,none": 0.007316569507089113 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5137614678899083, | |
| "acc_stderr,none": 0.008741742106878657, | |
| "bpb,none": 0.7286610122314143, | |
| "bpb_stderr,none": 0.008914918894223323, | |
| "logprob,none": -1.273001033742129, | |
| "logprob_stderr,none": 0.01486321445527341, | |
| "choice_logprob,none": -0.7926264912596379, | |
| "choice_logprob_stderr,none": 0.0090788009785167, | |
| "choice_prob_norm,none": 0.5205144023555821, | |
| "choice_prob_norm_stderr,none": 0.0026616239644261415, | |
| "choice_logprob_norm,none": -0.7016038938416879, | |
| "choice_logprob_norm_stderr,none": 0.005709866492914998 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5357798165137615, | |
| "acc_stderr,none": 0.008722635482201091, | |
| "acc_norm,none": 0.5602446483180428, | |
| "acc_norm_stderr,none": 0.008681343983423956, | |
| "bpb,none": 0.5509832646849105, | |
| "bpb_stderr,none": 0.009810966500804727, | |
| "logprob,none": -0.9067304967011152, | |
| "logprob_stderr,none": 0.014519104928619731, | |
| "choice_logprob,none": -0.8898638427319436, | |
| "choice_logprob_stderr,none": 0.014521917030274745, | |
| "choice_prob_norm,none": 0.527889529733078, | |
| "choice_prob_norm_stderr,none": 0.0038568469994045276, | |
| "choice_logprob_norm,none": -0.7594750535008054, | |
| "choice_logprob_norm_stderr,none": 0.009572287082251565 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.58, | |
| "acc_stderr,none": 0.049604496374885836, | |
| "bpb,none": 1.919532833929384, | |
| "bpb_stderr,none": 0.05030447484178161, | |
| "logprob,none": -36.31590324401856, | |
| "logprob_stderr,none": 0.7022728333295246, | |
| "choice_logprob,none": -1.5237651283051747, | |
| "choice_logprob_stderr,none": 0.2516922344221307, | |
| "choice_prob_norm,none": 0.499464094404323, | |
| "choice_prob_norm_stderr,none": 0.008018648398832776, | |
| "choice_logprob_norm,none": -0.7090264651354934, | |
| "choice_logprob_norm_stderr,none": 0.018050767048602083 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.20475020475020475, | |
| "acc_stderr,none": 0.011552714477876671, | |
| "bpb,none": 3.148213720923554, | |
| "bpb_stderr,none": 0.058978042609837675, | |
| "logprob,none": -2.1821754644582936, | |
| "logprob_stderr,none": 0.04088046394995327, | |
| "choice_logprob,none": -2.164364806547257, | |
| "choice_logprob_stderr,none": 0.04087718298862214, | |
| "choice_prob_norm,none": 0.20467033120522585, | |
| "choice_prob_norm_stderr,none": 0.00534799300109999, | |
| "choice_logprob_norm,none": -2.581631593711548, | |
| "choice_logprob_norm_stderr,none": 0.05906931912929533 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.21703521703521703, | |
| "acc_stderr,none": 0.011802018846529998, | |
| "acc_norm,none": 0.2244062244062244, | |
| "acc_norm_stderr,none": 0.011944134676023543, | |
| "bpb,none": 0.7256812573105682, | |
| "bpb_stderr,none": 0.00928482014813608, | |
| "logprob,none": -5.9137141026989735, | |
| "logprob_stderr,none": 0.06319895391742075, | |
| "choice_logprob,none": -2.7346849452204633, | |
| "choice_logprob_stderr,none": 0.05809857353035866, | |
| "choice_prob_norm,none": 0.20792358055004104, | |
| "choice_prob_norm_stderr,none": 0.0015422881711299034, | |
| "choice_logprob_norm,none": -1.6081352514053808, | |
| "choice_logprob_norm_stderr,none": 0.008189810299965718 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.27365066719776937, | |
| "acc_stderr,none": 0.0044492062959223385, | |
| "acc_norm,none": 0.28281218880701053, | |
| "acc_norm_stderr,none": 0.004494454911844578, | |
| "bpb,none": 1.0740874480201377, | |
| "bpb_stderr,none": 0.0022677168676193547, | |
| "logprob,none": -98.78564271754037, | |
| "logprob_stderr,none": 0.492517482756744, | |
| "choice_logprob,none": -27.512362821253223, | |
| "choice_logprob_stderr,none": 0.319339602879014, | |
| "choice_prob_norm,none": 0.2558516005274605, | |
| "choice_prob_norm_stderr,none": 0.0004393847946353529, | |
| "choice_logprob_norm,none": -1.3781728465369107, | |
| "choice_logprob_norm_stderr,none": 0.001746737742556392 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.2717586138219478, | |
| "acc_stderr,none": 0.0044395694474074245, | |
| "acc_norm,none": 0.2826130252937662, | |
| "acc_norm_stderr,none": 0.004493495872000052, | |
| "bpb,none": 1.0736875169267772, | |
| "bpb_stderr,none": 0.0022517433643217404, | |
| "logprob,none": -98.90815309641538, | |
| "logprob_stderr,none": 0.4948650918629855, | |
| "choice_logprob,none": -27.860797247308327, | |
| "choice_logprob_stderr,none": 0.3223766978580821, | |
| "choice_prob_norm,none": 0.2555068692757805, | |
| "choice_prob_norm_stderr,none": 0.00043432782926097806, | |
| "choice_logprob_norm,none": -1.3791684810533902, | |
| "choice_logprob_norm_stderr,none": 0.0017251494480918924 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.12, | |
| "acc_stderr,none": 0.014547276256845681, | |
| "acc_norm,none": 0.262, | |
| "acc_norm_stderr,none": 0.019684688820194727, | |
| "bpb,none": 2.079080085689421, | |
| "bpb_stderr,none": 0.05234480486774542, | |
| "logprob,none": -21.64301951789856, | |
| "logprob_stderr,none": 0.5691098132930806, | |
| "choice_logprob,none": -7.156216684167616, | |
| "choice_logprob_stderr,none": 0.3352144353973983, | |
| "choice_prob_norm,none": 0.25144437273949805, | |
| "choice_prob_norm_stderr,none": 0.005423994663229431, | |
| "choice_logprob_norm,none": -1.5301603545658207, | |
| "choice_logprob_norm_stderr,none": 0.030627603288517418 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6175190424374319, | |
| "acc_stderr,none": 0.011339019654272347, | |
| "acc_norm,none": 0.6235038084874864, | |
| "acc_norm_stderr,none": 0.011304341550126724, | |
| "bpb,none": 1.319102970924556, | |
| "bpb_stderr,none": 0.010677881732896393, | |
| "logprob,none": -79.11812885282349, | |
| "logprob_stderr,none": 1.6524612666055258, | |
| "choice_logprob,none": -3.7497750724473944, | |
| "choice_logprob_stderr,none": 0.25588934103428335, | |
| "choice_prob_norm,none": 0.5070240070215941, | |
| "choice_prob_norm_stderr,none": 0.0014095207300394968, | |
| "choice_logprob_norm,none": -0.6867061286650964, | |
| "choice_logprob_norm_stderr,none": 0.002925559867739968 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5067087608524072, | |
| "acc_stderr,none": 0.014051220692330346, | |
| "bpb,none": 0.4880828489628066, | |
| "bpb_stderr,none": 0.026688288906278192, | |
| "logprob,none": -21.954895537014355, | |
| "logprob_stderr,none": 0.3190544626321982, | |
| "choice_logprob,none": -0.7630275153842117, | |
| "choice_logprob_stderr,none": 0.012693712932140106, | |
| "choice_prob_norm,none": 0.5008606085032802, | |
| "choice_prob_norm_stderr,none": 0.0007794560275754764, | |
| "choice_logprob_norm,none": -0.6937036703519631, | |
| "choice_logprob_norm_stderr,none": 0.002436890803632395 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5311355311355311, | |
| "acc_stderr,none": 0.030258116236228354, | |
| "bpb,none": 0.7306337300105169, | |
| "bpb_stderr,none": 0.026023760734723574, | |
| "logprob,none": -30.255369392506804, | |
| "logprob_stderr,none": 0.5934678978621671, | |
| "choice_logprob,none": -0.8493739626737397, | |
| "choice_logprob_stderr,none": 0.04461467499844924, | |
| "choice_prob_norm,none": 0.5004867547282129, | |
| "choice_prob_norm_stderr,none": 0.0013390217607164177, | |
| "choice_logprob_norm,none": -0.6931541682543674, | |
| "choice_logprob_norm_stderr,none": 0.0026953318225662716 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.254601960315563, | |
| "acc_stderr,none": 0.006736479313671978, | |
| "acc_norm,none": 0.254601960315563, | |
| "acc_norm_stderr,none": 0.006736479313671978, | |
| "logprob,none": -1.5643893120364656, | |
| "logprob_stderr,none": 0.009425238897616482 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.23595505617977527, | |
| "acc_stderr,none": 0.006565715840345552, | |
| "acc_norm,none": 0.2660769782452785, | |
| "acc_norm_stderr,none": 0.006833400751317717, | |
| "bpb,none": 0.3124402539296528, | |
| "bpb_stderr,none": 0.0037842713576437255, | |
| "logprob,none": -4.155512506757203, | |
| "logprob_stderr,none": 0.03843810575569772, | |
| "choice_logprob,none": -2.290517659898563, | |
| "choice_logprob_stderr,none": 0.030926033553213488, | |
| "choice_prob_norm,none": 0.25180879174081117, | |
| "choice_prob_norm_stderr,none": 0.0005218437168622727, | |
| "choice_logprob_norm,none": -1.3893679942965198, | |
| "choice_logprob_norm_stderr,none": 0.0023539446344596877 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.24112607099143207, | |
| "acc_stderr,none": 0.014974827279752339, | |
| "logprob,none": -27.578771195475884, | |
| "logprob_stderr,none": 0.5337992699494261 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 230.45470771182207, | |
| "nll_stderr,none": 2.6424109037706884, | |
| "bpb,none": 1.1896377127095594, | |
| "bpb_stderr,none": 0.007719148528220743 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 81.68491005316014, | |
| "nll_stderr,none": 4.32215653040701, | |
| "bpb,none": 0.7464391807207046, | |
| "bpb_stderr,none": 0.026115364473812668 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v0", | |
| "hidden_dim": 768, | |
| "budget": 1.7e+18, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc_norm,none": 0.2775957840763424, | |
| "acc_norm_stderr,none": 0.0037750876153391203, | |
| "choice_logprob_norm,none": -1.391611070771956, | |
| "choice_logprob_norm_stderr,none": 0.0015553923744926368, | |
| "choice_logprob,none": -2.3037084751387655, | |
| "choice_logprob_stderr,none": 0.017418539987674318, | |
| "choice_prob_norm,none": 0.2530658875682952, | |
| "choice_prob_norm_stderr,none": 0.0003613764085044182, | |
| "bpb,none": 0.6721383592481129, | |
| "bpb_stderr,none": 0.0034005418561082676, | |
| "acc,none": 0.26150121065375304, | |
| "acc_stderr,none": 0.0037041449232690232, | |
| "logprob,none": -9.8900194814625, | |
| "logprob_stderr,none": 0.02629969002592281 | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.2701182167782367, | |
| "acc_stderr,none": 0.0037393188843305473, | |
| "acc_norm,none": 0.2822959692351517, | |
| "acc_norm_stderr,none": 0.0037926419826868423, | |
| "choice_prob_norm,none": 0.253361590100185, | |
| "choice_prob_norm_stderr,none": 0.00031179648016030603, | |
| "choice_logprob_norm,none": -1.3862776793159397, | |
| "choice_logprob_norm_stderr,none": 0.0013342697128226242, | |
| "bpb,none": 0.36913150000490247, | |
| "bpb_stderr,none": 0.0021225757010844147, | |
| "logprob,none": -6.0357810376667285, | |
| "logprob_stderr,none": 0.028751212649447598, | |
| "choice_logprob,none": -2.560116508422288, | |
| "choice_logprob_stderr,none": 0.020859660753929612 | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.1945392491467577, | |
| "acc_stderr,none": 0.011567709174648728, | |
| "acc_norm,none": 0.25, | |
| "acc_norm_stderr,none": 0.012653835621466646, | |
| "bpb,none": 1.2990533200623655, | |
| "bpb_stderr,none": 0.02389440741664436, | |
| "logprob,none": -21.88834087026811, | |
| "logprob_stderr,none": 0.3619037308298327, | |
| "choice_logprob,none": -5.4782723607587895, | |
| "choice_logprob_stderr,none": 0.16721914210574626, | |
| "choice_prob_norm,none": 0.24918523792736294, | |
| "choice_prob_norm_stderr,none": 0.0021066020166690346, | |
| "choice_logprob_norm,none": -1.4422897685462663, | |
| "choice_logprob_norm_stderr,none": 0.010874403351557026 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.49326599326599324, | |
| "acc_stderr,none": 0.010258852980991825, | |
| "acc_norm,none": 0.4654882154882155, | |
| "acc_norm_stderr,none": 0.010235314238969392, | |
| "bpb,none": 1.1177755064573227, | |
| "bpb_stderr,none": 0.011837585105800234, | |
| "logprob,none": -15.169280452158315, | |
| "logprob_stderr,none": 0.22071591731508317, | |
| "choice_logprob,none": -2.576503623539023, | |
| "choice_logprob_stderr,none": 0.08702443636440085, | |
| "choice_prob_norm,none": 0.2924226290313906, | |
| "choice_prob_norm_stderr,none": 0.0017791272246730923, | |
| "choice_logprob_norm,none": -1.2745287621863055, | |
| "choice_logprob_norm_stderr,none": 0.006427102488010312 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5850152905198777, | |
| "acc_stderr,none": 0.008617716361921568, | |
| "bpb,none": 0.49409554146117635, | |
| "bpb_stderr,none": 0.007473232864219822, | |
| "logprob,none": -0.7884021228241993, | |
| "logprob_stderr,none": 0.00996583463958122, | |
| "choice_logprob,none": -0.7369341656711315, | |
| "choice_logprob_stderr,none": 0.009847674123941044, | |
| "choice_prob_norm,none": 0.5399909671321121, | |
| "choice_prob_norm_stderr,none": 0.003280597434464875, | |
| "choice_logprob_norm,none": -0.6926188752292329, | |
| "choice_logprob_norm_stderr,none": 0.007338771601016744 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5602446483180428, | |
| "acc_stderr,none": 0.008681343983423961, | |
| "acc_norm,none": 0.5807339449541284, | |
| "acc_norm_stderr,none": 0.008630302070999095, | |
| "bpb,none": 0.5007309178926469, | |
| "bpb_stderr,none": 0.00840140955320915, | |
| "logprob,none": -0.8160195627708319, | |
| "logprob_stderr,none": 0.012073004117876136, | |
| "choice_logprob,none": -0.805065733835213, | |
| "choice_logprob_stderr,none": 0.012071066960501553, | |
| "choice_prob_norm,none": 0.535390097067654, | |
| "choice_prob_norm_stderr,none": 0.0035521285398966668, | |
| "choice_logprob_norm,none": -0.7187861770233629, | |
| "choice_logprob_norm_stderr,none": 0.00826172120852258 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.67, | |
| "acc_stderr,none": 0.04725815626252606, | |
| "bpb,none": 1.745047501847046, | |
| "bpb_stderr,none": 0.04642315348274156, | |
| "logprob,none": -32.88267251968384, | |
| "logprob_stderr,none": 0.5847845172179112, | |
| "choice_logprob,none": -1.217226976845902, | |
| "choice_logprob_stderr,none": 0.2178711669542397, | |
| "choice_prob_norm,none": 0.5100176054638571, | |
| "choice_prob_norm_stderr,none": 0.008069449235766888, | |
| "choice_logprob_norm,none": -0.6875663877071697, | |
| "choice_logprob_norm_stderr,none": 0.01768362645271402 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.20884520884520885, | |
| "acc_stderr,none": 0.011637590576063055, | |
| "bpb,none": 2.7082703285716865, | |
| "bpb_stderr,none": 0.03715734041556212, | |
| "logprob,none": -1.8772299424436227, | |
| "logprob_stderr,none": 0.025755505746153002, | |
| "choice_logprob,none": -1.8585579291608698, | |
| "choice_logprob_stderr,none": 0.02580663121801978, | |
| "choice_prob_norm,none": 0.20429292250736647, | |
| "choice_prob_norm_stderr,none": 0.004053738224061062, | |
| "choice_logprob_norm,none": -2.067910764468694, | |
| "choice_logprob_norm_stderr,none": 0.03749094489139384 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.21048321048321048, | |
| "acc_stderr,none": 0.011671038436522901, | |
| "acc_norm,none": 0.23996723996723995, | |
| "acc_norm_stderr,none": 0.012226783409751469, | |
| "bpb,none": 0.6441823880814893, | |
| "bpb_stderr,none": 0.008549812822294335, | |
| "logprob,none": -5.222800996828822, | |
| "logprob_stderr,none": 0.05706464555613182, | |
| "choice_logprob,none": -2.5830368086363924, | |
| "choice_logprob_stderr,none": 0.05420669315677992, | |
| "choice_prob_norm,none": 0.21245686706248418, | |
| "choice_prob_norm_stderr,none": 0.0015317316633513673, | |
| "choice_logprob_norm,none": -1.5843735654273707, | |
| "choice_logprob_norm_stderr,none": 0.007915773259093957 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.30103565026887075, | |
| "acc_stderr,none": 0.004577707025031402, | |
| "acc_norm,none": 0.33608842859988053, | |
| "acc_norm_stderr,none": 0.004714041652598645, | |
| "bpb,none": 0.9569241910847363, | |
| "bpb_stderr,none": 0.0021455688918908702, | |
| "logprob,none": -87.91369925982163, | |
| "logprob_stderr,none": 0.43935017760698675, | |
| "choice_logprob,none": -23.19143379806059, | |
| "choice_logprob_stderr,none": 0.2796031552364701, | |
| "choice_prob_norm,none": 0.26202436456375305, | |
| "choice_prob_norm_stderr,none": 0.00042843709347952896, | |
| "choice_logprob_norm,none": -1.352910072370103, | |
| "choice_logprob_norm_stderr,none": 0.0016609235613495087 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.2982473610834495, | |
| "acc_stderr,none": 0.004565536808632522, | |
| "acc_norm,none": 0.3325034853614818, | |
| "acc_norm_stderr,none": 0.004701474865207039, | |
| "bpb,none": 0.9521061871979787, | |
| "bpb_stderr,none": 0.0021221993555018753, | |
| "logprob,none": -87.59647112200399, | |
| "logprob_stderr,none": 0.439894342974009, | |
| "choice_logprob,none": -23.552198224947265, | |
| "choice_logprob_stderr,none": 0.2827691640786014, | |
| "choice_prob_norm,none": 0.2615570232697049, | |
| "choice_prob_norm_stderr,none": 0.0004246979309501359, | |
| "choice_logprob_norm,none": -1.3544657472921613, | |
| "choice_logprob_norm_stderr,none": 0.0016456457725755095 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.18, | |
| "acc_stderr,none": 0.017198592476314264, | |
| "acc_norm,none": 0.27, | |
| "acc_norm_stderr,none": 0.01987435483128749, | |
| "bpb,none": 1.9446477187944575, | |
| "bpb_stderr,none": 0.052677841494031055, | |
| "logprob,none": -20.190474004745482, | |
| "logprob_stderr,none": 0.5390781181062633, | |
| "choice_logprob,none": -6.4651767189708655, | |
| "choice_logprob_stderr,none": 0.3134653299574563, | |
| "choice_prob_norm,none": 0.2648284760112857, | |
| "choice_prob_norm_stderr,none": 0.005724075145660683, | |
| "choice_logprob_norm,none": -1.4763545745536883, | |
| "choice_logprob_norm_stderr,none": 0.03181823190384659 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6550598476605005, | |
| "acc_stderr,none": 0.011090670102993161, | |
| "acc_norm,none": 0.6512513601741022, | |
| "acc_norm_stderr,none": 0.011119263056159588, | |
| "bpb,none": 1.1999875195553047, | |
| "bpb_stderr,none": 0.011284952406624285, | |
| "logprob,none": -71.1018480836891, | |
| "logprob_stderr,none": 1.4948048391986262, | |
| "choice_logprob,none": -3.265468970186985, | |
| "choice_logprob_stderr,none": 0.22136651025327092, | |
| "choice_prob_norm,none": 0.5102461993314247, | |
| "choice_prob_norm_stderr,none": 0.0014059441446873727, | |
| "choice_logprob_norm,none": -0.6809067069938138, | |
| "choice_logprob_norm_stderr,none": 0.003156800336821187 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5114443567482242, | |
| "acc_stderr,none": 0.014048804199859329, | |
| "bpb,none": 0.45609870059930196, | |
| "bpb_stderr,none": 0.025146910655114735, | |
| "logprob,none": -20.520925116520186, | |
| "logprob_stderr,none": 0.29678939016817496, | |
| "choice_logprob,none": -0.7704629461893834, | |
| "choice_logprob_stderr,none": 0.015193384863767105, | |
| "choice_prob_norm,none": 0.5010707054381249, | |
| "choice_prob_norm_stderr,none": 0.0007834292997275223, | |
| "choice_logprob_norm,none": -0.6936490525307388, | |
| "choice_logprob_norm_stderr,none": 0.0027829346803724215 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5531135531135531, | |
| "acc_stderr,none": 0.030145416591160438, | |
| "bpb,none": 0.67698671023787, | |
| "bpb_stderr,none": 0.021979327889394554, | |
| "logprob,none": -28.21373940562154, | |
| "logprob_stderr,none": 0.5021699672178789, | |
| "choice_logprob,none": -0.8130615044760181, | |
| "choice_logprob_stderr,none": 0.04625245472538832, | |
| "choice_prob_norm,none": 0.5016862376228498, | |
| "choice_prob_norm_stderr,none": 0.0012715073257390224, | |
| "choice_logprob_norm,none": -0.6906336423309668, | |
| "choice_logprob_norm_stderr,none": 0.0024950331619603837 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.25197226870666983, | |
| "acc_stderr,none": 0.00671341057274834, | |
| "acc_norm,none": 0.25197226870666983, | |
| "acc_norm_stderr,none": 0.00671341057274834, | |
| "logprob,none": -1.4492818664598204, | |
| "logprob_stderr,none": 0.0056398661877203465 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.24049725077695433, | |
| "acc_stderr,none": 0.006608877757906801, | |
| "acc_norm,none": 0.2550800860626345, | |
| "acc_norm_stderr,none": 0.006740638791982925, | |
| "bpb,none": 0.36389366195139783, | |
| "bpb_stderr,none": 0.004394280237268685, | |
| "logprob,none": -4.67044810487299, | |
| "logprob_stderr,none": 0.03853183344816826, | |
| "choice_logprob,none": -2.5214551319843004, | |
| "choice_logprob_stderr,none": 0.03263217231457442, | |
| "choice_prob_norm,none": 0.25215177191095767, | |
| "choice_prob_norm_stderr,none": 0.0006548805042050642, | |
| "choice_logprob_norm,none": -1.3941606684728476, | |
| "choice_logprob_norm_stderr,none": 0.0029943180744198277 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.2215422276621787, | |
| "acc_stderr,none": 0.014537867601301145, | |
| "logprob,none": -24.17374606114998, | |
| "logprob_stderr,none": 0.45471216502872974 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 187.34562071134323, | |
| "nll_stderr,none": 2.254213084999866, | |
| "bpb,none": 0.9638362974839736, | |
| "bpb_stderr,none": 0.0069839552336510066 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 75.958770810104, | |
| "nll_stderr,none": 3.869064462254324, | |
| "bpb,none": 0.7154369386112661, | |
| "bpb_stderr,none": 0.024701782506346564 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v2", | |
| "hidden_dim": 768, | |
| "budget": 1.7e+18, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "choice_prob_norm,none": 0.2540078438483089, | |
| "choice_prob_norm_stderr,none": 0.0003950146977538393, | |
| "bpb,none": 0.630836113763738, | |
| "bpb_stderr,none": 0.0034420608674845072, | |
| "logprob,none": -9.216035056737201, | |
| "logprob_stderr,none": 0.028407621684874686, | |
| "acc,none": 0.26605896595926504, | |
| "acc_stderr,none": 0.0037265302413822345, | |
| "choice_logprob,none": -2.5552756392952287, | |
| "choice_logprob_stderr,none": 0.02051436813616077, | |
| "choice_logprob_norm,none": -1.3921231673770733, | |
| "choice_logprob_norm_stderr,none": 0.001696175570576605, | |
| "acc_norm,none": 0.2751032616436405, | |
| "acc_norm_stderr,none": 0.003757557237347936 | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.2741774675972084, | |
| "acc_stderr,none": 0.003762954740555449, | |
| "choice_prob_norm,none": 0.25372568733411754, | |
| "choice_prob_norm_stderr,none": 0.00031626526844855867, | |
| "bpb,none": 0.3520098493411551, | |
| "bpb_stderr,none": 0.0020665687953088357, | |
| "acc_norm,none": 0.2803731662156388, | |
| "acc_norm_stderr,none": 0.00378647862035118, | |
| "logprob,none": -6.0582327865717485, | |
| "logprob_stderr,none": 0.035340315604244055, | |
| "choice_logprob,none": -2.756019169963866, | |
| "choice_logprob_stderr,none": 0.025075966501954407, | |
| "choice_logprob_norm,none": -1.384894680459617, | |
| "choice_logprob_norm_stderr,none": 0.0013349007829572429 | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.2175767918088737, | |
| "acc_stderr,none": 0.012057262020972502, | |
| "acc_norm,none": 0.2645051194539249, | |
| "acc_norm_stderr,none": 0.012889272949313364, | |
| "bpb,none": 1.2767585656683982, | |
| "bpb_stderr,none": 0.023838389186886518, | |
| "logprob,none": -21.650589166647745, | |
| "logprob_stderr,none": 0.36685339341096324, | |
| "choice_logprob,none": -5.38573060146569, | |
| "choice_logprob_stderr,none": 0.1690200550739846, | |
| "choice_prob_norm,none": 0.2517412157952101, | |
| "choice_prob_norm_stderr,none": 0.0020922538941133286, | |
| "choice_logprob_norm,none": -1.4322964730580594, | |
| "choice_logprob_norm_stderr,none": 0.010889640242400956 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.5109427609427609, | |
| "acc_stderr,none": 0.010257326131172875, | |
| "acc_norm,none": 0.5004208754208754, | |
| "acc_norm_stderr,none": 0.010259779886094418, | |
| "bpb,none": 1.0780598841198599, | |
| "bpb_stderr,none": 0.012181664239511076, | |
| "logprob,none": -14.719918382488919, | |
| "logprob_stderr,none": 0.2191182331259823, | |
| "choice_logprob,none": -2.4034967520442434, | |
| "choice_logprob_stderr,none": 0.08495732381111372, | |
| "choice_prob_norm,none": 0.2998081729503958, | |
| "choice_prob_norm_stderr,none": 0.0018599082559222627, | |
| "choice_logprob_norm,none": -1.2522716570330117, | |
| "choice_logprob_norm_stderr,none": 0.0066828004516811205 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5241590214067279, | |
| "acc_stderr,none": 0.008734840763194161, | |
| "bpb,none": 0.6066002273508516, | |
| "bpb_stderr,none": 0.007455694237622892, | |
| "logprob,none": -1.0340861801707417, | |
| "logprob_stderr,none": 0.011358614622538975, | |
| "choice_logprob,none": -0.8071693390587505, | |
| "choice_logprob_stderr,none": 0.010142921993201668, | |
| "choice_prob_norm,none": 0.5222511487380298, | |
| "choice_prob_norm_stderr,none": 0.0030723125576453416, | |
| "choice_logprob_norm,none": -0.7173137661181964, | |
| "choice_logprob_norm_stderr,none": 0.006834398042458719 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5697247706422018, | |
| "acc_stderr,none": 0.008659608602932495, | |
| "acc_norm,none": 0.5837920489296636, | |
| "acc_norm_stderr,none": 0.008621380519419278, | |
| "bpb,none": 0.7037882765719319, | |
| "bpb_stderr,none": 0.015542286079910459, | |
| "logprob,none": -1.0663110018505597, | |
| "logprob_stderr,none": 0.021257464026123718, | |
| "choice_logprob,none": -1.0127293218081876, | |
| "choice_logprob_stderr,none": 0.02130786713780054, | |
| "choice_prob_norm,none": 0.5533353622550738, | |
| "choice_prob_norm_stderr,none": 0.005088416740268586, | |
| "choice_logprob_norm,none": -0.8457833515832397, | |
| "choice_logprob_norm_stderr,none": 0.015181862190279869 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.65, | |
| "acc_stderr,none": 0.047937248544110196, | |
| "bpb,none": 1.8546625683366378, | |
| "bpb_stderr,none": 0.05305699917413002, | |
| "logprob,none": -34.73192947387695, | |
| "logprob_stderr,none": 0.5707587662603286, | |
| "choice_logprob,none": -1.2919342387281028, | |
| "choice_logprob_stderr,none": 0.22408305328415662, | |
| "choice_prob_norm,none": 0.5085437035242247, | |
| "choice_prob_norm_stderr,none": 0.009136998278660999, | |
| "choice_logprob_norm,none": -0.6943084074677344, | |
| "choice_logprob_norm_stderr,none": 0.019840236643233748 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.21212121212121213, | |
| "acc_stderr,none": 0.01170420281420025, | |
| "bpb,none": 3.500847134179842, | |
| "bpb_stderr,none": 0.081985779112656, | |
| "logprob,none": -2.426602320628123, | |
| "logprob_stderr,none": 0.056828211637947934, | |
| "choice_logprob,none": -2.414209093293303, | |
| "choice_logprob_stderr,none": 0.05684788904568409, | |
| "choice_prob_norm,none": 0.2026777514576224, | |
| "choice_prob_norm_stderr,none": 0.005205199439650168, | |
| "choice_logprob_norm,none": -2.9333657572689336, | |
| "choice_logprob_norm_stderr,none": 0.08218007673061352 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.27764127764127766, | |
| "acc_stderr,none": 0.012821491901599497, | |
| "acc_norm,none": 0.28992628992628994, | |
| "acc_norm_stderr,none": 0.012990192906601208, | |
| "bpb,none": 0.7278105251167664, | |
| "bpb_stderr,none": 0.009948200510483247, | |
| "logprob,none": -5.857854351852879, | |
| "logprob_stderr,none": 0.06200791056605534, | |
| "choice_logprob,none": -2.4607794690097635, | |
| "choice_logprob_stderr,none": 0.05734824369698029, | |
| "choice_prob_norm,none": 0.21963906264811586, | |
| "choice_prob_norm_stderr,none": 0.0017687153334317231, | |
| "choice_logprob_norm,none": -1.560420362611322, | |
| "choice_logprob_norm_stderr,none": 0.009020672665039387 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.3200557657837084, | |
| "acc_stderr,none": 0.004655442766599458, | |
| "acc_norm,none": 0.3679545907189803, | |
| "acc_norm_stderr,none": 0.004812633280078266, | |
| "bpb,none": 0.9540877214137002, | |
| "bpb_stderr,none": 0.00222792153316327, | |
| "logprob,none": -87.33792356147492, | |
| "logprob_stderr,none": 0.43654116368740237, | |
| "choice_logprob,none": -22.551195968341176, | |
| "choice_logprob_stderr,none": 0.2775152461199856, | |
| "choice_prob_norm,none": 0.2656253688728796, | |
| "choice_prob_norm_stderr,none": 0.00044812549442936736, | |
| "choice_logprob_norm,none": -1.3402022360881427, | |
| "choice_logprob_norm_stderr,none": 0.0017198324351090767 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.3104959171479785, | |
| "acc_stderr,none": 0.004617510423156671, | |
| "acc_norm,none": 0.3575980880302729, | |
| "acc_norm_stderr,none": 0.004783133725599507, | |
| "bpb,none": 0.9459726089030255, | |
| "bpb_stderr,none": 0.00214501632085492, | |
| "logprob,none": -86.8965860186666, | |
| "logprob_stderr,none": 0.43671983617678317, | |
| "choice_logprob,none": -22.94937022533771, | |
| "choice_logprob_stderr,none": 0.27954745144555293, | |
| "choice_prob_norm,none": 0.26374843093315214, | |
| "choice_prob_norm_stderr,none": 0.0004292208136610329, | |
| "choice_logprob_norm,none": -1.3462078377996887, | |
| "choice_logprob_norm_stderr,none": 0.0016516130788471967 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.202, | |
| "acc_stderr,none": 0.017973260031288258, | |
| "acc_norm,none": 0.304, | |
| "acc_norm_stderr,none": 0.020591649571224932, | |
| "bpb,none": 1.9421223179926048, | |
| "bpb_stderr,none": 0.05390181468108013, | |
| "logprob,none": -20.149716567993163, | |
| "logprob_stderr,none": 0.5456166117411405, | |
| "choice_logprob,none": -6.528613793204991, | |
| "choice_logprob_stderr,none": 0.32731071815575646, | |
| "choice_prob_norm,none": 0.2695584061430335, | |
| "choice_prob_norm_stderr,none": 0.005978992778579398, | |
| "choice_logprob_norm,none": -1.4669640208829184, | |
| "choice_logprob_norm_stderr,none": 0.032866052905030904 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6588683351468988, | |
| "acc_stderr,none": 0.011061289443962714, | |
| "acc_norm,none": 0.6577801958650707, | |
| "acc_norm_stderr,none": 0.011069764658685451, | |
| "bpb,none": 1.2017750498066977, | |
| "bpb_stderr,none": 0.011273415908386208, | |
| "logprob,none": -71.33160145835336, | |
| "logprob_stderr,none": 1.510308364798975, | |
| "choice_logprob,none": -3.2394468434463275, | |
| "choice_logprob_stderr,none": 0.22018840222417524, | |
| "choice_prob_norm,none": 0.5110515190521396, | |
| "choice_prob_norm_stderr,none": 0.0013972988334892084, | |
| "choice_logprob_norm,none": -0.6788681723766132, | |
| "choice_logprob_norm_stderr,none": 0.0029807177483003942 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.4940805051302289, | |
| "acc_stderr,none": 0.014051500838485807, | |
| "bpb,none": 0.4533295910393437, | |
| "bpb_stderr,none": 0.0246540642680002, | |
| "logprob,none": -20.42467473940108, | |
| "logprob_stderr,none": 0.29524299652390007, | |
| "choice_logprob,none": -0.7792085946040049, | |
| "choice_logprob_stderr,none": 0.01540477769465132, | |
| "choice_prob_norm,none": 0.5012321330767487, | |
| "choice_prob_norm_stderr,none": 0.0008050836840195189, | |
| "choice_logprob_norm,none": -0.6927810087161707, | |
| "choice_logprob_norm_stderr,none": 0.002203592803854555 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5421245421245421, | |
| "acc_stderr,none": 0.03020916805173445, | |
| "bpb,none": 0.69385733165508, | |
| "bpb_stderr,none": 0.022998563698878597, | |
| "logprob,none": -28.90814703637427, | |
| "logprob_stderr,none": 0.524553202970914, | |
| "choice_logprob,none": -0.9073719422206339, | |
| "choice_logprob_stderr,none": 0.05850329381819185, | |
| "choice_prob_norm,none": 0.5017320241407558, | |
| "choice_prob_norm_stderr,none": 0.001213596529043583, | |
| "choice_logprob_norm,none": -0.6904815685479141, | |
| "choice_logprob_norm_stderr,none": 0.002413960436212978 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.2634472866363854, | |
| "acc_stderr,none": 0.006811719684474193, | |
| "acc_norm,none": 0.2634472866363854, | |
| "acc_norm_stderr,none": 0.006811719684474193, | |
| "logprob,none": -1.540723278338391, | |
| "logprob_stderr,none": 0.0093866109633236 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.23691130767391824, | |
| "acc_stderr,none": 0.006574888462196943, | |
| "acc_norm,none": 0.2589050920392063, | |
| "acc_norm_stderr,none": 0.006773532148760628, | |
| "bpb,none": 0.33043938542739426, | |
| "bpb_stderr,none": 0.0041065256563731015, | |
| "logprob,none": -4.312089565636409, | |
| "logprob_stderr,none": 0.04196129456888169, | |
| "choice_logprob,none": -2.507792368013145, | |
| "choice_logprob_stderr,none": 0.03505141101309658, | |
| "choice_prob_norm,none": 0.2521027067325027, | |
| "choice_prob_norm_stderr,none": 0.0006165004483683178, | |
| "choice_logprob_norm,none": -1.3921941747791449, | |
| "choice_logprob_norm_stderr,none": 0.002789274572546021 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.19951040391676866, | |
| "acc_stderr,none": 0.013989929967559664, | |
| "logprob,none": -23.7913622867813, | |
| "logprob_stderr,none": 0.45205070626438093 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 203.36205847322987, | |
| "nll_stderr,none": 2.37812013888117, | |
| "bpb,none": 1.0524536086094196, | |
| "bpb_stderr,none": 0.007615089454244769 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 82.56886346747235, | |
| "nll_stderr,none": 4.131968135247329, | |
| "bpb,none": 0.7633049076553656, | |
| "bpb_stderr,none": 0.025037153623458777 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v3", | |
| "hidden_dim": 768, | |
| "budget": 1.7e+18, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2649195271328871, | |
| "acc_stderr,none": 0.0037155548914596776, | |
| "acc_norm,none": 0.2806580259222333, | |
| "acc_norm_stderr,none": 0.0037854715595279447, | |
| "bpb,none": 0.6024695207378018, | |
| "bpb_stderr,none": 0.00320163813255778, | |
| "logprob,none": -8.978868199945595, | |
| "logprob_stderr,none": 0.028158187532551462, | |
| "choice_logprob,none": -2.334509456091557, | |
| "choice_logprob_stderr,none": 0.018004150410301928, | |
| "choice_prob_norm,none": 0.25327204781743756, | |
| "choice_prob_norm_stderr,none": 0.00036052901475016897, | |
| "choice_logprob_norm,none": -1.3915630382334927, | |
| "choice_logprob_norm_stderr,none": 0.0015694241062031151, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.27075915111807436, | |
| "acc_stderr,none": 0.003744294461398903, | |
| "acc_norm,none": 0.27610027061672127, | |
| "acc_norm_stderr,none": 0.003765766814329832, | |
| "bpb,none": 0.26312100151367407, | |
| "bpb_stderr,none": 0.0016353854951214155, | |
| "logprob,none": -4.2898202062336175, | |
| "logprob_stderr,none": 0.02519748498054928, | |
| "choice_logprob,none": -2.230520127314357, | |
| "choice_logprob_stderr,none": 0.018307220488809502, | |
| "choice_prob_norm,none": 0.25252420092599626, | |
| "choice_prob_norm_stderr,none": 0.00024856771120859505, | |
| "choice_logprob_norm,none": -1.3858647229236738, | |
| "choice_logprob_norm_stderr,none": 0.0010940303113457382, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.19795221843003413, | |
| "acc_stderr,none": 0.011643990971573379, | |
| "acc_norm,none": 0.2440273037542662, | |
| "acc_norm_stderr,none": 0.012551447627856255, | |
| "bpb,none": 1.3090066714226691, | |
| "bpb_stderr,none": 0.022716803418791277, | |
| "logprob,none": -22.191338637583083, | |
| "logprob_stderr,none": 0.36999834908122636, | |
| "choice_logprob,none": -5.6414561788964805, | |
| "choice_logprob_stderr,none": 0.17537006077048314, | |
| "choice_prob_norm,none": 0.24977816951595155, | |
| "choice_prob_norm_stderr,none": 0.002139670595246792, | |
| "choice_logprob_norm,none": -1.439082310977836, | |
| "choice_logprob_norm_stderr,none": 0.010576117473324044 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.494949494949495, | |
| "acc_stderr,none": 0.01025926010256586, | |
| "acc_norm,none": 0.46296296296296297, | |
| "acc_norm_stderr,none": 0.010231597249131053, | |
| "bpb,none": 1.1585534970490599, | |
| "bpb_stderr,none": 0.012481991142278126, | |
| "logprob,none": -15.496999463047644, | |
| "logprob_stderr,none": 0.22045258447123706, | |
| "choice_logprob,none": -2.602005149650746, | |
| "choice_logprob_stderr,none": 0.08816401484489347, | |
| "choice_prob_norm,none": 0.29242443961219655, | |
| "choice_prob_norm_stderr,none": 0.001805858994633604, | |
| "choice_logprob_norm,none": -1.277943804823616, | |
| "choice_logprob_norm_stderr,none": 0.006795895723581999 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5470948012232416, | |
| "acc_stderr,none": 0.00870617688583775, | |
| "bpb,none": 0.6959641183652738, | |
| "bpb_stderr,none": 0.011316618445433812, | |
| "logprob,none": -1.1740724734210093, | |
| "logprob_stderr,none": 0.017778192949909743, | |
| "choice_logprob,none": -0.7711060924984204, | |
| "choice_logprob_stderr,none": 0.009861549612757899, | |
| "choice_prob_norm,none": 0.5320772519733842, | |
| "choice_prob_norm_stderr,none": 0.003225951917275769, | |
| "choice_logprob_norm,none": -0.7075314292595454, | |
| "choice_logprob_norm_stderr,none": 0.007441305705339594 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5443425076452599, | |
| "acc_stderr,none": 0.008710597021081264, | |
| "acc_norm,none": 0.5773700305810398, | |
| "acc_norm_stderr,none": 0.008639722698719017, | |
| "bpb,none": 0.5048590515072109, | |
| "bpb_stderr,none": 0.0066582364068149165, | |
| "logprob,none": -0.8384363474831305, | |
| "logprob_stderr,none": 0.009256347318990573, | |
| "choice_logprob,none": -0.762414644839067, | |
| "choice_logprob_stderr,none": 0.009190905530342953, | |
| "choice_prob_norm,none": 0.5267977251387381, | |
| "choice_prob_norm_stderr,none": 0.002942421159012307, | |
| "choice_logprob_norm,none": -0.702744399639691, | |
| "choice_logprob_norm_stderr,none": 0.006554018005252461 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.65, | |
| "acc_stderr,none": 0.0479372485441102, | |
| "bpb,none": 1.7791294591456646, | |
| "bpb_stderr,none": 0.045845252353962085, | |
| "logprob,none": -33.65744636535644, | |
| "logprob_stderr,none": 0.6226578097302043, | |
| "choice_logprob,none": -1.3726728936669463, | |
| "choice_logprob_stderr,none": 0.2457164596192268, | |
| "choice_prob_norm,none": 0.507246365763789, | |
| "choice_prob_norm_stderr,none": 0.008340205133308472, | |
| "choice_logprob_norm,none": -0.6939474969184765, | |
| "choice_logprob_norm_stderr,none": 0.0181700017282806 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.19164619164619165, | |
| "acc_stderr,none": 0.01126862497880163, | |
| "bpb,none": 3.671707202868784, | |
| "bpb_stderr,none": 0.08557388817365925, | |
| "logprob,none": -2.545033495510142, | |
| "logprob_stderr,none": 0.059315299317124, | |
| "choice_logprob,none": -2.536059170760092, | |
| "choice_logprob_stderr,none": 0.059330788905025834, | |
| "choice_prob_norm,none": 0.19702426764674388, | |
| "choice_prob_norm_stderr,none": 0.00564354409779329, | |
| "choice_logprob_norm,none": -3.1414222594330394, | |
| "choice_logprob_norm_stderr,none": 0.08585929446901473 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.21457821457821458, | |
| "acc_stderr,none": 0.011753423094216849, | |
| "acc_norm,none": 0.22522522522522523, | |
| "acc_norm_stderr,none": 0.01195959122428623, | |
| "bpb,none": 0.5689145201033557, | |
| "bpb_stderr,none": 0.010213968008008441, | |
| "logprob,none": -4.55398947854788, | |
| "logprob_stderr,none": 0.0671578478652312, | |
| "choice_logprob,none": -2.818725038008329, | |
| "choice_logprob_stderr,none": 0.06353769805138083, | |
| "choice_prob_norm,none": 0.20872767447697946, | |
| "choice_prob_norm_stderr,none": 0.0016339148543048907, | |
| "choice_logprob_norm,none": -1.6126542167411066, | |
| "choice_logprob_norm_stderr,none": 0.009423999621414866 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.29725154351722766, | |
| "acc_stderr,none": 0.004561141293448453, | |
| "acc_norm,none": 0.32483569010157337, | |
| "acc_norm_stderr,none": 0.004673563250946118, | |
| "bpb,none": 0.9796705149258281, | |
| "bpb_stderr,none": 0.0022091598069014645, | |
| "logprob,none": -89.82898136321066, | |
| "logprob_stderr,none": 0.44818314862513037, | |
| "choice_logprob,none": -24.144845588658104, | |
| "choice_logprob_stderr,none": 0.288170037808976, | |
| "choice_prob_norm,none": 0.2609134652793668, | |
| "choice_prob_norm_stderr,none": 0.00043502952684702746, | |
| "choice_logprob_norm,none": -1.3577094881716545, | |
| "choice_logprob_norm_stderr,none": 0.001694800352566026 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.2952599083847839, | |
| "acc_stderr,none": 0.004552272447071751, | |
| "acc_norm,none": 0.3247361083449512, | |
| "acc_norm_stderr,none": 0.004673191423861227, | |
| "bpb,none": 0.9765488031232005, | |
| "bpb_stderr,none": 0.0021804283141941315, | |
| "logprob,none": -89.67450358449308, | |
| "logprob_stderr,none": 0.4494154799653304, | |
| "choice_logprob,none": -24.543052017087717, | |
| "choice_logprob_stderr,none": 0.2923068946123268, | |
| "choice_prob_norm,none": 0.2602805317169324, | |
| "choice_prob_norm_stderr,none": 0.00042882265701986254, | |
| "choice_logprob_norm,none": -1.3597682409940002, | |
| "choice_logprob_norm_stderr,none": 0.0016715171896852109 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.186, | |
| "acc_stderr,none": 0.01741880678058396, | |
| "acc_norm,none": 0.292, | |
| "acc_norm_stderr,none": 0.020354375480530068, | |
| "bpb,none": 1.9419830728944796, | |
| "bpb_stderr,none": 0.050665847295894746, | |
| "logprob,none": -20.289521512031556, | |
| "logprob_stderr,none": 0.5450531296339609, | |
| "choice_logprob,none": -6.617104865474552, | |
| "choice_logprob_stderr,none": 0.3279247788040061, | |
| "choice_prob_norm,none": 0.2641189282767381, | |
| "choice_prob_norm_stderr,none": 0.0055841283469181225, | |
| "choice_logprob_norm,none": -1.4721363660895315, | |
| "choice_logprob_norm_stderr,none": 0.03031027603400177 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6637649619151251, | |
| "acc_stderr,none": 0.011022346708970227, | |
| "acc_norm,none": 0.6561479869423286, | |
| "acc_norm_stderr,none": 0.011082356277961393, | |
| "bpb,none": 1.2075411435249774, | |
| "bpb_stderr,none": 0.01135770003852137, | |
| "logprob,none": -71.33046385097815, | |
| "logprob_stderr,none": 1.4856433422319442, | |
| "choice_logprob,none": -3.257656613888533, | |
| "choice_logprob_stderr,none": 0.2216319564386613, | |
| "choice_prob_norm,none": 0.5105612330678693, | |
| "choice_prob_norm_stderr,none": 0.0014126100718653543, | |
| "choice_logprob_norm,none": -0.6798722842751997, | |
| "choice_logprob_norm_stderr,none": 0.0029750502872840567 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5351223362273086, | |
| "acc_stderr,none": 0.014017773120881575, | |
| "bpb,none": 0.4623989922607736, | |
| "bpb_stderr,none": 0.02552736166876556, | |
| "logprob,none": -20.793666890407042, | |
| "logprob_stderr,none": 0.30062464277421497, | |
| "choice_logprob,none": -0.7703221832931528, | |
| "choice_logprob_stderr,none": 0.01604737295976134, | |
| "choice_prob_norm,none": 0.5008314524445316, | |
| "choice_prob_norm_stderr,none": 0.0007409387016776521, | |
| "choice_logprob_norm,none": -0.6939116611648893, | |
| "choice_logprob_norm_stderr,none": 0.00265109409506179 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5567765567765568, | |
| "acc_stderr,none": 0.030120860870184642, | |
| "bpb,none": 0.6722615630816129, | |
| "bpb_stderr,none": 0.023688048767935867, | |
| "logprob,none": -27.885882580236636, | |
| "logprob_stderr,none": 0.5264396316213983, | |
| "choice_logprob,none": -0.8601103994979528, | |
| "choice_logprob_stderr,none": 0.04981320319007168, | |
| "choice_prob_norm,none": 0.5010490999408292, | |
| "choice_prob_norm_stderr,none": 0.0011558725038424912, | |
| "choice_logprob_norm,none": -0.6917752194669604, | |
| "choice_logprob_norm_stderr,none": 0.002311264655129336 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.27157542433660053, | |
| "acc_stderr,none": 0.006877736197092466, | |
| "acc_norm,none": 0.27157542433660053, | |
| "acc_norm_stderr,none": 0.006877736197092466, | |
| "logprob,none": -1.5242668319882715, | |
| "logprob_stderr,none": 0.01075254837566572 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.254601960315563, | |
| "acc_stderr,none": 0.006736479313671978, | |
| "acc_norm,none": 0.2794644991632799, | |
| "acc_norm_stderr,none": 0.006939033764961425, | |
| "bpb,none": 0.23384956925224545, | |
| "bpb_stderr,none": 0.0031576904180103636, | |
| "logprob,none": -2.9715185612261994, | |
| "logprob_stderr,none": 0.028759614619079072, | |
| "choice_logprob,none": -1.8962299105082905, | |
| "choice_logprob_stderr,none": 0.022391609953139855, | |
| "choice_prob_norm,none": 0.2534780753988292, | |
| "choice_prob_norm_stderr,none": 0.0004656791959382843, | |
| "choice_logprob_norm,none": -1.3804754707606355, | |
| "choice_logprob_norm_stderr,none": 0.002065318251034982 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.21664626682986537, | |
| "acc_stderr,none": 0.014421468452506974, | |
| "logprob,none": -24.193991212833176, | |
| "logprob_stderr,none": 0.4860067499388812 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 189.70925658069115, | |
| "nll_stderr,none": 2.365351023976718, | |
| "bpb,none": 0.9738379584517999, | |
| "bpb_stderr,none": 0.007469410805016519 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 58.51965685879312, | |
| "nll_stderr,none": 3.2771923459713928, | |
| "bpb,none": 0.5533597291127494, | |
| "bpb_stderr,none": 0.02333208984243271 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v4", | |
| "hidden_dim": 768, | |
| "budget": 1.7e+18, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2545933627688364, | |
| "acc_stderr,none": 0.0036675964285558024, | |
| "acc_norm,none": 0.28087167070217917, | |
| "acc_norm_stderr,none": 0.0037837947167598256, | |
| "bpb,none": 0.604019403266609, | |
| "bpb_stderr,none": 0.0030674671657727527, | |
| "logprob,none": -9.094732472075744, | |
| "logprob_stderr,none": 0.0270249902313662, | |
| "choice_logprob,none": -2.301985849848546, | |
| "choice_logprob_stderr,none": 0.01685994759971927, | |
| "choice_prob_norm,none": 0.2526754526360594, | |
| "choice_prob_norm_stderr,none": 0.0003488590114592353, | |
| "choice_logprob_norm,none": -1.3922591756508496, | |
| "choice_logprob_norm_stderr,none": 0.0014986315590676327, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.2659165361059678, | |
| "acc_stderr,none": 0.0037246265371234447, | |
| "acc_norm,none": 0.27958980202250394, | |
| "acc_norm_stderr,none": 0.003782069677219712, | |
| "bpb,none": 0.3021447949633298, | |
| "bpb_stderr,none": 0.0016727105647384023, | |
| "logprob,none": -5.318975468008762, | |
| "logprob_stderr,none": 0.028990564850629445, | |
| "choice_logprob,none": -2.3954261775916654, | |
| "choice_logprob_stderr,none": 0.01977525158101873, | |
| "choice_prob_norm,none": 0.2525448278556143, | |
| "choice_prob_norm_stderr,none": 0.00024976017772156427, | |
| "choice_logprob_norm,none": -1.3847116447076364, | |
| "choice_logprob_norm_stderr,none": 0.0010683410918954095, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.20051194539249148, | |
| "acc_stderr,none": 0.011700318050499396, | |
| "acc_norm,none": 0.23122866894197952, | |
| "acc_norm_stderr,none": 0.012320858834772293, | |
| "bpb,none": 1.2587017012693331, | |
| "bpb_stderr,none": 0.02289648048420843, | |
| "logprob,none": -21.649527869940616, | |
| "logprob_stderr,none": 0.37035561791551175, | |
| "choice_logprob,none": -5.5860482295659715, | |
| "choice_logprob_stderr,none": 0.1702262251043129, | |
| "choice_prob_norm,none": 0.24933040742664814, | |
| "choice_prob_norm_stderr,none": 0.001958966551226811, | |
| "choice_logprob_norm,none": -1.4331417127089947, | |
| "choice_logprob_norm_stderr,none": 0.009684509320890152 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.4739057239057239, | |
| "acc_stderr,none": 0.010245801990240049, | |
| "acc_norm,none": 0.4722222222222222, | |
| "acc_norm_stderr,none": 0.010243938285881118, | |
| "bpb,none": 1.0766069451676723, | |
| "bpb_stderr,none": 0.011474280694012995, | |
| "logprob,none": -14.897179114296781, | |
| "logprob_stderr,none": 0.2224682584651731, | |
| "choice_logprob,none": -2.596018089293899, | |
| "choice_logprob_stderr,none": 0.08624427779609865, | |
| "choice_prob_norm,none": 0.29303412859039996, | |
| "choice_prob_norm_stderr,none": 0.0017927597251220823, | |
| "choice_logprob_norm,none": -1.272503910085061, | |
| "choice_logprob_norm_stderr,none": 0.0064001036716539285 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5850152905198777, | |
| "acc_stderr,none": 0.008617716361921567, | |
| "bpb,none": 0.5296847318017859, | |
| "bpb_stderr,none": 0.007708238146470792, | |
| "logprob,none": -0.8481250299226254, | |
| "logprob_stderr,none": 0.010126202956885915, | |
| "choice_logprob,none": -0.7330099208405059, | |
| "choice_logprob_stderr,none": 0.010079692335643068, | |
| "choice_prob_norm,none": 0.5438853674999767, | |
| "choice_prob_norm_stderr,none": 0.0033707419980152737, | |
| "choice_logprob_norm,none": -0.6902830310649932, | |
| "choice_logprob_norm_stderr,none": 0.007631074159632701 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5483180428134556, | |
| "acc_stderr,none": 0.00870412620615935, | |
| "acc_norm,none": 0.5678899082568807, | |
| "acc_norm_stderr,none": 0.008664067354619373, | |
| "bpb,none": 0.5092157903518371, | |
| "bpb_stderr,none": 0.007616494586568837, | |
| "logprob,none": -0.8522696880025601, | |
| "logprob_stderr,none": 0.011782420028001834, | |
| "choice_logprob,none": -0.8200578351735449, | |
| "choice_logprob_stderr,none": 0.011809232810617742, | |
| "choice_prob_norm,none": 0.5242103560881676, | |
| "choice_prob_norm_stderr,none": 0.0032533043980025127, | |
| "choice_logprob_norm,none": -0.7248779872506149, | |
| "choice_logprob_norm_stderr,none": 0.007467578574808926 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.67, | |
| "acc_stderr,none": 0.04725815626252606, | |
| "bpb,none": 1.7509401713984518, | |
| "bpb_stderr,none": 0.0465471025808557, | |
| "logprob,none": -33.0514147567749, | |
| "logprob_stderr,none": 0.6232918344655725, | |
| "choice_logprob,none": -1.195383127628822, | |
| "choice_logprob_stderr,none": 0.1977600254862864, | |
| "choice_prob_norm,none": 0.5106011755889054, | |
| "choice_prob_norm_stderr,none": 0.008237084944626733, | |
| "choice_logprob_norm,none": -0.6864939070298057, | |
| "choice_logprob_norm_stderr,none": 0.01756788250117356 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.20393120393120392, | |
| "acc_stderr,none": 0.011535521334313653, | |
| "bpb,none": 2.7750737736040763, | |
| "bpb_stderr,none": 0.038710211216076984, | |
| "logprob,none": -1.9235345620195168, | |
| "logprob_stderr,none": 0.026831873763303756, | |
| "choice_logprob,none": -1.8990875694210565, | |
| "choice_logprob_stderr,none": 0.026864032994106866, | |
| "choice_prob_norm,none": 0.2027864242551805, | |
| "choice_prob_norm_stderr,none": 0.004342440431344874, | |
| "choice_logprob_norm,none": -2.1417477392256643, | |
| "choice_logprob_norm_stderr,none": 0.03895586531751517 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.22113022113022113, | |
| "acc_stderr,none": 0.011881644696037889, | |
| "acc_norm,none": 0.23505323505323505, | |
| "acc_norm_stderr,none": 0.01214000336728023, | |
| "bpb,none": 0.5032101150756438, | |
| "bpb_stderr,none": 0.007832177722936925, | |
| "logprob,none": -4.08857823394538, | |
| "logprob_stderr,none": 0.055516030423002266, | |
| "choice_logprob,none": -2.4585863408814546, | |
| "choice_logprob_stderr,none": 0.052941311436914426, | |
| "choice_prob_norm,none": 0.2089841912558642, | |
| "choice_prob_norm_stderr,none": 0.0013420515634436365, | |
| "choice_logprob_norm,none": -1.5949569298775694, | |
| "choice_logprob_norm_stderr,none": 0.007336206390518743 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.29924317864967137, | |
| "acc_stderr,none": 0.004569906485090238, | |
| "acc_norm,none": 0.3366859191396136, | |
| "acc_norm_stderr,none": 0.00471610647590507, | |
| "bpb,none": 0.967633860035596, | |
| "bpb_stderr,none": 0.0021558406116817135, | |
| "logprob,none": -88.81399176838245, | |
| "logprob_stderr,none": 0.4432641787400844, | |
| "choice_logprob,none": -23.550237747733163, | |
| "choice_logprob_stderr,none": 0.2832279744664909, | |
| "choice_prob_norm,none": 0.26149831224242764, | |
| "choice_prob_norm_stderr,none": 0.00042612890446360705, | |
| "choice_logprob_norm,none": -1.3548133586125368, | |
| "choice_logprob_norm_stderr,none": 0.0016534233664985678 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.3004381597291376, | |
| "acc_stderr,none": 0.004575116093931934, | |
| "acc_norm,none": 0.3314080860386377, | |
| "acc_norm_stderr,none": 0.0046975739621694145, | |
| "bpb,none": 0.9676567958492561, | |
| "bpb_stderr,none": 0.0021420263053160524, | |
| "logprob,none": -88.96766037096708, | |
| "logprob_stderr,none": 0.44590889583776167, | |
| "choice_logprob,none": -23.964510189108076, | |
| "choice_logprob_stderr,none": 0.28711618611636736, | |
| "choice_prob_norm,none": 0.2610185154732678, | |
| "choice_prob_norm_stderr,none": 0.00042339348751737, | |
| "choice_logprob_norm,none": -1.3564887555532543, | |
| "choice_logprob_norm_stderr,none": 0.001642669815486361 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.16, | |
| "acc_stderr,none": 0.016411540980502327, | |
| "acc_norm,none": 0.298, | |
| "acc_norm_stderr,none": 0.020475118092988957, | |
| "bpb,none": 1.9540992975212328, | |
| "bpb_stderr,none": 0.05169578694391548, | |
| "logprob,none": -20.303072726249695, | |
| "logprob_stderr,none": 0.5378084724355116, | |
| "choice_logprob,none": -6.459549452866858, | |
| "choice_logprob_stderr,none": 0.3142409273698422, | |
| "choice_prob_norm,none": 0.26374395201506906, | |
| "choice_prob_norm_stderr,none": 0.005601387312775557, | |
| "choice_logprob_norm,none": -1.476509569316706, | |
| "choice_logprob_norm_stderr,none": 0.031568098906027714 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6621327529923831, | |
| "acc_stderr,none": 0.011035474307853841, | |
| "acc_norm,none": 0.6556039173014145, | |
| "acc_norm_stderr,none": 0.011086521237125621, | |
| "bpb,none": 1.2014970353837593, | |
| "bpb_stderr,none": 0.011135509083329038, | |
| "logprob,none": -70.90089691320883, | |
| "logprob_stderr,none": 1.4719156911259526, | |
| "choice_logprob,none": -3.234416235332625, | |
| "choice_logprob_stderr,none": 0.22608919598066995, | |
| "choice_prob_norm,none": 0.5103922109540393, | |
| "choice_prob_norm_stderr,none": 0.0013955248134373478, | |
| "choice_logprob_norm,none": -0.6800897269184378, | |
| "choice_logprob_norm_stderr,none": 0.0029647712439736297 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5201262825572218, | |
| "acc_stderr,none": 0.014041096664344327, | |
| "bpb,none": 0.45711397512724894, | |
| "bpb_stderr,none": 0.024559125218020894, | |
| "logprob,none": -20.652021914649968, | |
| "logprob_stderr,none": 0.2970189774087888, | |
| "choice_logprob,none": -0.7666224429549149, | |
| "choice_logprob_stderr,none": 0.014726534968563916, | |
| "choice_prob_norm,none": 0.5009876300221876, | |
| "choice_prob_norm_stderr,none": 0.0007727968633400879, | |
| "choice_logprob_norm,none": -0.6936139196733536, | |
| "choice_logprob_norm_stderr,none": 0.0026118340421311267 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5347985347985348, | |
| "acc_stderr,none": 0.03024344009911826, | |
| "bpb,none": 0.6968475485504413, | |
| "bpb_stderr,none": 0.0242763587065663, | |
| "logprob,none": -28.82327291817019, | |
| "logprob_stderr,none": 0.5332043092143391, | |
| "choice_logprob,none": -0.8000338807144458, | |
| "choice_logprob_stderr,none": 0.04046173201444223, | |
| "choice_prob_norm,none": 0.5009830256732977, | |
| "choice_prob_norm_stderr,none": 0.0012731116653940625, | |
| "choice_logprob_norm,none": -0.6920630078957928, | |
| "choice_logprob_norm_stderr,none": 0.0025503091583774632 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.2744441788190294, | |
| "acc_stderr,none": 0.006900338771605978, | |
| "acc_norm,none": 0.2744441788190294, | |
| "acc_norm_stderr,none": 0.006900338771605978, | |
| "logprob,none": -1.468579701194061, | |
| "logprob_stderr,none": 0.008180923658803626 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.2414535022710973, | |
| "acc_stderr,none": 0.006617833617335594, | |
| "acc_norm,none": 0.2701410470953861, | |
| "acc_norm_stderr,none": 0.0068662995339351775, | |
| "bpb,none": 0.2768836852870189, | |
| "bpb_stderr,none": 0.003417423547440866, | |
| "logprob,none": -3.6754608847380768, | |
| "logprob_stderr,none": 0.035808642115232145, | |
| "choice_logprob,none": -2.1497944553173047, | |
| "choice_logprob_stderr,none": 0.02742157118106784, | |
| "choice_prob_norm,none": 0.2514483693484317, | |
| "choice_prob_norm_stderr,none": 0.0004934200964202407, | |
| "choice_logprob_norm,none": -1.389670532257503, | |
| "choice_logprob_norm_stderr,none": 0.0022202315152482794 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.23255813953488372, | |
| "acc_stderr,none": 0.014789157531080517, | |
| "logprob,none": -24.057734922504775, | |
| "logprob_stderr,none": 0.4731599285534115 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 168.23937684320157, | |
| "nll_stderr,none": 2.173767361575589, | |
| "bpb,none": 0.8570287201881884, | |
| "bpb_stderr,none": 0.006734835930797166 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 64.84557540823774, | |
| "nll_stderr,none": 3.481535350353007, | |
| "bpb,none": 0.6030907083166783, | |
| "bpb_stderr,none": 0.022326441103826027 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v0", | |
| "hidden_dim": 1024, | |
| "budget": 9e+18, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc_norm,none": 0.28913260219341974, | |
| "acc_norm_stderr,none": 0.0038170505239232483, | |
| "logprob,none": -6.674078064848997, | |
| "logprob_stderr,none": 0.01901140521686992, | |
| "choice_logprob,none": -1.8773911852811065, | |
| "choice_logprob_stderr,none": 0.012410207247699801, | |
| "choice_prob_norm,none": 0.25286112945785577, | |
| "choice_prob_norm_stderr,none": 0.0002782327599307251, | |
| "bpb,none": 0.4661269626913522, | |
| "bpb_stderr,none": 0.002521564276281313, | |
| "choice_logprob_norm,none": -1.3855179171485157, | |
| "choice_logprob_norm_stderr,none": 0.0011732520315487474, | |
| "acc,none": 0.2718985899444524, | |
| "acc_stderr,none": 0.003750371502062022 | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc_norm,none": 0.290414470873095, | |
| "acc_norm_stderr,none": 0.003817983230903859, | |
| "choice_logprob_norm,none": -1.3832265879984036, | |
| "choice_logprob_norm_stderr,none": 0.0008903141103261133, | |
| "acc,none": 0.2820111095285572, | |
| "acc_stderr,none": 0.0037857344481307753, | |
| "logprob,none": -3.8444525264711373, | |
| "logprob_stderr,none": 0.020050404178067915, | |
| "bpb,none": 0.24216502968259027, | |
| "bpb_stderr,none": 0.0015688729102161052, | |
| "choice_prob_norm,none": 0.25242225038820676, | |
| "choice_prob_norm_stderr,none": 0.00021197420717174455, | |
| "choice_logprob,none": -1.955011404596652, | |
| "choice_logprob_stderr,none": 0.013779524804021928 | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.24061433447098976, | |
| "acc_stderr,none": 0.012491468532390575, | |
| "acc_norm,none": 0.2627986348122867, | |
| "acc_norm_stderr,none": 0.012862523175351331, | |
| "bpb,none": 1.1303670351920045, | |
| "bpb_stderr,none": 0.021460296068113123, | |
| "logprob,none": -19.27878961058607, | |
| "logprob_stderr,none": 0.3348281147526389, | |
| "choice_logprob,none": -4.806931849095136, | |
| "choice_logprob_stderr,none": 0.15549723707544602, | |
| "choice_prob_norm,none": 0.25551085220212294, | |
| "choice_prob_norm_stderr,none": 0.0019557571360019656, | |
| "choice_logprob_norm,none": -1.4043355010952636, | |
| "choice_logprob_norm_stderr,none": 0.009062425260127323 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.5555555555555556, | |
| "acc_stderr,none": 0.010196254838691668, | |
| "acc_norm,none": 0.5324074074074074, | |
| "acc_norm_stderr,none": 0.010238210368801891, | |
| "bpb,none": 0.9382912461299037, | |
| "bpb_stderr,none": 0.010856005058823528, | |
| "logprob,none": -12.965371599301745, | |
| "logprob_stderr,none": 0.1984302457062909, | |
| "choice_logprob,none": -2.026691862536917, | |
| "choice_logprob_stderr,none": 0.07339547348687801, | |
| "choice_prob_norm,none": 0.3044992148390953, | |
| "choice_prob_norm_stderr,none": 0.0017970120063283165, | |
| "choice_logprob_norm,none": -1.2303368635102365, | |
| "choice_logprob_norm_stderr,none": 0.006143988115956957 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.45718654434250766, | |
| "acc_stderr,none": 0.008712936764296237, | |
| "bpb,none": 0.45735726848110914, | |
| "bpb_stderr,none": 0.0033240133886141203, | |
| "logprob,none": -0.8455457207624336, | |
| "logprob_stderr,none": 0.007151816427775643, | |
| "choice_logprob,none": -0.7991114588856746, | |
| "choice_logprob_stderr,none": 0.0071367340988007675, | |
| "choice_prob_norm,none": 0.5017328079660853, | |
| "choice_prob_norm_stderr,none": 0.0016066156315061744, | |
| "choice_logprob_norm,none": -0.7071844266861197, | |
| "choice_logprob_norm_stderr,none": 0.003322330090811105 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.598776758409786, | |
| "acc_stderr,none": 0.008572708337178991, | |
| "acc_norm,none": 0.6116207951070336, | |
| "acc_norm_stderr,none": 0.008524357307908797, | |
| "bpb,none": 0.5487674562348573, | |
| "bpb_stderr,none": 0.009659175369762825, | |
| "logprob,none": -0.8471939702281893, | |
| "logprob_stderr,none": 0.012795218556042685, | |
| "choice_logprob,none": -0.7946344378664244, | |
| "choice_logprob_stderr,none": 0.012830967641596133, | |
| "choice_prob_norm,none": 0.5445438769833885, | |
| "choice_prob_norm_stderr,none": 0.003920664405608299, | |
| "choice_logprob_norm,none": -0.7270175501136007, | |
| "choice_logprob_norm_stderr,none": 0.00948407759703137 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.7, | |
| "acc_stderr,none": 0.046056618647183814, | |
| "bpb,none": 1.688417207231075, | |
| "bpb_stderr,none": 0.046443549576117016, | |
| "logprob,none": -31.785984535217285, | |
| "logprob_stderr,none": 0.574583828532445, | |
| "choice_logprob,none": -0.9573435664452496, | |
| "choice_logprob_stderr,none": 0.19609870862864015, | |
| "choice_prob_norm,none": 0.5165112756384823, | |
| "choice_prob_norm_stderr,none": 0.008333286364378283, | |
| "choice_logprob_norm,none": -0.6757736663288648, | |
| "choice_logprob_norm_stderr,none": 0.018314384433538264 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.2022932022932023, | |
| "acc_stderr,none": 0.01150091452526044, | |
| "bpb,none": 3.308068562033664, | |
| "bpb_stderr,none": 0.06991929962338578, | |
| "logprob,none": -2.2929783968726305, | |
| "logprob_stderr,none": 0.04846436540067588, | |
| "choice_logprob,none": -2.281958129242245, | |
| "choice_logprob_stderr,none": 0.048469082192047826, | |
| "choice_prob_norm,none": 0.20019674030611143, | |
| "choice_prob_norm_stderr,none": 0.004763466308994383, | |
| "choice_logprob_norm,none": -2.7287219537684475, | |
| "choice_logprob_norm_stderr,none": 0.07008232562791067 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.19000819000819, | |
| "acc_stderr,none": 0.01123172751912786, | |
| "acc_norm,none": 0.20147420147420148, | |
| "acc_norm_stderr,none": 0.011483500195202903, | |
| "bpb,none": 0.429949664417547, | |
| "bpb_stderr,none": 0.008076315515335691, | |
| "logprob,none": -3.451736507603226, | |
| "logprob_stderr,none": 0.05611321867763557, | |
| "choice_logprob,none": -2.630758405642532, | |
| "choice_logprob_stderr,none": 0.054967332321790414, | |
| "choice_prob_norm,none": 0.2038328543979333, | |
| "choice_prob_norm_stderr,none": 0.001346731979327511, | |
| "choice_logprob_norm,none": -1.62221360042132, | |
| "choice_logprob_norm_stderr,none": 0.007619295229946782 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.3452499502091217, | |
| "acc_stderr,none": 0.004744780201276638, | |
| "acc_norm,none": 0.4170483967337184, | |
| "acc_norm_stderr,none": 0.004920633227844459, | |
| "bpb,none": 0.8823173304353232, | |
| "bpb_stderr,none": 0.0020461410545454532, | |
| "logprob,none": -80.86246913710669, | |
| "logprob_stderr,none": 0.4031797743881049, | |
| "choice_logprob,none": -19.395296864862598, | |
| "choice_logprob_stderr,none": 0.2488205743300131, | |
| "choice_prob_norm,none": 0.27011729593392875, | |
| "choice_prob_norm_stderr,none": 0.00042040904737482244, | |
| "choice_logprob_norm,none": -1.3211981977293266, | |
| "choice_logprob_norm_stderr,none": 0.0015788492100577963 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.344353714399522, | |
| "acc_stderr,none": 0.004741859753178431, | |
| "acc_norm,none": 0.4140609440350528, | |
| "acc_norm_stderr,none": 0.004915524600627973, | |
| "bpb,none": 0.8678022635536807, | |
| "bpb_stderr,none": 0.002027430788617163, | |
| "logprob,none": -79.66558491758701, | |
| "logprob_stderr,none": 0.4004767695182609, | |
| "choice_logprob,none": -19.67994947421911, | |
| "choice_logprob_stderr,none": 0.2510734934995068, | |
| "choice_prob_norm,none": 0.2699317383112573, | |
| "choice_prob_norm_stderr,none": 0.00041977307256955434, | |
| "choice_logprob_norm,none": -1.3218253788095968, | |
| "choice_logprob_norm_stderr,none": 0.001573985153351605 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.192, | |
| "acc_stderr,none": 0.017632180454360984, | |
| "acc_norm,none": 0.31, | |
| "acc_norm_stderr,none": 0.020704041021724805, | |
| "bpb,none": 1.8607658739262547, | |
| "bpb_stderr,none": 0.04956693790865995, | |
| "logprob,none": -19.349410093307494, | |
| "logprob_stderr,none": 0.5175460946269681, | |
| "choice_logprob,none": -6.027359120533246, | |
| "choice_logprob_stderr,none": 0.30074657863828946, | |
| "choice_prob_norm,none": 0.27147872999523615, | |
| "choice_prob_norm_stderr,none": 0.005653136250223714, | |
| "choice_logprob_norm,none": -1.4341392043181795, | |
| "choice_logprob_norm_stderr,none": 0.028065683186773445 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.691512513601741, | |
| "acc_stderr,none": 0.010776164678037155, | |
| "acc_norm,none": 0.6849836779107725, | |
| "acc_norm_stderr,none": 0.010838072746240652, | |
| "bpb,none": 1.1001333707203211, | |
| "bpb_stderr,none": 0.010781655681221258, | |
| "logprob,none": -64.95884351553933, | |
| "logprob_stderr,none": 1.3648116902702927, | |
| "choice_logprob,none": -2.8290212149038094, | |
| "choice_logprob_stderr,none": 0.1978043337372253, | |
| "choice_prob_norm,none": 0.5143110704617275, | |
| "choice_prob_norm_stderr,none": 0.0013428686481482728, | |
| "choice_logprob_norm,none": -0.6716928977102089, | |
| "choice_logprob_norm_stderr,none": 0.0027974047765665882 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.4972375690607735, | |
| "acc_stderr,none": 0.014052271211616438, | |
| "bpb,none": 0.44075114363447215, | |
| "bpb_stderr,none": 0.023351630480024383, | |
| "logprob,none": -19.941748224663378, | |
| "logprob_stderr,none": 0.28624009285998814, | |
| "choice_logprob,none": -0.7768202090598731, | |
| "choice_logprob_stderr,none": 0.01582238063994806, | |
| "choice_prob_norm,none": 0.5011534614182027, | |
| "choice_prob_norm_stderr,none": 0.0007615166601860404, | |
| "choice_logprob_norm,none": -0.6926689003880863, | |
| "choice_logprob_norm_stderr,none": 0.0020130585439979316 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5714285714285714, | |
| "acc_stderr,none": 0.030006001800600198, | |
| "bpb,none": 0.6548505138835954, | |
| "bpb_stderr,none": 0.021204852809512176, | |
| "logprob,none": -27.373171830788635, | |
| "logprob_stderr,none": 0.4851443088103239, | |
| "choice_logprob,none": -0.781759254624432, | |
| "choice_logprob_stderr,none": 0.046038949062997735, | |
| "choice_prob_norm,none": 0.5022969315343694, | |
| "choice_prob_norm_stderr,none": 0.0012249598100978827, | |
| "choice_logprob_norm,none": -0.68936630940301, | |
| "choice_logprob_norm_stderr,none": 0.002427985612751233 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.2749223045661009, | |
| "acc_stderr,none": 0.006904070961661394, | |
| "acc_norm,none": 0.2749223045661009, | |
| "acc_norm_stderr,none": 0.006904070961661394, | |
| "logprob,none": -1.4320607341535634, | |
| "logprob_stderr,none": 0.00599742894108014 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.2409753765240258, | |
| "acc_stderr,none": 0.006613361336468529, | |
| "acc_norm,none": 0.27779105904852974, | |
| "acc_norm_stderr,none": 0.0069262561348971755, | |
| "bpb,none": 0.1980862205268872, | |
| "bpb_stderr,none": 0.002585096103962977, | |
| "logprob,none": -2.4686314965616765, | |
| "logprob_stderr,none": 0.022424759315674673, | |
| "choice_logprob,none": -1.7409690527301998, | |
| "choice_logprob_stderr,none": 0.01762798312325864, | |
| "choice_prob_norm,none": 0.2516423134955927, | |
| "choice_prob_norm_stderr,none": 0.00036149155722740135, | |
| "choice_logprob_norm,none": -1.3846224187693892, | |
| "choice_logprob_norm_stderr,none": 0.0016056848013486905 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.22766217870257038, | |
| "acc_stderr,none": 0.014679255032111064, | |
| "logprob,none": -20.182860770744966, | |
| "logprob_stderr,none": 0.4159737876273053 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 151.70901034617623, | |
| "nll_stderr,none": 1.9142194686108658, | |
| "bpb,none": 0.7785776724099326, | |
| "bpb_stderr,none": 0.006136763749738495 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 61.2485516100395, | |
| "nll_stderr,none": 3.2214641436356963, | |
| "bpb,none": 0.5741438524912152, | |
| "bpb_stderr,none": 0.020655639762877293 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v2", | |
| "hidden_dim": 1024, | |
| "budget": 9e+18, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "choice_logprob_norm,none": -1.3887148443048396, | |
| "choice_logprob_norm_stderr,none": 0.0011612383410942707, | |
| "choice_prob_norm,none": 0.251948878066459, | |
| "choice_prob_norm_stderr,none": 0.0002738137208502262, | |
| "bpb,none": 0.44218048678046323, | |
| "bpb_stderr,none": 0.002349499737618632, | |
| "acc_norm,none": 0.27602905569007263, | |
| "acc_norm_stderr,none": 0.003764639707646851, | |
| "logprob,none": -6.584575940159366, | |
| "logprob_stderr,none": 0.021564959282463818, | |
| "choice_logprob,none": -1.9591102267126992, | |
| "choice_logprob_stderr,none": 0.01261867725547403, | |
| "acc,none": 0.25210084033613445, | |
| "acc_stderr,none": 0.003656009837643968 | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "choice_logprob_norm,none": -1.3846107514653117, | |
| "choice_logprob_norm_stderr,none": 0.000752846654571709, | |
| "acc,none": 0.26741204956558895, | |
| "acc_stderr,none": 0.0037274963630969824, | |
| "bpb,none": 0.20057953440157156, | |
| "bpb_stderr,none": 0.0013000184012252752, | |
| "choice_prob_norm,none": 0.25151363104162694, | |
| "choice_prob_norm_stderr,none": 0.00017700146029745345, | |
| "logprob,none": -3.14127250357916, | |
| "logprob_stderr,none": 0.017217903509997766, | |
| "choice_logprob,none": -1.8302228602650503, | |
| "choice_logprob_stderr,none": 0.012720584641536284, | |
| "acc_norm,none": 0.28607036034752886, | |
| "acc_norm_stderr,none": 0.0038083312053686508 | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.27047781569965873, | |
| "acc_stderr,none": 0.012980954547659554, | |
| "acc_norm,none": 0.30631399317406144, | |
| "acc_norm_stderr,none": 0.013470584417276513, | |
| "bpb,none": 1.0935280258436462, | |
| "bpb_stderr,none": 0.02220483330833206, | |
| "logprob,none": -18.868265470547072, | |
| "logprob_stderr,none": 0.33899786997642567, | |
| "choice_logprob,none": -4.699882626065459, | |
| "choice_logprob_stderr,none": 0.15638782844904006, | |
| "choice_prob_norm,none": 0.2615960345029809, | |
| "choice_prob_norm_stderr,none": 0.002213300520616238, | |
| "choice_logprob_norm,none": -1.3886084230955638, | |
| "choice_logprob_norm_stderr,none": 0.009964575574973286 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.5997474747474747, | |
| "acc_stderr,none": 0.010053550119896114, | |
| "acc_norm,none": 0.5909090909090909, | |
| "acc_norm_stderr,none": 0.010088775152615782, | |
| "bpb,none": 0.875158962470022, | |
| "bpb_stderr,none": 0.011381731804273681, | |
| "logprob,none": -12.395110425724324, | |
| "logprob_stderr,none": 0.20011342776901275, | |
| "choice_logprob,none": -1.8528819599043764, | |
| "choice_logprob_stderr,none": 0.07185139483537852, | |
| "choice_prob_norm,none": 0.3189237129071374, | |
| "choice_prob_norm_stderr,none": 0.002016674957278432, | |
| "choice_logprob_norm,none": -1.1895055985152354, | |
| "choice_logprob_norm_stderr,none": 0.006533736773321884 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5168195718654435, | |
| "acc_stderr,none": 0.008740105658763946, | |
| "bpb,none": 0.541989411025595, | |
| "bpb_stderr,none": 0.004312307826813873, | |
| "logprob,none": -0.9524213738397721, | |
| "logprob_stderr,none": 0.006870908275453281, | |
| "choice_logprob,none": -0.7383123585052149, | |
| "choice_logprob_stderr,none": 0.006468265480150814, | |
| "choice_prob_norm,none": 0.5183710765429542, | |
| "choice_prob_norm_stderr,none": 0.0019750440851862504, | |
| "choice_logprob_norm,none": -0.6830070819392396, | |
| "choice_logprob_norm_stderr,none": 0.004108129300805658 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.6180428134556575, | |
| "acc_stderr,none": 0.008497851998427189, | |
| "acc_norm,none": 0.6201834862385321, | |
| "acc_norm_stderr,none": 0.008488668235778606, | |
| "bpb,none": 0.6635063310144024, | |
| "bpb_stderr,none": 0.011312178604720991, | |
| "logprob,none": -1.0069598921212946, | |
| "logprob_stderr,none": 0.014705390940278371, | |
| "choice_logprob,none": -0.8202281864002191, | |
| "choice_logprob_stderr,none": 0.014695636042857963, | |
| "choice_prob_norm,none": 0.5610461036675835, | |
| "choice_prob_norm_stderr,none": 0.004633673864166954, | |
| "choice_logprob_norm,none": -0.74378769719765, | |
| "choice_logprob_norm_stderr,none": 0.011175862199800225 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.68, | |
| "acc_stderr,none": 0.046882617226215034, | |
| "bpb,none": 1.7431494759115915, | |
| "bpb_stderr,none": 0.05228713019448322, | |
| "logprob,none": -32.63967601776123, | |
| "logprob_stderr,none": 0.5909717686742535, | |
| "choice_logprob,none": -1.1414016145947221, | |
| "choice_logprob_stderr,none": 0.20693242642507131, | |
| "choice_prob_norm,none": 0.517273855120282, | |
| "choice_prob_norm_stderr,none": 0.009460633953763339, | |
| "choice_logprob_norm,none": -0.6785414600198394, | |
| "choice_logprob_norm_stderr,none": 0.020700769324077977 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.20884520884520885, | |
| "acc_stderr,none": 0.011637590576063046, | |
| "bpb,none": 3.107354595439875, | |
| "bpb_stderr,none": 0.048339534030756526, | |
| "logprob,none": -2.1538540768291403, | |
| "logprob_stderr,none": 0.0335064117230004, | |
| "choice_logprob,none": -2.1230699300379263, | |
| "choice_logprob_stderr,none": 0.033539420460243184, | |
| "choice_prob_norm,none": 0.19609029022568736, | |
| "choice_prob_norm_stderr,none": 0.006244693248187209, | |
| "choice_logprob_norm,none": -2.550141192036063, | |
| "choice_logprob_norm_stderr,none": 0.049664187241009225 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.19246519246519248, | |
| "acc_stderr,none": 0.011286955409752617, | |
| "acc_norm,none": 0.20147420147420148, | |
| "acc_norm_stderr,none": 0.011483500195202903, | |
| "bpb,none": 0.39968402829407823, | |
| "bpb_stderr,none": 0.009492562523787662, | |
| "logprob,none": -3.1958935750887885, | |
| "logprob_stderr,none": 0.06880739818028928, | |
| "choice_logprob,none": -2.792026057863224, | |
| "choice_logprob_stderr,none": 0.06670431115893402, | |
| "choice_prob_norm,none": 0.20375478798874827, | |
| "choice_prob_norm_stderr,none": 0.0014768601460400197, | |
| "choice_logprob_norm,none": -1.6309924149932398, | |
| "choice_logprob_norm_stderr,none": 0.008771916183210268 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.3768173670583549, | |
| "acc_stderr,none": 0.004835981632401603, | |
| "acc_norm,none": 0.46922923720374426, | |
| "acc_norm_stderr,none": 0.0049803234000310795, | |
| "bpb,none": 0.8846169915295451, | |
| "bpb_stderr,none": 0.00216054708286405, | |
| "logprob,none": -80.74439490636823, | |
| "logprob_stderr,none": 0.40232680893402256, | |
| "choice_logprob,none": -18.21099782421507, | |
| "choice_logprob_stderr,none": 0.2436303915764476, | |
| "choice_prob_norm,none": 0.27606623257420826, | |
| "choice_prob_norm_stderr,none": 0.0004449742055465229, | |
| "choice_logprob_norm,none": -1.3003765092331951, | |
| "choice_logprob_norm_stderr,none": 0.0016418664757832538 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.3639713204540928, | |
| "acc_stderr,none": 0.0048015720289208, | |
| "acc_norm,none": 0.45130452101175067, | |
| "acc_norm_stderr,none": 0.0049660609953150634, | |
| "bpb,none": 0.8645088399901097, | |
| "bpb_stderr,none": 0.0020838005037428706, | |
| "logprob,none": -79.09350363089017, | |
| "logprob_stderr,none": 0.39670599272200674, | |
| "choice_logprob,none": -18.72502364044871, | |
| "choice_logprob_stderr,none": 0.24594117569880022, | |
| "choice_prob_norm,none": 0.27352034044036333, | |
| "choice_prob_norm_stderr,none": 0.00042830633098770517, | |
| "choice_logprob_norm,none": -1.3088219496509341, | |
| "choice_logprob_norm_stderr,none": 0.0015876999111474938 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.204, | |
| "acc_stderr,none": 0.018039369104138645, | |
| "acc_norm,none": 0.33, | |
| "acc_norm_stderr,none": 0.021049612166134817, | |
| "bpb,none": 1.875484662709983, | |
| "bpb_stderr,none": 0.05179042887158007, | |
| "logprob,none": -19.307575354576112, | |
| "logprob_stderr,none": 0.510450513559666, | |
| "choice_logprob,none": -6.008570079409671, | |
| "choice_logprob_stderr,none": 0.3063816766265774, | |
| "choice_prob_norm,none": 0.27933651070111226, | |
| "choice_prob_norm_stderr,none": 0.006200815553283834, | |
| "choice_logprob_norm,none": -1.4222343139005449, | |
| "choice_logprob_norm_stderr,none": 0.02937963571185174 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6974972796517954, | |
| "acc_stderr,none": 0.010717199698083893, | |
| "acc_norm,none": 0.6985854189336235, | |
| "acc_norm_stderr,none": 0.01070624824275376, | |
| "bpb,none": 1.1024937948409286, | |
| "bpb_stderr,none": 0.010867658597802586, | |
| "logprob,none": -64.97100728743743, | |
| "logprob_stderr,none": 1.37324001166038, | |
| "choice_logprob,none": -2.8151085032374, | |
| "choice_logprob_stderr,none": 0.20151047037802025, | |
| "choice_prob_norm,none": 0.515620895724506, | |
| "choice_prob_norm_stderr,none": 0.0013739474599856348, | |
| "choice_logprob_norm,none": -0.6694607389999877, | |
| "choice_logprob_norm_stderr,none": 0.0028669015157053446 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5122336227308603, | |
| "acc_stderr,none": 0.01404827882040562, | |
| "bpb,none": 0.4461208230255092, | |
| "bpb_stderr,none": 0.024572691862973322, | |
| "logprob,none": -20.093352986962756, | |
| "logprob_stderr,none": 0.289626758603389, | |
| "choice_logprob,none": -0.7990905808599074, | |
| "choice_logprob_stderr,none": 0.016693379995914515, | |
| "choice_prob_norm,none": 0.5011409500177034, | |
| "choice_prob_norm_stderr,none": 0.0007905641599567411, | |
| "choice_logprob_norm,none": -0.6934659670650414, | |
| "choice_logprob_norm_stderr,none": 0.0027187399193643737 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5567765567765568, | |
| "acc_stderr,none": 0.030120860870184635, | |
| "bpb,none": 0.6567250997319227, | |
| "bpb_stderr,none": 0.0219496167384627, | |
| "logprob,none": -27.465879419347743, | |
| "logprob_stderr,none": 0.4973342251798536, | |
| "choice_logprob,none": -0.9953870161772929, | |
| "choice_logprob_stderr,none": 0.07727446773738116, | |
| "choice_prob_norm,none": 0.5022875866783845, | |
| "choice_prob_norm_stderr,none": 0.0013912789346927528, | |
| "choice_logprob_norm,none": -0.6896150353015345, | |
| "choice_logprob_norm_stderr,none": 0.002752556408081628 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.3131723643318193, | |
| "acc_stderr,none": 0.007171724237526972, | |
| "acc_norm,none": 0.3131723643318193, | |
| "acc_norm_stderr,none": 0.007171724237526972, | |
| "logprob,none": -1.4007173108906372, | |
| "logprob_stderr,none": 0.005397008119825809 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.30695672961989, | |
| "acc_stderr,none": 0.007132253005986364, | |
| "acc_norm,none": 0.32106143915849866, | |
| "acc_norm_stderr,none": 0.007219669176206489, | |
| "bpb,none": 0.15485459632136042, | |
| "bpb_stderr,none": 0.002271082787996014, | |
| "logprob,none": -1.873889418159009, | |
| "logprob_stderr,none": 0.019455744132788375, | |
| "choice_logprob,none": -1.5687109944568915, | |
| "choice_logprob_stderr,none": 0.01690239468821199, | |
| "choice_prob_norm,none": 0.25316229322677253, | |
| "choice_prob_norm_stderr,none": 0.0003259097891512063, | |
| "choice_logprob_norm,none": -1.3778143596538146, | |
| "choice_logprob_norm_stderr,none": 0.0014879400524400613 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.22643818849449204, | |
| "acc_stderr,none": 0.014651337324602597, | |
| "logprob,none": -20.73370366405828, | |
| "logprob_stderr,none": 0.4278143769070544 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 149.62058111571108, | |
| "nll_stderr,none": 1.9986798444447733, | |
| "bpb,none": 0.7666060799050025, | |
| "bpb_stderr,none": 0.007104338540057894 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 66.28788003398151, | |
| "nll_stderr,none": 3.5592787545629223, | |
| "bpb,none": 0.6243391427736511, | |
| "bpb_stderr,none": 0.023415837672337732 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v3", | |
| "hidden_dim": 1024, | |
| "budget": 9e+18, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2778806437829369, | |
| "acc_stderr,none": 0.0037734484032089055, | |
| "acc_norm,none": 0.2879219484403931, | |
| "acc_norm_stderr,none": 0.0038099083678867296, | |
| "bpb,none": 0.42117007170812004, | |
| "bpb_stderr,none": 0.002349339982343761, | |
| "logprob,none": -6.128808188783855, | |
| "logprob_stderr,none": 0.019478551572906597, | |
| "choice_logprob,none": -1.8214619232173475, | |
| "choice_logprob_stderr,none": 0.012090375450305921, | |
| "choice_prob_norm,none": 0.252465882369846, | |
| "choice_prob_norm_stderr,none": 0.0002595426976148667, | |
| "choice_logprob_norm,none": -1.3860591506283386, | |
| "choice_logprob_norm_stderr,none": 0.001087801969607964, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.2716849451645065, | |
| "acc_stderr,none": 0.0037496055191951496, | |
| "acc_norm,none": 0.2837202677681242, | |
| "acc_norm_stderr,none": 0.0038024293601329213, | |
| "bpb,none": 0.2026213677068112, | |
| "bpb_stderr,none": 0.0013709593473205442, | |
| "logprob,none": -3.228287653153062, | |
| "logprob_stderr,none": 0.017556417333451688, | |
| "choice_logprob,none": -1.7620633090352307, | |
| "choice_logprob_stderr,none": 0.011662024024558518, | |
| "choice_prob_norm,none": 0.2516173104824868, | |
| "choice_prob_norm_stderr,none": 0.00016909057142734156, | |
| "choice_logprob_norm,none": -1.3838629310970294, | |
| "choice_logprob_norm_stderr,none": 0.0007211045140684807, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.25597269624573377, | |
| "acc_stderr,none": 0.012753013241244534, | |
| "acc_norm,none": 0.2883959044368601, | |
| "acc_norm_stderr,none": 0.013238394422428171, | |
| "bpb,none": 1.1221728382537852, | |
| "bpb_stderr,none": 0.022419709106874387, | |
| "logprob,none": -19.341666728563276, | |
| "logprob_stderr,none": 0.34589975499680453, | |
| "choice_logprob,none": -4.940120011255016, | |
| "choice_logprob_stderr,none": 0.15933995231359616, | |
| "choice_prob_norm,none": 0.25794012617660156, | |
| "choice_prob_norm_stderr,none": 0.0022300508930906348, | |
| "choice_logprob_norm,none": -1.4061200054289258, | |
| "choice_logprob_norm_stderr,none": 0.010740217642775685 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.5601851851851852, | |
| "acc_stderr,none": 0.01018518518518532, | |
| "acc_norm,none": 0.5467171717171717, | |
| "acc_norm_stderr,none": 0.01021490151673162, | |
| "bpb,none": 0.9151864377579918, | |
| "bpb_stderr,none": 0.010967587948958146, | |
| "logprob,none": -12.89686324339523, | |
| "logprob_stderr,none": 0.20518227800584848, | |
| "choice_logprob,none": -2.1136297973276794, | |
| "choice_logprob_stderr,none": 0.07708881527753612, | |
| "choice_prob_norm,none": 0.30940160257013594, | |
| "choice_prob_norm_stderr,none": 0.001909619515456115, | |
| "choice_logprob_norm,none": -1.2187653110451044, | |
| "choice_logprob_norm_stderr,none": 0.006600255376899415 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5330275229357798, | |
| "acc_stderr,none": 0.008725955605686028, | |
| "bpb,none": 0.4524451726921877, | |
| "bpb_stderr,none": 0.004517110600767529, | |
| "logprob,none": -0.788279603818141, | |
| "logprob_stderr,none": 0.006898990789787937, | |
| "choice_logprob,none": -0.7320842558469366, | |
| "choice_logprob_stderr,none": 0.006712690836080821, | |
| "choice_prob_norm,none": 0.5193000834523307, | |
| "choice_prob_norm_stderr,none": 0.0020481414090304526, | |
| "choice_logprob_norm,none": -0.6840219491412365, | |
| "choice_logprob_norm_stderr,none": 0.004384797259024088 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.43241590214067277, | |
| "acc_stderr,none": 0.008664798701065792, | |
| "acc_norm,none": 0.4614678899082569, | |
| "acc_norm_stderr,none": 0.008719048328810805, | |
| "bpb,none": 0.49222531517767704, | |
| "bpb_stderr,none": 0.005039794067753739, | |
| "logprob,none": -0.9121814133559527, | |
| "logprob_stderr,none": 0.009741947195775053, | |
| "choice_logprob,none": -0.8860730057583283, | |
| "choice_logprob_stderr,none": 0.009767192859571366, | |
| "choice_prob_norm,none": 0.4919903165538794, | |
| "choice_prob_norm_stderr,none": 0.002204641670153114, | |
| "choice_logprob_norm,none": -0.7457144675567828, | |
| "choice_logprob_norm_stderr,none": 0.004954534668786731 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.64, | |
| "acc_stderr,none": 0.048241815132442176, | |
| "bpb,none": 1.6599713126166298, | |
| "bpb_stderr,none": 0.04758606810439861, | |
| "logprob,none": -31.17845386505127, | |
| "logprob_stderr,none": 0.5851072656066011, | |
| "choice_logprob,none": -1.1896302615316923, | |
| "choice_logprob_stderr,none": 0.22459360269658862, | |
| "choice_prob_norm,none": 0.5117218312450332, | |
| "choice_prob_norm_stderr,none": 0.008392263694597767, | |
| "choice_logprob_norm,none": -0.6860098596291105, | |
| "choice_logprob_norm_stderr,none": 0.019012778968729667 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.18837018837018837, | |
| "acc_stderr,none": 0.011194511993535688, | |
| "bpb,none": 3.6411961247457723, | |
| "bpb_stderr,none": 0.08525874829781832, | |
| "logprob,none": -2.523884827733333, | |
| "logprob_stderr,none": 0.05909686100070277, | |
| "choice_logprob,none": -2.5189072357432103, | |
| "choice_logprob_stderr,none": 0.05909666810824806, | |
| "choice_prob_norm,none": 0.19650949851356697, | |
| "choice_prob_norm_stderr,none": 0.005258433505175625, | |
| "choice_logprob_norm,none": -3.102625967819365, | |
| "choice_logprob_norm_stderr,none": 0.08544437807964977 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.20638820638820637, | |
| "acc_stderr,none": 0.011586881879177828, | |
| "acc_norm,none": 0.22276822276822278, | |
| "acc_norm_stderr,none": 0.011913022964039571, | |
| "bpb,none": 0.452215524904344, | |
| "bpb_stderr,none": 0.009584694071551986, | |
| "logprob,none": -3.6264870981033663, | |
| "logprob_stderr,none": 0.06957873167229361, | |
| "choice_logprob,none": -2.9544891951847396, | |
| "choice_logprob_stderr,none": 0.068277529701088, | |
| "choice_prob_norm,none": 0.2046961509793604, | |
| "choice_prob_norm_stderr,none": 0.0016226983561465022, | |
| "choice_logprob_norm,none": -1.6328321858109678, | |
| "choice_logprob_norm_stderr,none": 0.009266670851355841 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.3324039036048596, | |
| "acc_stderr,none": 0.00470112142180543, | |
| "acc_norm,none": 0.4012148974307907, | |
| "acc_norm_stderr,none": 0.004891426533390623, | |
| "bpb,none": 0.9009375876364222, | |
| "bpb_stderr,none": 0.0021055138981136206, | |
| "logprob,none": -82.36120062808881, | |
| "logprob_stderr,none": 0.40982338735684054, | |
| "choice_logprob,none": -20.272721049937964, | |
| "choice_logprob_stderr,none": 0.2566580062434852, | |
| "choice_prob_norm,none": 0.2685254519429319, | |
| "choice_prob_norm_stderr,none": 0.00042581267754788663, | |
| "choice_logprob_norm,none": -1.3276102234515903, | |
| "choice_logprob_norm_stderr,none": 0.0016122223661406815 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.3298147779326827, | |
| "acc_stderr,none": 0.004691848665399074, | |
| "acc_norm,none": 0.3972316271659032, | |
| "acc_norm_stderr,none": 0.0048832465794966745, | |
| "bpb,none": 0.8905966079556564, | |
| "bpb_stderr,none": 0.002093729404640619, | |
| "logprob,none": -81.59669353016533, | |
| "logprob_stderr,none": 0.40979959770883, | |
| "choice_logprob,none": -20.699868645589074, | |
| "choice_logprob_stderr,none": 0.26058429885032486, | |
| "choice_prob_norm,none": 0.26802887392660346, | |
| "choice_prob_norm_stderr,none": 0.00042519100330753356, | |
| "choice_logprob_norm,none": -1.3294290716882005, | |
| "choice_logprob_norm_stderr,none": 0.0016093713491937313 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.198, | |
| "acc_stderr,none": 0.017838958963847237, | |
| "acc_norm,none": 0.314, | |
| "acc_norm_stderr,none": 0.020776701920308997, | |
| "bpb,none": 1.879972734852229, | |
| "bpb_stderr,none": 0.05180831152549998, | |
| "logprob,none": -19.369384078025817, | |
| "logprob_stderr,none": 0.5081540346015109, | |
| "choice_logprob,none": -6.169701454154714, | |
| "choice_logprob_stderr,none": 0.3067755780073137, | |
| "choice_prob_norm,none": 0.2716123029309807, | |
| "choice_prob_norm_stderr,none": 0.005826471946227696, | |
| "choice_logprob_norm,none": -1.4490867505465432, | |
| "choice_logprob_norm_stderr,none": 0.031429373141996994 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.6898803046789989, | |
| "acc_stderr,none": 0.01079187656684305, | |
| "acc_norm,none": 0.6822633297062024, | |
| "acc_norm_stderr,none": 0.010863133246569286, | |
| "bpb,none": 1.1267278287839135, | |
| "bpb_stderr,none": 0.011498569348231697, | |
| "logprob,none": -65.66394310858875, | |
| "logprob_stderr,none": 1.361309501469568, | |
| "choice_logprob,none": -2.8842120102243687, | |
| "choice_logprob_stderr,none": 0.20003484149747763, | |
| "choice_prob_norm,none": 0.5143371099470178, | |
| "choice_prob_norm_stderr,none": 0.0014191589687290998, | |
| "choice_logprob_norm,none": -0.672651949677491, | |
| "choice_logprob_norm_stderr,none": 0.003040303448085757 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.500394632991318, | |
| "acc_stderr,none": 0.014052481306049512, | |
| "bpb,none": 0.44746967272692195, | |
| "bpb_stderr,none": 0.02345581834077092, | |
| "logprob,none": -20.28466144733384, | |
| "logprob_stderr,none": 0.2879659049558859, | |
| "choice_logprob,none": -0.8118696336354043, | |
| "choice_logprob_stderr,none": 0.017177796509284464, | |
| "choice_prob_norm,none": 0.5009580522227739, | |
| "choice_prob_norm_stderr,none": 0.0007615390247721247, | |
| "choice_logprob_norm,none": -0.6933535946515011, | |
| "choice_logprob_norm_stderr,none": 0.0023013588988485354 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5567765567765568, | |
| "acc_stderr,none": 0.03012086087018464, | |
| "bpb,none": 0.6474761978959332, | |
| "bpb_stderr,none": 0.02276077704193253, | |
| "logprob,none": -26.934241563845905, | |
| "logprob_stderr,none": 0.5106632336755854, | |
| "choice_logprob,none": -0.8604588058829552, | |
| "choice_logprob_stderr,none": 0.0579314769585508, | |
| "choice_prob_norm,none": 0.5023677852150373, | |
| "choice_prob_norm_stderr,none": 0.001311220814064811, | |
| "choice_logprob_norm,none": -0.689345064121781, | |
| "choice_logprob_norm_stderr,none": 0.0026084407923643274 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.271097298589529, | |
| "acc_stderr,none": 0.0068739340426092, | |
| "acc_norm,none": 0.271097298589529, | |
| "acc_norm_stderr,none": 0.0068739340426092, | |
| "logprob,none": -1.464151024846919, | |
| "logprob_stderr,none": 0.008543787761274825 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.2644035381305283, | |
| "acc_stderr,none": 0.006819639738871822, | |
| "acc_norm,none": 0.2902223284723882, | |
| "acc_norm_stderr,none": 0.007018342582039129, | |
| "bpb,none": 0.15479395761995557, | |
| "bpb_stderr,none": 0.0021831761941513003, | |
| "logprob,none": -1.8923888203062111, | |
| "logprob_stderr,none": 0.017482812769489587, | |
| "choice_logprob,none": -1.6034572713356525, | |
| "choice_logprob_stderr,none": 0.015064619654782398, | |
| "choice_prob_norm,none": 0.2527971935551483, | |
| "choice_prob_norm_stderr,none": 0.0003179931281454456, | |
| "choice_logprob_norm,none": -1.379051484358927, | |
| "choice_logprob_norm_stderr,none": 0.0014580276768533052 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.22031823745410037, | |
| "acc_stderr,none": 0.01450904517148729, | |
| "logprob,none": -20.957387032386286, | |
| "logprob_stderr,none": 0.448160644790156 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 145.7956747267624, | |
| "nll_stderr,none": 1.9411249646364972, | |
| "bpb,none": 0.747191230526963, | |
| "bpb_stderr,none": 0.006610166624908301 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 49.127946647202094, | |
| "nll_stderr,none": 2.8058110154980107, | |
| "bpb,none": 0.4534054835306593, | |
| "bpb_stderr,none": 0.019236755302746454 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v4", | |
| "hidden_dim": 1024, | |
| "budget": 9e+18, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2774533542230451, | |
| "acc_stderr,none": 0.0037723646961444024, | |
| "acc_norm,none": 0.28642643498077197, | |
| "acc_norm_stderr,none": 0.003807535783758128, | |
| "bpb,none": 0.3803269122433914, | |
| "bpb_stderr,none": 0.0020280858919267198, | |
| "logprob,none": -5.844794198463459, | |
| "logprob_stderr,none": 0.018316805879275064, | |
| "choice_logprob,none": -1.8287491768128128, | |
| "choice_logprob_stderr,none": 0.011684516189729936, | |
| "choice_prob_norm,none": 0.2531198595132669, | |
| "choice_prob_norm_stderr,none": 0.00026485418404609127, | |
| "choice_logprob_norm,none": -1.3833328282413355, | |
| "choice_logprob_norm_stderr,none": 0.0011203301238816258, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.2789488676826663, | |
| "acc_stderr,none": 0.003779197707879068, | |
| "acc_norm,none": 0.2879931633670417, | |
| "acc_norm_stderr,none": 0.0038097458146897248, | |
| "bpb,none": 0.21992023018916243, | |
| "bpb_stderr,none": 0.0013923877356860114, | |
| "logprob,none": -3.564730589500739, | |
| "logprob_stderr,none": 0.017863782973941433, | |
| "choice_logprob,none": -1.8814392643084947, | |
| "choice_logprob_stderr,none": 0.012669429676574627, | |
| "choice_prob_norm,none": 0.25269851613168215, | |
| "choice_prob_norm_stderr,none": 0.00020693702420684866, | |
| "choice_logprob_norm,none": -1.3814515889802632, | |
| "choice_logprob_norm_stderr,none": 0.0008816679758781082, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.23720136518771331, | |
| "acc_stderr,none": 0.012430399829260825, | |
| "acc_norm,none": 0.2627986348122867, | |
| "acc_norm_stderr,none": 0.012862523175351331, | |
| "bpb,none": 1.122671016532738, | |
| "bpb_stderr,none": 0.02156659298873546, | |
| "logprob,none": -19.1114692114319, | |
| "logprob_stderr,none": 0.33233501302887325, | |
| "choice_logprob,none": -4.86906685657334, | |
| "choice_logprob_stderr,none": 0.15676936362090027, | |
| "choice_prob_norm,none": 0.25520528693887606, | |
| "choice_prob_norm_stderr,none": 0.0020432570152471912, | |
| "choice_logprob_norm,none": -1.4106325795742098, | |
| "choice_logprob_norm_stderr,none": 0.009764167523830716 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.5660774410774411, | |
| "acc_stderr,none": 0.010169795770462108, | |
| "acc_norm,none": 0.5475589225589226, | |
| "acc_norm_stderr,none": 0.010213265860171399, | |
| "bpb,none": 0.9042355726644368, | |
| "bpb_stderr,none": 0.010423249883679883, | |
| "logprob,none": -12.602477428487656, | |
| "logprob_stderr,none": 0.19648418817108307, | |
| "choice_logprob,none": -2.0388860933192974, | |
| "choice_logprob_stderr,none": 0.07320207165571463, | |
| "choice_prob_norm,none": 0.30685700242403374, | |
| "choice_prob_norm_stderr,none": 0.0018183540756247286, | |
| "choice_logprob_norm,none": -1.2231684410890977, | |
| "choice_logprob_norm_stderr,none": 0.006201465599955083 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5636085626911315, | |
| "acc_stderr,none": 0.008674000467432084, | |
| "bpb,none": 0.46067848498842306, | |
| "bpb_stderr,none": 0.005325747676230887, | |
| "logprob,none": -0.7747850852639668, | |
| "logprob_stderr,none": 0.007246572773184094, | |
| "choice_logprob,none": -0.7037545072531656, | |
| "choice_logprob_stderr,none": 0.007217177294864038, | |
| "choice_prob_norm,none": 0.5325794186447117, | |
| "choice_prob_norm_stderr,none": 0.0024977724222479975, | |
| "choice_logprob_norm,none": -0.6716482700585745, | |
| "choice_logprob_norm_stderr,none": 0.005302324269465953 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.41406727828746176, | |
| "acc_stderr,none": 0.008614932353134944, | |
| "acc_norm,none": 0.42691131498470947, | |
| "acc_norm_stderr,none": 0.0086511190696438, | |
| "bpb,none": 0.534590995082232, | |
| "bpb_stderr,none": 0.005468438995924815, | |
| "logprob,none": -1.0194541011017033, | |
| "logprob_stderr,none": 0.011602714762655012, | |
| "choice_logprob,none": -0.9879484043502937, | |
| "choice_logprob_stderr,none": 0.011650295516937031, | |
| "choice_prob_norm,none": 0.47919425468082766, | |
| "choice_prob_norm_stderr,none": 0.0024771178854644835, | |
| "choice_logprob_norm,none": -0.7815703185560569, | |
| "choice_logprob_norm_stderr,none": 0.005400403531963753 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.65, | |
| "acc_stderr,none": 0.04793724854411019, | |
| "bpb,none": 1.7019083597213787, | |
| "bpb_stderr,none": 0.04710704897624362, | |
| "logprob,none": -32.042115364074704, | |
| "logprob_stderr,none": 0.5908285100905646, | |
| "choice_logprob,none": -1.0528322688365988, | |
| "choice_logprob_stderr,none": 0.1874360472540367, | |
| "choice_prob_norm,none": 0.5120526610271129, | |
| "choice_prob_norm_stderr,none": 0.008026570580489788, | |
| "choice_logprob_norm,none": -0.6830492104032645, | |
| "choice_logprob_norm_stderr,none": 0.017254894455359437 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.19901719901719903, | |
| "acc_stderr,none": 0.011430809442838396, | |
| "bpb,none": 3.3766833013009983, | |
| "bpb_stderr,none": 0.07009656072885559, | |
| "logprob,none": -2.3405385099406324, | |
| "logprob_stderr,none": 0.048587233436155265, | |
| "choice_logprob,none": -2.3306571442623456, | |
| "choice_logprob_stderr,none": 0.04859626749409556, | |
| "choice_prob_norm,none": 0.20053994555124022, | |
| "choice_prob_norm_stderr,none": 0.005603288735735603, | |
| "choice_logprob_norm,none": -2.8393055229802457, | |
| "choice_logprob_norm_stderr,none": 0.07042486754199812 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.2031122031122031, | |
| "acc_stderr,none": 0.0115182547936341, | |
| "acc_norm,none": 0.21457821457821458, | |
| "acc_norm_stderr,none": 0.01175342309421685, | |
| "bpb,none": 0.4511414834609372, | |
| "bpb_stderr,none": 0.007965861823980845, | |
| "logprob,none": -3.6473876377483747, | |
| "logprob_stderr,none": 0.05557672894814405, | |
| "choice_logprob,none": -2.5809087316161494, | |
| "choice_logprob_stderr,none": 0.0532600056694442, | |
| "choice_prob_norm,none": 0.20605941344511416, | |
| "choice_prob_norm_stderr,none": 0.0013607278866299014, | |
| "choice_logprob_norm,none": -1.6102343051125687, | |
| "choice_logprob_norm_stderr,none": 0.007427904090546415 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.3437562238597889, | |
| "acc_stderr,none": 0.004739902411944531, | |
| "acc_norm,none": 0.40898227444732127, | |
| "acc_norm_stderr,none": 0.0049064119844767886, | |
| "bpb,none": 0.8910177605378113, | |
| "bpb_stderr,none": 0.0020666095537175736, | |
| "logprob,none": -81.55465481666378, | |
| "logprob_stderr,none": 0.4062560138921686, | |
| "choice_logprob,none": -19.694015247380943, | |
| "choice_logprob_stderr,none": 0.2527695825289582, | |
| "choice_prob_norm,none": 0.2694226770198269, | |
| "choice_prob_norm_stderr,none": 0.000422535399658907, | |
| "choice_logprob_norm,none": -1.323975923843565, | |
| "choice_logprob_norm_stderr,none": 0.0015923103921380202 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.3406691894045011, | |
| "acc_stderr,none": 0.004729656826803945, | |
| "acc_norm,none": 0.41047600079665403, | |
| "acc_norm_stderr,none": 0.004909148239488271, | |
| "bpb,none": 0.8784322372959009, | |
| "bpb_stderr,none": 0.0020547834420918203, | |
| "logprob,none": -80.53157815399504, | |
| "logprob_stderr,none": 0.4046564191726301, | |
| "choice_logprob,none": -20.014971737923123, | |
| "choice_logprob_stderr,none": 0.25572990843559773, | |
| "choice_prob_norm,none": 0.26936698050878854, | |
| "choice_prob_norm_stderr,none": 0.0004218675347711045, | |
| "choice_logprob_norm,none": -1.3241177544374372, | |
| "choice_logprob_norm_stderr,none": 0.001587492822164877 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.188, | |
| "acc_stderr,none": 0.01749067888034625, | |
| "acc_norm,none": 0.292, | |
| "acc_norm_stderr,none": 0.02035437548053006, | |
| "bpb,none": 1.8626723078338552, | |
| "bpb_stderr,none": 0.04876361788417803, | |
| "logprob,none": -19.33404068660736, | |
| "logprob_stderr,none": 0.5159979719157372, | |
| "choice_logprob,none": -6.121661900622359, | |
| "choice_logprob_stderr,none": 0.3037587927046394, | |
| "choice_prob_norm,none": 0.2706156420052292, | |
| "choice_prob_norm_stderr,none": 0.005846910147542166, | |
| "choice_logprob_norm,none": -1.4455013820083449, | |
| "choice_logprob_norm_stderr,none": 0.028513878219289402 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.7034820457018498, | |
| "acc_stderr,none": 0.010656078922661138, | |
| "acc_norm,none": 0.7067464635473341, | |
| "acc_norm_stderr,none": 0.010621818421101926, | |
| "bpb,none": 1.0952820472603881, | |
| "bpb_stderr,none": 0.010897094627065809, | |
| "logprob,none": -64.0548716683642, | |
| "logprob_stderr,none": 1.329642784085942, | |
| "choice_logprob,none": -2.7923947350166616, | |
| "choice_logprob_stderr,none": 0.20186784502783026, | |
| "choice_prob_norm,none": 0.5152923353674727, | |
| "choice_prob_norm_stderr,none": 0.0013546668938055843, | |
| "choice_logprob_norm,none": -0.6697296600130525, | |
| "choice_logprob_norm_stderr,none": 0.0027620892141826114 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5193370165745856, | |
| "acc_stderr,none": 0.014041972733712977, | |
| "bpb,none": 0.4385273228372553, | |
| "bpb_stderr,none": 0.02343280298510262, | |
| "logprob,none": -19.836543400078753, | |
| "logprob_stderr,none": 0.2874090450194781, | |
| "choice_logprob,none": -0.780198803559572, | |
| "choice_logprob_stderr,none": 0.016388003586523526, | |
| "choice_prob_norm,none": 0.5012282444770617, | |
| "choice_prob_norm_stderr,none": 0.00082343001258754, | |
| "choice_logprob_norm,none": -0.6929080730440892, | |
| "choice_logprob_norm_stderr,none": 0.002277345281768227 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5604395604395604, | |
| "acc_stderr,none": 0.030094646016767413, | |
| "bpb,none": 0.6676645055346196, | |
| "bpb_stderr,none": 0.02273293542937676, | |
| "logprob,none": -27.731201325580752, | |
| "logprob_stderr,none": 0.5024998047380301, | |
| "choice_logprob,none": -0.7743965635875873, | |
| "choice_logprob_stderr,none": 0.04520265330703877, | |
| "choice_prob_norm,none": 0.5022263467462416, | |
| "choice_prob_norm_stderr,none": 0.0012645326760995672, | |
| "choice_logprob_norm,none": -0.6895570740323064, | |
| "choice_logprob_norm_stderr,none": 0.0025022121928476066 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.29046139134592397, | |
| "acc_stderr,none": 0.007020050049541257, | |
| "acc_norm,none": 0.29046139134592397, | |
| "acc_norm_stderr,none": 0.007020050049541257, | |
| "logprob,none": -1.482259517818326, | |
| "logprob_stderr,none": 0.010804905175993704 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.27348792732488647, | |
| "acc_stderr,none": 0.006892844537516325, | |
| "acc_norm,none": 0.28448481950753046, | |
| "acc_norm_stderr,none": 0.006976650388273283, | |
| "bpb,none": 0.1889180090057425, | |
| "bpb_stderr,none": 0.002490507290640879, | |
| "logprob,none": -2.366434458480222, | |
| "logprob_stderr,none": 0.022541561684512896, | |
| "choice_logprob,none": -1.7610643124913015, | |
| "choice_logprob_stderr,none": 0.01969345898307532, | |
| "choice_prob_norm,none": 0.25339439814986003, | |
| "choice_prob_norm_stderr,none": 0.0004121143028173064, | |
| "choice_logprob_norm,none": -1.3792005120618667, | |
| "choice_logprob_norm_stderr,none": 0.0018467409714330132 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.23011015911872704, | |
| "acc_stderr,none": 0.014734557959807756, | |
| "logprob,none": -19.767552736485456, | |
| "logprob_stderr,none": 0.4362608490818928 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 124.9868801672229, | |
| "nll_stderr,none": 1.7562178141715343, | |
| "bpb,none": 0.6312360680966751, | |
| "bpb_stderr,none": 0.005629445208024275 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 50.14906453214041, | |
| "nll_stderr,none": 2.7024869472844553, | |
| "bpb,none": 0.4774618551645616, | |
| "bpb_stderr,none": 0.02005287009799475 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v0", | |
| "hidden_dim": 1280, | |
| "budget": 2.83e+19, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc_norm,none": 0.29226605896595925, | |
| "acc_norm_stderr,none": 0.003829748537931336, | |
| "logprob,none": -6.209622505894744, | |
| "logprob_stderr,none": 0.015489851959268889, | |
| "choice_logprob,none": -1.666521599179239, | |
| "choice_logprob_stderr,none": 0.00986827146754822, | |
| "bpb,none": 0.4410641939573321, | |
| "bpb_stderr,none": 0.002417360467048572, | |
| "choice_logprob_norm,none": -1.3843110721639518, | |
| "choice_logprob_norm_stderr,none": 0.001109748013176671, | |
| "acc,none": 0.26883634809856144, | |
| "acc_stderr,none": 0.003735640929041117, | |
| "choice_prob_norm,none": 0.2527257228896304, | |
| "choice_prob_norm_stderr,none": 0.00026333469686721627 | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "logprob,none": -3.3674200068996916, | |
| "logprob_stderr,none": 0.016813833012880218, | |
| "bpb,none": 0.21742171317815961, | |
| "bpb_stderr,none": 0.0014461482395902168, | |
| "acc_norm,none": 0.2828656886483407, | |
| "acc_norm_stderr,none": 0.0037943694482700015, | |
| "choice_logprob_norm,none": -1.3838840680471238, | |
| "choice_logprob_norm_stderr,none": 0.0007215059248168945, | |
| "acc,none": 0.26627261073921094, | |
| "acc_stderr,none": 0.0037251087618102646, | |
| "choice_logprob,none": -1.7229442209787504, | |
| "choice_logprob_stderr,none": 0.010439823186440269, | |
| "choice_prob_norm,none": 0.25157403125961547, | |
| "choice_prob_norm_stderr,none": 0.0001703963440441645 | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.27303754266211605, | |
| "acc_stderr,none": 0.013019332762635744, | |
| "acc_norm,none": 0.3054607508532423, | |
| "acc_norm_stderr,none": 0.013460080478002505, | |
| "bpb,none": 1.0283761589598304, | |
| "bpb_stderr,none": 0.019987480088041555, | |
| "logprob,none": -17.726086085261745, | |
| "logprob_stderr,none": 0.3161510775591788, | |
| "choice_logprob,none": -4.381043890471866, | |
| "choice_logprob_stderr,none": 0.14355991812557373, | |
| "choice_prob_norm,none": 0.26016375124616575, | |
| "choice_prob_norm_stderr,none": 0.0020313690468874773, | |
| "choice_logprob_norm,none": -1.3857153166892604, | |
| "choice_logprob_norm_stderr,none": 0.008830226241080263 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.6031144781144782, | |
| "acc_stderr,none": 0.010039236800583207, | |
| "acc_norm,none": 0.5946969696969697, | |
| "acc_norm_stderr,none": 0.010074093589739192, | |
| "bpb,none": 0.8148919918668881, | |
| "bpb_stderr,none": 0.0102402340029807, | |
| "logprob,none": -11.559560100438217, | |
| "logprob_stderr,none": 0.1870772112182125, | |
| "choice_logprob,none": -1.7572758329717766, | |
| "choice_logprob_stderr,none": 0.06667789995280643, | |
| "choice_prob_norm,none": 0.3174056212233518, | |
| "choice_prob_norm_stderr,none": 0.0018724297881361682, | |
| "choice_logprob_norm,none": -1.1876341516951332, | |
| "choice_logprob_norm_stderr,none": 0.005962337814397002 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5807339449541284, | |
| "acc_stderr,none": 0.008630302070999098, | |
| "bpb,none": 0.42498367106450263, | |
| "bpb_stderr,none": 0.005208711650660825, | |
| "logprob,none": -0.7036019508627211, | |
| "logprob_stderr,none": 0.00691061536637038, | |
| "choice_logprob,none": -0.6884317218506175, | |
| "choice_logprob_stderr,none": 0.006896944270013674, | |
| "choice_prob_norm,none": 0.5353019746155919, | |
| "choice_prob_norm_stderr,none": 0.002496901667984246, | |
| "choice_logprob_norm,none": -0.6657104660137152, | |
| "choice_logprob_norm_stderr,none": 0.005225205186616344 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.6198776758409786, | |
| "acc_stderr,none": 0.0084899909189892, | |
| "acc_norm,none": 0.6214067278287462, | |
| "acc_norm_stderr,none": 0.008483341718024479, | |
| "bpb,none": 0.5999890705120567, | |
| "bpb_stderr,none": 0.01207801489576029, | |
| "logprob,none": -0.8783978572314668, | |
| "logprob_stderr,none": 0.016216410708147073, | |
| "choice_logprob,none": -0.8516348796961529, | |
| "choice_logprob_stderr,none": 0.016231381752290774, | |
| "choice_prob_norm,none": 0.565980565018701, | |
| "choice_prob_norm_stderr,none": 0.004850349282933368, | |
| "choice_logprob_norm,none": -0.7542167961257918, | |
| "choice_logprob_norm_stderr,none": 0.011944739409233355 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.73, | |
| "acc_stderr,none": 0.0446196043338474, | |
| "bpb,none": 1.595618454194437, | |
| "bpb_stderr,none": 0.043333507469277076, | |
| "logprob,none": -30.05348129272461, | |
| "logprob_stderr,none": 0.5365979835667709, | |
| "choice_logprob,none": -0.8766987854590171, | |
| "choice_logprob_stderr,none": 0.16336826657602965, | |
| "choice_prob_norm,none": 0.5214883684868503, | |
| "choice_prob_norm_stderr,none": 0.008448802370021472, | |
| "choice_logprob_norm,none": -0.665533557235146, | |
| "choice_logprob_norm_stderr,none": 0.01764556442591378 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.21703521703521703, | |
| "acc_stderr,none": 0.01180201884653, | |
| "bpb,none": 2.725079745532547, | |
| "bpb_stderr,none": 0.0409573250841188, | |
| "logprob,none": -1.8888813424168993, | |
| "logprob_stderr,none": 0.028389454405334053, | |
| "choice_logprob,none": -1.8783109756143825, | |
| "choice_logprob_stderr,none": 0.028404217902383927, | |
| "choice_prob_norm,none": 0.20074997076219486, | |
| "choice_prob_norm_stderr,none": 0.003299421449442018, | |
| "choice_logprob_norm,none": -2.077638700704776, | |
| "choice_logprob_norm_stderr,none": 0.04117720478426129 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.22522522522522523, | |
| "acc_stderr,none": 0.011959591224286234, | |
| "acc_norm,none": 0.22358722358722358, | |
| "acc_norm_stderr,none": 0.011928612008761174, | |
| "bpb,none": 0.3175998808584069, | |
| "bpb_stderr,none": 0.005524235826180418, | |
| "logprob,none": -2.521169312658318, | |
| "logprob_stderr,none": 0.03560094846882005, | |
| "choice_logprob,none": -1.9576231799793016, | |
| "choice_logprob_stderr,none": 0.03470973076012509, | |
| "choice_prob_norm,none": 0.2048963995235781, | |
| "choice_prob_norm_stderr,none": 0.000922567238262749, | |
| "choice_logprob_norm,none": -1.599995325956514, | |
| "choice_logprob_norm_stderr,none": 0.0051925802377438095 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.38986257717586137, | |
| "acc_stderr,none": 0.004867221634461271, | |
| "acc_norm,none": 0.4887472615016929, | |
| "acc_norm_stderr,none": 0.004988517597998611, | |
| "bpb,none": 0.8415242444682122, | |
| "bpb_stderr,none": 0.0020012268461091645, | |
| "logprob,none": -76.97146024226288, | |
| "logprob_stderr,none": 0.3834460822769651, | |
| "choice_logprob,none": -16.843240086968393, | |
| "choice_logprob_stderr,none": 0.2292036319493721, | |
| "choice_prob_norm,none": 0.27690864474623345, | |
| "choice_prob_norm_stderr,none": 0.0004217564384003691, | |
| "choice_logprob_norm,none": -1.2958802116211106, | |
| "choice_logprob_norm_stderr,none": 0.0015479785034826032 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.38418641704839673, | |
| "acc_stderr,none": 0.004854082479916916, | |
| "acc_norm,none": 0.4896434973112926, | |
| "acc_norm_stderr,none": 0.004988710917169329, | |
| "bpb,none": 0.827420054400728, | |
| "bpb_stderr,none": 0.0019730830528583445, | |
| "logprob,none": -75.89870920900186, | |
| "logprob_stderr,none": 0.38215850087060366, | |
| "choice_logprob,none": -17.215097853552177, | |
| "choice_logprob_stderr,none": 0.2322699233354321, | |
| "choice_prob_norm,none": 0.2764504682104974, | |
| "choice_prob_norm_stderr,none": 0.000419227999480607, | |
| "choice_logprob_norm,none": -1.29740576523825, | |
| "choice_logprob_norm_stderr,none": 0.0015386710312781204 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.226, | |
| "acc_stderr,none": 0.018722956449139936, | |
| "acc_norm,none": 0.328, | |
| "acc_norm_stderr,none": 0.021017027165175495, | |
| "bpb,none": 1.8180603255364978, | |
| "bpb_stderr,none": 0.04992289763998061, | |
| "logprob,none": -18.71832051229477, | |
| "logprob_stderr,none": 0.4984652624372133, | |
| "choice_logprob,none": -5.756449158667543, | |
| "choice_logprob_stderr,none": 0.29307223445528263, | |
| "choice_prob_norm,none": 0.2771861143401492, | |
| "choice_prob_norm_stderr,none": 0.005959448128268381, | |
| "choice_logprob_norm,none": -1.4238737513088515, | |
| "choice_logprob_norm_stderr,none": 0.029916595311500378 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.7143634385201306, | |
| "acc_stderr,none": 0.010539303948661927, | |
| "acc_norm,none": 0.7132752992383025, | |
| "acc_norm_stderr,none": 0.010551314503108075, | |
| "bpb,none": 1.045407995402416, | |
| "bpb_stderr,none": 0.010281176695787196, | |
| "logprob,none": -61.57799939076711, | |
| "logprob_stderr,none": 1.2877505958045585, | |
| "choice_logprob,none": -2.5762836726033336, | |
| "choice_logprob_stderr,none": 0.18798789868527002, | |
| "choice_prob_norm,none": 0.5173234535007274, | |
| "choice_prob_norm_stderr,none": 0.0013264659761474242, | |
| "choice_logprob_norm,none": -0.665587866533371, | |
| "choice_logprob_norm_stderr,none": 0.0027402003861128465 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5185477505919495, | |
| "acc_stderr,none": 0.014042813708888378, | |
| "bpb,none": 0.4340690453637434, | |
| "bpb_stderr,none": 0.02297466714977533, | |
| "logprob,none": -19.649428625603594, | |
| "logprob_stderr,none": 0.27863341577197265, | |
| "choice_logprob,none": -0.7738055543223351, | |
| "choice_logprob_stderr,none": 0.015506418418150121, | |
| "choice_prob_norm,none": 0.5012963998338761, | |
| "choice_prob_norm_stderr,none": 0.0007472258662358123, | |
| "choice_logprob_norm,none": -0.6923158523760211, | |
| "choice_logprob_norm_stderr,none": 0.0019636471033472047 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.6410256410256411, | |
| "acc_stderr,none": 0.029086064518366282, | |
| "bpb,none": 0.6369651079416323, | |
| "bpb_stderr,none": 0.021725909969731252, | |
| "logprob,none": -26.41959673144442, | |
| "logprob_stderr,none": 0.47508686226521085, | |
| "choice_logprob,none": -0.6856140437555511, | |
| "choice_logprob_stderr,none": 0.0407542274946971, | |
| "choice_prob_norm,none": 0.503661892304479, | |
| "choice_prob_norm_stderr,none": 0.0012730978135580076, | |
| "choice_logprob_norm,none": -0.6867133915338746, | |
| "choice_logprob_norm_stderr,none": 0.002521126622067467 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.2696629213483146, | |
| "acc_stderr,none": 0.006862467146147035, | |
| "acc_norm,none": 0.2696629213483146, | |
| "acc_norm_stderr,none": 0.006862467146147035, | |
| "logprob,none": -1.41153096153837, | |
| "logprob_stderr,none": 0.004057120176996225 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.27061917284245757, | |
| "acc_stderr,none": 0.00687012182714279, | |
| "acc_norm,none": 0.3067176667463543, | |
| "acc_norm_stderr,none": 0.007130704645763216, | |
| "bpb,none": 0.15086929656791995, | |
| "bpb_stderr,none": 0.0019006540999348866, | |
| "logprob,none": -1.8186738202368389, | |
| "logprob_stderr,none": 0.013975437849597278, | |
| "choice_logprob,none": -1.5058455539114988, | |
| "choice_logprob_stderr,none": 0.011836430715752234, | |
| "choice_prob_norm,none": 0.2525030822261794, | |
| "choice_prob_norm_stderr,none": 0.0002748409391057536, | |
| "choice_logprob_norm,none": -1.3790022896972258, | |
| "choice_logprob_norm_stderr,none": 0.0011577589398460758 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.23011015911872704, | |
| "acc_stderr,none": 0.014734557959807763, | |
| "logprob,none": -19.10854571860744, | |
| "logprob_stderr,none": 0.4024300847185795 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 132.44639959761915, | |
| "nll_stderr,none": 1.7152639585544431, | |
| "bpb,none": 0.6816285006144224, | |
| "bpb_stderr,none": 0.006052614179169423 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 50.32563555240631, | |
| "nll_stderr,none": 2.740384937137503, | |
| "bpb,none": 0.4865457768105658, | |
| "bpb_stderr,none": 0.020536436529556782 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v2", | |
| "hidden_dim": 1280, | |
| "budget": 2.83e+19, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "choice_logprob,none": -1.6355644163636416, | |
| "choice_logprob_stderr,none": 0.00993168138907273, | |
| "acc_norm,none": 0.2942600769121208, | |
| "acc_norm_stderr,none": 0.003832776095212889, | |
| "acc,none": 0.2827944737216921, | |
| "acc_stderr,none": 0.0037907716737867903, | |
| "choice_logprob_norm,none": -1.382032849360097, | |
| "choice_logprob_norm_stderr,none": 0.0008785335683598963, | |
| "bpb,none": 0.37224887518444993, | |
| "bpb_stderr,none": 0.00199571516907844, | |
| "logprob,none": -5.386200733835919, | |
| "logprob_stderr,none": 0.01652340004633214, | |
| "choice_prob_norm,none": 0.2526551930696583, | |
| "choice_prob_norm_stderr,none": 0.0002125452947926354 | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc_norm,none": 0.277097279589802, | |
| "acc_norm_stderr,none": 0.0037643368162159296, | |
| "choice_logprob,none": -1.6686232811107222, | |
| "choice_logprob_stderr,none": 0.010236917091765866, | |
| "choice_logprob_norm,none": -1.3828063134991904, | |
| "choice_logprob_norm_stderr,none": 0.0006243860345584463, | |
| "acc,none": 0.2653468166927788, | |
| "acc_stderr,none": 0.0037118178455470127, | |
| "logprob,none": -3.2203891673529896, | |
| "logprob_stderr,none": 0.01716090349129126, | |
| "bpb,none": 0.20412977360285625, | |
| "bpb_stderr,none": 0.0013672943970149927, | |
| "choice_prob_norm,none": 0.2516315059406078, | |
| "choice_prob_norm_stderr,none": 0.0001511494982749355 | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.3037542662116041, | |
| "acc_stderr,none": 0.01343890918477876, | |
| "acc_norm,none": 0.34982935153583616, | |
| "acc_norm_stderr,none": 0.013936809212158294, | |
| "bpb,none": 1.0051468106645478, | |
| "bpb_stderr,none": 0.023198784622296246, | |
| "logprob,none": -17.086511841609617, | |
| "logprob_stderr,none": 0.31772037414415716, | |
| "choice_logprob,none": -4.346364128180887, | |
| "choice_logprob_stderr,none": 0.14955380981290733, | |
| "choice_prob_norm,none": 0.26689375012133293, | |
| "choice_prob_norm_stderr,none": 0.0023332473491658383, | |
| "choice_logprob_norm,none": -1.3760793440216341, | |
| "choice_logprob_norm_stderr,none": 0.011147063107963154 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.6515151515151515, | |
| "acc_stderr,none": 0.009777377947106538, | |
| "acc_norm,none": 0.6468855218855218, | |
| "acc_norm_stderr,none": 0.009807078935467613, | |
| "bpb,none": 0.7627812814045225, | |
| "bpb_stderr,none": 0.010633835928133899, | |
| "logprob,none": -10.932685228133643, | |
| "logprob_stderr,none": 0.1862389592816211, | |
| "choice_logprob,none": -1.6075675662596918, | |
| "choice_logprob_stderr,none": 0.06866105725822967, | |
| "choice_prob_norm,none": 0.3365922264060604, | |
| "choice_prob_norm_stderr,none": 0.002248177192882947, | |
| "choice_logprob_norm,none": -1.1387806607328435, | |
| "choice_logprob_norm_stderr,none": 0.006619520224730074 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.44036697247706424, | |
| "acc_stderr,none": 0.0086826356676869, | |
| "bpb,none": 0.5165486355515898, | |
| "bpb_stderr,none": 0.004568869184677957, | |
| "logprob,none": -0.9770105327670363, | |
| "logprob_stderr,none": 0.010064845379313079, | |
| "choice_logprob,none": -0.8866122233998849, | |
| "choice_logprob_stderr,none": 0.01003765511377256, | |
| "choice_prob_norm,none": 0.49596476745427315, | |
| "choice_prob_norm_stderr,none": 0.002130429975858172, | |
| "choice_logprob_norm,none": -0.7332640379794884, | |
| "choice_logprob_norm_stderr,none": 0.00451633810323634 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5819571865443425, | |
| "acc_stderr,none": 0.008626774352070744, | |
| "acc_norm,none": 0.5954128440366973, | |
| "acc_norm_stderr,none": 0.008584355308932685, | |
| "bpb,none": 0.5318714914967736, | |
| "bpb_stderr,none": 0.009560120218176012, | |
| "logprob,none": -0.8367789745695365, | |
| "logprob_stderr,none": 0.012954331347065601, | |
| "choice_logprob,none": -0.7965980586116753, | |
| "choice_logprob_stderr,none": 0.012971533544866524, | |
| "choice_prob_norm,none": 0.5463688682187838, | |
| "choice_prob_norm_stderr,none": 0.0038899028314131247, | |
| "choice_logprob_norm,none": -0.7207930588823217, | |
| "choice_logprob_norm_stderr,none": 0.009386272318322439 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.71, | |
| "acc_stderr,none": 0.045604802157206824, | |
| "bpb,none": 1.6588899547828249, | |
| "bpb_stderr,none": 0.04996699798522133, | |
| "logprob,none": -31.084959506988525, | |
| "logprob_stderr,none": 0.5828067888657876, | |
| "choice_logprob,none": -1.000873777244134, | |
| "choice_logprob_stderr,none": 0.20769504742752298, | |
| "choice_prob_norm,none": 0.5229587520965494, | |
| "choice_prob_norm_stderr,none": 0.009465694784086385, | |
| "choice_logprob_norm,none": -0.6668312799032938, | |
| "choice_logprob_norm_stderr,none": 0.02017937387226091 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.22932022932022933, | |
| "acc_stderr,none": 0.012035891058050904, | |
| "bpb,none": 2.722085484127594, | |
| "bpb_stderr,none": 0.04119660416594591, | |
| "logprob,none": -1.886805878566192, | |
| "logprob_stderr,none": 0.028555310026269497, | |
| "choice_logprob,none": -1.8781857661860797, | |
| "choice_logprob_stderr,none": 0.02856460110638303, | |
| "choice_prob_norm,none": 0.2141878789545394, | |
| "choice_prob_norm_stderr,none": 0.0052183509464165685, | |
| "choice_logprob_norm,none": -2.1209128340609134, | |
| "choice_logprob_norm_stderr,none": 0.04174733558816356 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.2416052416052416, | |
| "acc_stderr,none": 0.012255214642330772, | |
| "acc_norm,none": 0.2596232596232596, | |
| "acc_norm_stderr,none": 0.012552154236305978, | |
| "bpb,none": 0.29461692125090677, | |
| "bpb_stderr,none": 0.005511764022747519, | |
| "logprob,none": -2.3587569456721407, | |
| "logprob_stderr,none": 0.0390204882966398, | |
| "choice_logprob,none": -2.051425451423569, | |
| "choice_logprob_stderr,none": 0.038252338995030034, | |
| "choice_prob_norm,none": 0.20556295095644472, | |
| "choice_prob_norm_stderr,none": 0.0009840677417696346, | |
| "choice_logprob_norm,none": -1.597807553182379, | |
| "choice_logprob_norm_stderr,none": 0.005283126369552082 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.4246166102370046, | |
| "acc_stderr,none": 0.004932745013072715, | |
| "acc_norm,none": 0.5495917147978491, | |
| "acc_norm_stderr,none": 0.004965177633049898, | |
| "bpb,none": 0.842218931242513, | |
| "bpb_stderr,none": 0.0020964828004861073, | |
| "logprob,none": -76.82175086138615, | |
| "logprob_stderr,none": 0.38384521535442956, | |
| "choice_logprob,none": -15.645702940141794, | |
| "choice_logprob_stderr,none": 0.2233565615576406, | |
| "choice_prob_norm,none": 0.2841604287383886, | |
| "choice_prob_norm_stderr,none": 0.00044807381921523374, | |
| "choice_logprob_norm,none": -1.2709554769581326, | |
| "choice_logprob_norm_stderr,none": 0.0016109907123332992 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.40310695080661224, | |
| "acc_stderr,none": 0.004895194143892684, | |
| "acc_norm,none": 0.5241983668591914, | |
| "acc_norm_stderr,none": 0.0049839343432504565, | |
| "bpb,none": 0.8201113158799554, | |
| "bpb_stderr,none": 0.002012125040576876, | |
| "logprob,none": -75.05595897241693, | |
| "logprob_stderr,none": 0.3782021324643839, | |
| "choice_logprob,none": -16.314701318311375, | |
| "choice_logprob_stderr,none": 0.22663824711571423, | |
| "choice_prob_norm,none": 0.28006321862407313, | |
| "choice_prob_norm_stderr,none": 0.0004279107876750804, | |
| "choice_logprob_norm,none": -1.2846411563723752, | |
| "choice_logprob_norm_stderr,none": 0.001554748796237787 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.242, | |
| "acc_stderr,none": 0.019173085678337157, | |
| "acc_norm,none": 0.334, | |
| "acc_norm_stderr,none": 0.021113492347743727, | |
| "bpb,none": 1.8373831035676307, | |
| "bpb_stderr,none": 0.05059676581196407, | |
| "logprob,none": -18.901991720438005, | |
| "logprob_stderr,none": 0.5034766353234749, | |
| "choice_logprob,none": -5.856424473935738, | |
| "choice_logprob_stderr,none": 0.2982831669792747, | |
| "choice_prob_norm,none": 0.28294638983188414, | |
| "choice_prob_norm_stderr,none": 0.006367518381123621, | |
| "choice_logprob_norm,none": -1.4132429012816998, | |
| "choice_logprob_norm_stderr,none": 0.02956249651515355 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.7247007616974973, | |
| "acc_stderr,none": 0.01042142927736953, | |
| "acc_norm,none": 0.7181719260065288, | |
| "acc_norm_stderr,none": 0.01049667523125815, | |
| "bpb,none": 1.0331715040761036, | |
| "bpb_stderr,none": 0.010686860780086791, | |
| "logprob,none": -60.96679821263459, | |
| "logprob_stderr,none": 1.2924989329529273, | |
| "choice_logprob,none": -2.4510160295291232, | |
| "choice_logprob_stderr,none": 0.18160293905688882, | |
| "choice_prob_norm,none": 0.5200233189363311, | |
| "choice_prob_norm_stderr,none": 0.0013944298015675302, | |
| "choice_logprob_norm,none": -0.6608648059728867, | |
| "choice_logprob_norm_stderr,none": 0.0028196221716540477 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5461720599842147, | |
| "acc_stderr,none": 0.013992441563707063, | |
| "bpb,none": 0.43572332810692266, | |
| "bpb_stderr,none": 0.02351526655579996, | |
| "logprob,none": -19.668826701985445, | |
| "logprob_stderr,none": 0.2815252237442725, | |
| "choice_logprob,none": -0.753119375795119, | |
| "choice_logprob_stderr,none": 0.015587721758187565, | |
| "choice_prob_norm,none": 0.5017349921317078, | |
| "choice_prob_norm_stderr,none": 0.0007873641529697937, | |
| "choice_logprob_norm,none": -0.6918008109489324, | |
| "choice_logprob_norm_stderr,none": 0.002256961442328746 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.6373626373626373, | |
| "acc_stderr,none": 0.029150440533497746, | |
| "bpb,none": 0.6744923062517574, | |
| "bpb_stderr,none": 0.021573641072027108, | |
| "logprob,none": -28.171755874549948, | |
| "logprob_stderr,none": 0.4880426292925857, | |
| "choice_logprob,none": -0.7520041384529634, | |
| "choice_logprob_stderr,none": 0.05351695138599668, | |
| "choice_prob_norm,none": 0.5044346634265771, | |
| "choice_prob_norm_stderr,none": 0.001298596757931072, | |
| "choice_logprob_norm,none": -0.685193426286039, | |
| "choice_logprob_norm_stderr,none": 0.0025251037658414323 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.3162801816877839, | |
| "acc_stderr,none": 0.007190896863029239, | |
| "acc_norm,none": 0.3162801816877839, | |
| "acc_norm_stderr,none": 0.007190896863029239, | |
| "logprob,none": -1.4593657617103875, | |
| "logprob_stderr,none": 0.010492940476909497 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.3055223523786756, | |
| "acc_stderr,none": 0.0071229290172453805, | |
| "acc_norm,none": 0.3174754960554626, | |
| "acc_norm_stderr,none": 0.0071981719187398634, | |
| "bpb,none": 0.14504181755898918, | |
| "bpb_stderr,none": 0.002069838836516618, | |
| "logprob,none": -1.7652322429733474, | |
| "logprob_stderr,none": 0.018427749438524245, | |
| "choice_logprob,none": -1.5695841908610249, | |
| "choice_logprob_stderr,none": 0.01624033496449477, | |
| "choice_prob_norm,none": 0.25405428691108856, | |
| "choice_prob_norm_stderr,none": 0.00032918705979028377, | |
| "choice_logprob_norm,none": -1.374203763504559, | |
| "choice_logprob_norm_stderr,none": 0.001471903951849426 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.2178702570379437, | |
| "acc_stderr,none": 0.014450846714123895, | |
| "logprob,none": -19.17639766095464, | |
| "logprob_stderr,none": 0.40257679326714874 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 124.43013503065247, | |
| "nll_stderr,none": 1.7457547832049456, | |
| "bpb,none": 0.631483501899567, | |
| "bpb_stderr,none": 0.005981006908265368 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 57.852137891257684, | |
| "nll_stderr,none": 3.13853121848385, | |
| "bpb,none": 0.5336717673608972, | |
| "bpb_stderr,none": 0.019712068441408603 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v3", | |
| "hidden_dim": 1280, | |
| "budget": 2.83e+19, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2805868109955847, | |
| "acc_stderr,none": 0.0037835552116241037, | |
| "acc_norm,none": 0.2879219484403931, | |
| "acc_norm_stderr,none": 0.0038100683058163503, | |
| "bpb,none": 0.29564193249916315, | |
| "bpb_stderr,none": 0.00163539261025707, | |
| "logprob,none": -4.586842207251352, | |
| "logprob_stderr,none": 0.015925438089625925, | |
| "choice_logprob,none": -1.6562007130156302, | |
| "choice_logprob_stderr,none": 0.009092790810744425, | |
| "choice_prob_norm,none": 0.2524032845490927, | |
| "choice_prob_norm_stderr,none": 0.00021544351320955767, | |
| "choice_logprob_norm,none": -1.3830507908053524, | |
| "choice_logprob_norm_stderr,none": 0.0008968018751013425, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.27439111237715424, | |
| "acc_stderr,none": 0.0037534949331315937, | |
| "acc_norm,none": 0.2864976499074206, | |
| "acc_norm_stderr,none": 0.00380537568791088, | |
| "bpb,none": 0.18563286499798612, | |
| "bpb_stderr,none": 0.0012545152052620288, | |
| "logprob,none": -2.8578264987061446, | |
| "logprob_stderr,none": 0.014808275213136313, | |
| "choice_logprob,none": -1.5779558935911742, | |
| "choice_logprob_stderr,none": 0.008331239974153823, | |
| "choice_prob_norm,none": 0.25169926597413794, | |
| "choice_prob_norm_stderr,none": 0.00014606058989178342, | |
| "choice_logprob_norm,none": -1.382456117239998, | |
| "choice_logprob_norm_stderr,none": 0.0005969432966090565, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.2781569965870307, | |
| "acc_stderr,none": 0.0130944699195388, | |
| "acc_norm,none": 0.31569965870307165, | |
| "acc_norm_stderr,none": 0.013582571095815291, | |
| "bpb,none": 1.004577123505464, | |
| "bpb_stderr,none": 0.021011051134894577, | |
| "logprob,none": -17.43483729696111, | |
| "logprob_stderr,none": 0.3249676602454631, | |
| "choice_logprob,none": -4.554611755793552, | |
| "choice_logprob_stderr,none": 0.15300655584730743, | |
| "choice_prob_norm,none": 0.2619648526693254, | |
| "choice_prob_norm_stderr,none": 0.0021679851263837527, | |
| "choice_logprob_norm,none": -1.385142044173578, | |
| "choice_logprob_norm_stderr,none": 0.009742104467355102 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.6077441077441077, | |
| "acc_stderr,none": 0.010018744689650043, | |
| "acc_norm,none": 0.6165824915824916, | |
| "acc_norm_stderr,none": 0.009976995068264717, | |
| "bpb,none": 0.7901136817738874, | |
| "bpb_stderr,none": 0.010450610179742914, | |
| "logprob,none": -11.305382818660952, | |
| "logprob_stderr,none": 0.1897638487309862, | |
| "choice_logprob,none": -1.7970027483644972, | |
| "choice_logprob_stderr,none": 0.06922270693962838, | |
| "choice_prob_norm,none": 0.3219051432437752, | |
| "choice_prob_norm_stderr,none": 0.0020288902873620536, | |
| "choice_logprob_norm,none": -1.1792201909120759, | |
| "choice_logprob_norm_stderr,none": 0.006375784803674426 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5305810397553516, | |
| "acc_stderr,none": 0.008728682900189716, | |
| "bpb,none": 0.4231851610730653, | |
| "bpb_stderr,none": 0.0037642867645096014, | |
| "logprob,none": -0.7694294305022703, | |
| "logprob_stderr,none": 0.00735201818622056, | |
| "choice_logprob,none": -0.7451270770784845, | |
| "choice_logprob_stderr,none": 0.00726881733581359, | |
| "choice_prob_norm,none": 0.5188823602153386, | |
| "choice_prob_norm_stderr,none": 0.0019058689414028012, | |
| "choice_logprob_norm,none": -0.6791780780330144, | |
| "choice_logprob_norm_stderr,none": 0.0038249251654363606 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.5571865443425077, | |
| "acc_stderr,none": 0.008687668766930827, | |
| "acc_norm,none": 0.6003058103975535, | |
| "acc_norm_stderr,none": 0.008567275456584977, | |
| "bpb,none": 0.48365323953772843, | |
| "bpb_stderr,none": 0.004755316023137264, | |
| "logprob,none": -0.816987519417334, | |
| "logprob_stderr,none": 0.00618412365524644, | |
| "choice_logprob,none": -0.6970726227422511, | |
| "choice_logprob_stderr,none": 0.006084712635604526, | |
| "choice_prob_norm,none": 0.5286375143943968, | |
| "choice_prob_norm_stderr,none": 0.0022608907687295464, | |
| "choice_logprob_norm,none": -0.6711646122538006, | |
| "choice_logprob_norm_stderr,none": 0.00472307616647191 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.71, | |
| "acc_stderr,none": 0.045604802157206824, | |
| "bpb,none": 1.5776713479546853, | |
| "bpb_stderr,none": 0.04742787119329083, | |
| "logprob,none": -29.552455520629884, | |
| "logprob_stderr,none": 0.5787075614098893, | |
| "choice_logprob,none": -0.9883281715260241, | |
| "choice_logprob_stderr,none": 0.17475016771875287, | |
| "choice_prob_norm,none": 0.5138977264414717, | |
| "choice_prob_norm_stderr,none": 0.008287843399348597, | |
| "choice_logprob_norm,none": -0.6809176362984224, | |
| "choice_logprob_norm_stderr,none": 0.01835793383033253 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.21621621621621623, | |
| "acc_stderr,none": 0.011785889175486662, | |
| "bpb,none": 3.0773994144024783, | |
| "bpb_stderr,none": 0.05960158184136445, | |
| "logprob,none": -2.1330907275499036, | |
| "logprob_stderr,none": 0.04131266841025459, | |
| "choice_logprob,none": -2.1295597913030426, | |
| "choice_logprob_stderr,none": 0.041315162119509, | |
| "choice_prob_norm,none": 0.207259443443934, | |
| "choice_prob_norm_stderr,none": 0.0054614654643658665, | |
| "choice_logprob_norm,none": -2.518117658410582, | |
| "choice_logprob_norm_stderr,none": 0.060043035622258066 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.2194922194922195, | |
| "acc_stderr,none": 0.011849997754533981, | |
| "acc_norm,none": 0.22522522522522523, | |
| "acc_norm_stderr,none": 0.01195959122428624, | |
| "bpb,none": 0.278426458562545, | |
| "bpb_stderr,none": 0.00561864110394927, | |
| "logprob,none": -2.2275865034281868, | |
| "logprob_stderr,none": 0.03969882911879949, | |
| "choice_logprob,none": -2.111006653128628, | |
| "choice_logprob_stderr,none": 0.0393815589292252, | |
| "choice_prob_norm,none": 0.20387185409223316, | |
| "choice_prob_norm_stderr,none": 0.0009731985933530395, | |
| "choice_logprob_norm,none": -1.6065435407863369, | |
| "choice_logprob_norm_stderr,none": 0.005409053841909447 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.3752240589523999, | |
| "acc_stderr,none": 0.004831911860478678, | |
| "acc_norm,none": 0.4751045608444533, | |
| "acc_norm_stderr,none": 0.004983592410934169, | |
| "bpb,none": 0.855193833332515, | |
| "bpb_stderr,none": 0.002054792160447089, | |
| "logprob,none": -78.06590635993523, | |
| "logprob_stderr,none": 0.38839573001040845, | |
| "choice_logprob,none": -17.538550240006114, | |
| "choice_logprob_stderr,none": 0.23479525747783578, | |
| "choice_prob_norm,none": 0.27543585186615815, | |
| "choice_prob_norm_stderr,none": 0.00042570129200358506, | |
| "choice_logprob_norm,none": -1.301569766118517, | |
| "choice_logprob_norm_stderr,none": 0.001571768699400334 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.37273451503684524, | |
| "acc_stderr,none": 0.004825441080261189, | |
| "acc_norm,none": 0.47161919936267677, | |
| "acc_norm_stderr,none": 0.004981736689518747, | |
| "bpb,none": 0.8430849958045082, | |
| "bpb_stderr,none": 0.0020405100556970075, | |
| "logprob,none": -77.16160491272122, | |
| "logprob_stderr,none": 0.38786765623999814, | |
| "choice_logprob,none": -17.961448764992756, | |
| "choice_logprob_stderr,none": 0.23845571414769917, | |
| "choice_prob_norm,none": 0.275012911847633, | |
| "choice_prob_norm_stderr,none": 0.00042651187996399265, | |
| "choice_logprob_norm,none": -1.3031529086866211, | |
| "choice_logprob_norm_stderr,none": 0.0015738790612415988 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.234, | |
| "acc_stderr,none": 0.01895274156489368, | |
| "acc_norm,none": 0.316, | |
| "acc_norm_stderr,none": 0.02081235951585586, | |
| "bpb,none": 1.8224381060467416, | |
| "bpb_stderr,none": 0.047799940063301856, | |
| "logprob,none": -18.878421821594237, | |
| "logprob_stderr,none": 0.5024028120213828, | |
| "choice_logprob,none": -5.799801592879904, | |
| "choice_logprob_stderr,none": 0.2982324566604542, | |
| "choice_prob_norm,none": 0.2798389158047688, | |
| "choice_prob_norm_stderr,none": 0.005934231013412345, | |
| "choice_logprob_norm,none": -1.4071986250743183, | |
| "choice_logprob_norm_stderr,none": 0.027636792490084664 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.7100108813928183, | |
| "acc_stderr,none": 0.010586899128169326, | |
| "acc_norm,none": 0.7121871599564744, | |
| "acc_norm_stderr,none": 0.01056325038305919, | |
| "bpb,none": 1.0632483954888707, | |
| "bpb_stderr,none": 0.011094055239906829, | |
| "logprob,none": -62.06737054937942, | |
| "logprob_stderr,none": 1.2907558118508673, | |
| "choice_logprob,none": -2.6415280056968267, | |
| "choice_logprob_stderr,none": 0.19373254353608907, | |
| "choice_prob_norm,none": 0.5172964759363821, | |
| "choice_prob_norm_stderr,none": 0.0014099241438236664, | |
| "choice_logprob_norm,none": -0.6666790420392044, | |
| "choice_logprob_norm_stderr,none": 0.002980988229909506 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5224940805051302, | |
| "acc_stderr,none": 0.014038257824059885, | |
| "bpb,none": 0.43958825417755576, | |
| "bpb_stderr,none": 0.023636284026221113, | |
| "logprob,none": -19.90092382303071, | |
| "logprob_stderr,none": 0.2852125839905403, | |
| "choice_logprob,none": -0.7962947750599071, | |
| "choice_logprob_stderr,none": 0.01823518540912786, | |
| "choice_prob_norm,none": 0.5011304078017509, | |
| "choice_prob_norm_stderr,none": 0.0007359960478038117, | |
| "choice_logprob_norm,none": -0.6932266355873123, | |
| "choice_logprob_norm_stderr,none": 0.002570114917974759 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.5641025641025641, | |
| "acc_stderr,none": 0.030066767691175837, | |
| "bpb,none": 0.612532433855645, | |
| "bpb_stderr,none": 0.021271710060884862, | |
| "logprob,none": -25.572265995291126, | |
| "logprob_stderr,none": 0.5036890338925973, | |
| "choice_logprob,none": -0.7985367154694579, | |
| "choice_logprob_stderr,none": 0.053060844835365384, | |
| "choice_prob_norm,none": 0.5027888183948848, | |
| "choice_prob_norm_stderr,none": 0.001374804067229151, | |
| "choice_logprob_norm,none": -0.6885960337330129, | |
| "choice_logprob_norm_stderr,none": 0.0027288547464764477 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.284962945254602, | |
| "acc_stderr,none": 0.006980177321467588, | |
| "acc_norm,none": 0.284962945254602, | |
| "acc_norm_stderr,none": 0.006980177321467588, | |
| "logprob,none": -1.4640987695560141, | |
| "logprob_stderr,none": 0.007654463975283136 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.2713363614630648, | |
| "acc_stderr,none": 0.006875836376429595, | |
| "acc_norm,none": 0.28544107100167343, | |
| "acc_norm_stderr,none": 0.006983694646127615, | |
| "bpb,none": 0.1420662059671355, | |
| "bpb_stderr,none": 0.0016415787355263745, | |
| "logprob,none": -1.723408536928965, | |
| "logprob_stderr,none": 0.012804282714664252, | |
| "choice_logprob,none": -1.515039402430691, | |
| "choice_logprob_stderr,none": 0.011579147478589224, | |
| "choice_prob_norm,none": 0.25206391786261856, | |
| "choice_prob_norm_stderr,none": 0.00026129809605062555, | |
| "choice_logprob_norm,none": -1.3804023670330987, | |
| "choice_logprob_norm_stderr,none": 0.0010727030954833818 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.2350061199510404, | |
| "acc_stderr,none": 0.014843061507731608, | |
| "logprob,none": -18.407885265612983, | |
| "logprob_stderr,none": 0.41773859135898134 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 124.84876262066128, | |
| "nll_stderr,none": 1.7222330277692024, | |
| "bpb,none": 0.6359938881622156, | |
| "bpb_stderr,none": 0.00580424435057278 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 42.3871434447242, | |
| "nll_stderr,none": 2.496412503533855, | |
| "bpb,none": 0.39398483856328514, | |
| "bpb_stderr,none": 0.01722858528466455 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v4", | |
| "hidden_dim": 1280, | |
| "budget": 2.83e+19, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.27581541091012673, | |
| "acc_stderr,none": 0.0037655102250954736, | |
| "acc_norm,none": 0.28578550064093433, | |
| "acc_norm_stderr,none": 0.003801300172960957, | |
| "bpb,none": 0.3761284356347318, | |
| "bpb_stderr,none": 0.0020014477738698813, | |
| "logprob,none": -5.662279944632107, | |
| "logprob_stderr,none": 0.016687573808107634, | |
| "choice_logprob,none": -1.673632175862143, | |
| "choice_logprob_stderr,none": 0.009820667712085052, | |
| "choice_prob_norm,none": 0.25268171755591723, | |
| "choice_prob_norm_stderr,none": 0.00023939088147203121, | |
| "choice_logprob_norm,none": -1.3833894893827103, | |
| "choice_logprob_norm_stderr,none": 0.0010189355236283042, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.27887765275601767, | |
| "acc_stderr,none": 0.0037784107520474164, | |
| "acc_norm,none": 0.290414470873095, | |
| "acc_norm_stderr,none": 0.0038235243498804905, | |
| "bpb,none": 0.20180181204116973, | |
| "bpb_stderr,none": 0.0014229922195093773, | |
| "logprob,none": -3.1823982094482552, | |
| "logprob_stderr,none": 0.01701034616913897, | |
| "choice_logprob,none": -1.6487133829622105, | |
| "choice_logprob_stderr,none": 0.009845379108928897, | |
| "choice_prob_norm,none": 0.25144322116629586, | |
| "choice_prob_norm_stderr,none": 0.0001526810883746589, | |
| "choice_logprob_norm,none": -1.383706412618714, | |
| "choice_logprob_norm_stderr,none": 0.0006480126080205595, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.2721843003412969, | |
| "acc_stderr,none": 0.0130066004064237, | |
| "acc_norm,none": 0.30802047781569963, | |
| "acc_norm_stderr,none": 0.01349142951729204, | |
| "bpb,none": 1.004044559728517, | |
| "bpb_stderr,none": 0.02048653979427857, | |
| "logprob,none": -17.376599727281125, | |
| "logprob_stderr,none": 0.3198441947941251, | |
| "choice_logprob,none": -4.402544551034342, | |
| "choice_logprob_stderr,none": 0.14604037734305347, | |
| "choice_prob_norm,none": 0.26219375712543186, | |
| "choice_prob_norm_stderr,none": 0.002171320809011532, | |
| "choice_logprob_norm,none": -1.3848545061941255, | |
| "choice_logprob_norm_stderr,none": 0.00985341268112613 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.6123737373737373, | |
| "acc_stderr,none": 0.009997307914447612, | |
| "acc_norm,none": 0.6039562289562289, | |
| "acc_norm_stderr,none": 0.010035580962097949, | |
| "bpb,none": 0.8010086370262028, | |
| "bpb_stderr,none": 0.010519090088396273, | |
| "logprob,none": -11.337275293263723, | |
| "logprob_stderr,none": 0.18745096671919237, | |
| "choice_logprob,none": -1.7834794454579106, | |
| "choice_logprob_stderr,none": 0.06781658640079403, | |
| "choice_prob_norm,none": 0.3207981167012948, | |
| "choice_prob_norm_stderr,none": 0.0019661397554196205, | |
| "choice_logprob_norm,none": -1.180810501076774, | |
| "choice_logprob_norm_stderr,none": 0.006252532908298729 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5828746177370031, | |
| "acc_stderr,none": 0.008624092785001305, | |
| "bpb,none": 0.424077273007807, | |
| "bpb_stderr,none": 0.004402659753550807, | |
| "logprob,none": -0.7095859006092818, | |
| "logprob_stderr,none": 0.0056372946348402595, | |
| "choice_logprob,none": -0.6812509971851064, | |
| "choice_logprob_stderr,none": 0.005612186414219027, | |
| "choice_prob_norm,none": 0.5297755603930668, | |
| "choice_prob_norm_stderr,none": 0.002130059118157897, | |
| "choice_logprob_norm,none": -0.6649111950908801, | |
| "choice_logprob_norm_stderr,none": 0.004407980311407431 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.618960244648318, | |
| "acc_stderr,none": 0.00849393752443933, | |
| "acc_norm,none": 0.6214067278287462, | |
| "acc_norm_stderr,none": 0.008483341718024479, | |
| "bpb,none": 0.6787550750990903, | |
| "bpb_stderr,none": 0.013136290003665905, | |
| "logprob,none": -1.0040385069650248, | |
| "logprob_stderr,none": 0.01751576350015502, | |
| "choice_logprob,none": -0.8903443609749093, | |
| "choice_logprob_stderr,none": 0.017546302326646203, | |
| "choice_prob_norm,none": 0.5675065356066568, | |
| "choice_prob_norm_stderr,none": 0.005100287590387704, | |
| "choice_logprob_norm,none": -0.7794454336526722, | |
| "choice_logprob_norm_stderr,none": 0.01299160917527106 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.71, | |
| "acc_stderr,none": 0.045604802157206824, | |
| "bpb,none": 1.544881163587024, | |
| "bpb_stderr,none": 0.04224672310779111, | |
| "logprob,none": -29.091637153625488, | |
| "logprob_stderr,none": 0.5256394129910011, | |
| "choice_logprob,none": -1.0821095002832515, | |
| "choice_logprob_stderr,none": 0.19938560807250813, | |
| "choice_prob_norm,none": 0.5166427832287669, | |
| "choice_prob_norm_stderr,none": 0.007955090348616845, | |
| "choice_logprob_norm,none": -0.6735219157148393, | |
| "choice_logprob_norm_stderr,none": 0.01681896816881189 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.2031122031122031, | |
| "acc_stderr,none": 0.0115182547936341, | |
| "bpb,none": 3.4662226834005994, | |
| "bpb_stderr,none": 0.07760683991934131, | |
| "logprob,none": -2.402602480192052, | |
| "logprob_stderr,none": 0.053792962282258425, | |
| "choice_logprob,none": -2.399173130363177, | |
| "choice_logprob_stderr,none": 0.05379191867983419, | |
| "choice_prob_norm,none": 0.20228046783106549, | |
| "choice_prob_norm_stderr,none": 0.005927960143637226, | |
| "choice_logprob_norm,none": -2.945002693886426, | |
| "choice_logprob_norm_stderr,none": 0.07836870414941635 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.21457821457821458, | |
| "acc_stderr,none": 0.01175342309421685, | |
| "acc_norm,none": 0.21785421785421785, | |
| "acc_norm_stderr,none": 0.011818079981132528, | |
| "bpb,none": 0.2971528895582945, | |
| "bpb_stderr,none": 0.00657122035739363, | |
| "logprob,none": -2.36769612986567, | |
| "logprob_stderr,none": 0.046303055084943257, | |
| "choice_logprob,none": -2.222552686903668, | |
| "choice_logprob_stderr,none": 0.04618156469383482, | |
| "choice_prob_norm,none": 0.2033634953423176, | |
| "choice_prob_norm_stderr,none": 0.0010928332934984917, | |
| "choice_logprob_norm,none": -1.614389554809131, | |
| "choice_logprob_norm_stderr,none": 0.00632740787966796 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.383788090021908, | |
| "acc_stderr,none": 0.0048531342715477715, | |
| "acc_norm,none": 0.4758016331408086, | |
| "acc_norm_stderr,none": 0.004983934343250453, | |
| "bpb,none": 0.8481104875212312, | |
| "bpb_stderr,none": 0.002012050778454794, | |
| "logprob,none": -77.5695962406824, | |
| "logprob_stderr,none": 0.3867355755639212, | |
| "choice_logprob,none": -17.13094384233126, | |
| "choice_logprob_stderr,none": 0.2322080139429845, | |
| "choice_prob_norm,none": 0.2762306795727128, | |
| "choice_prob_norm_stderr,none": 0.0004226701999579038, | |
| "choice_logprob_norm,none": -1.2984380262627977, | |
| "choice_logprob_norm_stderr,none": 0.0015549430400205245 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.38109938259310894, | |
| "acc_stderr,none": 0.004846643735666544, | |
| "acc_norm,none": 0.47191794463254333, | |
| "acc_norm_stderr,none": 0.0049819052938781415, | |
| "bpb,none": 0.8341103490332094, | |
| "bpb_stderr,none": 0.0019988853798998825, | |
| "logprob,none": -76.39748609609704, | |
| "logprob_stderr,none": 0.3843855373550899, | |
| "choice_logprob,none": -17.535902908856652, | |
| "choice_logprob_stderr,none": 0.23551377069804363, | |
| "choice_prob_norm,none": 0.2757732593282485, | |
| "choice_prob_norm_stderr,none": 0.0004213091077415382, | |
| "choice_logprob_norm,none": -1.3000195912908532, | |
| "choice_logprob_norm_stderr,none": 0.0015490835224453204 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.236, | |
| "acc_stderr,none": 0.019008699622084718, | |
| "acc_norm,none": 0.33, | |
| "acc_norm_stderr,none": 0.021049612166134813, | |
| "bpb,none": 1.8203886772407774, | |
| "bpb_stderr,none": 0.0495479665668239, | |
| "logprob,none": -18.808675647974013, | |
| "logprob_stderr,none": 0.5015900523926243, | |
| "choice_logprob,none": -5.87718670291878, | |
| "choice_logprob_stderr,none": 0.29807818376863565, | |
| "choice_prob_norm,none": 0.27503256875877746, | |
| "choice_prob_norm_stderr,none": 0.005859820333892211, | |
| "choice_logprob_norm,none": -1.4274577534386603, | |
| "choice_logprob_norm_stderr,none": 0.029418619584831728 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.7170837867247007, | |
| "acc_stderr,none": 0.010508949177489678, | |
| "acc_norm,none": 0.7181719260065288, | |
| "acc_norm_stderr,none": 0.010496675231258154, | |
| "bpb,none": 1.0360682750860744, | |
| "bpb_stderr,none": 0.010399419087489509, | |
| "logprob,none": -60.83130082316186, | |
| "logprob_stderr,none": 1.2666396586204536, | |
| "choice_logprob,none": -2.597817156961528, | |
| "choice_logprob_stderr,none": 0.19001766378429708, | |
| "choice_prob_norm,none": 0.5182779144202206, | |
| "choice_prob_norm_stderr,none": 0.0013533389349256308, | |
| "choice_logprob_norm,none": -0.6638477081057357, | |
| "choice_logprob_norm_stderr,none": 0.0027423091484904973 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5351223362273086, | |
| "acc_stderr,none": 0.014017773120881578, | |
| "bpb,none": 0.4334391964017789, | |
| "bpb_stderr,none": 0.022729344808219854, | |
| "logprob,none": -19.64402215275693, | |
| "logprob_stderr,none": 0.2811034515456502, | |
| "choice_logprob,none": -0.7569728289871221, | |
| "choice_logprob_stderr,none": 0.016668264538358627, | |
| "choice_prob_norm,none": 0.5016246375491271, | |
| "choice_prob_norm_stderr,none": 0.0008329726427253178, | |
| "choice_logprob_norm,none": -0.6921774194909759, | |
| "choice_logprob_norm_stderr,none": 0.0023311184855997463 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.63003663003663, | |
| "acc_stderr,none": 0.02927371304052677, | |
| "bpb,none": 0.6220051946131483, | |
| "bpb_stderr,none": 0.020972923053141378, | |
| "logprob,none": -25.870619232401307, | |
| "logprob_stderr,none": 0.4859263527795115, | |
| "choice_logprob,none": -0.7395315600151394, | |
| "choice_logprob_stderr,none": 0.04398772506101539, | |
| "choice_prob_norm,none": 0.5030254920705404, | |
| "choice_prob_norm_stderr,none": 0.001242651266911796, | |
| "choice_logprob_norm,none": -0.6879269969814308, | |
| "choice_logprob_norm_stderr,none": 0.0024365735253503887 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.30217547214917523, | |
| "acc_stderr,none": 0.007100856001176909, | |
| "acc_norm,none": 0.30217547214917523, | |
| "acc_norm_stderr,none": 0.007100856001176909, | |
| "logprob,none": -1.442878668113898, | |
| "logprob_stderr,none": 0.008342765938311467 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.2971551518049247, | |
| "acc_stderr,none": 0.0070669065873055804, | |
| "acc_norm,none": 0.30958642122878316, | |
| "acc_norm_stderr,none": 0.007149136674702513, | |
| "bpb,none": 0.14020111563374607, | |
| "bpb_stderr,none": 0.0019311804256365126, | |
| "logprob,none": -1.6622335477869048, | |
| "logprob_stderr,none": 0.012776276450538825, | |
| "choice_logprob,none": -1.4643904381104342, | |
| "choice_logprob_stderr,none": 0.010785426134688378, | |
| "choice_prob_norm,none": 0.2525786770609617, | |
| "choice_prob_norm_stderr,none": 0.0002552044637741952, | |
| "choice_logprob_norm,none": -1.37853956157313, | |
| "choice_logprob_norm_stderr,none": 0.0011673702634496894 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.23378212974296206, | |
| "acc_stderr,none": 0.01481619599193159, | |
| "logprob,none": -19.429164113146292, | |
| "logprob_stderr,none": 0.41290169240196367 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 110.98014808406063, | |
| "nll_stderr,none": 1.5568822800964812, | |
| "bpb,none": 0.5618956999100354, | |
| "bpb_stderr,none": 0.005125187842721609 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 44.287433080556916, | |
| "nll_stderr,none": 2.574493607908735, | |
| "bpb,none": 0.4140016039194337, | |
| "bpb_stderr,none": 0.017964659850082786 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v0", | |
| "hidden_dim": 1536, | |
| "budget": 9e+19, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2503916820965674, | |
| "acc_stderr,none": 0.0036534832564727587, | |
| "acc_norm,none": 0.27916251246261214, | |
| "acc_norm_stderr,none": 0.0037770013669132336, | |
| "bpb,none": 0.35552157414979424, | |
| "bpb_stderr,none": 0.0019131290063905577, | |
| "logprob,none": -5.065637073363192, | |
| "logprob_stderr,none": 0.012907325924577924, | |
| "choice_logprob,none": -1.5819518990523416, | |
| "choice_logprob_stderr,none": 0.007394711008538637, | |
| "choice_prob_norm,none": 0.25167494076569574, | |
| "choice_prob_norm_stderr,none": 0.0001931048603284136, | |
| "choice_logprob_norm,none": -1.3847537145620747, | |
| "choice_logprob_norm_stderr,none": 0.0007972310007693355, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.27125765560461473, | |
| "acc_stderr,none": 0.003744348166570681, | |
| "acc_norm,none": 0.28656886483406924, | |
| "acc_norm_stderr,none": 0.003808597127162219, | |
| "bpb,none": 0.18156040586581168, | |
| "bpb_stderr,none": 0.0011994962628792578, | |
| "logprob,none": -2.790621268407345, | |
| "logprob_stderr,none": 0.01383950194817158, | |
| "choice_logprob,none": -1.5617558721966827, | |
| "choice_logprob_stderr,none": 0.008074175431290678, | |
| "choice_prob_norm,none": 0.25143517100434937, | |
| "choice_prob_norm_stderr,none": 0.00013388532833850876, | |
| "choice_logprob_norm,none": -1.3829688743489568, | |
| "choice_logprob_norm_stderr,none": 0.0005421892698406115, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.3302047781569966, | |
| "acc_stderr,none": 0.013743085603760422, | |
| "acc_norm,none": 0.35238907849829354, | |
| "acc_norm_stderr,none": 0.013960142600598677, | |
| "bpb,none": 0.9414160161263657, | |
| "bpb_stderr,none": 0.019954836359592047, | |
| "logprob,none": -16.183541593494677, | |
| "logprob_stderr,none": 0.2986216045469544, | |
| "choice_logprob,none": -3.8834576554259708, | |
| "choice_logprob_stderr,none": 0.1379796322208355, | |
| "choice_prob_norm,none": 0.26792039945837454, | |
| "choice_prob_norm_stderr,none": 0.002158966256971253, | |
| "choice_logprob_norm,none": -1.3601627412764932, | |
| "choice_logprob_norm_stderr,none": 0.009449722136061042 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.6742424242424242, | |
| "acc_stderr,none": 0.009616642976885968, | |
| "acc_norm,none": 0.6662457912457912, | |
| "acc_norm_stderr,none": 0.00967606568357548, | |
| "bpb,none": 0.7220906914137978, | |
| "bpb_stderr,none": 0.00973173910514286, | |
| "logprob,none": -10.344662363857331, | |
| "logprob_stderr,none": 0.17419791693308334, | |
| "choice_logprob,none": -1.4053845306771011, | |
| "choice_logprob_stderr,none": 0.059466594757234946, | |
| "choice_prob_norm,none": 0.3358377762667521, | |
| "choice_prob_norm_stderr,none": 0.0020885713089263333, | |
| "choice_logprob_norm,none": -1.1341845532619204, | |
| "choice_logprob_norm_stderr,none": 0.006103898794992183 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.5431192660550459, | |
| "acc_stderr,none": 0.008712475433089477, | |
| "bpb,none": 0.3995581140433765, | |
| "bpb_stderr,none": 0.002460409164664351, | |
| "logprob,none": -0.7095964209566787, | |
| "logprob_stderr,none": 0.0041886957939902564, | |
| "choice_logprob,none": -0.6905970206206402, | |
| "choice_logprob_stderr,none": 0.004179601526381547, | |
| "choice_prob_norm,none": 0.5199159676607907, | |
| "choice_prob_norm_stderr,none": 0.001298365236934181, | |
| "choice_logprob_norm,none": -0.6644838231351827, | |
| "choice_logprob_norm_stderr,none": 0.0025417972585884737 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.6168195718654435, | |
| "acc_stderr,none": 0.008503021391450791, | |
| "acc_norm,none": 0.6211009174311927, | |
| "acc_norm_stderr,none": 0.00848467871856502, | |
| "bpb,none": 0.4644675507724053, | |
| "bpb_stderr,none": 0.0064341497329966505, | |
| "logprob,none": -0.732731931431031, | |
| "logprob_stderr,none": 0.007998332680439648, | |
| "choice_logprob,none": -0.692841816111664, | |
| "choice_logprob_stderr,none": 0.00798951759349642, | |
| "choice_prob_norm,none": 0.5410706603221777, | |
| "choice_prob_norm_stderr,none": 0.002985798104826507, | |
| "choice_logprob_norm,none": -0.6744856753522923, | |
| "choice_logprob_norm_stderr,none": 0.0063912819169031155 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.69, | |
| "acc_stderr,none": 0.04648231987117316, | |
| "bpb,none": 1.473402516662691, | |
| "bpb_stderr,none": 0.04351580290541288, | |
| "logprob,none": -27.692846031188964, | |
| "logprob_stderr,none": 0.5557166606714958, | |
| "choice_logprob,none": -0.8469954823714905, | |
| "choice_logprob_stderr,none": 0.1681309923120363, | |
| "choice_prob_norm,none": 0.5292895416366712, | |
| "choice_prob_norm_stderr,none": 0.008465964698636304, | |
| "choice_logprob_norm,none": -0.6502013843114527, | |
| "choice_logprob_norm_stderr,none": 0.017311023118854105 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.22194922194922195, | |
| "acc_stderr,none": 0.011897367280936749, | |
| "bpb,none": 2.6573150036844377, | |
| "bpb_stderr,none": 0.039550358678890216, | |
| "logprob,none": -1.8419104026635098, | |
| "logprob_stderr,none": 0.027414219608407304, | |
| "choice_logprob,none": -1.8370197668091335, | |
| "choice_logprob_stderr,none": 0.027418207937237038, | |
| "choice_prob_norm,none": 0.21507221565660545, | |
| "choice_prob_norm_stderr,none": 0.004564710560332225, | |
| "choice_logprob_norm,none": -2.0376804575438836, | |
| "choice_logprob_norm_stderr,none": 0.03981184025115802 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.23177723177723178, | |
| "acc_stderr,none": 0.01208089355230227, | |
| "acc_norm,none": 0.23914823914823916, | |
| "acc_norm_stderr,none": 0.012212475442884533, | |
| "bpb,none": 0.25099320755867205, | |
| "bpb_stderr,none": 0.004031633326397867, | |
| "logprob,none": -1.9946075831886982, | |
| "logprob_stderr,none": 0.02428004908649879, | |
| "choice_logprob,none": -1.779406383085779, | |
| "choice_logprob_stderr,none": 0.023417537336332426, | |
| "choice_prob_norm,none": 0.20371179160009323, | |
| "choice_prob_norm_stderr,none": 0.000687244002046894, | |
| "choice_logprob_norm,none": -1.598885612265858, | |
| "choice_logprob_norm_stderr,none": 0.0037213188706360627 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.4358693487353117, | |
| "acc_stderr,none": 0.004948567856373863, | |
| "acc_norm,none": 0.5707030472017527, | |
| "acc_norm_stderr,none": 0.004939642460172563, | |
| "bpb,none": 0.8055160072851943, | |
| "bpb_stderr,none": 0.001959624425503467, | |
| "logprob,none": -73.47371736168267, | |
| "logprob_stderr,none": 0.36482269582399635, | |
| "choice_logprob,none": -14.373321577720866, | |
| "choice_logprob_stderr,none": 0.20916515197729738, | |
| "choice_prob_norm,none": 0.28455403877118096, | |
| "choice_prob_norm_stderr,none": 0.00042521342658046455, | |
| "choice_logprob_norm,none": -1.2682300246515799, | |
| "choice_logprob_norm_stderr,none": 0.0015210307259313257 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.4340768771161123, | |
| "acc_stderr,none": 0.0049462215121452826, | |
| "acc_norm,none": 0.5705038836885082, | |
| "acc_norm_stderr,none": 0.004939925958728895, | |
| "bpb,none": 0.7875317382572161, | |
| "bpb_stderr,none": 0.0019305361634130123, | |
| "logprob,none": -72.04622087006643, | |
| "logprob_stderr,none": 0.36205872501721265, | |
| "choice_logprob,none": -14.635099365145285, | |
| "choice_logprob_stderr,none": 0.21157611102802906, | |
| "choice_prob_norm,none": 0.28439536617380506, | |
| "choice_prob_norm_stderr,none": 0.0004251818877565747, | |
| "choice_logprob_norm,none": -1.2687362959471866, | |
| "choice_logprob_norm_stderr,none": 0.001515667428361757 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.256, | |
| "acc_stderr,none": 0.01953692357474761, | |
| "acc_norm,none": 0.356, | |
| "acc_norm_stderr,none": 0.02143471235607264, | |
| "bpb,none": 1.7514395936329286, | |
| "bpb_stderr,none": 0.04590346199306301, | |
| "logprob,none": -18.116089826107025, | |
| "logprob_stderr,none": 0.47628978811352274, | |
| "choice_logprob,none": -5.50242292258655, | |
| "choice_logprob_stderr,none": 0.2787961676611458, | |
| "choice_prob_norm,none": 0.28378143761842295, | |
| "choice_prob_norm_stderr,none": 0.0059663707202530095, | |
| "choice_logprob_norm,none": -1.390278992117216, | |
| "choice_logprob_norm_stderr,none": 0.027074530398652805 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.7442872687704026, | |
| "acc_stderr,none": 0.010178690109459862, | |
| "acc_norm,none": 0.7377584330794341, | |
| "acc_norm_stderr,none": 0.010262502565172449, | |
| "bpb,none": 0.9986341152067002, | |
| "bpb_stderr,none": 0.010487867574866843, | |
| "logprob,none": -58.39074820636795, | |
| "logprob_stderr,none": 1.2146705689000088, | |
| "choice_logprob,none": -2.3070429811822026, | |
| "choice_logprob_stderr,none": 0.17286932173721653, | |
| "choice_prob_norm,none": 0.5212220951674228, | |
| "choice_prob_norm_stderr,none": 0.0013421756048841715, | |
| "choice_logprob_norm,none": -0.6580206669507951, | |
| "choice_logprob_norm_stderr,none": 0.0027121864908891693 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5603788476716653, | |
| "acc_stderr,none": 0.013949649776015698, | |
| "bpb,none": 0.4188933766106223, | |
| "bpb_stderr,none": 0.022580345252037705, | |
| "logprob,none": -18.928303497648653, | |
| "logprob_stderr,none": 0.2737846820388816, | |
| "choice_logprob,none": -0.7431147138587829, | |
| "choice_logprob_stderr,none": 0.016910068400034776, | |
| "choice_prob_norm,none": 0.5020179593806919, | |
| "choice_prob_norm_stderr,none": 0.0007811650353038019, | |
| "choice_logprob_norm,none": -0.6910696833248146, | |
| "choice_logprob_norm_stderr,none": 0.0021031923529397506 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.663003663003663, | |
| "acc_stderr,none": 0.028660654384243294, | |
| "bpb,none": 0.6238243103766034, | |
| "bpb_stderr,none": 0.02104329844697861, | |
| "logprob,none": -26.042019389924548, | |
| "logprob_stderr,none": 0.47439874711014063, | |
| "choice_logprob,none": -0.6398779205929981, | |
| "choice_logprob_stderr,none": 0.043379094756256516, | |
| "choice_prob_norm,none": 0.5049551743412123, | |
| "choice_prob_norm_stderr,none": 0.0013298180281791753, | |
| "choice_logprob_norm,none": -0.6842169909217483, | |
| "choice_logprob_norm_stderr,none": 0.00261393859833492 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.29141764284006694, | |
| "acc_stderr,none": 0.007026856322397198, | |
| "acc_norm,none": 0.29141764284006694, | |
| "acc_norm_stderr,none": 0.007026856322397198, | |
| "logprob,none": -1.3837027403118791, | |
| "logprob_stderr,none": 0.004640465793133948 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.2739660530719579, | |
| "acc_stderr,none": 0.006896596635623945, | |
| "acc_norm,none": 0.30408797513746116, | |
| "acc_norm_stderr,none": 0.007113523647158317, | |
| "bpb,none": 0.13553781024009642, | |
| "bpb_stderr,none": 0.0015734770390694308, | |
| "logprob,none": -1.6200002332623427, | |
| "logprob_stderr,none": 0.010590969800974608, | |
| "choice_logprob,none": -1.4465072580196185, | |
| "choice_logprob_stderr,none": 0.009186848812944771, | |
| "choice_prob_norm,none": 0.25210393918690377, | |
| "choice_prob_norm_stderr,none": 0.00022420517419145707, | |
| "choice_logprob_norm,none": -1.3796419726384546, | |
| "choice_logprob_norm_stderr,none": 0.0009219691173436295 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.2178702570379437, | |
| "acc_stderr,none": 0.014450846714123895, | |
| "logprob,none": -16.946733659600685, | |
| "logprob_stderr,none": 0.37880723885054923 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 106.78752208822509, | |
| "nll_stderr,none": 1.468171699400727, | |
| "bpb,none": 0.5419649635618328, | |
| "bpb_stderr,none": 0.004803235526642886 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 43.058409789713416, | |
| "nll_stderr,none": 2.5276094541783687, | |
| "bpb,none": 0.4099039537826882, | |
| "bpb_stderr,none": 0.01781821917069972 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v2", | |
| "hidden_dim": 1536, | |
| "budget": 9e+19, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2833641931348811, | |
| "acc_stderr,none": 0.003793525260773458, | |
| "acc_norm,none": 0.2995299814841191, | |
| "acc_norm_stderr,none": 0.0038569415918105715, | |
| "bpb,none": 0.3652159920429904, | |
| "bpb_stderr,none": 0.0019549232625298827, | |
| "logprob,none": -5.212395275117396, | |
| "logprob_stderr,none": 0.014327558705991703, | |
| "choice_logprob,none": -1.5584006500944099, | |
| "choice_logprob_stderr,none": 0.007587638778556977, | |
| "choice_prob_norm,none": 0.25309797338211804, | |
| "choice_prob_norm_stderr,none": 0.00021247705862183423, | |
| "choice_logprob_norm,none": -1.3801880227174377, | |
| "choice_logprob_norm_stderr,none": 0.0008690833228587661, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.29539951573849876, | |
| "acc_stderr,none": 0.003819873395705863, | |
| "acc_norm,none": 0.3074348383421165, | |
| "acc_norm_stderr,none": 0.003869313824989712, | |
| "bpb,none": 0.17754738725422795, | |
| "bpb_stderr,none": 0.001192601312340027, | |
| "logprob,none": -2.6514670316189117, | |
| "logprob_stderr,none": 0.0132553270705789, | |
| "choice_logprob,none": -1.5186375646840715, | |
| "choice_logprob_stderr,none": 0.007962096535581713, | |
| "choice_prob_norm,none": 0.2529079101185746, | |
| "choice_prob_norm_stderr,none": 0.00014230007662830306, | |
| "choice_logprob_norm,none": -1.377431731399958, | |
| "choice_logprob_norm_stderr,none": 0.0005748698312148346, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.37627986348122866, | |
| "acc_stderr,none": 0.014157022555407172, | |
| "acc_norm,none": 0.3984641638225256, | |
| "acc_norm_stderr,none": 0.014306946052735565, | |
| "bpb,none": 0.9117224950345897, | |
| "bpb_stderr,none": 0.02058570982727825, | |
| "logprob,none": -15.715859739548517, | |
| "logprob_stderr,none": 0.30072817934094254, | |
| "choice_logprob,none": -3.6533481887218437, | |
| "choice_logprob_stderr,none": 0.13895640550282407, | |
| "choice_prob_norm,none": 0.2779257414622447, | |
| "choice_prob_norm_stderr,none": 0.0023598128882322067, | |
| "choice_logprob_norm,none": -1.3303668950456593, | |
| "choice_logprob_norm_stderr,none": 0.010589553731885004 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.6999158249158249, | |
| "acc_stderr,none": 0.009404000558513351, | |
| "acc_norm,none": 0.7007575757575758, | |
| "acc_norm_stderr,none": 0.009396447162309822, | |
| "bpb,none": 0.679267650346058, | |
| "bpb_stderr,none": 0.010041405206793617, | |
| "logprob,none": -9.946306985247919, | |
| "logprob_stderr,none": 0.17607263616243687, | |
| "choice_logprob,none": -1.3562313881029344, | |
| "choice_logprob_stderr,none": 0.060583134820112, | |
| "choice_prob_norm,none": 0.35567155622634117, | |
| "choice_prob_norm_stderr,none": 0.002475510479674059, | |
| "choice_logprob_norm,none": -1.0871060184096497, | |
| "choice_logprob_norm_stderr,none": 0.006789025142729967 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.6198776758409786, | |
| "acc_stderr,none": 0.008489990918989207, | |
| "bpb,none": 0.42246278290564976, | |
| "bpb_stderr,none": 0.005627377329609596, | |
| "logprob,none": -0.6899870655223135, | |
| "logprob_stderr,none": 0.007479563628999024, | |
| "choice_logprob,none": -0.6578724003071241, | |
| "choice_logprob_stderr,none": 0.007440020669474746, | |
| "choice_prob_norm,none": 0.5500499661526049, | |
| "choice_prob_norm_stderr,none": 0.0027414575071941782, | |
| "choice_logprob_norm,none": -0.6454941559968439, | |
| "choice_logprob_norm_stderr,none": 0.00569403632356082 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.6103975535168196, | |
| "acc_stderr,none": 0.008529228894936292, | |
| "acc_norm,none": 0.618348623853211, | |
| "acc_norm_stderr,none": 0.00849655074117826, | |
| "bpb,none": 0.5402393387293413, | |
| "bpb_stderr,none": 0.00944492455451324, | |
| "logprob,none": -0.8257174519953013, | |
| "logprob_stderr,none": 0.01240018574869942, | |
| "choice_logprob,none": -0.7641612885400936, | |
| "choice_logprob_stderr,none": 0.012424413318765988, | |
| "choice_prob_norm,none": 0.5546675618062102, | |
| "choice_prob_norm_stderr,none": 0.004018200206431961, | |
| "choice_logprob_norm,none": -0.7081703345145451, | |
| "choice_logprob_norm_stderr,none": 0.009347391902965673 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.76, | |
| "acc_stderr,none": 0.04292346959909283, | |
| "bpb,none": 1.5886466466443578, | |
| "bpb_stderr,none": 0.04513550330410471, | |
| "logprob,none": -29.909457206726074, | |
| "logprob_stderr,none": 0.562216134659488, | |
| "choice_logprob,none": -0.7410448574769116, | |
| "choice_logprob_stderr,none": 0.1652254579682627, | |
| "choice_prob_norm,none": 0.5313054727825939, | |
| "choice_prob_norm_stderr,none": 0.008917301908166142, | |
| "choice_logprob_norm,none": -0.6478710857505101, | |
| "choice_logprob_norm_stderr,none": 0.01820457056298214 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.2571662571662572, | |
| "acc_stderr,none": 0.012513329723602744, | |
| "bpb,none": 3.61170673010068, | |
| "bpb_stderr,none": 0.09115943514540976, | |
| "logprob,none": -2.5034443369786654, | |
| "logprob_stderr,none": 0.06318690545247797, | |
| "choice_logprob,none": -2.5008061738874, | |
| "choice_logprob_stderr,none": 0.06319150639160746, | |
| "choice_prob_norm,none": 0.24951125498615806, | |
| "choice_prob_norm_stderr,none": 0.008165344544357807, | |
| "choice_logprob_norm,none": -3.1682373926593677, | |
| "choice_logprob_norm_stderr,none": 0.09176244758442266 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.2809172809172809, | |
| "acc_stderr,none": 0.012867635159174046, | |
| "acc_norm,none": 0.2751842751842752, | |
| "acc_norm_stderr,none": 0.012786323696375949, | |
| "bpb,none": 0.3391542211540572, | |
| "bpb_stderr,none": 0.008716444229196115, | |
| "logprob,none": -2.7009197721756646, | |
| "logprob_stderr,none": 0.06272524909701989, | |
| "choice_logprob,none": -2.5017582374199936, | |
| "choice_logprob_stderr,none": 0.06202889514708539, | |
| "choice_prob_norm,none": 0.21156096565798563, | |
| "choice_prob_norm_stderr,none": 0.0015500718173795656, | |
| "choice_logprob_norm,none": -1.5925035827705205, | |
| "choice_logprob_norm_stderr,none": 0.008536529612626369 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.47460665206134234, | |
| "acc_stderr,none": 0.004983342213776263, | |
| "acc_norm,none": 0.6235809599681338, | |
| "acc_norm_stderr,none": 0.004834969412883654, | |
| "bpb,none": 0.8082126244191451, | |
| "bpb_stderr,none": 0.002052694634543442, | |
| "logprob,none": -73.64892684143156, | |
| "logprob_stderr,none": 0.36813965885884153, | |
| "choice_logprob,none": -13.202259825805944, | |
| "choice_logprob_stderr,none": 0.2037577386294536, | |
| "choice_prob_norm,none": 0.2928902213610382, | |
| "choice_prob_norm_stderr,none": 0.00045372247972980584, | |
| "choice_logprob_norm,none": -1.2402134001613863, | |
| "choice_logprob_norm_stderr,none": 0.001578571806293506 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.45289782911770565, | |
| "acc_stderr,none": 0.004967591267557401, | |
| "acc_norm,none": 0.6041625174268074, | |
| "acc_norm_stderr,none": 0.004880303863138483, | |
| "bpb,none": 0.7808793083265002, | |
| "bpb_stderr,none": 0.001959424563973173, | |
| "logprob,none": -71.30152131100381, | |
| "logprob_stderr,none": 0.35882751446169764, | |
| "choice_logprob,none": -13.817169149802801, | |
| "choice_logprob_stderr,none": 0.20639744750506261, | |
| "choice_prob_norm,none": 0.28819234125658705, | |
| "choice_prob_norm_stderr,none": 0.0004274974259184617, | |
| "choice_logprob_norm,none": -1.2553293865354775, | |
| "choice_logprob_norm_stderr,none": 0.0015072929470410464 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.28, | |
| "acc_stderr,none": 0.020099950647503233, | |
| "acc_norm,none": 0.376, | |
| "acc_norm_stderr,none": 0.021683827539286115, | |
| "bpb,none": 1.7713766886331395, | |
| "bpb_stderr,none": 0.04749189587622596, | |
| "logprob,none": -18.332382786035538, | |
| "logprob_stderr,none": 0.4923023644170012, | |
| "choice_logprob,none": -5.514236012659305, | |
| "choice_logprob_stderr,none": 0.2901789635151622, | |
| "choice_prob_norm,none": 0.29362202446085794, | |
| "choice_prob_norm_stderr,none": 0.006406220502987297, | |
| "choice_logprob_norm,none": -1.36896579388406, | |
| "choice_logprob_norm_stderr,none": 0.028663472646257432 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.750272034820457, | |
| "acc_stderr,none": 0.010099232969867497, | |
| "acc_norm,none": 0.7584330794341676, | |
| "acc_norm_stderr,none": 0.009986718001804477, | |
| "bpb,none": 0.9947934118053557, | |
| "bpb_stderr,none": 0.010950628645614828, | |
| "logprob,none": -58.05032417946723, | |
| "logprob_stderr,none": 1.2177224619074434, | |
| "choice_logprob,none": -2.2846200577070364, | |
| "choice_logprob_stderr,none": 0.17254081279876907, | |
| "choice_prob_norm,none": 0.5236971605836466, | |
| "choice_prob_norm_stderr,none": 0.001409745284657901, | |
| "choice_logprob_norm,none": -0.653834274516731, | |
| "choice_logprob_norm_stderr,none": 0.002818757424899055 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5603788476716653, | |
| "acc_stderr,none": 0.013949649776015701, | |
| "bpb,none": 0.42337717662405155, | |
| "bpb_stderr,none": 0.023282161540202174, | |
| "logprob,none": -19.086542711732136, | |
| "logprob_stderr,none": 0.27651431898622375, | |
| "choice_logprob,none": -0.7498901000889382, | |
| "choice_logprob_stderr,none": 0.018670393953621845, | |
| "choice_prob_norm,none": 0.5021590887201153, | |
| "choice_prob_norm_stderr,none": 0.0007537685697302094, | |
| "choice_logprob_norm,none": -0.6908339471445006, | |
| "choice_logprob_norm_stderr,none": 0.0021854993231866676 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.63003663003663, | |
| "acc_stderr,none": 0.029273713040526766, | |
| "bpb,none": 0.6458781534417585, | |
| "bpb_stderr,none": 0.02115840809808031, | |
| "logprob,none": -26.894661665836097, | |
| "logprob_stderr,none": 0.47443732608851724, | |
| "choice_logprob,none": -0.711798298682546, | |
| "choice_logprob_stderr,none": 0.05622774135992073, | |
| "choice_prob_norm,none": 0.5070496306067538, | |
| "choice_prob_norm_stderr,none": 0.0015178808977874385, | |
| "choice_logprob_norm,none": -0.6803297384186003, | |
| "choice_logprob_norm_stderr,none": 0.002934668813995698 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.3000239062873536, | |
| "acc_stderr,none": 0.007086430306346813, | |
| "acc_norm,none": 0.3000239062873536, | |
| "acc_norm_stderr,none": 0.007086430306346813, | |
| "logprob,none": -1.384638548894787, | |
| "logprob_stderr,none": 0.006011263189773365 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.29213483146067415, | |
| "acc_stderr,none": 0.007031936297958185, | |
| "acc_norm,none": 0.31651924456131963, | |
| "acc_norm_stderr,none": 0.007192356263004332, | |
| "bpb,none": 0.1348471597122054, | |
| "bpb_stderr,none": 0.001871467610285096, | |
| "logprob,none": -1.6010850405784962, | |
| "logprob_stderr,none": 0.01228112024153422, | |
| "choice_logprob,none": -1.4481968658234194, | |
| "choice_logprob_stderr,none": 0.010446909746323068, | |
| "choice_prob_norm,none": 0.25284289852310765, | |
| "choice_prob_norm_stderr,none": 0.0002564181546171402, | |
| "choice_logprob_norm,none": -1.3775063314377334, | |
| "choice_logprob_norm_stderr,none": 0.0012172335913715972 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.23255813953488372, | |
| "acc_stderr,none": 0.014789157531080508, | |
| "logprob,none": -16.15946305921641, | |
| "logprob_stderr,none": 0.38174767602465554 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 106.9984015117006, | |
| "nll_stderr,none": 1.5243443231933331, | |
| "bpb,none": 0.544555718461919, | |
| "bpb_stderr,none": 0.005408454171740506 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 48.29808946062879, | |
| "nll_stderr,none": 2.5801950339103974, | |
| "bpb,none": 0.45749667858902093, | |
| "bpb_stderr,none": 0.018462211336157767 | |
| } | |
| } | |
| }, | |
| { | |
| "mix": "v3", | |
| "hidden_dim": 1536, | |
| "budget": 9e+19, | |
| "tasks": { | |
| "mmlu_sl_verb_0shot": { | |
| "acc,none": 0.2993875516308218, | |
| "acc_stderr,none": 0.003845267502892936, | |
| "acc_norm,none": 0.29867540236433554, | |
| "acc_norm_stderr,none": 0.0038417770785854908, | |
| "bpb,none": 0.3437568050019886, | |
| "bpb_stderr,none": 0.0019239556694492393, | |
| "logprob,none": -5.079754080753175, | |
| "logprob_stderr,none": 0.015738878001771744, | |
| "choice_logprob,none": -1.5769890073434985, | |
| "choice_logprob_stderr,none": 0.00820013727407004, | |
| "choice_prob_norm,none": 0.25343685280982375, | |
| "choice_prob_norm_stderr,none": 0.00022739980059084734, | |
| "choice_logprob_norm,none": -1.379710960697182, | |
| "choice_logprob_norm_stderr,none": 0.0009369500180734398, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "mmlu_sl_verb_5shot": { | |
| "acc,none": 0.3127047429141148, | |
| "acc_stderr,none": 0.003884351503361081, | |
| "acc_norm,none": 0.30793334282865686, | |
| "acc_norm_stderr,none": 0.0038776332597401067, | |
| "bpb,none": 0.19614308476728629, | |
| "bpb_stderr,none": 0.0014122119380714784, | |
| "logprob,none": -3.0694671304109518, | |
| "logprob_stderr,none": 0.01727239105380594, | |
| "choice_logprob,none": -1.5472708010614544, | |
| "choice_logprob_stderr,none": 0.00913556432507866, | |
| "choice_prob_norm,none": 0.25314930728707763, | |
| "choice_prob_norm_stderr,none": 0.00016511282196633405, | |
| "choice_logprob_norm,none": -1.3772906231407986, | |
| "choice_logprob_norm_stderr,none": 0.0006642202918479287, | |
| "sample_count": { | |
| "acc,none": 14042, | |
| "acc_norm,none": 14042, | |
| "bpb,none": 14042, | |
| "logprob,none": 14042, | |
| "choice_logprob,none": 14042, | |
| "choice_prob_norm,none": 14042, | |
| "choice_logprob_norm,none": 14042 | |
| } | |
| }, | |
| "arc_challenge_5shot": { | |
| "acc,none": 0.30802047781569963, | |
| "acc_stderr,none": 0.01349142951729204, | |
| "acc_norm,none": 0.3626279863481229, | |
| "acc_norm_stderr,none": 0.014049106564955012, | |
| "bpb,none": 0.946247010821327, | |
| "bpb_stderr,none": 0.01978146472612539, | |
| "logprob,none": -16.43383033019284, | |
| "logprob_stderr,none": 0.309529240670054, | |
| "choice_logprob,none": -4.169585053458868, | |
| "choice_logprob_stderr,none": 0.14671042509484286, | |
| "choice_prob_norm,none": 0.27267684429575634, | |
| "choice_prob_norm_stderr,none": 0.0025010185384983573, | |
| "choice_logprob_norm,none": -1.3506892796004153, | |
| "choice_logprob_norm_stderr,none": 0.010161115752093695 | |
| }, | |
| "arc_easy_5shot": { | |
| "acc,none": 0.67003367003367, | |
| "acc_stderr,none": 0.009648311574241036, | |
| "acc_norm,none": 0.67003367003367, | |
| "acc_norm_stderr,none": 0.009648311574241036, | |
| "bpb,none": 0.717838677508279, | |
| "bpb_stderr,none": 0.009992334745802786, | |
| "logprob,none": -10.44071673156587, | |
| "logprob_stderr,none": 0.18172674659824423, | |
| "choice_logprob,none": -1.50517965371248, | |
| "choice_logprob_stderr,none": 0.0640209698061502, | |
| "choice_prob_norm,none": 0.34321682834671347, | |
| "choice_prob_norm_stderr,none": 0.002286219045431404, | |
| "choice_logprob_norm,none": -1.118483567434795, | |
| "choice_logprob_norm_stderr,none": 0.006495514696262302 | |
| }, | |
| "boolq_10shot": { | |
| "acc,none": 0.518348623853211, | |
| "acc_stderr,none": 0.008739164562341823, | |
| "bpb,none": 0.44150930609847827, | |
| "bpb_stderr,none": 0.005224087739791807, | |
| "logprob,none": -0.8430835138265875, | |
| "logprob_stderr,none": 0.011048484094461665, | |
| "choice_logprob,none": -0.8278325532809531, | |
| "choice_logprob_stderr,none": 0.011052511470261588, | |
| "choice_prob_norm,none": 0.5199383966989631, | |
| "choice_prob_norm_stderr,none": 0.002497028130377343, | |
| "choice_logprob_norm,none": -0.6961303581208853, | |
| "choice_logprob_norm_stderr,none": 0.005262126102052664 | |
| }, | |
| "boolq_sl_verb_10shot": { | |
| "acc,none": 0.4798165137614679, | |
| "acc_stderr,none": 0.008737927070893478, | |
| "acc_norm,none": 0.5100917431192661, | |
| "acc_norm_stderr,none": 0.008743273544801916, | |
| "bpb,none": 0.4764182924937028, | |
| "bpb_stderr,none": 0.005724492191122488, | |
| "logprob,none": -0.8689801140070326, | |
| "logprob_stderr,none": 0.010654305907655457, | |
| "choice_logprob,none": -0.852790323191026, | |
| "choice_logprob_stderr,none": 0.010659904235460005, | |
| "choice_prob_norm,none": 0.507302111679821, | |
| "choice_prob_norm_stderr,none": 0.002567599725396225, | |
| "choice_logprob_norm,none": -0.7262019204822747, | |
| "choice_logprob_norm_stderr,none": 0.005665271723349297 | |
| }, | |
| "copa_0shot": { | |
| "acc,none": 0.7, | |
| "acc_stderr,none": 0.046056618647183814, | |
| "bpb,none": 1.487274144839745, | |
| "bpb_stderr,none": 0.042327999980145345, | |
| "logprob,none": -27.925178775787355, | |
| "logprob_stderr,none": 0.5065800122681858, | |
| "choice_logprob,none": -0.8381555373550902, | |
| "choice_logprob_stderr,none": 0.14467992619144224, | |
| "choice_prob_norm,none": 0.5252378959948037, | |
| "choice_prob_norm_stderr,none": 0.00835550268030079, | |
| "choice_logprob_norm,none": -0.6581485206599267, | |
| "choice_logprob_norm_stderr,none": 0.017611476146166566 | |
| }, | |
| "csqa_5shot": { | |
| "acc,none": 0.2244062244062244, | |
| "acc_stderr,none": 0.01194413467602355, | |
| "bpb,none": 3.1894932947425274, | |
| "bpb_stderr,none": 0.0635788560784972, | |
| "logprob,none": -2.210788284665634, | |
| "logprob_stderr,none": 0.04406950483403684, | |
| "choice_logprob,none": -2.208177722265666, | |
| "choice_logprob_stderr,none": 0.04407384337459683, | |
| "choice_prob_norm,none": 0.2198874834708879, | |
| "choice_prob_norm_stderr,none": 0.006782628095303889, | |
| "choice_logprob_norm,none": -2.682700028774271, | |
| "choice_logprob_norm_stderr,none": 0.0641729357233482 | |
| }, | |
| "csqa_sl_verb_5shot": { | |
| "acc,none": 0.2334152334152334, | |
| "acc_stderr,none": 0.012110575321206388, | |
| "acc_norm,none": 0.2375102375102375, | |
| "acc_norm_stderr,none": 0.012183673723473449, | |
| "bpb,none": 0.29635859947390786, | |
| "bpb_stderr,none": 0.005987390627844368, | |
| "logprob,none": -2.3901897513358734, | |
| "logprob_stderr,none": 0.04406606414920322, | |
| "choice_logprob,none": -2.1752721980458336, | |
| "choice_logprob_stderr,none": 0.043255338329785384, | |
| "choice_prob_norm,none": 0.20679160700423543, | |
| "choice_prob_norm_stderr,none": 0.0010893273430492957, | |
| "choice_logprob_norm,none": -1.5948191889787853, | |
| "choice_logprob_norm_stderr,none": 0.005737327652962774 | |
| }, | |
| "hellaswag_0shot": { | |
| "acc,none": 0.42202748456482775, | |
| "acc_stderr,none": 0.004928735103635842, | |
| "acc_norm,none": 0.5519816769567815, | |
| "acc_norm_stderr,none": 0.00496274242684986, | |
| "bpb,none": 0.8146418259907425, | |
| "bpb_stderr,none": 0.0019910515944172053, | |
| "logprob,none": -74.23395537010015, | |
| "logprob_stderr,none": 0.3688047710238557, | |
| "choice_logprob,none": -14.920520618331876, | |
| "choice_logprob_stderr,none": 0.21343610616943745, | |
| "choice_prob_norm,none": 0.28304819548066895, | |
| "choice_prob_norm_stderr,none": 0.0004249866493142731, | |
| "choice_logprob_norm,none": -1.2735844619255967, | |
| "choice_logprob_norm_stderr,none": 0.0015222545817839856 | |
| }, | |
| "hellaswag_5shot": { | |
| "acc,none": 0.41943835889265085, | |
| "acc_stderr,none": 0.004924586362301659, | |
| "acc_norm,none": 0.5467038438558056, | |
| "acc_norm_stderr,none": 0.004967965810200004, | |
| "bpb,none": 0.7998635043931804, | |
| "bpb_stderr,none": 0.001971432034337865, | |
| "logprob,none": -73.13535002844431, | |
| "logprob_stderr,none": 0.3676983080956583, | |
| "choice_logprob,none": -15.315985043546844, | |
| "choice_logprob_stderr,none": 0.21689330486310834, | |
| "choice_prob_norm,none": 0.2826641401574518, | |
| "choice_prob_norm_stderr,none": 0.00042543304587702426, | |
| "choice_logprob_norm,none": -1.274983501434549, | |
| "choice_logprob_norm_stderr,none": 0.0015250347859301313 | |
| }, | |
| "openbookqa_0shot": { | |
| "acc,none": 0.232, | |
| "acc_stderr,none": 0.018896193591952052, | |
| "acc_norm,none": 0.352, | |
| "acc_norm_stderr,none": 0.021380042385946034, | |
| "bpb,none": 1.7721463489399987, | |
| "bpb_stderr,none": 0.04602009846494316, | |
| "logprob,none": -18.295756459236145, | |
| "logprob_stderr,none": 0.4800185473603445, | |
| "choice_logprob,none": -5.530525388563438, | |
| "choice_logprob_stderr,none": 0.28186391598720884, | |
| "choice_prob_norm,none": 0.283422989537816, | |
| "choice_prob_norm_stderr,none": 0.006072762610892424, | |
| "choice_logprob_norm,none": -1.3951215332813311, | |
| "choice_logprob_norm_stderr,none": 0.0273854109203058 | |
| }, | |
| "piqa_5shot": { | |
| "acc,none": 0.7431991294885746, | |
| "acc_stderr,none": 0.010192864802278052, | |
| "acc_norm,none": 0.7448313384113167, | |
| "acc_norm_stderr,none": 0.010171571592521824, | |
| "bpb,none": 1.0059009799366614, | |
| "bpb_stderr,none": 0.010861208835924993, | |
| "logprob,none": -58.56565026956751, | |
| "logprob_stderr,none": 1.2168260391906867, | |
| "choice_logprob,none": -2.3953581215067867, | |
| "choice_logprob_stderr,none": 0.1780463218410201, | |
| "choice_prob_norm,none": 0.5223932399769874, | |
| "choice_prob_norm_stderr,none": 0.0014301252890507393, | |
| "choice_logprob_norm,none": -0.6567117051919433, | |
| "choice_logprob_norm_stderr,none": 0.002918677846516806 | |
| }, | |
| "winogrande_5shot": { | |
| "acc,none": 0.5485398579321231, | |
| "acc_stderr,none": 0.013986110301017764, | |
| "bpb,none": 0.42715998091726126, | |
| "bpb_stderr,none": 0.022341584833798164, | |
| "logprob,none": -19.36323658518652, | |
| "logprob_stderr,none": 0.2777843058337961, | |
| "choice_logprob,none": -0.7872808600225875, | |
| "choice_logprob_stderr,none": 0.01916575086954024, | |
| "choice_prob_norm,none": 0.5017534878040611, | |
| "choice_prob_norm_stderr,none": 0.0008288172134091742, | |
| "choice_logprob_norm,none": -0.691864582681053, | |
| "choice_logprob_norm_stderr,none": 0.002278305676407902 | |
| }, | |
| "wsc273_0shot": { | |
| "acc,none": 0.6410256410256411, | |
| "acc_stderr,none": 0.029086064518366282, | |
| "bpb,none": 0.6020577170225541, | |
| "bpb_stderr,none": 0.020261099294745278, | |
| "logprob,none": -25.124764009273097, | |
| "logprob_stderr,none": 0.49789066870628557, | |
| "choice_logprob,none": -0.7944657533511067, | |
| "choice_logprob_stderr,none": 0.0634044680841067, | |
| "choice_prob_norm,none": 0.5046261727330773, | |
| "choice_prob_norm_stderr,none": 0.001277739101185203, | |
| "choice_logprob_norm,none": -0.6848069715512327, | |
| "choice_logprob_norm_stderr,none": 0.0025299360506142346 | |
| }, | |
| "medmcqa_5shot": { | |
| "acc,none": 0.301458283528568, | |
| "acc_stderr,none": 0.007096068027380258, | |
| "acc_norm,none": 0.301458283528568, | |
| "acc_norm_stderr,none": 0.007096068027380258, | |
| "logprob,none": -1.4224252123122947, | |
| "logprob_stderr,none": 0.008231404069115371 | |
| }, | |
| "medmcqa_sl_verb_5shot": { | |
| "acc,none": 0.2976332775519962, | |
| "acc_stderr,none": 0.007070183613490213, | |
| "acc_norm,none": 0.3169973703083911, | |
| "acc_norm_stderr,none": 0.0071952684770417, | |
| "bpb,none": 0.13300602038879603, | |
| "bpb_stderr,none": 0.0016396134861531057, | |
| "logprob,none": -1.5505911962120493, | |
| "logprob_stderr,none": 0.010335937240549408, | |
| "choice_logprob,none": -1.4340295424366702, | |
| "choice_logprob_stderr,none": 0.009568631676604841, | |
| "choice_prob_norm,none": 0.25357433651291594, | |
| "choice_prob_norm_stderr,none": 0.00027225266681251624, | |
| "choice_logprob_norm,none": -1.374563104529693, | |
| "choice_logprob_norm_stderr,none": 0.0010959101001788325 | |
| }, | |
| "truthfulqa_mc1_0shot": { | |
| "acc,none": 0.23623011015911874, | |
| "acc_stderr,none": 0.0148697550158711, | |
| "logprob,none": -17.643773400214485, | |
| "logprob_stderr,none": 0.4078807293990941 | |
| }, | |
| "logprob_gsm8k_5shot": { | |
| "nll,none": 99.61485948049273, | |
| "nll_stderr,none": 1.489248502672281, | |
| "bpb,none": 0.4985796881272956, | |
| "bpb_stderr,none": 0.0047488863679661765 | |
| }, | |
| "logprob_humaneval_10shot": { | |
| "nll,none": 37.64992275470641, | |
| "nll_stderr,none": 2.1628372263642115, | |
| "bpb,none": 0.35576757453147967, | |
| "bpb_stderr,none": 0.015815894855466917 | |
| } | |
| } | |
| } | |
| ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment