aggregate_metrics_graphiti.json•3.79 kB
{
"correctness": {
"scores": [
1.0,
0.1516911530779163,
0.9652779272706994,
0.06811182045889698,
0.5413287736671829,
0.011665819944729103,
0.9164842594734586,
0.2512616273708377,
0.9744205258104126,
0.8215842716560738,
0.35274070822990194,
0.9741231396604345,
0.9567543838178784,
0.6146464512636055,
0.8748697728680277,
0.023827366639971995,
0.9083953603125151,
0.9755864900982664,
0.5957032794648195,
0.2471993638452164,
1.0,
0.45893327668045336,
0.9533663675462775,
0.010417677901840285,
0.9277567328086379,
0.7696242452153286,
0.8769517795439207,
0.0,
0.7318106213374399,
0.22311598081764444,
0.04867760013425768,
0.8917800488625602,
0.2901710872123221,
0.38648706911633857,
0.03979175575266881,
0.0,
0.9252744305765163,
0.9099517116097993,
0.02072679430977809,
0.20357250743465896,
0.981323994039772,
0.21304082381028241,
0.05123635779958491,
0.9987568351136321,
0.8773834691382791,
0.5326275541061523,
0.9801784650901422,
0.9725300707965576,
0.9816380323545841,
0.997966764532167
],
"mean": 0.5896152909714488,
"ci_lower": 0.4827500785323634,
"ci_upper": 0.6929328608370466
},
"EM": {
"scores": [
1.0,
0.0,
1.0,
0.0,
1.0,
0.0,
1.0,
0.0,
1.0,
0.0,
0.0,
1.0,
1.0,
0.0,
1.0,
0.0,
1.0,
1.0,
0.0,
0.0,
1.0,
0.0,
1.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
1.0,
1.0,
0.0,
0.0,
1.0,
0.0,
0.0,
1.0,
1.0,
0.0,
1.0,
1.0,
1.0,
1.0
],
"mean": 0.44,
"ci_lower": 0.3,
"ci_upper": 0.58
},
"f1": {
"scores": [
1.0,
0.0,
1.0,
0.5,
1.0,
0.0,
1.0,
0.5,
1.0,
1.0,
0.1818181818181818,
1.0,
1.0,
0.6666666666666666,
1.0,
0.0,
1.0,
1.0,
0.0,
0.0,
1.0,
0.33333333333333337,
1.0,
0.0,
0.8,
0.8,
1.0,
0.0,
0.5714285714285715,
0.0,
0.0,
1.0,
0.0,
0.5714285714285715,
0.0,
0.0,
1.0,
1.0,
0.0,
0.0,
1.0,
0.0,
0.4,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0
],
"mean": 0.6064935064935065,
"ci_lower": 0.4827926406926407,
"ci_upper": 0.7281313852813852
}
}