@prefix this: . @prefix sub: . @prefix rdfs: . @prefix xsd: . @prefix np: . @prefix npx: . @prefix dcterms: . @prefix prov: . @prefix schema: . @prefix foaf: . @prefix orcid: . sub:head { this: np:hasAssertion sub:assertion; np:hasProvenance sub:provenance; np:hasPublicationInfo sub:pubinfo; a np:Nanopublication . } sub:assertion { "preprint" . "preprint" . "unknown" . "computerProgram" . "webpage" . sub:assertion dcterms:creator ; , , ; rdfs:comment """ The BenchBench Leaderboard lets you explore 100s of benchmarks and find trustworthy alternatives that fit your resources. 👉 https://huggingface.co/spaces/ibm/benchbench Currently, benchmark comparisons are often ad-hoc and inconsistent making results untrustworthy and benchmark choice 🤮 BenchBench & our findings: https://arxiv.org/pdf/2407.13696 offer standard and transparent comparisons to reduce variance and increase confidence in your evaluations!🎉 https://twitter.com/LChoshen/status/1835738770353623053/photo/1 No need to manually gather and compare benchmark data! BenchBench provides a centralized platform with a curated database and standardized methodology for effortless benchmark agreement testing. You can also use them with our package here: https://github.com/IBM/BenchBench Want to incorporate your benchmark into BenchBench? Make a PR skeptical about the idea of BenchBench? comment! Details? Read: https://arxiv.org/abs/2407.13696 And if you are in the mood for other benchmarking aspects: https://x.com/LChoshen/status/1696153656653926581 """; schema:keywords "Benchmarking", "CentralizedPlatform", "CuratedDatabase", "HuggingFace", "LanguageModels", "StandardizedMethodology"; ; , ; , ; . "forumPost" . } sub:provenance { a prov:SoftwareAgent; prov:actedOnBehalfOf . sub:activity a ; prov:wasAssociatedWith . sub:assertion prov:linksTo ; prov:wasAssociatedWith ; prov:wasAttributedTo orcid:0000-0002-0085-6496, ; prov:wasGeneratedBy sub:activity . foaf:account orcid:0000-0002-0085-6496, . } sub:pubinfo { sub:sig npx:hasAlgorithm "RSA"; npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArHtI92jm8pAYVsvJabxLGfOT+7G0JyJGh2gwjB5x2pFPga6wWTd+rNBWWUZViIFnaJrBEsJpgdnoupLU9ppwn+khMiGRfxqGsDDzwHcj3Jc75CRys7d3etwXdBdoXfBgjsJiZBazwm13idr6tljRrC1TaEJBnRQAqzBw9cLDeGY77cSznzXT39feUGT168dpCSE9O6u/48DvvWVqciHGsH9cQ+LroJJVsMrorwtsdZnAK+q48wtIP6pIpw5shSJ5LnA0qeN/f4TvTFDV6ItYIXjiWWpTECc/Bxmfnyat3B5xWCu9nvz8fEs7Ns0TuzQwT3/K55iSKDEIi/E0nO97xwIDAQAB"; npx:hasSignature "I5lz+4/xIbaHsmaqNtkbAL0ZQmAmLTB8MjqHiqx5ifUTvyjDx0uWFZwslB9tXL6QZ2pITWfYWDY21vr3QCzSzNMzYWx+EeQbJbuF1D/RUBtHQzQLIl1LqH3WyLLPfzpIpskjshyow+5LUifAQ2GU3tRtdXzAoEz4MKMKHRAZbolbXsjrON65/lIjSqWvDYHAFNCtyBjvTIH3qwycis6GG7vyUKQ+K0FUcOf+CfttB8A+gV/HFXOGId10hn0O/saxCcxhffApPwaIv6yOm0NZOHlE4OpoZg5w3FzEQhKN/NjaFBWCnhxWdJSwpNr8m/gGCDUAmeQkhDprtiq6Obtsng=="; npx:hasSignatureTarget this:; npx:singedBy ; prov:wasAssociatedWith . this: dcterms:created "2024-09-16T18:17:08.034Z"^^xsd:dateTime; dcterms:creator ; dcterms:license ; npx:hasNanopubType ; npx:wasCreatedAt ; rdfs:label "CoSMO Semantic Post"; prov:wasAttributedTo orcid:0000-0002-0085-6496; "0xf6ECcfD463afB464dcC85b051DF2E93E2646E6D2" . foaf:account orcid:0000-0002-0085-6496; foaf:name "Leshem Choshen 🤖🤗 @ICML wanna talk?" . }