352 lines
58 KiB
HTML
352 lines
58 KiB
HTML
<!DOCTYPE html> <html style lang=en><!--
|
|
Page saved with SingleFile
|
|
url: https://arxiv.org/abs/2305.11747
|
|
saved date: Sun Jan 11 2026 22:22:03 GMT+0100 (Central European Standard Time)
|
|
--><meta charset=utf-8>
|
|
<title>[2305.11747] HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models</title>
|
|
<meta name=viewport content="width=device-width, initial-scale=1">
|
|
<meta name=msapplication-TileColor content=#da532c>
|
|
<meta name=theme-color content=#ffffff>
|
|
<style media=screen>body{margin:0;padding:0;background-color:#fff;color:#000;font-family:"Lucida Grande",helvetica,arial,verdana,sans-serif}a:link,a:visited,a:active{text-decoration:none;font-weight:normal}a:hover{text-decoration:underline}img{border:0}em{font-weight:bold;font-style:normal}.primary-subject{font-weight:bold}main{flex-grow:1}.flex-wrap-footer{display:flex;min-height:100vh;flex-direction:column}footer ul li{display:flex;align-items:center;font-size:14px}footer ul li a{font-size:13.5px}footer{background-color:hsl(0,0%,95%);color:#000;padding:1em 2em;font-size:0.9rem;-webkit-font-smoothing:antialiased;margin-top:6rem}footer a,footer a:visited{color:#000;text-decoration:none;border-bottom:1px solid transparent;line-height:1.75em}footer a:hover,footer a:active{color:#005e9d;border-bottom:1px dotted #005e9d;text-decoration:none}footer ul{padding:0;margin:0}footer .sorry-app-links .help{font-size:0.75rem;margin-bottom:0;line-height:1.75em}footer .sorry-app-links .help a,footer .sorry-app-links .help a:visited{border-bottom:1px dotted #000}footer .sorry-app-links .help a:hover,footer .sorry-app-links .help a:active{border-bottom:1px dotted #005e9d}footer .sorry-app-links svg.icon{margin-bottom:-2px!important}footer .sorry-app-links .a11y-main-link{font-size:110%;border-bottom:1px solid transparent!important;padding:0;margin:0}@media screen and (max-width:768px){footer .sorry-app-links.column{padding:0}}@media screen and (min-width:769px){.columns{display:flex;flex-direction:row}}.icon{width:.9rem;margin-right:.45em;margin-top:-.15rem}.help{font-family:"Lucida Grande","Helvetica Neue",Helvetica,Arial,sans-serif;display:block;margin-top:0.25rem}#content,#content-inner{margin:.7em;font-size:90%}#abs-outer,#abs{margin:-0.7em}#abs-outer .leftcolumn{margin:0 0 1em 0;padding:0px;width:calc(100% - 18em);float:left}#abs-outer .mobile-submission-download{display:none}#abs-outer .extra-services{float:right;margin:0;width:18em}#abs-outer .extra-services span.bib-cite-button{color:rgb(0,0,238);font-weight:bold;padding:.35em;display:block;text-transform:capitalize}#abs-outer .extra-services span.bib-cite-button:hover,#abs-outer .extra-services span.bib-cite-button:focus{text-decoration:underline;cursor:pointer}#labstabs{clear:both;margin:1em 1.5em}#labstabs .labstabs{display:flex;flex-wrap:wrap;white-space:normal;justify-content:flex-start}#labstabs .labstabs>label{order:1;display:block;padding:.75em 1.5em;font-size:.85em;color:#4a4a4a;margin-right:0.2rem;cursor:pointer;font-weight:bold;transition:background ease 0.2s;border-radius:6px 6px 0 0;background-color:#eef5f9}#labstabs .labstabs .tab{order:99;flex-grow:1;width:100%;display:none;padding:1rem;border:1px solid #828282;z-index:1;top:-1px;position:relative}#labstabs .labstabs .tab h1{font-size:1.25em;font-weight:normal}#labstabs .labstabs input[type="radio"]:checked+label{background:#ffffff;border:1px solid #828282;border-bottom:0;z-index:2;color:black}#labstabs .labstabs input[type="radio"]:checked+label+.tab{display:block}@media (max-width:45em){#labstabs{padding-bottom:1em;margin:1em}#labstabs .labstabs .tab,#labstabs .labstabs label{order:initial;width:100%;margin-right:0;margin-top:0.2rem;border-radius:0}#labstabs .labstabs>label{font-size:.8em}#labstabs .labstabs .tab{margin-top:-10px!important;border:2px solid #b8b8b8;border-top:0px;background-color:#ffffff}#labstabs .labstabs label{background-color:#eef5f9;color:black;padding:1em .5em}#labstabs .labstabs .labs-display-bib label{background:transparent;color:black;padding:0}#labstabs .labstabs input[type="radio"]:checked+label{background-color:#acd2e8;color:black;border:2px solid #b8b8b8;border-bottom:0px;padding:1em .5em}}#labstabs .toggle{border:1px dotted #c7d3db;padding:.5em;background-color:#eef5f9}#labstabs .toggle .columns.lab-row{align-items:center}#labstabs .toggle .columns.lab-row .column{padding:.5rem .25rem}#labstabs .column.lab-name{flex-basis:auto;flex-grow:0;font-size:.75rem}#labstabs .tab a{display:inline}#labstabs .column.lab-name em{font-size:11px;color:black}#labstabs .column.lab-name em a{line-height:18px;text-decoration:none;padding:0;border:0;font-style:normal;font-size:11px}#labstabs .column.lab-switch{flex-grow:0}#labstabs .toggle .lab-switch label.switch{position:relative;display:inline-block;width:38px;height:20px;padding:0;background-color:transparent;margin:0}#labstabs .toggle .lab-switch .slider{position:absolute;cursor:pointer;top:0;left:0;right:0;bottom:0;border-radius:24px;background-color:#ccc;-webkit-transition:.4s;transition:.4s}#labstabs .toggle .lab-switch .slider:before{position:absolute;content:"";height:16px;width:16px;left:2px;bottom:2px;border-radius:50%;background-color:white;-webkit-transition:.4s;transition:.4s}#abs-outer .subheader{background-color:#eee;color:#000;padding:.25em 0;border-bottom:1px solid #ccc}#abs-outer .subheader h1{margin:0;font-size:1.1em;padding:0 0 .2em 20px;font-weight:bold;font-style:normal}#abs h1.title{margin:.5em 0 .5em 20px;font-size:x-large;font-weight:bold;line-height:120%}#abs .authors{margin:.5em 0 .5em 20px;font-size:medium;line-height:150%}#abs .authors a{font-size:medium}#abs .dateline{margin:.5em 0 .5em 20px;font-style:italic;font-size:small}#abs blockquote.abstract{line-height:1.55;font-size:1.05em;margin-bottom:1.5em}#abs .metatable{font-size:0.92em;line-height:1.5;margin:0 0 1.5em 20px}#abs .tablecell{padding:.1em .5em 0em 0em;vertical-align:top}#abs .arxivid a{font-weight:normal}#abs-outer .submission-history{margin:1.5em 0 1.5em 20px;font-size:90%;line-height:1.5em}#abs-outer .submission-history h2{font-size:120%;margin:0 0 .25em 0;font-weight:bold}#abs-outer .endorsers{margin:1em 0 1.5em 20px;font-size:small;font-style:italic;clear:both}#abs-outer .header-breadcrumbs-mobile{display:none}#abs-outer #abs .dateline{margin-top:15px;margin-bottom:0}#abs-outer #abs h1.title{margin-top:.25em}@media screen and (max-width:768px){#abs-outer .header-breadcrumbs-mobile{display:block;color:black;font-size:.85em;margin:.25em 0 .5em 1em}#abs-outer #abs a.mobile-submission-download{display:flex;justify-content:center;margin:.7em .25em;border-radius:10px;background-color:#408bd0;padding:.25em 1em;font-weight:800;color:white;text-decoration:none;font-size:20px;text-align:center}#abs-outer .extra-services h2{font-size:15px;margin-bottom:.5em}#abs-outer .extra-services h3{font-size:14px}#abs-outer .leftcolumn,#abs-outer .extra-services{width:100%;float:left}#abs-outer .extra-services,#abs-outer .extra-services .full-text,#abs-outer .extra-services .browse,#abs-outer .extra-services .extra-ref-cite{border:0px;font-size:12px}#abs-outer .extra-services{-webkit-box-shadow:inset 0px 8px 15px 0px rgba(173,173,173,1);-moz-box-shadow:inset 0px 8px 15px 0px rgba(173,173,173,1);box-shadow:inset 0px 8px 15px 0px rgba(173,173,173,1);background-color:#E6E6E6;margin:0 0 1em 0;padding:1em 0}#abs-outer .extra-ref-cite ul li{float:left}#abs-outer .extra-services .full-text ul,#abs-outer .extra-services .extra-ref-cite ul{list-style:none;margin:0;padding:0}#abs-outer .extra-services .full-text ul li,#abs-outer .extra-services .extra-ref-cite ul li{display:inline-block;margin:0 0 .25em 0;padding:0}#abs-outer .extra-services .bookmarks{margin:1em 0 0 0;border-left:0;padding:.25em .5em 0 1em;border-top:2px solid #cccccc;font-size:1em}#abs-outer .extra-services .bookmarks .abs-button-small{margin-top:.25em}#abs-outer .extra-services .browse{margin-top:.5em;border-top:2px solid #cccccc;padding-top:1em}#abs-outer .extra-services .prevnext{margin-top:.5em}#abs-outer .extra-services .browse .current{color:#AB4B02;display:inline}.abs-switch-cat{margin:0 0 1em 0}.browse .abs-switch-cat .switch{display:inline}.browse .abs-switch-cat .switch a{font-weight:bold}.abs-button{display:inline-block;border-radius:5px;border:1px solid #046BAF;font-size:1.25em;color:#046BAF!important;padding:.5em;background:#E6E6E6;margin-right:.3em}.abs-button-small{font-size:1em;padding:.25em 1em;margin:.75em .5em 0 0}.abs-button-grey{border:1px solid #666666;color:#666666!important}#abs-outer .extra-services span.bib-cite-button{margin:.5em .5em 0 0;display:inline-block!important;border-radius:5px;background:#E6E6E6;border:1px solid #046BAF;font-size:1em!important;padding:.25em 1em;font-weight:normal;text-transform:capitalize}#abs-outer .extra-services .extra-ref-cite ul li{margin:.5em 0;padding:0;height:auto}#abs-outer .extra-services .extra-ref-cite ul li a{margin:.25em .5em .25em 0}#abs-outer .subheader{background-color:#fefefe;padding:.25em 0;border-bottom:1px solid #ccc}#abs-outer .subheader h1{margin:0;font-size:.75em;padding:.2em 0 .2em 1em;font-weight:normal;font-style:normal;color:#b55c06}#abs-outer #abs .dateline{color:#767676;font-size:.85em;font-style:normal;margin:2em 0 0 1em}#abs-outer .submission-history{padding:1em;margin:0;background-color:#f5f5f5;-webkit-box-shadow:inset 0px -6px 15px 0px rgba(219,219,219,1);-moz-box-shadow:inset 0px -6px 15px 0px rgba(219,219,219,1);box-shadow:inset 0px -6px 15px 0px rgba(219,219,219,1)}#abs-outer #abs h1.title{margin:0 .25em 0 .5em;font-size:1.5em}#abs-outer #abs .authors{margin:1em .25em 0 1em;font-size:.9em;line-height:1.5em}#abs-outer #abs .authors a{font-size:inherit}#abs-outer #abs blockquote.abstract{margin:0 1em}#abs-outer #abs .metatable{margin:.75em 0 1.5em 1.5em}#abs-outer #abs a,#abs-outer a,#abs-outer .endorsers a{color:#1777bc}#abs-outer .submission-history a,#abs-outer .abs-switch-cat .switch a,#abs-outer .extra-services .full-text .abs-license a{color:#046BAF}#abs-outer .endorsers{display:block;float:left;border:1px solid #eee;padding:1em;margin:1em}}.mobile-header{background-color:#b31b1b}.mobile-header .columns{height:65px;align-items:center}.mobile-header .column{border-left:1px solid #fc5554;border-right:1px solid #731515;height:65px;padding:0 1em;display:flex;align-items:center}.mobile-header .column:first-child{border-left:0}.mobile-header .column:last-child{border-right:0}.mobile-header .column.logo-arxiv{width:100px;flex:none}.mobile-header .column.logo-cornell{display:flex}.mobile-header .column.logo-cornell img{height:45px}.mobile-header .column.nav{justify-content:flex-end;align-self:flex-end}.mobile-header #toggle-container button.toggle-control{background-color:transparent;border-radius:0;border:0;font-size:25px;padding:3px;margin-left:.5em}.mobile-header #toggle-container button.toggle-control svg.icon{width:1.25rem;margin:0}@media screen and (min-width:769px){.mobile-header{display:none}}@media screen and (min-width:426px){.mobile-header .column.nav{flex:none;width:100px}}@media screen and (max-width:500px){.mobile-header .columns{height:80px}.mobile-header .column{height:80px;padding:0 .5em}.mobile-header .column.logo-arxiv{border-right:0!important}.mobile-header .column.logo-cornell{justify-content:flex-end;border-left:0!important}.mobile-header .column.logo-cornell img{height:73px}.mobile-header .column.nav{width:65px;flex:none}}@media screen and (min-width:501px){.mobile-header .column{height:65px}.mobile-header .column.logo-cornell img{height:45px}.mobile-header .column.nav{width:65px}}.extra-services{border-bottom:.35em solid #ddd}.extra-services h3{font-size:medium;font-weight:normal;margin:0 0 0.3em 0;padding-top:0.3em}.full-text{margin:0;padding:.5em 1em .5em 1em;font-size:110%;font-weight:normal;border-bottom:medium solid #ddd;border-left:.35em solid #ddd}.full-text h2{font-size:140%;font-weight:bold;margin:0.1em 0 0 0}.full-text ul{margin:.3em 0 0 1em;padding:0;list-style-type:none}.extra-ref-cite{margin:0;padding:0 1em 0 1em;font-size:90%;border-bottom:medium solid #ddd;border-left:.35em solid #ddd}.extra-ref-cite ul{font-weight:normal;margin:0.3em 0 0 20px;padding-left:0;padding-bottom:0.3em;list-style-type:none}.browse{padding:0 1em 0 1em;font-size:90%;border-bottom:medium solid #ddd;border-left:.35em solid #ddd}.browse .current{padding:0;font-weight:bold}.browse .prevnext{padding:0.2em 0 0 0}.browse .list{padding:0.2em 0 0.5em 0;font-weight:normal}.browse .switch{font-weight:normal;padding:.2em 0em .7em 0em}.bookmarks{clear:both;margin:0;padding:0 1em .5em 1em;font-size:90%;border-left:.35em solid #ddd}.abs-license{font-size:xx-small;padding-top:0.3em}@media screen and (max-width:768px){.columns.is-mobile{display:flex}}.is-sr-only{border:none!important;clip:rect(0,0,0,0)!important;height:0.01em!important;overflow:hidden!important;padding:0!important;position:absolute!important;white-space:nowrap!important;width:0.01em!important}.column{display:block;flex-basis:0;flex-grow:1;flex-shrink:1;padding:0.75rem}@media screen and (min-width:769px),print{.columns:not(.is-desktop){display:flex}}@media screen and (min-width:1024px){.columns.is-desktop{display:flex}}svg.icon{height:1em!important}.icon.filter-white{fill:#FFFFFF}.icon.filter-black{fill:#000000}.filter-dark_grey{fill:#cccccc}a .icon{transition:fill 0.3s ease}</style>
|
|
<link rel=canonical href=https://arxiv.org/abs/2305.11747>
|
|
<meta name=description content="Abstract page for arXiv paper 2305.11747: HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models"><meta property=og:type content=website>
|
|
<meta property=og:site_name content=arXiv.org>
|
|
<meta property=og:title content="HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models">
|
|
<meta property=og:url content=https://arxiv.org/abs/2305.11747v3>
|
|
<meta property=og:image content=/static/browse/0.3.4/images/arxiv-logo-fb.png>
|
|
<meta property=og:image:secure_url content=/static/browse/0.3.4/images/arxiv-logo-fb.png>
|
|
<meta property=og:image:width content=1200>
|
|
<meta property=og:image:height content=700>
|
|
<meta property=og:image:alt content="arXiv logo">
|
|
<meta property=og:description content="Large language models (LLMs), such as ChatGPT, are prone to generate hallucinations, i.e., content that conflicts with the source or cannot be verified by the factual knowledge. To understand what types of content and to which extent LLMs are apt to hallucinate, we introduce the Hallucination Evaluation benchmark for Large Language Models (HaluEval), a large collection of generated and human-annotated hallucinated samples for evaluating the performance of LLMs in recognizing hallucination. To generate these samples, we propose a ChatGPT-based two-step framework, i.e., sampling-then-filtering. Besides, we also hire some human labelers to annotate the hallucinations in ChatGPT responses. The empirical results suggest that ChatGPT is likely to generate hallucinated content in specific topics by fabricating unverifiable information (i.e., about $19.5\%$ responses). Moreover, existing LLMs face great challenges in recognizing the hallucinations in texts. However, our experiments also prove that providing external knowledge or adding reasoning steps can help LLMs recognize hallucinations. Our benchmark can be accessed at https://github.com/RUCAIBox/HaluEval.">
|
|
<meta name=twitter:site content=@arxiv>
|
|
<meta name=twitter:card content=summary>
|
|
<meta name=twitter:title content="HaluEval: A Large-Scale Hallucination Evaluation Benchmark for...">
|
|
<meta name=twitter:description content="Large language models (LLMs), such as ChatGPT, are prone to generate hallucinations, i.e., content that conflicts with the source or cannot be verified by the factual knowledge. To understand what...">
|
|
<meta name=twitter:image content=https://static.arxiv.org/icons/twitter/arxiv-logo-twitter-square.png>
|
|
<meta name=twitter:image:alt content="arXiv logo">
|
|
<style media=screen>.visually-hidden{clip-path:inset(100%);clip:rect(1px,1px,1px,1px);height:1px;overflow:hidden;position:absolute;white-space:nowrap;width:1px}.button-and-tooltip{position:relative;display:inline}.button-and-tooltip button{background:transparent;border:none;box-shadow:none;position:relative;padding:0 0 0 6px}td.tablecell.arxivdoi{display:inline-flex;align-items:center}.arrow{width:0;height:0;border-left:8px solid transparent;border-right:8px solid transparent}@-webkit-keyframes pulsate{0%{-webkit-transform:scale(.1);transform:scale(.1);opacity:0}30%{opacity:1}60%{-webkit-transform:scale(.8);transform:scale(.8);opacity:0}}@keyframes pulsate{0%{-webkit-transform:scale(.1);transform:scale(.1);opacity:0}30%{opacity:1}60%{-webkit-transform:scale(.8);transform:scale(.8);opacity:0}}</style>
|
|
<meta name=citation_title content="HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models"><meta name=citation_author content="Li, Junyi"><meta name=citation_author content="Cheng, Xiaoxue"><meta name=citation_author content="Zhao, Wayne Xin"><meta name=citation_author content="Nie, Jian-Yun"><meta name=citation_author content="Wen, Ji-Rong"><meta name=citation_date content=2023/05/19><meta name=citation_online_date content=2023/10/23><meta name=citation_pdf_url content=https://arxiv.org/pdf/2305.11747><meta name=citation_arxiv_id content=2305.11747><meta name=citation_abstract content="Large language models (LLMs), such as ChatGPT, are prone to generate hallucinations, i.e., content that conflicts with the source or cannot be verified by the factual knowledge. To understand what types of content and to which extent LLMs are apt to hallucinate, we introduce the Hallucination Evaluation benchmark for Large Language Models (HaluEval), a large collection of generated and human-annotated hallucinated samples for evaluating the performance of LLMs in recognizing hallucination. To generate these samples, we propose a ChatGPT-based two-step framework, i.e., sampling-then-filtering. Besides, we also hire some human labelers to annotate the hallucinations in ChatGPT responses. The empirical results suggest that ChatGPT is likely to generate hallucinated content in specific topics by fabricating unverifiable information (i.e., about $19.5\%$ responses). Moreover, existing LLMs face great challenges in recognizing the hallucinations in texts. However, our experiments also prove that providing external knowledge or adding reasoning steps can help LLMs recognize hallucinations. Our benchmark can be accessed at https://github.com/RUCAIBox/HaluEval.">
|
|
<style>.MathJax_Preview{color:#888}#MathJax_Message{position:fixed;left:1px;bottom:2px;background-color:#E6E6E6;border:1px solid #959595;margin:0px;padding:2px 8px;z-index:102;color:black;font-size:80%;width:auto;white-space:nowrap}</style><link rel=icon type=image/png sizes=32x32 href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAACr1BMVEUAAACzICWzICSyrKazrKWzqaKzraazR0mzTE6zaGizBQiz49uzZmSzEBazHySzsam0ICmzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzrKWzrKWzICWzICWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzICWzrKWzrKWzrKWzICWzICWzICWzHySz4dWzrKWzrKWzrKWzICWzICWzHySzgX2zrqezrKWzrKWzICWzPD+zrKWzrKWzrKWzICWzHySzjYmzr6izraazT1CzLjKzMDSzQUOzrKWzrqezgH2zKi6zHySzICWzrKWzrKWzcW+zICWzICWzrKWzrKWzYWGzICWzrKWzUVKzICWzrKWzrKWznZezQkSzICWzICWzrKWzraazkY2zHiOzICWzICWzjYmzm5WzhIGzODuzAACzHySzHSKzOz6zrqazs6yzICWzICWzODuzrKWzrKWzrKWzICWzICWzODuzo5yzrKWzrKWzICWzICWzICWzGyCzr6ezrKWzrKWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzICWzICWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzICWzrKWzHySzkYyzrqezHiOzODuzJCmzqaOzISazraazpZ6zNTizkIuzi4ezraX///8A/VYjAAAA1HRSTlMAAAAAAAAAAAAAAAAAAAAAAAGA95kOB7X6kQ9W8fyaEQIBA3z9oRUllnUKDJz+qBgwxuwxGbquHD3SpQwt1CBL3cEeR+e7JFvm1mX0wSgEbO5HBYXFkfPyYaX++n0EHsH9lwtO8/NRJrr8/ttFGb7+3jpk/v6ecv6sK9j+/vVcQNf+8nUGSfD7dQMHivyVCmr2/vl3A0np28zvWS7V0joht+HJMR2vzCcLvygYp7IVc7QhFJ/9lQkSzhoRl/t2lu0UDY9XDSoLhvjgOwh89a4GB4L2i8A61lMAAAABYktHROQvYjspAAAAB3RJTUUH5wETDS455INCLAAAAgZJREFUOMt10/dfTXEYB/Dz6Ilst3lFueWmjDIysmdkF0Kyyd7Ze5WZcc2ErIhKISops6zs73Pda1zrH/EcoXPqe8+vn/fre57z+T5HURQwuHt4eoHi9AFvH2Fs6gu1nAJo1lz4+beAqkNc0BSAGhHY0ijMQa3+C8TgkNZttKJtOyFCw9r/FYgdOlKncC2Azl2EEF27/RGIEd2JevRE3Ri9erPw6cMCA/r2I+o/AFE/6MBBLCIHg6tpSBTR0HB9rophw1mMGDlqdDRRVEz1nMWYsX4sxsWOJ5owsWbOwjeOgXXSR1v85CmSnF8ydZpVCPun6TNm1pY2Wsdt1mwW1oQ5devJcsS58+YvUMXCRSC7Oly8hGjpZxZi2XKJQFyRSPRl5VcGYpVBBlYTkePbGvWItYGSd+C69arYsNEuNm2WDYG4ZauDyLbt+/YdO6X7Vb9BUjILx67dexpKe2i0d9/+HzYWKQcay3IwHDxkP/yT57AcOSq7Cjh2nIs+cfIXUeopyWVC2mn+vjNn08/xGecjaojKpbtwEVwvZbC4fKWagEx/zq9eA0DMyuZKc67rN65JrlGIvBtqP4g3b+VT4u07uqUtKBTmsKLK/hDvFt8jS4lJu/fepfc9H1T9OA8fxVPqY80YAE/KMjX9Y/nTZ/T8hWwz/4mXFa/o9RvnQMG3795/UKf4DUwCyzJ9eBcsAAAAJXRFWHRkYXRlOmNyZWF0ZQAyMDIzLTAxLTE5VDEzOjQ2OjU3KzAwOjAwqJ4w4AAAACV0RVh0ZGF0ZTptb2RpZnkAMjAyMy0wMS0xOVQxMzo0Njo1NyswMDowMNnDiFwAAABXelRYdFJhdyBwcm9maWxlIHR5cGUgaXB0YwAAeJzj8gwIcVYoKMpPy8xJ5VIAAyMLLmMLEyMTS5MUAxMgRIA0w2QDI7NUIMvY1MjEzMQcxAfLgEigSi4A6hcRdPJCNZUAAAAASUVORK5CYII="><style>.sf-hidden{display:none!important}</style><meta http-equiv=content-security-policy content="default-src 'none'; font-src 'self' data:; img-src 'self' data:; style-src 'unsafe-inline'; media-src 'self' data:; script-src 'unsafe-inline' data:; object-src 'self' data:; frame-src 'self' data:;"><style>img[src="data:,"],source[src="data:,"]{display:none!important}</style></head>
|
|
<body class=with-cu-identity><div style="visibility:hidden;overflow:hidden;position:absolute;top:0px;height:1px;width:auto;padding:0px;border:0px none;margin:0px;text-align:left;text-indent:0px;text-transform:none;line-height:normal;letter-spacing:normal;word-spacing:normal" class=sf-hidden></div><div id=MathJax_Message style=display:none></div>
|
|
|
|
|
|
<div class=flex-wrap-footer>
|
|
<header>
|
|
<a href=#content class=is-sr-only>Skip to main content</a>
|
|
|
|
<div class="columns is-vcentered is-hidden-mobile sf-hidden" id=cu-identity>
|
|
|
|
</div>
|
|
<div id=header class="is-hidden-mobile sf-hidden">
|
|
|
|
|
|
|
|
</div>
|
|
<div class=mobile-header>
|
|
<div class="columns is-mobile">
|
|
<div class="column logo-arxiv"><a href=https://arxiv.org/><img src="data:image/svg+xml;base64,PHN2ZyBpZD0ibG9nb21hcmsiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgdmlld0JveD0iMCAwIDc0LjQ5MiAxMDAuMjUiPjxnIGlkPSJ0aW55Xy1fd2hpdGUiIGRhdGEtbmFtZT0idGlueSAtIHdoaXRlIj48cGF0aCBkPSJNNTg2LjcyLDI1NS42MTZhMy4zNzcsMy4zNzcsMCwwLDEsLjQ0OC4wMzEsNS45MTcsNS45MTcsMCwwLDEsMy41ODEsMi43OWMuNDU0LDEuMTE2LjMxNCwyLjAyMy0xLjMxNSw0LjE0MUw1NjMuMTY4LDI5My42bC04LjU1OC0xMC4wNDcsMjkuMzQ4LTI2LjYxNmE0LjQwNiw0LjQwNiwwLDAsMSwyLjc2Mi0xLjMyMW0wLTEuNWE1Ljc2Niw1Ljc2NiwwLDAsMC0zLjY5LDEuNjQzbC0uMDQxLjAzMi0uMDM4LjAzNUw1NTMuNiwyODIuNDQybC0xLjA3Ny45NzcuOTQzLDEuMTA3LDguNTU4LDEwLjA0NywxLjE0NSwxLjM0NCwxLjE0MS0xLjM0OCwyNi4yNjctMzEuMDIyLjAyMi0uMDI3LjAyMi0uMDI4YzEuNTc0LTIuMDQ2LDIuMzI3LTMuNjIyLDEuNTE2LTUuNjE5YTcuMzA5LDcuMzA5LDAsMCwwLTQuNzc5LTMuNzE0LDUuMDgzLDUuMDgzLDAsMCwwLS42NC0uMDQzWiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIi8+PHBhdGggZD0iTTU1My40MjMsMjg0LjU5M2w4Ljk3NywxMC41NThMNTk3LjkxMSwzMzcuOWMuODczLDEuMDkzLDEuNDE5LDIuMTg2LDEuMDQ3LDMuNDE4YTQuMDkyLDQuMDkyLDAsMCwxLTIuNzIxLDIuODM3LDMuNTU3LDMuNTU3LDAsMCwxLTEuMDQ1LjE1OSw0LDQsMCwwLDEtMi42ODctMS4xMjRMNTQ4LjAxLDMwMC44MDhjLTMuNS0zLjUtMi45NzEtOC4xNTEuNDM2LTExLjU1OGw0Ljk3Ny00LjY1N20uMTI0LTIuMTdMNTUyLjQsMjgzLjVsLTQuOTc2LDQuNjU2Yy00LjE5Miw0LjE5MS00LjM3Miw5LjgxNi0uNDczLDEzLjcxNGw0NC41MjEsNDIuNGE1LjQ4NSw1LjQ4NSwwLDAsMCwzLjcyMiwxLjUzOCw1LjEsNS4xLDAsMCwwLDEuNDgzLS4yMjQsNS41OSw1LjU5LDAsMCwwLDMuNzE5LTMuODM4LDUuMTc2LDUuMTc2LDAsMCwwLTEuMzEtNC43ODhsLTM1LjUzLTQyLjc2Ny04Ljk4OC0xMC41NzEtMS4wMTktMS4yWiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIi8+PHBhdGggZD0iTTU2Mi40LDI5NS4xNTFsOS41NTYsMTEuNSw1Ljc2MS01LjM1NmE3LjkyNiw3LjkyNiwwLDAsMCwuMDQxLTExLjc0M2wtNDMuNy00MS45MjNzLTEuNjcxLTIuMDI5LTMuNDM3LTIuMDcxYTQuNDksNC40OSwwLDAsMC00LjIzLDIuNzE4Yy0uNjg4LDEuNjUxLS4xOTQsMi44MDksMS4zMTUsNC45N2wyOS4zMDYsMzUuNTY1WiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIi8+PHBhdGggZD0iTTU1My43LDMwNi4yMjNsLTE3LjExNiwyMS4wMjRjLTEuMjU1LDEuMzM3LTIuMDMyLDMuNjgzLTEuMzMxLDUuMzY3YTQuNTg3LDQuNTg3LDAsMCwwLDQuMjg3LDIuODQxLDQuMDg3LDQuMDg3LDAsMCwwLDMuMDgyLTEuNTIzbDIwLjMyOC0xOC45WiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIi8+PHBhdGggZD0iTTU5Mi4wNzQsMjUwLjU0NyIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIiBzdHJva2U9IiMwMDAiIHN0cm9rZS1taXRlcmxpbWl0PSIxMCIgc3Ryb2tlLXdpZHRoPSIwLjI1Ii8+PC9nPjwvc3ZnPg==" alt="arXiv logo" style=height:60px></a></div>
|
|
<div class="column logo-cornell"><a href=https://www.cornell.edu/>
|
|
<picture>
|
|
|
|
|
|
<img src=data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPCEtLSBHZW5lcmF0b3I6IEFkb2JlIElsbHVzdHJhdG9yIDE5LjIuMSwgU1ZHIEV4cG9ydCBQbHVnLUluIC4gU1ZHIFZlcnNpb246IDYuMDAgQnVpbGQgMCkgIC0tPgo8c3ZnIHZlcnNpb249IjEuMSIgaWQ9IkxheWVyXzEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIHg9IjBweCIgeT0iMHB4IgoJIHZpZXdCb3g9IjAgMCAxMjAgMTIwIiBzdHlsZT0iZW5hYmxlLWJhY2tncm91bmQ6bmV3IDAgMCAxMjAgMTIwOyIgeG1sOnNwYWNlPSJwcmVzZXJ2ZSI+CjxnPgoJPHBhdGggZD0iTTQ1LjksOTEuN2M2LjgsNS4zLDEzLjUsOC4yLDEzLjgsOC4zbDAuNSwwLjJsMC41LTAuMmMwLjMtMC4xLDYuOS0zLDEzLjctOC4zYzkuMS03LjEsMTMuOS0xNS4yLDEzLjktMjMuNWwwLTM3LjZsLTU2LjQsMAoJCWwwLDM3QzMxLjksNzYuMSwzNi43LDg0LjUsNDUuOSw5MS43eiBNNTguOCw4MC4xYy0wLjMtMC4xLTAuNi0wLjItMS0wLjJjMCwwLDAsMC0wLjEsMGMtMS40LDAtMi40LDAuMS0zLjUsMC4yCgkJYy0xLjEsMC4xLTIuMiwwLjItMy44LDAuMmMtMC43LDAtMS40LDAtMS45LDBWNjIuN2MwLjEsMCwwLjIsMCwwLjIsMGMwLjcsMCwxLjUsMCwyLjYsMGMwLjksMCwxLjktMC4xLDMtMC4yCgkJYzAuNi0wLjEsMS4zLTAuMSwyLTAuMmMxLjItMC4xLDIuMiwwLjEsMi4yLDAuMWMwLjEsMCwwLjIsMC4xLDAuMywwLjFDNTguOCw2Mi40LDU4LjgsODAuMSw1OC44LDgwLjF6IE04NS42LDY4LjIKCQljMCwxNS40LTE4LjksMjUuOC0yNC41LDI4LjZ2LTE0YzAuMS0wLjEsMC4yLTAuMiwwLjMtMC4zYzAuMy0wLjMsMC41LTAuNSwxLTAuNWMxLjMsMCwyLjMsMC4xLDMuNCwwLjJjMS4xLDAuMSwyLjIsMC4yLDMuOSwwLjIKCQljMS43LDAsMywwLDMsMGwwLjksMGwwLTUuMmgyLjJ2LTIuOGgtMi4ybDAtNS4xaDIuMnYtMi44aC0yLjJsMC01LjdsLTEsMGMtMC40LDAtMC44LDAtMS4zLDBjLTAuNywwLTEuNSwwLTIuNSwwCgkJYy0wLjgsMC0xLjctMC4xLTIuOC0wLjJjLTAuNi0wLjEtMS4zLTAuMS0yLTAuMmMtMS41LTAuMi0yLjgsMC4xLTIuOSwwLjFjMCwwLDAsMC0wLjEsMFY1NWgyNC41TDg1LjYsNjguMnogTTYxLjEsODAuMlY2Mi41CgkJYzAuMS0wLjEsMC4zLTAuMSwwLjUtMC4yYzAsMCwxLjEtMC4yLDIuMy0wLjFjMC43LDAuMSwxLjQsMC4xLDIsMC4yYzEuMSwwLjEsMi4xLDAuMiwzLDAuMmMwLjksMCwxLjYsMCwyLjMsMGMwLjIsMCwwLjQsMCwwLjUsMAoJCWwwLDE3LjdjLTAuNSwwLTEuMiwwLTEuOSwwYy0xLjYsMC0yLjctMC4xLTMuOC0wLjJjLTEuMS0wLjEtMi4xLTAuMi0zLjUtMC4yQzYxLjksODAsNjEuNSw4MC4xLDYxLjEsODAuMnogTTM0LjYsMzMuM2w1MSwwbDAsMTkuMQoJCWgtNTFMMzQuNiwzMy4zeiBNMzQuNiw1NWgyNC4zdjUuM2MtMC40LTAuMS0xLjUtMC4yLTIuNy0wLjFjLTAuNywwLjEtMS40LDAuMS0yLDAuMmMtMS4xLDAuMS0yLDAuMi0yLjgsMC4yYy0xLDAtMS44LDAtMi41LDAKCQljLTAuNSwwLTAuOSwwLTEuMywwbC0xLDB2NS4xaC0yLjR2Mi44aDIuNHY1LjFoLTIuNHYyLjhoMi40djUuN2wwLjksMGMwLjEsMCwxLjQsMC4xLDMsMGMxLjcsMCwyLjktMC4xLDMuOS0wLjIKCQljMS4xLTAuMSwyLTAuMiwzLjMtMC4yYzAuNSwwLDAuNywwLjIsMC45LDAuNWMwLDAsMC4xLDAuMSwwLjEsMC4xdjE0LjFDNTMsOTMuOCwzNC42LDgzLjUsMzQuNiw2Ny42TDM0LjYsNTV6Ii8+Cgk8cGF0aCBkPSJNNTUuOSwzNS4ySDQxLjZ2MWMwLDMuMSwwLDguNCwwLDguNmMwLjEsMS4xLDAuOCwyLjEsMi4yLDNjMS43LDEuMiw0LjMsMi43LDQuNCwyLjdsMC41LDAuM2wwLjUtMC4zCgkJYzAuMS0wLjEsMi45LTEuNiw0LjYtMi43YzItMS4zLDIuMS0yLjYsMi4xLTNjMC0wLjIsMC00LjgsMC04LjZWMzUuMnogTTQzLjYsMzcuMmgxMC4yYzAsMC44LDAsMS43LDAsMi42SDQzLjYKCQlDNDMuNiwzOC45LDQzLjYsMzgsNDMuNiwzNy4yeiBNNTMuOSw0NC43YzAsMC41LTAuNywxLTEuMiwxLjRjLTEuMiwwLjgtMy4xLDEuOS00LDIuNGMtMC44LTAuNS0yLjYtMS41LTMuOC0yLjQKCQljLTAuOC0wLjUtMS4zLTEuMS0xLjMtMS41YzAtMC4xLDAtMS4zLDAtMi45aDEwLjJDNTMuOSw0My4zLDUzLjksNDQuNSw1My45LDQ0Ljd6Ii8+Cgk8cGF0aCBkPSJNODAuMSwzNC44SDY1LjZ2MWMwLDMuOSwwLDguNiwwLDguOWMwLjEsMS4xLDAuOCwyLjEsMi4yLDNjMS44LDEuMiw0LjQsMi43LDQuNSwyLjhsMC41LDAuM2wwLjUtMC4zCgkJYzAuMS0wLjEsMy0xLjYsNC43LTIuOGMyLjEtMS40LDIuMS0yLjYsMi4xLTNjMC0wLjIsMC00LjksMC04LjhMODAuMSwzNC44eiBNNzguMSw0NC41YzAsMC41LTAuNywxLjEtMS4yLDEuNAoJCWMtMS4zLDAuOC0zLjIsMS45LTQuMSwyLjRjLTAuOS0wLjUtMi42LTEuNi0zLjktMi40Yy0wLjQtMC4zLTEuMy0wLjktMS4zLTEuNWMwLTAuMSwwLTAuMiwwLTAuNWw1LTMuNmw1LjUsMy43CgkJQzc4LjEsNDQuMyw3OC4xLDQ0LjUsNzguMSw0NC41eiBNNzIuNiwzNy44bC01LDMuNmMwLTEuNCwwLTMuMiwwLTQuN2gxMC42YzAsMS42LDAsMy4zLDAsNC44TDcyLjYsMzcuOHoiLz4KCTxwYXRoIGQ9Ik02My43LDY2LjVsMSwwLjFjMC42LDAuMSwxLjIsMC4xLDEuNywwLjFsMi45LDAuMWwwLTJsLTIuOS0wLjFjLTAuNCwwLTAuOS0wLjEtMS41LTAuMWwtMS0wLjFjLTAuMywwLTAuNSwwLTAuNywwbDAuMSwyCgkJQzYzLjQsNjYuNCw2My42LDY2LjQsNjMuNyw2Ni41eiIvPgoJPHBhdGggZD0iTTY2LjQsNzAuMmMtMC40LDAtMC45LTAuMS0xLjUtMC4xYy0wLjMsMC0wLjctMC4xLTEtMC4xYy0wLjMsMC0wLjUsMC0wLjcsMGwwLDJjMC4xLDAsMC4zLDAsMC41LDBjMC40LDAsMC43LDAuMSwxLDAuMQoJCWMwLjYsMC4xLDEuMSwwLjEsMS42LDAuMWMwLjYsMCwxLjUsMCwyLjIsMGwwLjgsMGwwLTJsLTAuOCwwQzY3LjksNzAuMyw2Nyw3MC4yLDY2LjQsNzAuMnoiLz4KCTxwYXRoIGQ9Ik02Ni40LDc1LjhjLTAuNCwwLTAuOS0wLjEtMS41LTAuMWMtMC4zLDAtMC43LTAuMS0xLTAuMWMtMC4zLDAtMC41LDAtMC43LDBsMC4xLDJjMC4xLDAsMC4zLDAsMC41LDBjMC40LDAsMC43LDAuMSwxLDAuMQoJCWMwLjYsMC4xLDEuMiwwLjEsMS42LDAuMWwwLjgsMGMwLjgsMCwxLjgsMCwyLjEsMC4xbDAtMmMtMC4zLDAtMS4zLDAtMi4xLTAuMUw2Ni40LDc1Ljh6Ii8+Cgk8cGF0aCBkPSJNNTMuOCw2Ni43YzAuNSwwLDEtMC4xLDEuNy0wLjFjMC4zLDAsMC42LTAuMSwxLTAuMWMwLjIsMCwwLjMsMCwwLjUsMGwwLTJjLTAuMiwwLTAuNCwwLTAuNywwYy0wLjQsMC0wLjcsMC4xLTEsMC4xCgkJYy0wLjYsMC4xLTEuMSwwLjEtMS41LDAuMWwtMi45LDAuMWwwLDJMNTMuOCw2Ni43eiIvPgoJPHBhdGggZD0iTTU1LjMsNzAuMWMtMC42LDAuMS0xLjEsMC4xLTEuNSwwLjFsLTIuOSwwLjFsMCwybDIuOS0wLjFjMC41LDAsMS0wLjEsMS43LTAuMWwwLjktMC4xYzAuMiwwLDAuMywwLDAuNCwwbDAuMS0yCgkJYy0wLjIsMC0wLjQsMC0wLjcsMEw1NS4zLDcwLjF6Ii8+Cgk8cGF0aCBkPSJNNTUuMyw3NS42Yy0wLjYsMC4xLTEuMSwwLjEtMS42LDAuMWMtMC45LDAtMi40LDAuMS0yLjgsMC4xbDAsMmMwLjQsMCwxLjktMC4xLDIuOS0wLjFjMC41LDAsMS0wLjEsMS43LTAuMWwxLTAuMQoJCWMwLjEsMCwwLjMsMCwwLjQsMGwwLTJjLTAuMiwwLTAuNCwwLTAuNywwTDU1LjMsNzUuNnoiLz4KCTxwYXRoIGQ9Ik05LjMsNjAuM2MyLjUsMCw0LTEuNiw0LjItNC40YzAtMC41LDAtMS4yLTAuMS0xLjlsMC0wLjJsLTIuMSwwbDAuMiwwLjRjMC4xLDAuMywwLjIsMSwwLjIsMS42Yy0wLjEsMS40LTEuMiwyLjMtMi43LDIuMgoJCWMtMS42LTAuMS0yLjUtMS4xLTIuNS0yLjVjMC0wLjUsMC4xLTEsMC41LTEuNWwwLjMtMC40bC0yLTAuM2wtMC4xLDAuMmMtMC4zLDAuNi0wLjQsMS4zLTAuNSwxLjljLTAuMiwyLjksMS40LDQuOCw0LjMsNC45CgkJQzkuMSw2MC4zLDkuMiw2MC4zLDkuMyw2MC4zeiIvPgoJPHBhdGggZD0iTTkuNyw1MC41YzAuNiwwLjIsMS4xLDAuMiwxLjYsMC4yYzEuOSwwLDMuMy0xLjEsMy45LTNjMC4zLTEuMiwwLjItMi4yLTAuMy0zLjJjLTAuNi0xLTEuNi0xLjctMi45LTIuMQoJCWMtMC41LTAuMi0xLjEtMC4yLTEuNS0wLjJjLTEuOSwwLTMuMywxLjEtMy45LDNDNS44LDQ3LjcsNyw0OS43LDkuNyw1MC41eiBNOC4zLDQ1LjhjMC4zLTEuMSwxLjItMS4zLDEuOS0xLjMKCQljMC4zLDAsMC43LDAuMSwxLjEsMC4yYzAuOSwwLjMsMS42LDAuNywxLjksMS4zYzAuMiwwLjQsMC4zLDAuOCwwLjEsMS4zYy0wLjMsMS4xLTEuMiwxLjMtMS45LDEuM2MtMC40LDAtMC43LTAuMS0xLjEtMC4yCgkJQzkuMSw0OCw3LjksNDcuMSw4LjMsNDUuOHoiLz4KCTxwYXRoIGQ9Ik0xOC4zLDM5LjNsLTIuOS0xLjZsMC4xLTAuMmMwLjMtMC41LDAuNS0wLjUsMC45LTAuNGwyLjksMC40bDEuMS0yLjFsLTMuOC0wLjRjLTAuMywwLTAuNSwwLTAuNywwYzAtMC44LTAuNC0xLjYtMS4yLTIKCQljLTAuNC0wLjItMC44LTAuMy0xLjItMC4zYy0xLjQsMC0yLjEsMS40LTIuOCwyLjdsLTEsMS45bDcuNiw0LjFMMTguMywzOS4zeiBNMTMuOCwzNi42bC0wLjEsMC4ybC0xLjQtMC43bDAuMS0wLjIKCQljMC4zLTAuNiwwLjctMS4yLDEuMy0wLjljMC4yLDAuMSwwLjMsMC4yLDAuNCwwLjRDMTQuMiwzNS44LDE0LDM2LjMsMTMuOCwzNi42eiIvPgoJPHBvbHlnb24gcG9pbnRzPSIyMywzMS43IDE5LjEsMjguNSAyNC43LDI5LjggMjYuMiwyNy45IDE5LjYsMjIuNCAxOC4yLDI0IDIyLjIsMjcuMyAxNi42LDI2IDE1LDI3LjkgMjEuNywzMy40IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iMzIuNywyMi40IDMxLjUsMjEgMjkuMSwyMi45IDI4LDIxLjYgMzAuMiwxOS44IDI5LDE4LjQgMjYuOCwyMC4yIDI2LDE5LjEgMjguNCwxNy4yIDI3LjIsMTUuNyAyMy4xLDE5IDI4LjUsMjUuNyAKCQkJIi8+Cgk8cG9seWdvbiBwb2ludHM9IjM5LjgsMTguMSAzOC45LDE2LjUgMzYuMiwxOCAzMi45LDEyLjEgMzEsMTMuMiAzNS4yLDIwLjcgCSIvPgoJPHBvbHlnb24gcG9pbnRzPSI0Ny4xLDE1LjMgNDYuNCwxMy42IDQzLjYsMTQuNiA0MS4zLDguMyAzOS4yLDkgNDIuMSwxNy4xIAkiLz4KCTxwYXRoIGQ9Ik01NS44LDEzLjljMC4yLDAsMC4zLDAsMC41LDBjMS4yLTAuMSwyLjEtMC41LDIuNi0xLjFjMC41LTAuNiwwLjctMS41LDAuNi0yLjVsLTAuNC01LjRMNTYuOSw1bDAuNCw0LjkKCQljMC4xLDEuNy0wLjYsMi0xLjIsMi4xYy0wLjEsMC0wLjEsMC0wLjIsMGMtMC41LDAtMS4yLTAuMi0xLjQtMS45bC0wLjQtNC45bC0yLjIsMC4ybDAuNCw1LjRDNTIuNywxMi44LDUzLjgsMTMuOSw1NS44LDEzLjl6Ii8+Cgk8cG9seWdvbiBwb2ludHM9IjY1LjcsOC44IDY3LjUsMTQuMSA3MCwxNC41IDcxLjEsNS45IDY5LDUuNiA2OC4zLDEwLjcgNjYuNSw1LjMgNjQuMSw1IDYyLjksMTMuNSA2NSwxMy44IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iNzcuNiw3LjggNzUuNSw3LjEgNzIuOSwxNS4zIDc1LDE2IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iODguNCwxMi42IDg2LjUsMTEuNiA4Mi4zLDE1LjkgODMuMiwxMCA4MS4xLDguOSA3OS44LDE4IDgyLDE5IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iOTAuNiwyNSA5MS43LDIzLjUgODkuMywyMS42IDkwLjMsMjAuMyA5Mi41LDIyIDkzLjcsMjAuNSA5MS41LDE4LjggOTIuMywxNy43IDk0LjcsMTkuNiA5NS45LDE4LjEgOTEuNywxNC45IAoJCTg2LjQsMjEuNyAJIi8+Cgk8cGF0aCBkPSJNOTQuMiwyOC42bDIuNS0yLjJsMC4xLDAuMmMwLjMsMC40LDAuMywwLjUsMC4xLDFsLTEuNCwyLjZsMS42LDEuOGwxLjctMy40YzAuMS0wLjEsMC4yLTAuNCwwLjItMC43CgkJYzAuNywwLjMsMS42LDAuMiwyLjMtMC40YzEuOS0xLjYsMC40LTMuMy0wLjctNC42bC0xLjQtMS42TDkyLjcsMjdMOTQuMiwyOC42eiBNOTkuMywyNC4xbDAuMSwwLjJjMC4zLDAuNCwwLjYsMC44LDAuNiwxLjEKCQljMCwwLjItMC4xLDAuMy0wLjMsMC40Qzk5LjYsMjYsOTkuNCwyNiw5OS4zLDI2Yy0wLjQsMC0wLjgtMC40LTEtMC43bC0wLjEtMC4yTDk5LjMsMjQuMXoiLz4KCTxwYXRoIGQ9Ik0xMDIuMiwzNS41Yy0wLjQsMC4zLTEtMC4yLTEuMi0wLjZjLTAuMy0wLjUtMC40LTEtMC40LTEuN2wwLTAuNWwtMiwwLjlsMC4xLDAuNGMwLjIsMC43LDAuMywxLjEsMC43LDEuOAoJCWMwLjcsMS4yLDEuNiwxLjgsMi42LDEuOGMwLjUsMCwwLjktMC4xLDEuNC0wLjRjMS41LTAuOSwxLjMtMi4zLDEuMS0zLjRjLTAuMS0wLjctMC4yLTEuMiwwLjEtMS40YzAuMi0wLjEsMC4zLTAuMSwwLjQtMC4xCgkJYzAuNCwwLDAuNywwLjQsMC44LDAuNWMwLjMsMC40LDAuNCwwLjksMC41LDEuNGwwLjEsMC40bDEuOC0wLjhsMC0wLjJjLTAuMS0wLjYtMC40LTEuMi0wLjgtMS45Yy0wLjktMS42LTIuNS0yLjEtMy45LTEuMgoJCWMtMS41LDAuOS0xLjMsMi4zLTEuMSwzLjNDMTAyLjUsMzQuOCwxMDIuNiwzNS4zLDEwMi4yLDM1LjV6Ii8+Cgk8cmVjdCB4PSIxMDIuNSIgeT0iMzkuNiIgdHJhbnNmb3JtPSJtYXRyaXgoMC45MjU5IC0wLjM3NzcgMC4zNzc3IDAuOTI1OSAtNy40NTU5IDQzLjM1NjgpIiB3aWR0aD0iOC42IiBoZWlnaHQ9IjIuMiIvPgoJPHBvbHlnb24gcG9pbnRzPSIxMTEuMiw0Ni4yIDEwNC43LDQ3LjggMTA1LjIsNDkuOSAxMTEuOCw0OC4zIDExMi4zLDUwLjUgMTE0LjEsNTAuMSAxMTIuNSw0My41IDExMC43LDQ0IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iMTA2LjIsNTYuNiAxMDYuMyw1OC44IDEwOS43LDU4LjYgMTE1LjEsNjEuNCAxMTUsNTkgMTExLjgsNTcuNSAxMTQuOCw1NS42IDExNC42LDUzIDEwOS42LDU2LjUgCSIvPgoJPHBvbHlnb24gcG9pbnRzPSIxMSw3Ni40IDEyLjgsNzUuOSAxMi4xLDczLjMgMTMuNCw3Mi45IDE0LjIsNzUuNyAxNiw3NS4xIDE0LjYsNzAuMyA2LjMsNzIuNyA2LjksNzQuOCAxMC4zLDczLjggCSIvPgoJPHBhdGggZD0iTTE4LjcsODMuNmMwLjMtMSwwLjItMi4xLTAuMy0zLjJjLTAuOC0xLjYtMi0yLjQtMy42LTIuNGMtMC43LDAtMS41LDAuMi0yLjMsMC42Yy0xLjMsMC42LTIuMSwxLjUtMi41LDIuNgoJCWMtMC4zLDEtMC4yLDIuMSwwLjMsMy4yYzAuOCwxLjUsMiwyLjQsMy42LDIuNGMwLjcsMCwxLjUtMC4yLDIuMy0wLjZDMTcuNSw4NS41LDE4LjQsODQuNywxOC43LDgzLjZ6IE0xNS4zLDg0LjEKCQljLTAuNSwwLjMtMS4xLDAuNC0xLjYsMC40Yy0wLjgsMC0xLjMtMC4zLTEuNi0xYy0wLjItMC41LTAuMy0wLjktMC4xLTEuM2MwLjItMC42LDAuOC0xLjIsMS42LTEuNmMwLjUtMC4yLDEtMC40LDEuNS0wLjQKCQljMC44LDAsMS4zLDAuMywxLjYsMWMwLjIsMC41LDAuMywwLjksMC4xLDEuM0MxNi42LDgzLjEsMTYuMSw4My43LDE1LjMsODQuMXoiLz4KCTxwYXRoIGQ9Ik0yNC43LDkwLjNsLTMuOSwzLjFjLTEsMC44LTEuOCwwLjktMi40LDAuMmMtMC4yLTAuMy0wLjMtMC42LTAuMy0wLjljMC4xLTAuNCwwLjQtMC45LDEtMS40bDMuOS0zLjFsLTEuNC0xLjdsLTQuMywzLjQKCQljLTAuOSwwLjctMS40LDEuNS0xLjQsMi40YzAsMC44LDAuMywxLjYsMSwyLjVjMC44LDEuMSwxLjcsMS42LDIuNywxLjZjMC43LDAsMS40LTAuMywyLjEtMC45TDI2LDkyTDI0LjcsOTAuM3oiLz4KCTxwb2x5Z29uIHBvaW50cz0iMzIsOTcuMyAyOC44LDEwMS4zIDMwLDk1LjcgMjguMSw5NC4xIDIyLjYsMTAwLjggMjQuMywxMDIuMiAyNy41LDk4LjIgMjYuMywxMDMuOCAyOC4yLDEwNS40IDMzLjcsOTguNyAJIi8+Cgk8cGF0aCBkPSJNMzguNCwxMDEuNGwtMi4yLTEuMWwtMy45LDcuN2wyLjIsMS4xYzEuMSwwLjUsMiwwLjgsMi45LDAuOGMxLjUsMCwyLjctMC44LDMuNS0yLjRDNDIuMiwxMDQuOSw0MS4zLDEwMi45LDM4LjQsMTAxLjR6CgkJIE0zOC44LDEwNi41Yy0wLjQsMC45LTEuMiwxLjQtMi4xLDEuNGMtMC40LDAtMC43LTAuMS0xLjEtMC4zbC0wLjUtMC4zbDIuMi00LjRsMC41LDAuM2MwLjYsMC4zLDEsMC44LDEuMiwxLjMKCQlDMzkuMiwxMDUuMiwzOS4xLDEwNS44LDM4LjgsMTA2LjV6Ii8+Cgk8cG9seWdvbiBwb2ludHM9IjQzLjIsMTEyLjggNDguMywxMTQuMiA0OC44LDExMi40IDQ1LjgsMTExLjYgNDYuMiwxMTAgNDksMTEwLjcgNDkuNSwxMDguOSA0Ni43LDEwOC4yIDQ3LDEwNi44IDUwLjEsMTA3LjYgCgkJNTAuNSwxMDUuOCA0NS40LDEwNC41IAkiLz4KCTxwYXRoIGQ9Ik01NS45LDEwNi41bC0yLjUtMC4xbC0wLjUsOC42bDIuNSwwLjFjMC4yLDAsMC40LDAsMC42LDBjMi45LDAsNC40LTEuNCw0LjYtNC4xYzAuMS0xLjMtMC4yLTIuNC0wLjktMy4yCgkJQzU4LjksMTA3LjEsNTcuNywxMDYuNiw1NS45LDEwNi41eiBNNTUuOCwxMTMuM2wtMC42LDBsMC4zLTQuOWwwLjYsMGMxLjIsMC4xLDIuMywxLDIuMiwyLjZDNTguMiwxMTIuNiw1NywxMTMuMyw1NS44LDExMy4zeiIvPgoJPHBhdGggZD0iTTY4LjIsMTA1LjhsLTEuNSw5LjFsMi4zLTAuNWwwLjMtMS45bDIuOC0wLjZsMS4xLDEuN2wyLjQtMC41bC01LjEtNy43TDY4LjIsMTA1Ljh6IE02OS42LDExMC42bDAuMy0yLjNsMS4yLDEuOQoJCUw2OS42LDExMC42eiIvPgoJPHBvbHlnb24gcG9pbnRzPSI3Ni40LDExMC4zIDc3LjIsMTEyLjQgNzkuMiwxMTEuNiA3OC41LDEwOS42IAkiLz4KCTxwYXRoIGQ9Ik04MC42LDEwMS42bC0yLjIsMS4ybDQuMSw3LjZsMi4yLTEuMmMyLjktMS42LDMuNi0zLjYsMi4zLTYuMUM4NS43LDEwMC41LDgzLjUsMTAwLDgwLjYsMTAxLjZ6IE04NC4xLDEwNy40bC0wLjUsMC4zCgkJbC0yLjMtNC4zbDAuNS0wLjNjMC4zLTAuMiwwLjctMC4zLDEuMS0wLjNjMC45LDAsMS42LDAuNSwyLjEsMS4zQzg1LjgsMTA1LjUsODUuMSwxMDYuOCw4NC4xLDEwNy40eiIvPgoJPHBvbHlnb24gcG9pbnRzPSI4OC43LDEwNC4zIDg5LjksMTA2LjEgOTEuNywxMDQuOCA5MC41LDEwMyAJIi8+Cgk8cG9seWdvbiBwb2ludHM9IjkyLjEsOTMuMiA5MS43LDk2LjYgOTMuNiw5Ni45IDkzLjgsOTUuNSA5Ny45LDk5LjYgOTkuNSw5OC4xIDkzLjQsOTEuOSAJIi8+Cgk8cGF0aCBkPSJNMTAzLjgsODguN2MtMC43LTAuNS0xLjUtMC42LTIuNC0wLjNjMC0wLjgtMC40LTEuNi0xLTJjLTEuMS0wLjgtMi41LTAuMy0zLjYsMS4xYy0xLDEuNC0wLjksMi44LDAuMywzLjcKCQljMC42LDAuNCwxLjQsMC41LDIuMSwwLjNjLTAuMSwwLjksMC4yLDEuNiwwLjksMi4xYzAuNSwwLjQsMSwwLjUsMS41LDAuNWMwLjksMCwxLjctMC41LDIuNS0xLjVjMC42LTAuOCwwLjktMS42LDAuOC0yLjQKCQlDMTA0LjgsODkuNywxMDQuNCw4OS4yLDEwMy44LDg4Ljd6IE05OS43LDg5LjNjLTAuMywwLjItMC42LDAuNC0wLjksMC40Yy0wLjEsMC0wLjMsMC0wLjQtMC4xYy0wLjQtMC4zLTAuMi0wLjgsMC0xLjEKCQljMC4xLTAuMiwwLjMtMC4zLDAuNS0wLjNjMC4xLDAsMC4zLDAuMSwwLjUsMC4yQzk5LjcsODguNiw5OS44LDg5LDk5LjcsODkuM3ogTTEwMi43LDkxLjdjLTAuMywwLjQtMC44LDAuNS0xLjIsMC4yCgkJYy0wLjMtMC4zLTAuNS0wLjYtMC40LTEuMmMwLjYtMC40LDEuMS0wLjUsMS41LTAuMmMwLjIsMC4xLDAuMywwLjMsMC4zLDAuNUMxMDIuOSw5MS4yLDEwMi44LDkxLjUsMTAyLjcsOTEuN3oiLz4KCTxwYXRoIGQ9Ik0xMDcuNyw4MC4zYy0xLjUtMC43LTIuOS0wLjMtMy42LDEuMWMtMC4yLDAuNC0wLjMsMC44LTAuMiwxLjJjLTAuNS0wLjQtMS0xLjEtMC42LTJjMC4yLTAuNCwwLjUtMC43LDAuOC0xbDAuMy0wLjIKCQlsLTEuNi0xLjFsLTAuMiwwLjJjLTAuNCwwLjQtMC44LDAuOS0xLjEsMS41Yy0wLjQsMC45LTAuNSwxLjctMC4yLDIuNWMwLjUsMS40LDEuOSwyLjQsMywyLjljMSwwLjUsMS44LDAuNywyLjUsMC43CgkJYzEuMSwwLDEuOS0wLjUsMi41LTEuNkMxMTAuMSw4Mi43LDEwOS41LDgxLjIsMTA3LjcsODAuM3ogTTEwNy43LDgzLjVjLTAuMSwwLjMtMC40LDAuNC0wLjcsMC40Yy0wLjIsMC0wLjUtMC4xLTAuNy0wLjIKCQljLTAuNC0wLjItMC43LTAuNS0wLjgtMC43Yy0wLjEtMC4yLDAtMC4zLDAtMC41YzAuMS0wLjMsMC4zLTAuNCwwLjctMC40YzAuMiwwLDAuNSwwLjEsMC44LDAuMmMwLjQsMC4yLDAuNiwwLjQsMC43LDAuNwoJCUMxMDcuOCw4My4yLDEwNy43LDgzLjQsMTA3LjcsODMuNXoiLz4KCTxwYXRoIGQ9Ik0xMTAuOCw3MS41Yy0xLjctMC41LTMuMSwwLjMtMy42LDEuOWMwLDAuMi0wLjEsMC4zLTAuMSwwLjRsLTEtMC4zbDAuOS0zbC0xLjctMC41bC0xLjUsNWw0LjgsMS41bDAtMC40CgkJYzAtMC41LDAuMS0wLjksMC4yLTEuNGMwLjEtMC40LDAuNi0xLjQsMS41LTEuMmMxLDAuMywwLjgsMS40LDAuNywxLjdjLTAuMiwwLjYtMC41LDEtMC44LDEuNGwtMC4zLDAuM2wyLDAuN2wwLjEtMC4yCgkJYzAuMy0wLjYsMC42LTEuMywwLjgtMS44YzAuMy0xLDAuMy0xLjktMC4xLTIuN0MxMTIuMyw3Mi4zLDExMS42LDcxLjgsMTEwLjgsNzEuNXoiLz4KCTxwYXRoIGQ9Ik0xNi4zLDYwYzAsMjQsMTkuNSw0My42LDQzLjUsNDMuNmMyNCwwLDQzLjUtMTkuNSw0My41LTQzLjZjMC0yNC0xOS41LTQzLjUtNDMuNS00My41QzM1LjksMTYuNCwxNi4zLDM2LDE2LjMsNjB6CgkJIE01OS45LDE3LjljMjMuMiwwLDQyLDE4LjksNDIsNDJjMCwyMy4yLTE4LjksNDItNDIsNDJjLTIzLjIsMC00Mi0xOC45LTQyLTQyQzE3LjgsMzYuOCwzNi43LDE3LjksNTkuOSwxNy45eiIvPgoJPHBhdGggZD0iTTU5LjksMC40QzI3LDAuNCwwLjMsMjcuMSwwLjMsNjBjMCwzMi44LDI2LjcsNTkuNiw1OS42LDU5LjZjMzIuOCwwLDU5LjYtMjYuNyw1OS42LTU5LjZDMTE5LjQsMjcuMSw5Mi43LDAuNCw1OS45LDAuNHoKCQkgTTU5LjksMTE4Yy0zMiwwLTU4LTI2LTU4LTU4YzAtMzIsMjYtNTgsNTgtNThjMzIsMCw1OCwyNiw1OCw1OEMxMTcuOSw5Miw5MS45LDExOCw1OS45LDExOHoiLz4KPC9nPgo8L3N2Zz4K alt="Cornell University Logo" srcset sizes>
|
|
</picture>
|
|
</a></div>
|
|
<div class="column nav" id=toggle-container role=menubar>
|
|
<button class=toggle-control><svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"></path></svg></button>
|
|
<div class="mobile-toggle-block toggle-target sf-hidden">
|
|
|
|
</div>
|
|
<button class=toggle-control><svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 448 512" class="icon filter-white" role=menu><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"></path></svg></button>
|
|
<div class="mobile-toggle-block toggle-target sf-hidden">
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</header>
|
|
<main>
|
|
<div id=content>
|
|
<div id=abs-outer>
|
|
<div class=leftcolumn>
|
|
<div class=subheader>
|
|
<h1>Computer Science > Computation and Language</h1>
|
|
</div>
|
|
<div class=header-breadcrumbs-mobile>
|
|
<strong>arXiv:2305.11747</strong> (cs)
|
|
</div>
|
|
<style>#abs{font-family:"Lucida Grande",Helvetica,Arial,sans-serif!important}#abs h1.title{display:block;font-size:1.8em!important;font-weight:700;margin-block-end:12px;margin-block-start:12px;margin-bottom:12px;margin-inline-end:0px;margin-inline-start:20px;margin-left:20px;margin-right:0px;margin-top:12px}#abs div.authors{font-size:1.2em;line-height:24px;margin-bottom:8px;margin-left:20px;margin-right:0px;margin-top:8px}#abs div.dateline{font-size:0.9em;font-style:italic;margin-bottom:6.5px;margin-left:20px;margin-right:0px;margin-top:6.5px}#abs blockquote.abstract{font-size:1.05em;margin-block-end:21.6px;margin-block-start:14.4px;margin-bottom:21.6px;margin-inline-end:40px;margin-inline-start:40px;background-color:white;border-left:0px;padding:0px}#abs div.metatable{font-size:0.95em!important;margin-bottom:19px;margin-left:20px;margin-right:0px;margin-top:0px;border:0px;padding:0px}#abs div.metatable tbody{vertical-align:middle}#abs tr{margin-top:0px;margin-bottom:0px}#abs td.tablecell{padding-top:0px;padding-bottom:0px;padding-right:6.5px;padding-left:0px;vertical-align:top;font-size:0.95em!important;margin-top:0px;margin-bottom:0px;border:0px}#abs td.tablecell.label{font-weight:400!important}#abs span.primary-subject{font-weight:700}#abs span.arxivid{font-weight:700}</style>
|
|
<div id=content-inner>
|
|
<div id=abs>
|
|
<div class=dateline>
|
|
[Submitted on 19 May 2023 (<a href=https://arxiv.org/abs/2305.11747v1>v1</a>), last revised 23 Oct 2023 (this version, v3)]</div>
|
|
<h1 class="title mathjax"><span class="descriptor sf-hidden">Title:</span>HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models</h1>
|
|
<div class=authors><span class="descriptor sf-hidden">Authors:</span><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J" rel=nofollow>Junyi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+X" rel=nofollow>Xiaoxue Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+W+X" rel=nofollow>Wayne Xin Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nie,+J" rel=nofollow>Jian-Yun Nie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+J" rel=nofollow>Ji-Rong Wen</a></div> <div id=download-button-info class=sf-hidden hidden>View a PDF of the paper titled HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models, by Junyi Li and 3 other authors</div>
|
|
<a class=mobile-submission-download href=https://arxiv.org/pdf/2305.11747>View PDF</a>
|
|
<blockquote class="abstract mathjax">
|
|
<span class="descriptor sf-hidden">Abstract:</span>Large language models (LLMs), such as ChatGPT, are prone to generate hallucinations, i.e., content that conflicts with the source or cannot be verified by the factual knowledge. To understand what types of content and to which extent LLMs are apt to hallucinate, we introduce the Hallucination Evaluation benchmark for Large Language Models (HaluEval), a large collection of generated and human-annotated hallucinated samples for evaluating the performance of LLMs in recognizing hallucination. To generate these samples, we propose a ChatGPT-based two-step framework, i.e., sampling-then-filtering. Besides, we also hire some human labelers to annotate the hallucinations in ChatGPT responses. The empirical results suggest that ChatGPT is likely to generate hallucinated content in specific topics by fabricating unverifiable information (i.e., about <span class=MathJax_Preview>19.5\%</span><span class="MathJax MathJax_Processing sf-hidden" id=MathJax-Element-1-Frame tabindex=0></span> responses). Moreover, existing LLMs face great challenges in recognizing the hallucinations in texts. However, our experiments also prove that providing external knowledge or adding reasoning steps can help LLMs recognize hallucinations. Our benchmark can be accessed at <a href=https://github.com/RUCAIBox/HaluEval rel="external noopener nofollow" class="link-external link-https">this https URL</a>.
|
|
</blockquote>
|
|
|
|
<div class=metatable>
|
|
<table summary="Additional metadata"> <tbody><tr>
|
|
<td class="tablecell label">Comments:</td>
|
|
<td class="tablecell comments mathjax">Accepted to EMNLP 2023 Main Conference (Long Paper)</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="tablecell label">Subjects:</td>
|
|
<td class="tablecell subjects">
|
|
<span class=primary-subject>Computation and Language (cs.CL)</span></td>
|
|
<tr>
|
|
<td class="tablecell label">Cite as:</td>
|
|
<td class="tablecell arxivid"><span class=arxivid><a href=https://arxiv.org/abs/2305.11747>arXiv:2305.11747</a> [cs.CL]</span></td>
|
|
</tr>
|
|
<tr>
|
|
<td class="tablecell label"> </td>
|
|
<td class="tablecell arxividv">(or <span class=arxivid>
|
|
<a href=https://arxiv.org/abs/2305.11747v3>arXiv:2305.11747v3</a> [cs.CL]</span> for this version)
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="tablecell label"> </td>
|
|
<td class="tablecell arxivdoi"> <a href=https://doi.org/10.48550/arXiv.2305.11747 id=arxiv-doi-link>https://doi.org/10.48550/arXiv.2305.11747</a><div class=button-and-tooltip>
|
|
<button class=more-info aria-describedby=more-info-desc-1>
|
|
<svg height=15 role=presentation xmlns=http://www.w3.org/2000/svg viewBox="0 0 512 512"><path fill=currentColor d="M256 8C119.043 8 8 119.083 8 256c0 136.997 111.043 248 248 248s248-111.003 248-248C504 119.083 392.957 8 256 8zm0 110c23.196 0 42 18.804 42 42s-18.804 42-42 42-42-18.804-42-42 18.804-42 42-42zm56 254c0 6.627-5.373 12-12 12h-88c-6.627 0-12-5.373-12-12v-24c0-6.627 5.373-12 12-12h12v-64h-12c-6.627 0-12-5.373-12-12v-24c0-6.627 5.373-12 12-12h64c6.627 0 12 5.373 12 12v100h12c6.627 0 12 5.373 12 12v24z"></path></svg>
|
|
<span class=visually-hidden>Focus to learn more</span>
|
|
</button>
|
|
|
|
<div role=tooltip id=more-info-desc-1 class=sf-hidden>
|
|
arXiv-issued DOI via DataCite</div>
|
|
</div>
|
|
</td>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class=submission-history>
|
|
<h2>Submission history</h2> From: Junyi Li [<a href=https://arxiv.org/show-email/f59aab9c/2305.11747 rel=nofollow>view email</a>] <br> <strong><a href=https://arxiv.org/abs/2305.11747v1 rel=nofollow>[v1]</a></strong>
|
|
Fri, 19 May 2023 15:36:27 UTC (686 KB)<br>
|
|
<strong><a href=https://arxiv.org/abs/2305.11747v2 rel=nofollow>[v2]</a></strong>
|
|
Mon, 22 May 2023 13:36:09 UTC (687 KB)<br>
|
|
<strong>[v3]</strong>
|
|
Mon, 23 Oct 2023 01:49:32 UTC (689 KB)<br>
|
|
</div>
|
|
</div>
|
|
|
|
<div class=extra-services> <div class=full-text>
|
|
<a name=other></a>
|
|
<span class="descriptor sf-hidden">Full-text links:</span>
|
|
<h2>Access Paper:</h2>
|
|
<ul>
|
|
<div id=download-button-info class=sf-hidden hidden>
|
|
View a PDF of the paper titled HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models, by Junyi Li and 3 other authors</div><li><a href=https://arxiv.org/pdf/2305.11747 aria-describedby=download-button-info accesskey=f class="abs-button download-pdf">View PDF</a><li><a href=https://arxiv.org/src/2305.11747 class="abs-button download-eprint">TeX Source
|
|
</a></ul>
|
|
<div class=abs-license><a href=http://arxiv.org/licenses/nonexclusive-distrib/1.0/ title="Rights to this article">view license</a></div>
|
|
</div>
|
|
<div class=browse>
|
|
Current browse context: <div class=current>cs.CL</div>
|
|
<div class=prevnext>
|
|
<span class=arrow>
|
|
<a class="abs-button prev-url" href="https://arxiv.org/prevnext?id=2305.11747&function=prev&context=cs.CL" accesskey=p title="previous in cs.CL (accesskey p)" rel=nofollow>< prev</a>
|
|
</span>
|
|
<span class="is-hidden-mobile sf-hidden"> | </span> <span class=arrow>
|
|
<a class="abs-button next-url" href="https://arxiv.org/prevnext?id=2305.11747&function=next&context=cs.CL" accesskey=n title="next in cs.CL (accesskey n)" rel=nofollow>next ></a>
|
|
</span><br>
|
|
</div><div class=list>
|
|
<a class="abs-button abs-button-grey abs-button-small context-new" href=https://arxiv.org/list/cs.CL/new rel=nofollow>new</a>
|
|
<span class="is-hidden-mobile sf-hidden"> | </span>
|
|
<a class="abs-button abs-button-grey abs-button-small context-recent" href=https://arxiv.org/list/cs.CL/recent rel=nofollow>recent</a>
|
|
<span class="is-hidden-mobile sf-hidden"> | </span><a class="abs-button abs-button-grey abs-button-small context-id" href=https://arxiv.org/list/cs.CL/2023-05 rel=nofollow>2023-05</a>
|
|
</div><div class=abs-switch-cat>
|
|
Change to browse by:
|
|
<div class="switch context-change">
|
|
<a href="https://arxiv.org/abs/2305.11747?context=cs" rel=nofollow>cs</a><br class="is-hidden-mobile sf-hidden">
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class=extra-ref-cite>
|
|
<h3>References & Citations</h3>
|
|
<ul>
|
|
<li><a class="abs-button abs-button-small cite-ads" href=https://ui.adsabs.harvard.edu/abs/arXiv:2305.11747>NASA ADS</a><li><a class="abs-button abs-button-small cite-google-scholar" href="https://scholar.google.com/scholar_lookup?arxiv_id=2305.11747" target=_blank rel=noopener>Google Scholar</a></li>
|
|
<li><a class="abs-button abs-button-small cite-semantic-scholar" href=https://api.semanticscholar.org/arXiv:2305.11747 target=_blank rel=noopener>Semantic Scholar</a></li>
|
|
</ul>
|
|
<div style=clear:both></div>
|
|
</div>
|
|
<div class=extra-ref-cite>
|
|
<span id=bib-cite-trigger class="bib-cite-button abs-button">export BibTeX citation</span>
|
|
<span id=bib-cite-loading class=sf-hidden hidden>Loading...</span>
|
|
</div>
|
|
<div id=bib-cite-modal class="bib-modal sf-hidden" hidden>
|
|
|
|
</div><div class=bookmarks>
|
|
<div><h3>Bookmark</h3></div><a class="abs-button abs-button-grey abs-button-small" href="http://www.bibsonomy.org/BibtexHandler?requTask=upload&url=https://arxiv.org/abs/2305.11747&description=HaluEval:%20A%20Large-Scale%20Hallucination%20Evaluation%20Benchmark%20for%20Large%20Language%20Models" title="Bookmark on BibSonomy">
|
|
<img src=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAABTVBMVEXn5ubf3t6/H2OioaKgn6DLysrHxsa4tre3traysLGxsLCqqKmmpKWioKGenJ2cmpuamJmYlpeWlJWVlJSUkpOQjo+Ib3qMiov+/v78/Pz6+vr5+Pn4+Pj29vb09PSrqal1cnTu7u7i4uLe3t7V1NWJh4fJyMnHxsfDwsO/vr+9vL3y8fG3treDXW3e3d2oqKilpKWkoqSjoqPY19fS0dGXlpfMy8uUkpTGxcXEw8ONjI28u7u4t7ezsbKxr7CjH1etq6yrqaqnpaampaWko6OjoaKioaGhn6Cgn5+fnZ6em52dm5ybmZqVk5SIWW2Rj5CPjY6Oi42Ni4z////9/f37+/t/fX75+floSlfv7+/p6eno5+jl5eXk4+Tj4+Pd3d3b29vZ2dnX19fT09PS0dLR0dGEa3XOzc7Nzc3KycrIx8i+vb53dHW6ubrr6up9Unu9AAAA0klEQVQYlWMIRgJRgeJsDMFSxnEwAYcsRXOGYBcLp5TgYMlsu2RPW2ZVBoZgfS1hs+BgS83oMLlgZbtghuDQhCSRmOBYEXet4OBU1xAGkF5tAePgBNvgYBU+LgewQLCMm5oee7CsEDcrL0QgONQgTZeHL9E4QwYqEJxrzyQmAWJABELVfSP8vIJhAiEqgmo6WSGO6WABE3kOb+ssIwXZ4FABRpBAjjSLXrCoJGdicLCsfx5QwNmQPzw408NABigbGaQRD3QpknetlEwZgpGBj00AAMhwRAoMDs/uAAAAAElFTkSuQmCC alt="BibSonomy logo">
|
|
</a>
|
|
<a class="abs-button abs-button-grey abs-button-small" href="https://reddit.com/submit?url=https://arxiv.org/abs/2305.11747&title=HaluEval:%20A%20Large-Scale%20Hallucination%20Evaluation%20Benchmark%20for%20Large%20Language%20Models" title="Bookmark on Reddit">
|
|
<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABIAAAASCAMAAABhEH5lAAAAclBMVEWtra3/IQClpaX/hFJ7e3tzc3Nra2tjY2P/Yyn/597/zr05OTkxMTHe3t4pKSnW1tbOzs7Gxsb/vaX/GACcnJyUlJSMjIyEhIT/rYxaWlpSUlL///9KSkr39/dCQkL/jFrv7+//hFrn5+f/jGO9vb21tbWaFPpZAAAAxUlEQVQYlU2Q7VrDMAhGEcMyO6thNsH0ta5m9f5vUVxbJz/ycQLkPBD2aMY91TCD/lCRQU5zoDuapAGSo97Rq/gys2JF05KtjpbT7ZFakHAQSq3pIGIxT2SD1H6vXj65MAXzJufrE9B9dWjRMi0vnnN8eAce3y4An3pCxJ517QAt/qMk/Is8ONLaMGVt7Zvdzi/uNVSFmgv04DGtqilW1pQ0f4RfnZt9HNmKZX4u2BCPee0dDsuGjMs6DKOyou3kcwhqvv0AYpYbpE15FCsAAAAASUVORK5CYII=" alt="Reddit logo">
|
|
</a>
|
|
</div> </div>
|
|
|
|
<div id=labstabs>
|
|
<div class=labstabs><input type=radio name=tabs id=tabone checked class=sf-hidden>
|
|
<label for=tabone>Bibliographic Tools</label>
|
|
<div class="tab labs-display-bib">
|
|
<h1>Bibliographic and Citation Tools</h1>
|
|
<div class=toggle>
|
|
<div class="columns is-mobile lab-row">
|
|
<div class="column lab-switch">
|
|
<label class=switch>
|
|
<input id=bibex-toggle type=checkbox class="lab-toggle sf-hidden" data-script-url=/static/browse/0.3.4/bibex/bibex.js?20241202>
|
|
<span class=slider></span>
|
|
<span class=is-sr-only>Bibliographic Explorer Toggle</span>
|
|
</label>
|
|
</div>
|
|
<div class="column lab-name">
|
|
<span id=label-for-bibex>Bibliographic Explorer</span> <em>(<a href=https://info.arxiv.org/labs/showcase.html#arxiv-bibliographic-explorer>What is the Explorer?</a>)</em>
|
|
</div>
|
|
</div>
|
|
<div class="columns is-mobile lab-row">
|
|
<div class="column lab-switch">
|
|
<label class=switch>
|
|
<input id=connectedpapers-toggle type=checkbox class="lab-toggle sf-hidden" data-script-url=/static/browse/0.3.4/js/connectedpapers.js aria-labelledby=label-for-connected-papers>
|
|
<span class=slider></span>
|
|
<span class=is-sr-only>Connected Papers Toggle</span>
|
|
</label>
|
|
</div>
|
|
<div class="column lab-name">
|
|
<span id=label-for-connected-papers>Connected Papers</span> <em>(<a href=https://www.connectedpapers.com/about target=_blank>What is Connected Papers?</a>)</em>
|
|
</div>
|
|
</div><div class="columns is-mobile lab-row">
|
|
<div class="column lab-switch">
|
|
<label class=switch>
|
|
<input id=litmaps-toggle type=checkbox class="lab-toggle sf-hidden" data-script-url=/static/browse/0.3.4/js/litmaps.js?20210617 aria-labelledby=label-for-litmaps>
|
|
<span class=slider></span>
|
|
<span class=is-sr-only>Litmaps Toggle</span>
|
|
</label>
|
|
</div>
|
|
<div class="column lab-name">
|
|
<span id=label-for-litmaps>Litmaps</span> <em>(<a href=https://www.litmaps.co/ target=_blank>What is Litmaps?</a>)</em>
|
|
</div>
|
|
</div>
|
|
<div class="columns is-mobile lab-row">
|
|
<div class="column lab-switch">
|
|
<label class=switch>
|
|
<input id=scite-toggle type=checkbox class="lab-toggle sf-hidden" data-script-url=/static/browse/0.3.4/js/scite.js?20210617 aria-labelledby=label-for-scite>
|
|
<span class=slider></span>
|
|
<span class=is-sr-only>scite.ai Toggle</span>
|
|
</label>
|
|
</div>
|
|
<div class="column lab-name">
|
|
<span id=label-for-scite>scite Smart Citations</span> <em>(<a href=https://www.scite.ai/ target=_blank>What are Smart Citations?</a>)</em>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="labs-content-placeholder labs-display" style=display:none></div>
|
|
<div style=min-height:15px id=connectedpapers-output></div>
|
|
<div style=min-height:15px id=litmaps-open-in></div>
|
|
<div style=min-height:15px id=scite-open-in></div>
|
|
</div>
|
|
<input type=radio name=tabs id=tabtwo class=sf-hidden>
|
|
<label for=tabtwo>Code, Data, Media</label>
|
|
<div class="tab sf-hidden">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</div>
|
|
<input type=radio name=tabs id=labstabs-demos-input class=sf-hidden>
|
|
<label for=labstabs-demos-input id=labstabs-demos-label>Demos</label>
|
|
<div class="tab sf-hidden">
|
|
|
|
|
|
|
|
|
|
|
|
</div>
|
|
<input type=radio name=tabs id=tabfour class=sf-hidden>
|
|
<label for=tabfour>Related Papers</label>
|
|
<div class="tab sf-hidden">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</div>
|
|
<input type=radio name=tabs id=tabfive class=sf-hidden>
|
|
<label for=tabfive>
|
|
About arXivLabs
|
|
</label>
|
|
<div class="tab sf-hidden">
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class=endorsers>
|
|
<a href=https://arxiv.org/auth/show-endorsers/2305.11747 class=endorser-who rel=nofollow>Which authors of this paper are endorsers?</a> |
|
|
<a id=mathjax_toggle>Disable MathJax</a> (<a href=https://info.arxiv.org/help/mathjax.html>What is MathJax?</a>)
|
|
<span class="help sf-hidden" style=font-style:normal;float:right;margin-top:0;margin-right:1em></span>
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</main>
|
|
<footer style=clear:both>
|
|
<div class="columns is-desktop" role=navigation aria-label=Secondary style="margin:-0.75em -0.75em 0.75em -0.75em">
|
|
|
|
<div class=column style=padding:0>
|
|
<div class=columns>
|
|
<div class=column>
|
|
<ul style=list-style:none;line-height:2>
|
|
<li><a href=https://info.arxiv.org/about>About</a></li>
|
|
<li><a href=https://info.arxiv.org/help>Help</a></li>
|
|
</ul>
|
|
</div>
|
|
<div class=column>
|
|
<ul style=list-style:none;line-height:2>
|
|
<li>
|
|
<svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 512 512" class="icon filter-black" role=presentation><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"></path></svg>
|
|
<a href=https://info.arxiv.org/help/contact.html> Contact</a>
|
|
</li>
|
|
<li>
|
|
<svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 512 512" class="icon filter-black" role=presentation><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"></path></svg>
|
|
<a href=https://info.arxiv.org/help/subscribe> Subscribe</a>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div class=column style=padding:0>
|
|
<div class=columns>
|
|
<div class=column>
|
|
<ul style=list-style:none;line-height:2>
|
|
<li><a href=https://info.arxiv.org/help/license/index.html>Copyright</a></li>
|
|
<li><a href=https://info.arxiv.org/help/policies/privacy_policy.html>Privacy Policy</a></li>
|
|
</ul>
|
|
</div>
|
|
<div class="column sorry-app-links">
|
|
<ul style=list-style:none;line-height:2>
|
|
<li><a href=https://info.arxiv.org/help/web_accessibility.html>Web Accessibility Assistance</a></li>
|
|
<li>
|
|
<p class=help>
|
|
<a class=a11y-main-link href=https://status.arxiv.org/ target=_blank>arXiv Operational Status <svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 256 512" class="icon filter-dark_grey" role=presentation><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"></path></svg></a><br>
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</footer>
|
|
</div>
|
|
|
|
<div style="position:absolute;width:0px;height:0px;overflow:hidden;padding:0px;border:0px none;margin:0px"><div id=MathJax_Font_Test style="position:absolute;visibility:hidden;top:0px;left:0px;width:auto;min-width:0px;max-width:none;padding:0px;border:0px none;margin:0px;white-space:nowrap;text-align:left;text-indent:0px;text-transform:none;line-height:normal;letter-spacing:normal;word-spacing:normal;font-size:40px;font-weight:normal;font-style:normal;font-size-adjust:none;font-family:MathJax_Main,monospace" class=sf-hidden></div></div> |