Files
2026-01-04 23:05:47 +01:00

356 lines
58 KiB
HTML

<!DOCTYPE html> <html style lang=en><!--
Page saved with SingleFile
url: https://arxiv.org/abs/1907.11692
saved date: Sat Jan 03 2026 13:30:37 GMT+0100 (Central European Standard Time)
--><meta charset=utf-8>
<title>[1907.11692] RoBERTa: A Robustly Optimized BERT Pretraining Approach</title>
<meta name=viewport content="width=device-width, initial-scale=1">
<meta name=msapplication-TileColor content=#da532c>
<meta name=theme-color content=#ffffff>
<style media=screen>body{margin:0;padding:0;background-color:#fff;color:#000;font-family:"Lucida Grande",helvetica,arial,verdana,sans-serif}a:link,a:visited,a:active{text-decoration:none;font-weight:normal}a:hover{text-decoration:underline}img{border:0}em{font-weight:bold;font-style:normal}.primary-subject{font-weight:bold}main{flex-grow:1}.flex-wrap-footer{display:flex;min-height:100vh;flex-direction:column}footer ul li{display:flex;align-items:center;font-size:14px}footer ul li a{font-size:13.5px}footer{background-color:hsl(0,0%,95%);color:#000;padding:1em 2em;font-size:0.9rem;-webkit-font-smoothing:antialiased;margin-top:6rem}footer a,footer a:visited{color:#000;text-decoration:none;border-bottom:1px solid transparent;line-height:1.75em}footer a:hover,footer a:active{color:#005e9d;border-bottom:1px dotted #005e9d;text-decoration:none}footer ul{padding:0;margin:0}footer .sorry-app-links .help{font-size:0.75rem;margin-bottom:0;line-height:1.75em}footer .sorry-app-links .help a,footer .sorry-app-links .help a:visited{border-bottom:1px dotted #000}footer .sorry-app-links .help a:hover,footer .sorry-app-links .help a:active{border-bottom:1px dotted #005e9d}footer .sorry-app-links svg.icon{margin-bottom:-2px!important}footer .sorry-app-links .a11y-main-link{font-size:110%;border-bottom:1px solid transparent!important;padding:0;margin:0}@media screen and (max-width:768px){footer .sorry-app-links.column{padding:0}}@media screen and (min-width:769px){.columns{display:flex;flex-direction:row}}.icon{width:.9rem;margin-right:.45em;margin-top:-.15rem}.help{font-family:"Lucida Grande","Helvetica Neue",Helvetica,Arial,sans-serif;display:block;margin-top:0.25rem}#content,#content-inner{margin:.7em;font-size:90%}#abs-outer,#abs{margin:-0.7em}#abs-outer .leftcolumn{margin:0 0 1em 0;padding:0px;width:calc(100% - 18em);float:left}#abs-outer .mobile-submission-download{display:none}#abs-outer .extra-services{float:right;margin:0;width:18em}#abs-outer .extra-services span.bib-cite-button{color:rgb(0,0,238);font-weight:bold;padding:.35em;display:block;text-transform:capitalize}#abs-outer .extra-services span.bib-cite-button:hover,#abs-outer .extra-services span.bib-cite-button:focus{text-decoration:underline;cursor:pointer}#labstabs{clear:both;margin:1em 1.5em}#labstabs .labstabs{display:flex;flex-wrap:wrap;white-space:normal;justify-content:flex-start}#labstabs .labstabs>label{order:1;display:block;padding:.75em 1.5em;font-size:.85em;color:#4a4a4a;margin-right:0.2rem;cursor:pointer;font-weight:bold;transition:background ease 0.2s;border-radius:6px 6px 0 0;background-color:#eef5f9}#labstabs .labstabs .tab{order:99;flex-grow:1;width:100%;display:none;padding:1rem;border:1px solid #828282;z-index:1;top:-1px;position:relative}#labstabs .labstabs .tab h1{font-size:1.25em;font-weight:normal}#labstabs .labstabs input[type="radio"]:checked+label{background:#ffffff;border:1px solid #828282;border-bottom:0;z-index:2;color:black}#labstabs .labstabs input[type="radio"]:checked+label+.tab{display:block}@media (max-width:45em){#labstabs{padding-bottom:1em;margin:1em}#labstabs .labstabs .tab,#labstabs .labstabs label{order:initial;width:100%;margin-right:0;margin-top:0.2rem;border-radius:0}#labstabs .labstabs>label{font-size:.8em}#labstabs .labstabs .tab{margin-top:-10px!important;border:2px solid #b8b8b8;border-top:0px;background-color:#ffffff}#labstabs .labstabs label{background-color:#eef5f9;color:black;padding:1em .5em}#labstabs .labstabs .labs-display-bib label{background:transparent;color:black;padding:0}#labstabs .labstabs input[type="radio"]:checked+label{background-color:#acd2e8;color:black;border:2px solid #b8b8b8;border-bottom:0px;padding:1em .5em}}#labstabs .toggle{border:1px dotted #c7d3db;padding:.5em;background-color:#eef5f9}#labstabs .toggle .columns.lab-row{align-items:center}#labstabs .toggle .columns.lab-row .column{padding:.5rem .25rem}#labstabs .column.lab-name{flex-basis:auto;flex-grow:0;font-size:.75rem}#labstabs .tab a{display:inline}#labstabs .column.lab-name em{font-size:11px;color:black}#labstabs .column.lab-name em a{line-height:18px;text-decoration:none;padding:0;border:0;font-style:normal;font-size:11px}#labstabs .column.lab-switch{flex-grow:0}#labstabs .toggle .lab-switch label.switch{position:relative;display:inline-block;width:38px;height:20px;padding:0;background-color:transparent;margin:0}#labstabs .toggle .lab-switch .slider{position:absolute;cursor:pointer;top:0;left:0;right:0;bottom:0;border-radius:24px;background-color:#ccc;-webkit-transition:.4s;transition:.4s}#labstabs .toggle .lab-switch .slider:before{position:absolute;content:"";height:16px;width:16px;left:2px;bottom:2px;border-radius:50%;background-color:white;-webkit-transition:.4s;transition:.4s}#abs-outer .subheader{background-color:#eee;color:#000;padding:.25em 0;border-bottom:1px solid #ccc}#abs-outer .subheader h1{margin:0;font-size:1.1em;padding:0 0 .2em 20px;font-weight:bold;font-style:normal}#abs h1.title{margin:.5em 0 .5em 20px;font-size:x-large;font-weight:bold;line-height:120%}#abs .authors{margin:.5em 0 .5em 20px;font-size:medium;line-height:150%}#abs .authors a{font-size:medium}#abs .dateline{margin:.5em 0 .5em 20px;font-style:italic;font-size:small}#abs blockquote.abstract{line-height:1.55;font-size:1.05em;margin-bottom:1.5em}#abs .metatable{font-size:0.92em;line-height:1.5;margin:0 0 1.5em 20px}#abs .tablecell{padding:.1em .5em 0em 0em;vertical-align:top}#abs .arxivid a{font-weight:normal}#abs-outer .submission-history{margin:1.5em 0 1.5em 20px;font-size:90%;line-height:1.5em}#abs-outer .submission-history h2{font-size:120%;margin:0 0 .25em 0;font-weight:bold}#abs-outer .endorsers{margin:1em 0 1.5em 20px;font-size:small;font-style:italic;clear:both}#abs-outer .header-breadcrumbs-mobile{display:none}#abs-outer #abs .dateline{margin-top:15px;margin-bottom:0}#abs-outer #abs h1.title{margin-top:.25em}@media screen and (max-width:768px){#abs-outer .header-breadcrumbs-mobile{display:block;color:black;font-size:.85em;margin:.25em 0 .5em 1em}#abs-outer #abs a.mobile-submission-download{display:flex;justify-content:center;margin:.7em .25em;border-radius:10px;background-color:#408bd0;padding:.25em 1em;font-weight:800;color:white;text-decoration:none;font-size:20px;text-align:center}#abs-outer .extra-services h2{font-size:15px;margin-bottom:.5em}#abs-outer .extra-services h3{font-size:14px}#abs-outer .leftcolumn,#abs-outer .extra-services{width:100%;float:left}#abs-outer .extra-services,#abs-outer .extra-services .full-text,#abs-outer .extra-services .browse,#abs-outer .extra-services .extra-ref-cite{border:0px;font-size:12px}#abs-outer .extra-services{-webkit-box-shadow:inset 0px 8px 15px 0px rgba(173,173,173,1);-moz-box-shadow:inset 0px 8px 15px 0px rgba(173,173,173,1);box-shadow:inset 0px 8px 15px 0px rgba(173,173,173,1);background-color:#E6E6E6;margin:0 0 1em 0;padding:1em 0}#abs-outer .extra-ref-cite ul li{float:left}#abs-outer .extra-services .full-text ul,#abs-outer .extra-services .extra-ref-cite ul{list-style:none;margin:0;padding:0}#abs-outer .extra-services .full-text ul li,#abs-outer .extra-services .extra-ref-cite ul li{display:inline-block;margin:0 0 .25em 0;padding:0}#abs-outer .extra-services .bookmarks{margin:1em 0 0 0;border-left:0;padding:.25em .5em 0 1em;border-top:2px solid #cccccc;font-size:1em}#abs-outer .extra-services .bookmarks .abs-button-small{margin-top:.25em}#abs-outer .extra-services .browse{margin-top:.5em;border-top:2px solid #cccccc;padding-top:1em}#abs-outer .extra-services .prevnext{margin-top:.5em}#abs-outer .extra-services .browse .current{color:#AB4B02;display:inline}.abs-switch-cat{margin:0 0 1em 0}.browse .abs-switch-cat .switch{display:inline}.browse .abs-switch-cat .switch a{font-weight:bold}.abs-button{display:inline-block;border-radius:5px;border:1px solid #046BAF;font-size:1.25em;color:#046BAF!important;padding:.5em;background:#E6E6E6;margin-right:.3em}.abs-button-small{font-size:1em;padding:.25em 1em;margin:.75em .5em 0 0}.abs-button-grey{border:1px solid #666666;color:#666666!important}#abs-outer .extra-services span.bib-cite-button{margin:.5em .5em 0 0;display:inline-block!important;border-radius:5px;background:#E6E6E6;border:1px solid #046BAF;font-size:1em!important;padding:.25em 1em;font-weight:normal;text-transform:capitalize}#abs-outer .extra-services .extra-ref-cite ul li{margin:.5em 0;padding:0;height:auto}#abs-outer .extra-services .extra-ref-cite ul li a{margin:.25em .5em .25em 0}#abs-outer .subheader{background-color:#fefefe;padding:.25em 0;border-bottom:1px solid #ccc}#abs-outer .subheader h1{margin:0;font-size:.75em;padding:.2em 0 .2em 1em;font-weight:normal;font-style:normal;color:#b55c06}#abs-outer #abs .dateline{color:#767676;font-size:.85em;font-style:normal;margin:2em 0 0 1em}#abs-outer .submission-history{padding:1em;margin:0;background-color:#f5f5f5;-webkit-box-shadow:inset 0px -6px 15px 0px rgba(219,219,219,1);-moz-box-shadow:inset 0px -6px 15px 0px rgba(219,219,219,1);box-shadow:inset 0px -6px 15px 0px rgba(219,219,219,1)}#abs-outer #abs h1.title{margin:0 .25em 0 .5em;font-size:1.5em}#abs-outer #abs .authors{margin:1em .25em 0 1em;font-size:.9em;line-height:1.5em}#abs-outer #abs .authors a{font-size:inherit}#abs-outer #abs blockquote.abstract{margin:0 1em}#abs-outer #abs .metatable{margin:.75em 0 1.5em 1.5em}#abs-outer #abs a,#abs-outer a,#abs-outer .endorsers a{color:#1777bc}#abs-outer .submission-history a,#abs-outer .abs-switch-cat .switch a,#abs-outer .extra-services .full-text .abs-license a{color:#046BAF}#abs-outer .endorsers{display:block;float:left;border:1px solid #eee;padding:1em;margin:1em}}.mobile-header{background-color:#b31b1b}.mobile-header .columns{height:65px;align-items:center}.mobile-header .column{border-left:1px solid #fc5554;border-right:1px solid #731515;height:65px;padding:0 1em;display:flex;align-items:center}.mobile-header .column:first-child{border-left:0}.mobile-header .column:last-child{border-right:0}.mobile-header .column.logo-arxiv{width:100px;flex:none}.mobile-header .column.logo-cornell{display:flex}.mobile-header .column.logo-cornell img{height:45px}.mobile-header .column.nav{justify-content:flex-end;align-self:flex-end}.mobile-header #toggle-container button.toggle-control{background-color:transparent;border-radius:0;border:0;font-size:25px;padding:3px;margin-left:.5em}.mobile-header #toggle-container button.toggle-control svg.icon{width:1.25rem;margin:0}@media screen and (min-width:769px){.mobile-header{display:none}}@media screen and (min-width:426px){.mobile-header .column.nav{flex:none;width:100px}}@media screen and (max-width:500px){.mobile-header .columns{height:80px}.mobile-header .column{height:80px;padding:0 .5em}.mobile-header .column.logo-arxiv{border-right:0!important}.mobile-header .column.logo-cornell{justify-content:flex-end;border-left:0!important}.mobile-header .column.logo-cornell img{height:73px}.mobile-header .column.nav{width:65px;flex:none}}@media screen and (min-width:501px){.mobile-header .column{height:65px}.mobile-header .column.logo-cornell img{height:45px}.mobile-header .column.nav{width:65px}}.extra-services{border-bottom:.35em solid #ddd}.extra-services h3{font-size:medium;font-weight:normal;margin:0 0 0.3em 0;padding-top:0.3em}.full-text{margin:0;padding:.5em 1em .5em 1em;font-size:110%;font-weight:normal;border-bottom:medium solid #ddd;border-left:.35em solid #ddd}.full-text h2{font-size:140%;font-weight:bold;margin:0.1em 0 0 0}.full-text ul{margin:.3em 0 0 1em;padding:0;list-style-type:none}.extra-general{margin:0;padding:0 1em 0 1em;font-size:90%;border-bottom:medium solid #ddd;border-left:.35em solid #ddd}.extra-ref-cite{margin:0;padding:0 1em 0 1em;font-size:90%;border-bottom:medium solid #ddd;border-left:.35em solid #ddd}.extra-ref-cite ul{font-weight:normal;margin:0.3em 0 0 20px;padding-left:0;padding-bottom:0.3em;list-style-type:none}.browse{padding:0 1em 0 1em;font-size:90%;border-bottom:medium solid #ddd;border-left:.35em solid #ddd}.browse .current{padding:0;font-weight:bold}.browse .prevnext{padding:0.2em 0 0 0}.browse .list{padding:0.2em 0 0.5em 0;font-weight:normal}.browse .switch{font-weight:normal;padding:.2em 0em .7em 0em}.dblp{padding:0 1em 0 1em;font-size:90%;border-bottom:medium solid #ddd;border-left:.35em solid #ddd}.dblp .list{font-weight:normal;margin:0.3em 0 0 20px;padding-left:0;padding-bottom:0.3em}.bookmarks{clear:both;margin:0;padding:0 1em .5em 1em;font-size:90%;border-left:.35em solid #ddd}.what-is-this{font-size:xx-small;padding-bottom:0.3em}.what-is-this h3{display:inline}.abs-license{font-size:xx-small;padding-top:0.3em}@media screen and (max-width:768px){.columns.is-mobile{display:flex}}.is-sr-only{border:none!important;clip:rect(0,0,0,0)!important;height:0.01em!important;overflow:hidden!important;padding:0!important;position:absolute!important;white-space:nowrap!important;width:0.01em!important}.column{display:block;flex-basis:0;flex-grow:1;flex-shrink:1;padding:0.75rem}@media screen and (min-width:769px),print{.columns:not(.is-desktop){display:flex}}@media screen and (min-width:1024px){.columns.is-desktop{display:flex}}svg.icon{height:1em!important}.icon.filter-white{fill:#FFFFFF}.icon.filter-black{fill:#000000}.filter-dark_grey{fill:#cccccc}a .icon{transition:fill 0.3s ease}</style>
<link rel=canonical href=https://arxiv.org/abs/1907.11692>
<meta name=description content="Abstract page for arXiv paper 1907.11692: RoBERTa: A Robustly Optimized BERT Pretraining Approach"><meta property=og:type content=website>
<meta property=og:site_name content=arXiv.org>
<meta property=og:title content="RoBERTa: A Robustly Optimized BERT Pretraining Approach">
<meta property=og:url content=https://arxiv.org/abs/1907.11692v1>
<meta property=og:image content=/static/browse/0.3.4/images/arxiv-logo-fb.png>
<meta property=og:image:secure_url content=/static/browse/0.3.4/images/arxiv-logo-fb.png>
<meta property=og:image:width content=1200>
<meta property=og:image:height content=700>
<meta property=og:image:alt content="arXiv logo">
<meta property=og:description content="Language model pretraining has led to significant performance gains but careful comparison between different approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes, and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results highlight the importance of previously overlooked design choices, and raise questions about the source of recently reported improvements. We release our models and code.">
<meta name=twitter:site content=@arxiv>
<meta name=twitter:card content=summary>
<meta name=twitter:title content="RoBERTa: A Robustly Optimized BERT Pretraining Approach">
<meta name=twitter:description content="Language model pretraining has led to significant performance gains but careful comparison between different approaches is challenging. Training is computationally expensive, often done on private...">
<meta name=twitter:image content=https://static.arxiv.org/icons/twitter/arxiv-logo-twitter-square.png>
<meta name=twitter:image:alt content="arXiv logo">
<style media=screen>.visually-hidden{clip-path:inset(100%);clip:rect(1px,1px,1px,1px);height:1px;overflow:hidden;position:absolute;white-space:nowrap;width:1px}.button-and-tooltip{position:relative;display:inline}.button-and-tooltip button{background:transparent;border:none;box-shadow:none;position:relative;padding:0 0 0 6px}td.tablecell.arxivdoi{display:inline-flex;align-items:center}.arrow{width:0;height:0;border-left:8px solid transparent;border-right:8px solid transparent}@-webkit-keyframes pulsate{0%{-webkit-transform:scale(.1);transform:scale(.1);opacity:0}30%{opacity:1}60%{-webkit-transform:scale(.8);transform:scale(.8);opacity:0}}@keyframes pulsate{0%{-webkit-transform:scale(.1);transform:scale(.1);opacity:0}30%{opacity:1}60%{-webkit-transform:scale(.8);transform:scale(.8);opacity:0}}</style>
<meta name=citation_title content="RoBERTa: A Robustly Optimized BERT Pretraining Approach"><meta name=citation_author content="Liu, Yinhan"><meta name=citation_author content="Ott, Myle"><meta name=citation_author content="Goyal, Naman"><meta name=citation_author content="Du, Jingfei"><meta name=citation_author content="Joshi, Mandar"><meta name=citation_author content="Chen, Danqi"><meta name=citation_author content="Levy, Omer"><meta name=citation_author content="Lewis, Mike"><meta name=citation_author content="Zettlemoyer, Luke"><meta name=citation_author content="Stoyanov, Veselin"><meta name=citation_date content=2019/07/26><meta name=citation_online_date content=2019/07/26><meta name=citation_pdf_url content=https://arxiv.org/pdf/1907.11692><meta name=citation_arxiv_id content=1907.11692><meta name=citation_abstract content="Language model pretraining has led to significant performance gains but careful comparison between different approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes, and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results highlight the importance of previously overlooked design choices, and raise questions about the source of recently reported improvements. We release our models and code.">
<style>#MathJax_Message{position:fixed;left:1px;bottom:2px;background-color:#E6E6E6;border:1px solid #959595;margin:0px;padding:2px 8px;z-index:102;color:black;font-size:80%;width:auto;white-space:nowrap}</style><link rel=icon type=image/png sizes=32x32 href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAACr1BMVEUAAACzICWzICSyrKazrKWzqaKzraazR0mzTE6zaGizBQiz49uzZmSzEBazHySzsam0ICmzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzICWzrKWzrKWzICWzICWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzICWzrKWzrKWzrKWzICWzICWzICWzHySz4dWzrKWzrKWzrKWzICWzICWzHySzgX2zrqezrKWzrKWzICWzPD+zrKWzrKWzrKWzICWzHySzjYmzr6izraazT1CzLjKzMDSzQUOzrKWzrqezgH2zKi6zHySzICWzrKWzrKWzcW+zICWzICWzrKWzrKWzYWGzICWzrKWzUVKzICWzrKWzrKWznZezQkSzICWzICWzrKWzraazkY2zHiOzICWzICWzjYmzm5WzhIGzODuzAACzHySzHSKzOz6zrqazs6yzICWzICWzODuzrKWzrKWzrKWzICWzICWzODuzo5yzrKWzrKWzICWzICWzICWzGyCzr6ezrKWzrKWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzrKWzICWzICWzICWzrKWzrKWzrKWzICWzICWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzrKWzICWzrKWzHySzkYyzrqezHiOzODuzJCmzqaOzISazraazpZ6zNTizkIuzi4ezraX///8A/VYjAAAA1HRSTlMAAAAAAAAAAAAAAAAAAAAAAAGA95kOB7X6kQ9W8fyaEQIBA3z9oRUllnUKDJz+qBgwxuwxGbquHD3SpQwt1CBL3cEeR+e7JFvm1mX0wSgEbO5HBYXFkfPyYaX++n0EHsH9lwtO8/NRJrr8/ttFGb7+3jpk/v6ecv6sK9j+/vVcQNf+8nUGSfD7dQMHivyVCmr2/vl3A0np28zvWS7V0joht+HJMR2vzCcLvygYp7IVc7QhFJ/9lQkSzhoRl/t2lu0UDY9XDSoLhvjgOwh89a4GB4L2i8A61lMAAAABYktHROQvYjspAAAAB3RJTUUH5wETDS455INCLAAAAgZJREFUOMt10/dfTXEYB/Dz6Ilst3lFueWmjDIysmdkF0Kyyd7Ze5WZcc2ErIhKISops6zs73Pda1zrH/EcoXPqe8+vn/fre57z+T5HURQwuHt4eoHi9AFvH2Fs6gu1nAJo1lz4+beAqkNc0BSAGhHY0ijMQa3+C8TgkNZttKJtOyFCw9r/FYgdOlKncC2Azl2EEF27/RGIEd2JevRE3Ri9erPw6cMCA/r2I+o/AFE/6MBBLCIHg6tpSBTR0HB9rophw1mMGDlqdDRRVEz1nMWYsX4sxsWOJ5owsWbOwjeOgXXSR1v85CmSnF8ydZpVCPun6TNm1pY2Wsdt1mwW1oQ5devJcsS58+YvUMXCRSC7Oly8hGjpZxZi2XKJQFyRSPRl5VcGYpVBBlYTkePbGvWItYGSd+C69arYsNEuNm2WDYG4ZauDyLbt+/YdO6X7Vb9BUjILx67dexpKe2i0d9/+HzYWKQcay3IwHDxkP/yT57AcOSq7Cjh2nIs+cfIXUeopyWVC2mn+vjNn08/xGecjaojKpbtwEVwvZbC4fKWagEx/zq9eA0DMyuZKc67rN65JrlGIvBtqP4g3b+VT4u07uqUtKBTmsKLK/hDvFt8jS4lJu/fepfc9H1T9OA8fxVPqY80YAE/KMjX9Y/nTZ/T8hWwz/4mXFa/o9RvnQMG3795/UKf4DUwCyzJ9eBcsAAAAJXRFWHRkYXRlOmNyZWF0ZQAyMDIzLTAxLTE5VDEzOjQ2OjU3KzAwOjAwqJ4w4AAAACV0RVh0ZGF0ZTptb2RpZnkAMjAyMy0wMS0xOVQxMzo0Njo1NyswMDowMNnDiFwAAABXelRYdFJhdyBwcm9maWxlIHR5cGUgaXB0YwAAeJzj8gwIcVYoKMpPy8xJ5VIAAyMLLmMLEyMTS5MUAxMgRIA0w2QDI7NUIMvY1MjEzMQcxAfLgEigSi4A6hcRdPJCNZUAAAAASUVORK5CYII="><style>.sf-hidden{display:none!important}</style><meta http-equiv=content-security-policy content="default-src 'none'; font-src 'self' data:; img-src 'self' data:; style-src 'unsafe-inline'; media-src 'self' data:; script-src 'unsafe-inline' data:; object-src 'self' data:; frame-src 'self' data:;"><style>img[src="data:,"],source[src="data:,"]{display:none!important}</style></head>
<body class=with-cu-identity><div id=MathJax_Message style=display:none></div>
<div class=flex-wrap-footer>
<header>
<a href=#content class=is-sr-only>Skip to main content</a>
<div class="columns is-vcentered is-hidden-mobile sf-hidden" id=cu-identity>
</div>
<div id=header class="is-hidden-mobile sf-hidden">
</div>
<div class=mobile-header>
<div class="columns is-mobile">
<div class="column logo-arxiv"><a href=https://arxiv.org/><img src="data:image/svg+xml;base64,PHN2ZyBpZD0ibG9nb21hcmsiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgdmlld0JveD0iMCAwIDc0LjQ5MiAxMDAuMjUiPjxnIGlkPSJ0aW55Xy1fd2hpdGUiIGRhdGEtbmFtZT0idGlueSAtIHdoaXRlIj48cGF0aCBkPSJNNTg2LjcyLDI1NS42MTZhMy4zNzcsMy4zNzcsMCwwLDEsLjQ0OC4wMzEsNS45MTcsNS45MTcsMCwwLDEsMy41ODEsMi43OWMuNDU0LDEuMTE2LjMxNCwyLjAyMy0xLjMxNSw0LjE0MUw1NjMuMTY4LDI5My42bC04LjU1OC0xMC4wNDcsMjkuMzQ4LTI2LjYxNmE0LjQwNiw0LjQwNiwwLDAsMSwyLjc2Mi0xLjMyMW0wLTEuNWE1Ljc2Niw1Ljc2NiwwLDAsMC0zLjY5LDEuNjQzbC0uMDQxLjAzMi0uMDM4LjAzNUw1NTMuNiwyODIuNDQybC0xLjA3Ny45NzcuOTQzLDEuMTA3LDguNTU4LDEwLjA0NywxLjE0NSwxLjM0NCwxLjE0MS0xLjM0OCwyNi4yNjctMzEuMDIyLjAyMi0uMDI3LjAyMi0uMDI4YzEuNTc0LTIuMDQ2LDIuMzI3LTMuNjIyLDEuNTE2LTUuNjE5YTcuMzA5LDcuMzA5LDAsMCwwLTQuNzc5LTMuNzE0LDUuMDgzLDUuMDgzLDAsMCwwLS42NC0uMDQzWiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIi8+PHBhdGggZD0iTTU1My40MjMsMjg0LjU5M2w4Ljk3NywxMC41NThMNTk3LjkxMSwzMzcuOWMuODczLDEuMDkzLDEuNDE5LDIuMTg2LDEuMDQ3LDMuNDE4YTQuMDkyLDQuMDkyLDAsMCwxLTIuNzIxLDIuODM3LDMuNTU3LDMuNTU3LDAsMCwxLTEuMDQ1LjE1OSw0LDQsMCwwLDEtMi42ODctMS4xMjRMNTQ4LjAxLDMwMC44MDhjLTMuNS0zLjUtMi45NzEtOC4xNTEuNDM2LTExLjU1OGw0Ljk3Ny00LjY1N20uMTI0LTIuMTdMNTUyLjQsMjgzLjVsLTQuOTc2LDQuNjU2Yy00LjE5Miw0LjE5MS00LjM3Miw5LjgxNi0uNDczLDEzLjcxNGw0NC41MjEsNDIuNGE1LjQ4NSw1LjQ4NSwwLDAsMCwzLjcyMiwxLjUzOCw1LjEsNS4xLDAsMCwwLDEuNDgzLS4yMjQsNS41OSw1LjU5LDAsMCwwLDMuNzE5LTMuODM4LDUuMTc2LDUuMTc2LDAsMCwwLTEuMzEtNC43ODhsLTM1LjUzLTQyLjc2Ny04Ljk4OC0xMC41NzEtMS4wMTktMS4yWiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIi8+PHBhdGggZD0iTTU2Mi40LDI5NS4xNTFsOS41NTYsMTEuNSw1Ljc2MS01LjM1NmE3LjkyNiw3LjkyNiwwLDAsMCwuMDQxLTExLjc0M2wtNDMuNy00MS45MjNzLTEuNjcxLTIuMDI5LTMuNDM3LTIuMDcxYTQuNDksNC40OSwwLDAsMC00LjIzLDIuNzE4Yy0uNjg4LDEuNjUxLS4xOTQsMi44MDksMS4zMTUsNC45N2wyOS4zMDYsMzUuNTY1WiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIi8+PHBhdGggZD0iTTU1My43LDMwNi4yMjNsLTE3LjExNiwyMS4wMjRjLTEuMjU1LDEuMzM3LTIuMDMyLDMuNjgzLTEuMzMxLDUuMzY3YTQuNTg3LDQuNTg3LDAsMCwwLDQuMjg3LDIuODQxLDQuMDg3LDQuMDg3LDAsMCwwLDMuMDgyLTEuNTIzbDIwLjMyOC0xOC45WiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIi8+PHBhdGggZD0iTTU5Mi4wNzQsMjUwLjU0NyIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTUyNi4wODYgLTI0NS41NTkpIiBmaWxsPSIjZmZmIiBzdHJva2U9IiMwMDAiIHN0cm9rZS1taXRlcmxpbWl0PSIxMCIgc3Ryb2tlLXdpZHRoPSIwLjI1Ii8+PC9nPjwvc3ZnPg==" alt="arXiv logo" style=height:60px></a></div>
<div class="column logo-cornell"><a href=https://www.cornell.edu/>
<picture>
<img src=data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4KPCEtLSBHZW5lcmF0b3I6IEFkb2JlIElsbHVzdHJhdG9yIDE5LjIuMSwgU1ZHIEV4cG9ydCBQbHVnLUluIC4gU1ZHIFZlcnNpb246IDYuMDAgQnVpbGQgMCkgIC0tPgo8c3ZnIHZlcnNpb249IjEuMSIgaWQ9IkxheWVyXzEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIHg9IjBweCIgeT0iMHB4IgoJIHZpZXdCb3g9IjAgMCAxMjAgMTIwIiBzdHlsZT0iZW5hYmxlLWJhY2tncm91bmQ6bmV3IDAgMCAxMjAgMTIwOyIgeG1sOnNwYWNlPSJwcmVzZXJ2ZSI+CjxnPgoJPHBhdGggZD0iTTQ1LjksOTEuN2M2LjgsNS4zLDEzLjUsOC4yLDEzLjgsOC4zbDAuNSwwLjJsMC41LTAuMmMwLjMtMC4xLDYuOS0zLDEzLjctOC4zYzkuMS03LjEsMTMuOS0xNS4yLDEzLjktMjMuNWwwLTM3LjZsLTU2LjQsMAoJCWwwLDM3QzMxLjksNzYuMSwzNi43LDg0LjUsNDUuOSw5MS43eiBNNTguOCw4MC4xYy0wLjMtMC4xLTAuNi0wLjItMS0wLjJjMCwwLDAsMC0wLjEsMGMtMS40LDAtMi40LDAuMS0zLjUsMC4yCgkJYy0xLjEsMC4xLTIuMiwwLjItMy44LDAuMmMtMC43LDAtMS40LDAtMS45LDBWNjIuN2MwLjEsMCwwLjIsMCwwLjIsMGMwLjcsMCwxLjUsMCwyLjYsMGMwLjksMCwxLjktMC4xLDMtMC4yCgkJYzAuNi0wLjEsMS4zLTAuMSwyLTAuMmMxLjItMC4xLDIuMiwwLjEsMi4yLDAuMWMwLjEsMCwwLjIsMC4xLDAuMywwLjFDNTguOCw2Mi40LDU4LjgsODAuMSw1OC44LDgwLjF6IE04NS42LDY4LjIKCQljMCwxNS40LTE4LjksMjUuOC0yNC41LDI4LjZ2LTE0YzAuMS0wLjEsMC4yLTAuMiwwLjMtMC4zYzAuMy0wLjMsMC41LTAuNSwxLTAuNWMxLjMsMCwyLjMsMC4xLDMuNCwwLjJjMS4xLDAuMSwyLjIsMC4yLDMuOSwwLjIKCQljMS43LDAsMywwLDMsMGwwLjksMGwwLTUuMmgyLjJ2LTIuOGgtMi4ybDAtNS4xaDIuMnYtMi44aC0yLjJsMC01LjdsLTEsMGMtMC40LDAtMC44LDAtMS4zLDBjLTAuNywwLTEuNSwwLTIuNSwwCgkJYy0wLjgsMC0xLjctMC4xLTIuOC0wLjJjLTAuNi0wLjEtMS4zLTAuMS0yLTAuMmMtMS41LTAuMi0yLjgsMC4xLTIuOSwwLjFjMCwwLDAsMC0wLjEsMFY1NWgyNC41TDg1LjYsNjguMnogTTYxLjEsODAuMlY2Mi41CgkJYzAuMS0wLjEsMC4zLTAuMSwwLjUtMC4yYzAsMCwxLjEtMC4yLDIuMy0wLjFjMC43LDAuMSwxLjQsMC4xLDIsMC4yYzEuMSwwLjEsMi4xLDAuMiwzLDAuMmMwLjksMCwxLjYsMCwyLjMsMGMwLjIsMCwwLjQsMCwwLjUsMAoJCWwwLDE3LjdjLTAuNSwwLTEuMiwwLTEuOSwwYy0xLjYsMC0yLjctMC4xLTMuOC0wLjJjLTEuMS0wLjEtMi4xLTAuMi0zLjUtMC4yQzYxLjksODAsNjEuNSw4MC4xLDYxLjEsODAuMnogTTM0LjYsMzMuM2w1MSwwbDAsMTkuMQoJCWgtNTFMMzQuNiwzMy4zeiBNMzQuNiw1NWgyNC4zdjUuM2MtMC40LTAuMS0xLjUtMC4yLTIuNy0wLjFjLTAuNywwLjEtMS40LDAuMS0yLDAuMmMtMS4xLDAuMS0yLDAuMi0yLjgsMC4yYy0xLDAtMS44LDAtMi41LDAKCQljLTAuNSwwLTAuOSwwLTEuMywwbC0xLDB2NS4xaC0yLjR2Mi44aDIuNHY1LjFoLTIuNHYyLjhoMi40djUuN2wwLjksMGMwLjEsMCwxLjQsMC4xLDMsMGMxLjcsMCwyLjktMC4xLDMuOS0wLjIKCQljMS4xLTAuMSwyLTAuMiwzLjMtMC4yYzAuNSwwLDAuNywwLjIsMC45LDAuNWMwLDAsMC4xLDAuMSwwLjEsMC4xdjE0LjFDNTMsOTMuOCwzNC42LDgzLjUsMzQuNiw2Ny42TDM0LjYsNTV6Ii8+Cgk8cGF0aCBkPSJNNTUuOSwzNS4ySDQxLjZ2MWMwLDMuMSwwLDguNCwwLDguNmMwLjEsMS4xLDAuOCwyLjEsMi4yLDNjMS43LDEuMiw0LjMsMi43LDQuNCwyLjdsMC41LDAuM2wwLjUtMC4zCgkJYzAuMS0wLjEsMi45LTEuNiw0LjYtMi43YzItMS4zLDIuMS0yLjYsMi4xLTNjMC0wLjIsMC00LjgsMC04LjZWMzUuMnogTTQzLjYsMzcuMmgxMC4yYzAsMC44LDAsMS43LDAsMi42SDQzLjYKCQlDNDMuNiwzOC45LDQzLjYsMzgsNDMuNiwzNy4yeiBNNTMuOSw0NC43YzAsMC41LTAuNywxLTEuMiwxLjRjLTEuMiwwLjgtMy4xLDEuOS00LDIuNGMtMC44LTAuNS0yLjYtMS41LTMuOC0yLjQKCQljLTAuOC0wLjUtMS4zLTEuMS0xLjMtMS41YzAtMC4xLDAtMS4zLDAtMi45aDEwLjJDNTMuOSw0My4zLDUzLjksNDQuNSw1My45LDQ0Ljd6Ii8+Cgk8cGF0aCBkPSJNODAuMSwzNC44SDY1LjZ2MWMwLDMuOSwwLDguNiwwLDguOWMwLjEsMS4xLDAuOCwyLjEsMi4yLDNjMS44LDEuMiw0LjQsMi43LDQuNSwyLjhsMC41LDAuM2wwLjUtMC4zCgkJYzAuMS0wLjEsMy0xLjYsNC43LTIuOGMyLjEtMS40LDIuMS0yLjYsMi4xLTNjMC0wLjIsMC00LjksMC04LjhMODAuMSwzNC44eiBNNzguMSw0NC41YzAsMC41LTAuNywxLjEtMS4yLDEuNAoJCWMtMS4zLDAuOC0zLjIsMS45LTQuMSwyLjRjLTAuOS0wLjUtMi42LTEuNi0zLjktMi40Yy0wLjQtMC4zLTEuMy0wLjktMS4zLTEuNWMwLTAuMSwwLTAuMiwwLTAuNWw1LTMuNmw1LjUsMy43CgkJQzc4LjEsNDQuMyw3OC4xLDQ0LjUsNzguMSw0NC41eiBNNzIuNiwzNy44bC01LDMuNmMwLTEuNCwwLTMuMiwwLTQuN2gxMC42YzAsMS42LDAsMy4zLDAsNC44TDcyLjYsMzcuOHoiLz4KCTxwYXRoIGQ9Ik02My43LDY2LjVsMSwwLjFjMC42LDAuMSwxLjIsMC4xLDEuNywwLjFsMi45LDAuMWwwLTJsLTIuOS0wLjFjLTAuNCwwLTAuOS0wLjEtMS41LTAuMWwtMS0wLjFjLTAuMywwLTAuNSwwLTAuNywwbDAuMSwyCgkJQzYzLjQsNjYuNCw2My42LDY2LjQsNjMuNyw2Ni41eiIvPgoJPHBhdGggZD0iTTY2LjQsNzAuMmMtMC40LDAtMC45LTAuMS0xLjUtMC4xYy0wLjMsMC0wLjctMC4xLTEtMC4xYy0wLjMsMC0wLjUsMC0wLjcsMGwwLDJjMC4xLDAsMC4zLDAsMC41LDBjMC40LDAsMC43LDAuMSwxLDAuMQoJCWMwLjYsMC4xLDEuMSwwLjEsMS42LDAuMWMwLjYsMCwxLjUsMCwyLjIsMGwwLjgsMGwwLTJsLTAuOCwwQzY3LjksNzAuMyw2Nyw3MC4yLDY2LjQsNzAuMnoiLz4KCTxwYXRoIGQ9Ik02Ni40LDc1LjhjLTAuNCwwLTAuOS0wLjEtMS41LTAuMWMtMC4zLDAtMC43LTAuMS0xLTAuMWMtMC4zLDAtMC41LDAtMC43LDBsMC4xLDJjMC4xLDAsMC4zLDAsMC41LDBjMC40LDAsMC43LDAuMSwxLDAuMQoJCWMwLjYsMC4xLDEuMiwwLjEsMS42LDAuMWwwLjgsMGMwLjgsMCwxLjgsMCwyLjEsMC4xbDAtMmMtMC4zLDAtMS4zLDAtMi4xLTAuMUw2Ni40LDc1Ljh6Ii8+Cgk8cGF0aCBkPSJNNTMuOCw2Ni43YzAuNSwwLDEtMC4xLDEuNy0wLjFjMC4zLDAsMC42LTAuMSwxLTAuMWMwLjIsMCwwLjMsMCwwLjUsMGwwLTJjLTAuMiwwLTAuNCwwLTAuNywwYy0wLjQsMC0wLjcsMC4xLTEsMC4xCgkJYy0wLjYsMC4xLTEuMSwwLjEtMS41LDAuMWwtMi45LDAuMWwwLDJMNTMuOCw2Ni43eiIvPgoJPHBhdGggZD0iTTU1LjMsNzAuMWMtMC42LDAuMS0xLjEsMC4xLTEuNSwwLjFsLTIuOSwwLjFsMCwybDIuOS0wLjFjMC41LDAsMS0wLjEsMS43LTAuMWwwLjktMC4xYzAuMiwwLDAuMywwLDAuNCwwbDAuMS0yCgkJYy0wLjIsMC0wLjQsMC0wLjcsMEw1NS4zLDcwLjF6Ii8+Cgk8cGF0aCBkPSJNNTUuMyw3NS42Yy0wLjYsMC4xLTEuMSwwLjEtMS42LDAuMWMtMC45LDAtMi40LDAuMS0yLjgsMC4xbDAsMmMwLjQsMCwxLjktMC4xLDIuOS0wLjFjMC41LDAsMS0wLjEsMS43LTAuMWwxLTAuMQoJCWMwLjEsMCwwLjMsMCwwLjQsMGwwLTJjLTAuMiwwLTAuNCwwLTAuNywwTDU1LjMsNzUuNnoiLz4KCTxwYXRoIGQ9Ik05LjMsNjAuM2MyLjUsMCw0LTEuNiw0LjItNC40YzAtMC41LDAtMS4yLTAuMS0xLjlsMC0wLjJsLTIuMSwwbDAuMiwwLjRjMC4xLDAuMywwLjIsMSwwLjIsMS42Yy0wLjEsMS40LTEuMiwyLjMtMi43LDIuMgoJCWMtMS42LTAuMS0yLjUtMS4xLTIuNS0yLjVjMC0wLjUsMC4xLTEsMC41LTEuNWwwLjMtMC40bC0yLTAuM2wtMC4xLDAuMmMtMC4zLDAuNi0wLjQsMS4zLTAuNSwxLjljLTAuMiwyLjksMS40LDQuOCw0LjMsNC45CgkJQzkuMSw2MC4zLDkuMiw2MC4zLDkuMyw2MC4zeiIvPgoJPHBhdGggZD0iTTkuNyw1MC41YzAuNiwwLjIsMS4xLDAuMiwxLjYsMC4yYzEuOSwwLDMuMy0xLjEsMy45LTNjMC4zLTEuMiwwLjItMi4yLTAuMy0zLjJjLTAuNi0xLTEuNi0xLjctMi45LTIuMQoJCWMtMC41LTAuMi0xLjEtMC4yLTEuNS0wLjJjLTEuOSwwLTMuMywxLjEtMy45LDNDNS44LDQ3LjcsNyw0OS43LDkuNyw1MC41eiBNOC4zLDQ1LjhjMC4zLTEuMSwxLjItMS4zLDEuOS0xLjMKCQljMC4zLDAsMC43LDAuMSwxLjEsMC4yYzAuOSwwLjMsMS42LDAuNywxLjksMS4zYzAuMiwwLjQsMC4zLDAuOCwwLjEsMS4zYy0wLjMsMS4xLTEuMiwxLjMtMS45LDEuM2MtMC40LDAtMC43LTAuMS0xLjEtMC4yCgkJQzkuMSw0OCw3LjksNDcuMSw4LjMsNDUuOHoiLz4KCTxwYXRoIGQ9Ik0xOC4zLDM5LjNsLTIuOS0xLjZsMC4xLTAuMmMwLjMtMC41LDAuNS0wLjUsMC45LTAuNGwyLjksMC40bDEuMS0yLjFsLTMuOC0wLjRjLTAuMywwLTAuNSwwLTAuNywwYzAtMC44LTAuNC0xLjYtMS4yLTIKCQljLTAuNC0wLjItMC44LTAuMy0xLjItMC4zYy0xLjQsMC0yLjEsMS40LTIuOCwyLjdsLTEsMS45bDcuNiw0LjFMMTguMywzOS4zeiBNMTMuOCwzNi42bC0wLjEsMC4ybC0xLjQtMC43bDAuMS0wLjIKCQljMC4zLTAuNiwwLjctMS4yLDEuMy0wLjljMC4yLDAuMSwwLjMsMC4yLDAuNCwwLjRDMTQuMiwzNS44LDE0LDM2LjMsMTMuOCwzNi42eiIvPgoJPHBvbHlnb24gcG9pbnRzPSIyMywzMS43IDE5LjEsMjguNSAyNC43LDI5LjggMjYuMiwyNy45IDE5LjYsMjIuNCAxOC4yLDI0IDIyLjIsMjcuMyAxNi42LDI2IDE1LDI3LjkgMjEuNywzMy40IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iMzIuNywyMi40IDMxLjUsMjEgMjkuMSwyMi45IDI4LDIxLjYgMzAuMiwxOS44IDI5LDE4LjQgMjYuOCwyMC4yIDI2LDE5LjEgMjguNCwxNy4yIDI3LjIsMTUuNyAyMy4xLDE5IDI4LjUsMjUuNyAKCQkJIi8+Cgk8cG9seWdvbiBwb2ludHM9IjM5LjgsMTguMSAzOC45LDE2LjUgMzYuMiwxOCAzMi45LDEyLjEgMzEsMTMuMiAzNS4yLDIwLjcgCSIvPgoJPHBvbHlnb24gcG9pbnRzPSI0Ny4xLDE1LjMgNDYuNCwxMy42IDQzLjYsMTQuNiA0MS4zLDguMyAzOS4yLDkgNDIuMSwxNy4xIAkiLz4KCTxwYXRoIGQ9Ik01NS44LDEzLjljMC4yLDAsMC4zLDAsMC41LDBjMS4yLTAuMSwyLjEtMC41LDIuNi0xLjFjMC41LTAuNiwwLjctMS41LDAuNi0yLjVsLTAuNC01LjRMNTYuOSw1bDAuNCw0LjkKCQljMC4xLDEuNy0wLjYsMi0xLjIsMi4xYy0wLjEsMC0wLjEsMC0wLjIsMGMtMC41LDAtMS4yLTAuMi0xLjQtMS45bC0wLjQtNC45bC0yLjIsMC4ybDAuNCw1LjRDNTIuNywxMi44LDUzLjgsMTMuOSw1NS44LDEzLjl6Ii8+Cgk8cG9seWdvbiBwb2ludHM9IjY1LjcsOC44IDY3LjUsMTQuMSA3MCwxNC41IDcxLjEsNS45IDY5LDUuNiA2OC4zLDEwLjcgNjYuNSw1LjMgNjQuMSw1IDYyLjksMTMuNSA2NSwxMy44IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iNzcuNiw3LjggNzUuNSw3LjEgNzIuOSwxNS4zIDc1LDE2IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iODguNCwxMi42IDg2LjUsMTEuNiA4Mi4zLDE1LjkgODMuMiwxMCA4MS4xLDguOSA3OS44LDE4IDgyLDE5IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iOTAuNiwyNSA5MS43LDIzLjUgODkuMywyMS42IDkwLjMsMjAuMyA5Mi41LDIyIDkzLjcsMjAuNSA5MS41LDE4LjggOTIuMywxNy43IDk0LjcsMTkuNiA5NS45LDE4LjEgOTEuNywxNC45IAoJCTg2LjQsMjEuNyAJIi8+Cgk8cGF0aCBkPSJNOTQuMiwyOC42bDIuNS0yLjJsMC4xLDAuMmMwLjMsMC40LDAuMywwLjUsMC4xLDFsLTEuNCwyLjZsMS42LDEuOGwxLjctMy40YzAuMS0wLjEsMC4yLTAuNCwwLjItMC43CgkJYzAuNywwLjMsMS42LDAuMiwyLjMtMC40YzEuOS0xLjYsMC40LTMuMy0wLjctNC42bC0xLjQtMS42TDkyLjcsMjdMOTQuMiwyOC42eiBNOTkuMywyNC4xbDAuMSwwLjJjMC4zLDAuNCwwLjYsMC44LDAuNiwxLjEKCQljMCwwLjItMC4xLDAuMy0wLjMsMC40Qzk5LjYsMjYsOTkuNCwyNiw5OS4zLDI2Yy0wLjQsMC0wLjgtMC40LTEtMC43bC0wLjEtMC4yTDk5LjMsMjQuMXoiLz4KCTxwYXRoIGQ9Ik0xMDIuMiwzNS41Yy0wLjQsMC4zLTEtMC4yLTEuMi0wLjZjLTAuMy0wLjUtMC40LTEtMC40LTEuN2wwLTAuNWwtMiwwLjlsMC4xLDAuNGMwLjIsMC43LDAuMywxLjEsMC43LDEuOAoJCWMwLjcsMS4yLDEuNiwxLjgsMi42LDEuOGMwLjUsMCwwLjktMC4xLDEuNC0wLjRjMS41LTAuOSwxLjMtMi4zLDEuMS0zLjRjLTAuMS0wLjctMC4yLTEuMiwwLjEtMS40YzAuMi0wLjEsMC4zLTAuMSwwLjQtMC4xCgkJYzAuNCwwLDAuNywwLjQsMC44LDAuNWMwLjMsMC40LDAuNCwwLjksMC41LDEuNGwwLjEsMC40bDEuOC0wLjhsMC0wLjJjLTAuMS0wLjYtMC40LTEuMi0wLjgtMS45Yy0wLjktMS42LTIuNS0yLjEtMy45LTEuMgoJCWMtMS41LDAuOS0xLjMsMi4zLTEuMSwzLjNDMTAyLjUsMzQuOCwxMDIuNiwzNS4zLDEwMi4yLDM1LjV6Ii8+Cgk8cmVjdCB4PSIxMDIuNSIgeT0iMzkuNiIgdHJhbnNmb3JtPSJtYXRyaXgoMC45MjU5IC0wLjM3NzcgMC4zNzc3IDAuOTI1OSAtNy40NTU5IDQzLjM1NjgpIiB3aWR0aD0iOC42IiBoZWlnaHQ9IjIuMiIvPgoJPHBvbHlnb24gcG9pbnRzPSIxMTEuMiw0Ni4yIDEwNC43LDQ3LjggMTA1LjIsNDkuOSAxMTEuOCw0OC4zIDExMi4zLDUwLjUgMTE0LjEsNTAuMSAxMTIuNSw0My41IDExMC43LDQ0IAkiLz4KCTxwb2x5Z29uIHBvaW50cz0iMTA2LjIsNTYuNiAxMDYuMyw1OC44IDEwOS43LDU4LjYgMTE1LjEsNjEuNCAxMTUsNTkgMTExLjgsNTcuNSAxMTQuOCw1NS42IDExNC42LDUzIDEwOS42LDU2LjUgCSIvPgoJPHBvbHlnb24gcG9pbnRzPSIxMSw3Ni40IDEyLjgsNzUuOSAxMi4xLDczLjMgMTMuNCw3Mi45IDE0LjIsNzUuNyAxNiw3NS4xIDE0LjYsNzAuMyA2LjMsNzIuNyA2LjksNzQuOCAxMC4zLDczLjggCSIvPgoJPHBhdGggZD0iTTE4LjcsODMuNmMwLjMtMSwwLjItMi4xLTAuMy0zLjJjLTAuOC0xLjYtMi0yLjQtMy42LTIuNGMtMC43LDAtMS41LDAuMi0yLjMsMC42Yy0xLjMsMC42LTIuMSwxLjUtMi41LDIuNgoJCWMtMC4zLDEtMC4yLDIuMSwwLjMsMy4yYzAuOCwxLjUsMiwyLjQsMy42LDIuNGMwLjcsMCwxLjUtMC4yLDIuMy0wLjZDMTcuNSw4NS41LDE4LjQsODQuNywxOC43LDgzLjZ6IE0xNS4zLDg0LjEKCQljLTAuNSwwLjMtMS4xLDAuNC0xLjYsMC40Yy0wLjgsMC0xLjMtMC4zLTEuNi0xYy0wLjItMC41LTAuMy0wLjktMC4xLTEuM2MwLjItMC42LDAuOC0xLjIsMS42LTEuNmMwLjUtMC4yLDEtMC40LDEuNS0wLjQKCQljMC44LDAsMS4zLDAuMywxLjYsMWMwLjIsMC41LDAuMywwLjksMC4xLDEuM0MxNi42LDgzLjEsMTYuMSw4My43LDE1LjMsODQuMXoiLz4KCTxwYXRoIGQ9Ik0yNC43LDkwLjNsLTMuOSwzLjFjLTEsMC44LTEuOCwwLjktMi40LDAuMmMtMC4yLTAuMy0wLjMtMC42LTAuMy0wLjljMC4xLTAuNCwwLjQtMC45LDEtMS40bDMuOS0zLjFsLTEuNC0xLjdsLTQuMywzLjQKCQljLTAuOSwwLjctMS40LDEuNS0xLjQsMi40YzAsMC44LDAuMywxLjYsMSwyLjVjMC44LDEuMSwxLjcsMS42LDIuNywxLjZjMC43LDAsMS40LTAuMywyLjEtMC45TDI2LDkyTDI0LjcsOTAuM3oiLz4KCTxwb2x5Z29uIHBvaW50cz0iMzIsOTcuMyAyOC44LDEwMS4zIDMwLDk1LjcgMjguMSw5NC4xIDIyLjYsMTAwLjggMjQuMywxMDIuMiAyNy41LDk4LjIgMjYuMywxMDMuOCAyOC4yLDEwNS40IDMzLjcsOTguNyAJIi8+Cgk8cGF0aCBkPSJNMzguNCwxMDEuNGwtMi4yLTEuMWwtMy45LDcuN2wyLjIsMS4xYzEuMSwwLjUsMiwwLjgsMi45LDAuOGMxLjUsMCwyLjctMC44LDMuNS0yLjRDNDIuMiwxMDQuOSw0MS4zLDEwMi45LDM4LjQsMTAxLjR6CgkJIE0zOC44LDEwNi41Yy0wLjQsMC45LTEuMiwxLjQtMi4xLDEuNGMtMC40LDAtMC43LTAuMS0xLjEtMC4zbC0wLjUtMC4zbDIuMi00LjRsMC41LDAuM2MwLjYsMC4zLDEsMC44LDEuMiwxLjMKCQlDMzkuMiwxMDUuMiwzOS4xLDEwNS44LDM4LjgsMTA2LjV6Ii8+Cgk8cG9seWdvbiBwb2ludHM9IjQzLjIsMTEyLjggNDguMywxMTQuMiA0OC44LDExMi40IDQ1LjgsMTExLjYgNDYuMiwxMTAgNDksMTEwLjcgNDkuNSwxMDguOSA0Ni43LDEwOC4yIDQ3LDEwNi44IDUwLjEsMTA3LjYgCgkJNTAuNSwxMDUuOCA0NS40LDEwNC41IAkiLz4KCTxwYXRoIGQ9Ik01NS45LDEwNi41bC0yLjUtMC4xbC0wLjUsOC42bDIuNSwwLjFjMC4yLDAsMC40LDAsMC42LDBjMi45LDAsNC40LTEuNCw0LjYtNC4xYzAuMS0xLjMtMC4yLTIuNC0wLjktMy4yCgkJQzU4LjksMTA3LjEsNTcuNywxMDYuNiw1NS45LDEwNi41eiBNNTUuOCwxMTMuM2wtMC42LDBsMC4zLTQuOWwwLjYsMGMxLjIsMC4xLDIuMywxLDIuMiwyLjZDNTguMiwxMTIuNiw1NywxMTMuMyw1NS44LDExMy4zeiIvPgoJPHBhdGggZD0iTTY4LjIsMTA1LjhsLTEuNSw5LjFsMi4zLTAuNWwwLjMtMS45bDIuOC0wLjZsMS4xLDEuN2wyLjQtMC41bC01LjEtNy43TDY4LjIsMTA1Ljh6IE02OS42LDExMC42bDAuMy0yLjNsMS4yLDEuOQoJCUw2OS42LDExMC42eiIvPgoJPHBvbHlnb24gcG9pbnRzPSI3Ni40LDExMC4zIDc3LjIsMTEyLjQgNzkuMiwxMTEuNiA3OC41LDEwOS42IAkiLz4KCTxwYXRoIGQ9Ik04MC42LDEwMS42bC0yLjIsMS4ybDQuMSw3LjZsMi4yLTEuMmMyLjktMS42LDMuNi0zLjYsMi4zLTYuMUM4NS43LDEwMC41LDgzLjUsMTAwLDgwLjYsMTAxLjZ6IE04NC4xLDEwNy40bC0wLjUsMC4zCgkJbC0yLjMtNC4zbDAuNS0wLjNjMC4zLTAuMiwwLjctMC4zLDEuMS0wLjNjMC45LDAsMS42LDAuNSwyLjEsMS4zQzg1LjgsMTA1LjUsODUuMSwxMDYuOCw4NC4xLDEwNy40eiIvPgoJPHBvbHlnb24gcG9pbnRzPSI4OC43LDEwNC4zIDg5LjksMTA2LjEgOTEuNywxMDQuOCA5MC41LDEwMyAJIi8+Cgk8cG9seWdvbiBwb2ludHM9IjkyLjEsOTMuMiA5MS43LDk2LjYgOTMuNiw5Ni45IDkzLjgsOTUuNSA5Ny45LDk5LjYgOTkuNSw5OC4xIDkzLjQsOTEuOSAJIi8+Cgk8cGF0aCBkPSJNMTAzLjgsODguN2MtMC43LTAuNS0xLjUtMC42LTIuNC0wLjNjMC0wLjgtMC40LTEuNi0xLTJjLTEuMS0wLjgtMi41LTAuMy0zLjYsMS4xYy0xLDEuNC0wLjksMi44LDAuMywzLjcKCQljMC42LDAuNCwxLjQsMC41LDIuMSwwLjNjLTAuMSwwLjksMC4yLDEuNiwwLjksMi4xYzAuNSwwLjQsMSwwLjUsMS41LDAuNWMwLjksMCwxLjctMC41LDIuNS0xLjVjMC42LTAuOCwwLjktMS42LDAuOC0yLjQKCQlDMTA0LjgsODkuNywxMDQuNCw4OS4yLDEwMy44LDg4Ljd6IE05OS43LDg5LjNjLTAuMywwLjItMC42LDAuNC0wLjksMC40Yy0wLjEsMC0wLjMsMC0wLjQtMC4xYy0wLjQtMC4zLTAuMi0wLjgsMC0xLjEKCQljMC4xLTAuMiwwLjMtMC4zLDAuNS0wLjNjMC4xLDAsMC4zLDAuMSwwLjUsMC4yQzk5LjcsODguNiw5OS44LDg5LDk5LjcsODkuM3ogTTEwMi43LDkxLjdjLTAuMywwLjQtMC44LDAuNS0xLjIsMC4yCgkJYy0wLjMtMC4zLTAuNS0wLjYtMC40LTEuMmMwLjYtMC40LDEuMS0wLjUsMS41LTAuMmMwLjIsMC4xLDAuMywwLjMsMC4zLDAuNUMxMDIuOSw5MS4yLDEwMi44LDkxLjUsMTAyLjcsOTEuN3oiLz4KCTxwYXRoIGQ9Ik0xMDcuNyw4MC4zYy0xLjUtMC43LTIuOS0wLjMtMy42LDEuMWMtMC4yLDAuNC0wLjMsMC44LTAuMiwxLjJjLTAuNS0wLjQtMS0xLjEtMC42LTJjMC4yLTAuNCwwLjUtMC43LDAuOC0xbDAuMy0wLjIKCQlsLTEuNi0xLjFsLTAuMiwwLjJjLTAuNCwwLjQtMC44LDAuOS0xLjEsMS41Yy0wLjQsMC45LTAuNSwxLjctMC4yLDIuNWMwLjUsMS40LDEuOSwyLjQsMywyLjljMSwwLjUsMS44LDAuNywyLjUsMC43CgkJYzEuMSwwLDEuOS0wLjUsMi41LTEuNkMxMTAuMSw4Mi43LDEwOS41LDgxLjIsMTA3LjcsODAuM3ogTTEwNy43LDgzLjVjLTAuMSwwLjMtMC40LDAuNC0wLjcsMC40Yy0wLjIsMC0wLjUtMC4xLTAuNy0wLjIKCQljLTAuNC0wLjItMC43LTAuNS0wLjgtMC43Yy0wLjEtMC4yLDAtMC4zLDAtMC41YzAuMS0wLjMsMC4zLTAuNCwwLjctMC40YzAuMiwwLDAuNSwwLjEsMC44LDAuMmMwLjQsMC4yLDAuNiwwLjQsMC43LDAuNwoJCUMxMDcuOCw4My4yLDEwNy43LDgzLjQsMTA3LjcsODMuNXoiLz4KCTxwYXRoIGQ9Ik0xMTAuOCw3MS41Yy0xLjctMC41LTMuMSwwLjMtMy42LDEuOWMwLDAuMi0wLjEsMC4zLTAuMSwwLjRsLTEtMC4zbDAuOS0zbC0xLjctMC41bC0xLjUsNWw0LjgsMS41bDAtMC40CgkJYzAtMC41LDAuMS0wLjksMC4yLTEuNGMwLjEtMC40LDAuNi0xLjQsMS41LTEuMmMxLDAuMywwLjgsMS40LDAuNywxLjdjLTAuMiwwLjYtMC41LDEtMC44LDEuNGwtMC4zLDAuM2wyLDAuN2wwLjEtMC4yCgkJYzAuMy0wLjYsMC42LTEuMywwLjgtMS44YzAuMy0xLDAuMy0xLjktMC4xLTIuN0MxMTIuMyw3Mi4zLDExMS42LDcxLjgsMTEwLjgsNzEuNXoiLz4KCTxwYXRoIGQ9Ik0xNi4zLDYwYzAsMjQsMTkuNSw0My42LDQzLjUsNDMuNmMyNCwwLDQzLjUtMTkuNSw0My41LTQzLjZjMC0yNC0xOS41LTQzLjUtNDMuNS00My41QzM1LjksMTYuNCwxNi4zLDM2LDE2LjMsNjB6CgkJIE01OS45LDE3LjljMjMuMiwwLDQyLDE4LjksNDIsNDJjMCwyMy4yLTE4LjksNDItNDIsNDJjLTIzLjIsMC00Mi0xOC45LTQyLTQyQzE3LjgsMzYuOCwzNi43LDE3LjksNTkuOSwxNy45eiIvPgoJPHBhdGggZD0iTTU5LjksMC40QzI3LDAuNCwwLjMsMjcuMSwwLjMsNjBjMCwzMi44LDI2LjcsNTkuNiw1OS42LDU5LjZjMzIuOCwwLDU5LjYtMjYuNyw1OS42LTU5LjZDMTE5LjQsMjcuMSw5Mi43LDAuNCw1OS45LDAuNHoKCQkgTTU5LjksMTE4Yy0zMiwwLTU4LTI2LTU4LTU4YzAtMzIsMjYtNTgsNTgtNThjMzIsMCw1OCwyNiw1OCw1OEMxMTcuOSw5Miw5MS45LDExOCw1OS45LDExOHoiLz4KPC9nPgo8L3N2Zz4K alt="Cornell University Logo" srcset sizes>
</picture>
</a></div>
<div class="column nav" id=toggle-container role=menubar>
<button class=toggle-control><svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"></path></svg></button>
<div class="mobile-toggle-block toggle-target sf-hidden">
</div>
<button class=toggle-control><svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 448 512" class="icon filter-white" role=menu><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"></path></svg></button>
<div class="mobile-toggle-block toggle-target sf-hidden">
</div>
</div>
</div>
</div>
</header>
<main>
<div id=content>
<div id=abs-outer>
<div class=leftcolumn>
<div class=subheader>
<h1>Computer Science &gt; Computation and Language</h1>
</div>
<div class=header-breadcrumbs-mobile>
<strong>arXiv:1907.11692</strong> (cs)
</div>
<style>#abs{font-family:"Lucida Grande",Helvetica,Arial,sans-serif!important}#abs h1.title{display:block;font-size:1.8em!important;font-weight:700;margin-block-end:12px;margin-block-start:12px;margin-bottom:12px;margin-inline-end:0px;margin-inline-start:20px;margin-left:20px;margin-right:0px;margin-top:12px}#abs div.authors{font-size:1.2em;line-height:24px;margin-bottom:8px;margin-left:20px;margin-right:0px;margin-top:8px}#abs div.dateline{font-size:0.9em;font-style:italic;margin-bottom:6.5px;margin-left:20px;margin-right:0px;margin-top:6.5px}#abs blockquote.abstract{font-size:1.05em;margin-block-end:21.6px;margin-block-start:14.4px;margin-bottom:21.6px;margin-inline-end:40px;margin-inline-start:40px;background-color:white;border-left:0px;padding:0px}#abs div.metatable{font-size:0.95em!important;margin-bottom:19px;margin-left:20px;margin-right:0px;margin-top:0px;border:0px;padding:0px}#abs div.metatable tbody{vertical-align:middle}#abs tr{margin-top:0px;margin-bottom:0px}#abs td.tablecell{padding-top:0px;padding-bottom:0px;padding-right:6.5px;padding-left:0px;vertical-align:top;font-size:0.95em!important;margin-top:0px;margin-bottom:0px;border:0px}#abs td.tablecell.label{font-weight:400!important}#abs span.primary-subject{font-weight:700}#abs span.arxivid{font-weight:700}</style>
<div id=content-inner>
<div id=abs>
<div class=dateline>
[Submitted on 26 Jul 2019]</div>
<h1 class="title mathjax"><span class="descriptor sf-hidden">Title:</span>RoBERTa: A Robustly Optimized BERT Pretraining Approach</h1>
<div class=authors><span class="descriptor sf-hidden">Authors:</span><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Liu,+Y" rel=nofollow>Yinhan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ott,+M" rel=nofollow>Myle Ott</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Goyal,+N" rel=nofollow>Naman Goyal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Du,+J" rel=nofollow>Jingfei Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Joshi,+M" rel=nofollow>Mandar Joshi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chen,+D" rel=nofollow>Danqi Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Levy,+O" rel=nofollow>Omer Levy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lewis,+M" rel=nofollow>Mike Lewis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zettlemoyer,+L" rel=nofollow>Luke Zettlemoyer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Stoyanov,+V" rel=nofollow>Veselin Stoyanov</a></div> <div id=download-button-info class=sf-hidden hidden>View a PDF of the paper titled RoBERTa: A Robustly Optimized BERT Pretraining Approach, by Yinhan Liu and 9 other authors</div>
<a class=mobile-submission-download href=https://arxiv.org/pdf/1907.11692>View PDF</a>
<blockquote class="abstract mathjax">
<span class="descriptor sf-hidden">Abstract:</span>Language model pretraining has led to significant performance gains but careful comparison between different approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes, and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results highlight the importance of previously overlooked design choices, and raise questions about the source of recently reported improvements. We release our models and code.
</blockquote>
<div class=metatable>
<table summary="Additional metadata"><tbody><tr>
<td class="tablecell label">Subjects:</td>
<td class="tablecell subjects">
<span class=primary-subject>Computation and Language (cs.CL)</span></td>
<tr>
<td class="tablecell label">Cite as:</td>
<td class="tablecell arxivid"><span class=arxivid><a href=https://arxiv.org/abs/1907.11692>arXiv:1907.11692</a> [cs.CL]</span></td>
</tr>
<tr>
<td class="tablecell label">&nbsp;</td>
<td class="tablecell arxividv">(or <span class=arxivid>
<a href=https://arxiv.org/abs/1907.11692v1>arXiv:1907.11692v1</a> [cs.CL]</span> for this version)
</td>
</tr>
<tr>
<td class="tablecell label">&nbsp;</td>
<td class="tablecell arxivdoi"> <a href=https://doi.org/10.48550/arXiv.1907.11692 id=arxiv-doi-link>https://doi.org/10.48550/arXiv.1907.11692</a><div class=button-and-tooltip>
<button class=more-info aria-describedby=more-info-desc-1>
<svg height=15 role=presentation xmlns=http://www.w3.org/2000/svg viewBox="0 0 512 512"><path fill=currentColor d="M256 8C119.043 8 8 119.083 8 256c0 136.997 111.043 248 248 248s248-111.003 248-248C504 119.083 392.957 8 256 8zm0 110c23.196 0 42 18.804 42 42s-18.804 42-42 42-42-18.804-42-42 18.804-42 42-42zm56 254c0 6.627-5.373 12-12 12h-88c-6.627 0-12-5.373-12-12v-24c0-6.627 5.373-12 12-12h12v-64h-12c-6.627 0-12-5.373-12-12v-24c0-6.627 5.373-12 12-12h64c6.627 0 12 5.373 12 12v100h12c6.627 0 12 5.373 12 12v24z"></path></svg>
<span class=visually-hidden>Focus to learn more</span>
</button>
<div role=tooltip id=more-info-desc-1 class=sf-hidden>
arXiv-issued DOI via DataCite</div>
</div>
</td>
</table>
</div>
</div>
</div>
<div class=submission-history>
<h2>Submission history</h2> From: Myle Ott [<a href=https://arxiv.org/show-email/980ef4cc/1907.11692 rel=nofollow>view email</a>] <br> <strong>[v1]</strong>
Fri, 26 Jul 2019 17:48:29 UTC (45 KB)<br>
</div>
</div>
<div class=extra-services> <div class=full-text>
<a name=other></a>
<span class="descriptor sf-hidden">Full-text links:</span>
<h2>Access Paper:</h2>
<ul>
<div id=download-button-info class=sf-hidden hidden>
View a PDF of the paper titled RoBERTa: A Robustly Optimized BERT Pretraining Approach, by Yinhan Liu and 9 other authors</div><li><a href=https://arxiv.org/pdf/1907.11692 aria-describedby=download-button-info accesskey=f class="abs-button download-pdf">View PDF</a><li><a href=https://arxiv.org/src/1907.11692 class="abs-button download-eprint">TeX Source
</a></ul>
<div class=abs-license><a href=http://arxiv.org/licenses/nonexclusive-distrib/1.0/ title="Rights to this article">view license</a></div>
</div>
<div class=browse>
Current browse context: <div class=current>cs.CL</div>
<div class=prevnext>
<span class=arrow>
<a class="abs-button prev-url" href="https://arxiv.org/prevnext?id=1907.11692&amp;function=prev&amp;context=cs.CL" accesskey=p title="previous in cs.CL (accesskey p)" rel=nofollow>&lt;&nbsp;prev</a>
</span>
<span class="is-hidden-mobile sf-hidden">&nbsp; | &nbsp;</span> <span class=arrow>
<a class="abs-button next-url" href="https://arxiv.org/prevnext?id=1907.11692&amp;function=next&amp;context=cs.CL" accesskey=n title="next in cs.CL (accesskey n)" rel=nofollow>next&nbsp;&gt;</a>
</span><br>
</div><div class=list>
<a class="abs-button abs-button-grey abs-button-small context-new" href=https://arxiv.org/list/cs.CL/new rel=nofollow>new</a>
<span class="is-hidden-mobile sf-hidden"> | </span>
<a class="abs-button abs-button-grey abs-button-small context-recent" href=https://arxiv.org/list/cs.CL/recent rel=nofollow>recent</a>
<span class="is-hidden-mobile sf-hidden"> | </span><a class="abs-button abs-button-grey abs-button-small context-id" href=https://arxiv.org/list/cs.CL/2019-07 rel=nofollow>2019-07</a>
</div><div class=abs-switch-cat>
Change to browse by:
<div class="switch context-change">
<a href="https://arxiv.org/abs/1907.11692?context=cs" rel=nofollow>cs</a><br class="is-hidden-mobile sf-hidden">
</div>
</div>
</div>
<div class=extra-ref-cite>
<h3>References &amp; Citations</h3>
<ul>
<li><a class="abs-button abs-button-small cite-ads" href=https://ui.adsabs.harvard.edu/abs/arXiv:1907.11692>NASA ADS</a><li><a class="abs-button abs-button-small cite-google-scholar" href="https://scholar.google.com/scholar_lookup?arxiv_id=1907.11692" target=_blank rel=noopener>Google Scholar</a></li>
<li><a class="abs-button abs-button-small cite-semantic-scholar" href=https://api.semanticscholar.org/arXiv:1907.11692 target=_blank rel=noopener>Semantic Scholar</a></li>
</ul>
<div style=clear:both></div>
</div>
<div class=extra-general>
<div class=what-is-this>
<h3><a class="abs-button abs-button-grey abs-button-small trackback-link" href=https://arxiv.org/tb/1907.11692> 21 blog links</a></h3> (<a href=https://info.arxiv.org/help/trackback.html class=trackback-help>what is this?</a>)
</div>
</div>
<div class=dblp>
<h3><a href=https://dblp.uni-trier.de/>DBLP</a> - CS Bibliography</h3>
<div class=list>
<a href=https://dblp.uni-trier.de/db/journals/corr/corr1907.html#abs-1907-11692 title="listing on DBLP">listing</a> | <a href=https://dblp.uni-trier.de/rec/bibtex/journals/corr/abs-1907-11692 title="DBLP bibtex record">bibtex</a> </div>
<div class=list>
<a href="https://dblp.uni-trier.de/search/author?author=Yinhan%20Liu" title="DBLP author search">Yinhan Liu</a><br><a href="https://dblp.uni-trier.de/search/author?author=Myle%20Ott" title="DBLP author search">Myle Ott</a><br><a href="https://dblp.uni-trier.de/search/author?author=Naman%20Goyal" title="DBLP author search">Naman Goyal</a><br><a href="https://dblp.uni-trier.de/search/author?author=Jingfei%20Du" title="DBLP author search">Jingfei Du</a><br><a href="https://dblp.uni-trier.de/search/author?author=Mandar%20Joshi" title="DBLP author search">Mandar Joshi</a> <div class=list></div>
</div>
</div><div class=extra-ref-cite>
<span id=bib-cite-trigger class="bib-cite-button abs-button">export BibTeX citation</span>
<span id=bib-cite-loading class=sf-hidden hidden>Loading...</span>
</div>
<div id=bib-cite-modal class="bib-modal sf-hidden" hidden>
</div><div class=bookmarks>
<div><h3>Bookmark</h3></div><a class="abs-button abs-button-grey abs-button-small" href="http://www.bibsonomy.org/BibtexHandler?requTask=upload&amp;url=https://arxiv.org/abs/1907.11692&amp;description=RoBERTa:%20A%20Robustly%20Optimized%20BERT%20Pretraining%20Approach" title="Bookmark on BibSonomy">
<img src=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAABTVBMVEXn5ubf3t6/H2OioaKgn6DLysrHxsa4tre3traysLGxsLCqqKmmpKWioKGenJ2cmpuamJmYlpeWlJWVlJSUkpOQjo+Ib3qMiov+/v78/Pz6+vr5+Pn4+Pj29vb09PSrqal1cnTu7u7i4uLe3t7V1NWJh4fJyMnHxsfDwsO/vr+9vL3y8fG3treDXW3e3d2oqKilpKWkoqSjoqPY19fS0dGXlpfMy8uUkpTGxcXEw8ONjI28u7u4t7ezsbKxr7CjH1etq6yrqaqnpaampaWko6OjoaKioaGhn6Cgn5+fnZ6em52dm5ybmZqVk5SIWW2Rj5CPjY6Oi42Ni4z////9/f37+/t/fX75+floSlfv7+/p6eno5+jl5eXk4+Tj4+Pd3d3b29vZ2dnX19fT09PS0dLR0dGEa3XOzc7Nzc3KycrIx8i+vb53dHW6ubrr6up9Unu9AAAA0klEQVQYlWMIRgJRgeJsDMFSxnEwAYcsRXOGYBcLp5TgYMlsu2RPW2ZVBoZgfS1hs+BgS83oMLlgZbtghuDQhCSRmOBYEXet4OBU1xAGkF5tAePgBNvgYBU+LgewQLCMm5oee7CsEDcrL0QgONQgTZeHL9E4QwYqEJxrzyQmAWJABELVfSP8vIJhAiEqgmo6WSGO6WABE3kOb+ssIwXZ4FABRpBAjjSLXrCoJGdicLCsfx5QwNmQPzw408NABigbGaQRD3QpknetlEwZgpGBj00AAMhwRAoMDs/uAAAAAElFTkSuQmCC alt="BibSonomy logo">
</a>
<a class="abs-button abs-button-grey abs-button-small" href="https://reddit.com/submit?url=https://arxiv.org/abs/1907.11692&amp;title=RoBERTa:%20A%20Robustly%20Optimized%20BERT%20Pretraining%20Approach" title="Bookmark on Reddit">
<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABIAAAASCAMAAABhEH5lAAAAclBMVEWtra3/IQClpaX/hFJ7e3tzc3Nra2tjY2P/Yyn/597/zr05OTkxMTHe3t4pKSnW1tbOzs7Gxsb/vaX/GACcnJyUlJSMjIyEhIT/rYxaWlpSUlL///9KSkr39/dCQkL/jFrv7+//hFrn5+f/jGO9vb21tbWaFPpZAAAAxUlEQVQYlU2Q7VrDMAhGEcMyO6thNsH0ta5m9f5vUVxbJz/ycQLkPBD2aMY91TCD/lCRQU5zoDuapAGSo97Rq/gys2JF05KtjpbT7ZFakHAQSq3pIGIxT2SD1H6vXj65MAXzJufrE9B9dWjRMi0vnnN8eAce3y4An3pCxJ517QAt/qMk/Is8ONLaMGVt7Zvdzi/uNVSFmgv04DGtqilW1pQ0f4RfnZt9HNmKZX4u2BCPee0dDsuGjMs6DKOyou3kcwhqvv0AYpYbpE15FCsAAAAASUVORK5CYII=" alt="Reddit logo">
</a>
</div> </div>
<div id=labstabs>
<div class=labstabs><input type=radio name=tabs id=tabone checked class=sf-hidden>
<label for=tabone>Bibliographic Tools</label>
<div class="tab labs-display-bib">
<h1>Bibliographic and Citation Tools</h1>
<div class=toggle>
<div class="columns is-mobile lab-row">
<div class="column lab-switch">
<label class=switch>
<input id=bibex-toggle type=checkbox class="lab-toggle sf-hidden" data-script-url=/static/browse/0.3.4/bibex/bibex.js?20241202>
<span class=slider></span>
<span class=is-sr-only>Bibliographic Explorer Toggle</span>
</label>
</div>
<div class="column lab-name">
<span id=label-for-bibex>Bibliographic Explorer</span> <em>(<a href=https://info.arxiv.org/labs/showcase.html#arxiv-bibliographic-explorer>What is the Explorer?</a>)</em>
</div>
</div>
<div class="columns is-mobile lab-row">
<div class="column lab-switch">
<label class=switch>
<input id=connectedpapers-toggle type=checkbox class="lab-toggle sf-hidden" data-script-url=/static/browse/0.3.4/js/connectedpapers.js aria-labelledby=label-for-connected-papers>
<span class=slider></span>
<span class=is-sr-only>Connected Papers Toggle</span>
</label>
</div>
<div class="column lab-name">
<span id=label-for-connected-papers>Connected Papers</span> <em>(<a href=https://www.connectedpapers.com/about target=_blank>What is Connected Papers?</a>)</em>
</div>
</div><div class="columns is-mobile lab-row">
<div class="column lab-switch">
<label class=switch>
<input id=litmaps-toggle type=checkbox class="lab-toggle sf-hidden" data-script-url=/static/browse/0.3.4/js/litmaps.js?20210617 aria-labelledby=label-for-litmaps>
<span class=slider></span>
<span class=is-sr-only>Litmaps Toggle</span>
</label>
</div>
<div class="column lab-name">
<span id=label-for-litmaps>Litmaps</span> <em>(<a href=https://www.litmaps.co/ target=_blank>What is Litmaps?</a>)</em>
</div>
</div>
<div class="columns is-mobile lab-row">
<div class="column lab-switch">
<label class=switch>
<input id=scite-toggle type=checkbox class="lab-toggle sf-hidden" data-script-url=/static/browse/0.3.4/js/scite.js?20210617 aria-labelledby=label-for-scite>
<span class=slider></span>
<span class=is-sr-only>scite.ai Toggle</span>
</label>
</div>
<div class="column lab-name">
<span id=label-for-scite>scite Smart Citations</span> <em>(<a href=https://www.scite.ai/ target=_blank>What are Smart Citations?</a>)</em>
</div>
</div>
</div>
<div class="labs-content-placeholder labs-display" style=display:none></div>
<div style=min-height:15px id=connectedpapers-output></div>
<div style=min-height:15px id=litmaps-open-in></div>
<div style=min-height:15px id=scite-open-in></div>
</div>
<input type=radio name=tabs id=tabtwo class=sf-hidden>
<label for=tabtwo>Code, Data, Media</label>
<div class="tab sf-hidden">
</div>
<input type=radio name=tabs id=labstabs-demos-input class=sf-hidden>
<label for=labstabs-demos-input id=labstabs-demos-label>Demos</label>
<div class="tab sf-hidden">
</div>
<input type=radio name=tabs id=tabfour class=sf-hidden>
<label for=tabfour>Related Papers</label>
<div class="tab sf-hidden">
</div>
<input type=radio name=tabs id=tabfive class=sf-hidden>
<label for=tabfive>
About arXivLabs
</label>
<div class="tab sf-hidden">
</div>
</div>
</div>
<div class=endorsers>
<a href=https://arxiv.org/auth/show-endorsers/1907.11692 class=endorser-who rel=nofollow>Which authors of this paper are endorsers?</a> |
<a id=mathjax_toggle>Disable MathJax</a> (<a href=https://info.arxiv.org/help/mathjax.html>What is MathJax?</a>)
<span class="help sf-hidden" style=font-style:normal;float:right;margin-top:0;margin-right:1em></span>
</div>
</div>
</div>
</main>
<footer style=clear:both>
<div class="columns is-desktop" role=navigation aria-label=Secondary style="margin:-0.75em -0.75em 0.75em -0.75em">
<div class=column style=padding:0>
<div class=columns>
<div class=column>
<ul style=list-style:none;line-height:2>
<li><a href=https://info.arxiv.org/about>About</a></li>
<li><a href=https://info.arxiv.org/help>Help</a></li>
</ul>
</div>
<div class=column>
<ul style=list-style:none;line-height:2>
<li>
<svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 512 512" class="icon filter-black" role=presentation><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"></path></svg>
<a href=https://info.arxiv.org/help/contact.html> Contact</a>
</li>
<li>
<svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 512 512" class="icon filter-black" role=presentation><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"></path></svg>
<a href=https://info.arxiv.org/help/subscribe> Subscribe</a>
</li>
</ul>
</div>
</div>
</div>
<div class=column style=padding:0>
<div class=columns>
<div class=column>
<ul style=list-style:none;line-height:2>
<li><a href=https://info.arxiv.org/help/license/index.html>Copyright</a></li>
<li><a href=https://info.arxiv.org/help/policies/privacy_policy.html>Privacy Policy</a></li>
</ul>
</div>
<div class="column sorry-app-links">
<ul style=list-style:none;line-height:2>
<li><a href=https://info.arxiv.org/help/web_accessibility.html>Web Accessibility Assistance</a></li>
<li>
<p class=help>
<a class=a11y-main-link href=https://status.arxiv.org/ target=_blank>arXiv Operational Status <svg xmlns=http://www.w3.org/2000/svg viewBox="0 0 256 512" class="icon filter-dark_grey" role=presentation><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"></path></svg></a><br>
</p>
</li>
</ul>
</div>
</div>
</div>
</div>
</footer>
</div>