JanetBackEnd/assistedlab_content.json

{"id":{"1":1,"2":2,"3":3,"4":4,"5":5,"6":6,"7":7,"8":8,"9":9,"10":10,"11":11,"12":12,"13":13,"14":14,"15":15,"16":16,"17":17,"18":18,"19":19,"20":20,"21":21,"22":22,"23":23,"24":24,"25":25,"26":26,"27":27,"28":28,"29":29,"30":30,"31":31,"32":32,"33":33,"34":34,"35":35,"36":36,"37":37,"38":38,"39":39,"40":40,"41":41,"42":42,"43":43,"44":44,"45":45,"46":46,"47":47,"48":48,"49":49,"50":50,"51":51,"52":52,"53":53,"54":54,"55":55,"56":56,"57":57,"58":58,"59":59,"60":60,"61":61,"62":62,"63":63,"64":64,"65":65,"66":66,"67":67,"68":68,"69":69,"70":70,"71":71,"72":72,"73":73,"74":74,"75":75,"76":76,"77":77,"78":78,"79":79,"80":80,"81":81,"82":82,"83":83,"84":84,"85":85,"86":86,"87":87,"88":88,"89":89,"90":90,"91":91,"92":92,"93":93,"94":94,"95":95,"96":96,"97":97,"98":98,"99":99,"100":100,"101":101,"102":102,"103":103,"104":104,"105":105,"106":106,"107":107,"108":108,"109":109,"110":110,"111":111,"112":112,"113":113,"114":114,"115":115,"116":116,"117":117,"118":118,"119":119,"120":120,"121":121,"122":122,"123":123,"124":124,"125":125,"126":126,"127":127,"128":128,"129":129,"130":130,"131":131,"132":132,"133":133,"134":134,"135":135,"136":136,"137":137,"138":138,"139":139,"140":140,"141":141,"142":142,"143":143,"144":144,"145":145,"146":146,"147":147,"148":148,"149":149,"150":150,"151":151,"152":152,"153":153,"154":154,"155":155,"156":156,"157":157,"158":158,"159":159,"160":160,"161":161,"162":162,"163":163,"164":164,"165":165,"166":166,"167":167,"168":168,"169":169,"170":170,"171":171,"172":172,"173":173,"174":174,"175":175,"176":176,"177":177,"178":178,"179":179,"180":180,"181":181,"182":182,"183":183,"184":184,"185":185,"186":186,"187":187,"188":188,"189":189,"190":190,"191":191,"192":192,"193":193,"194":194,"195":195},"paperid":{"1":1,"2":1,"3":1,"4":1,"5":1,"6":1,"7":1,"8":1,"9":1,"10":1,"11":1,"12":1,"13":1,"14":1,"15":1,"16":1,"17":1,"18":1,"19":1,"20":1,"21":1,"22":1,"23":1,"24":1,"25":1,"26":1,"27":1,"28":1,"29":1,"30":1,"31":1,"32":1,"33":1,"34":1,"35":1,"36":1,"37":1,"38":1,"39":1,"40":1,"41":1,"42":1,"43":2,"44":2,"45":2,"46":2,"47":2,"48":2,"49":2,"50":2,"51":2,"52":2,"53":2,"54":2,"55":2,"56":2,"57":2,"58":2,"59":2,"60":2,"61":2,"62":2,"63":2,"64":2,"65":2,"66":2,"67":2,"68":2,"69":2,"70":2,"71":2,"72":2,"73":2,"74":2,"75":2,"76":2,"77":2,"78":2,"79":2,"80":2,"81":2,"82":2,"83":2,"84":2,"85":2,"86":2,"87":2,"88":2,"89":2,"90":2,"91":2,"92":2,"93":2,"94":2,"95":2,"96":2,"97":3,"98":3,"99":5,"100":6,"101":6,"102":6,"103":6,"104":6,"105":6,"106":6,"107":6,"108":6,"109":6,"110":6,"111":6,"112":6,"113":6,"114":6,"115":6,"116":6,"117":6,"118":6,"119":6,"120":6,"121":6,"122":6,"123":6,"124":6,"125":6,"126":6,"127":6,"128":6,"129":6,"130":6,"131":6,"132":6,"133":6,"134":6,"135":6,"136":6,"137":6,"138":6,"139":6,"140":6,"141":6,"142":6,"143":6,"144":6,"145":6,"146":6,"147":6,"148":6,"149":6,"150":6,"151":6,"152":6,"153":6,"154":6,"155":6,"156":6,"157":6,"158":6,"159":6,"160":6,"161":6,"162":6,"163":6,"164":6,"165":6,"166":6,"167":6,"168":6,"169":6,"170":6,"171":6,"172":7,"173":-1,"174":-1,"175":-1,"176":-1,"177":-1,"178":-1,"179":-1,"180":-1,"181":-1,"182":-1,"183":-1,"184":-1,"185":-1,"186":-2,"187":-2,"188":-2,"189":-3,"190":-3,"191":-3,"192":-3,"193":-3,"194":-3,"195":-3},"content":{"1":"Recent success in scaling reinforcement learning (RL) to large problems has been driven in domains that have a well-speci\ufb01ed reward function (Mnih et al, 2015, 2016; Silver et al, 2016). Unfortunately, many tasks involve goals that are complex, poorly-de\ufb01ned, or hard to specify. Overcoming this limitation would greatly expand the possible impact of deep RL and could increase the reach of machine learning more broadly.  For example, suppose that we wanted to use reinforcement learning to train a robot to clean a table or scramble an egg. It\u2019s not clear how to construct a suitable reward function, which will need to be a function of the robot\u2019s sensors. ","2":"We could try to design a simple reward function that approximately captures the intended behavior, but this will often result in behavior that optimizes our reward