JanetBackEnd/assistedlab_paper.json


			
				
				
					
						
						
						
							
							
							{"id":{"4":4,"7":7,"6":6,"5":5,"3":3,"2":2,"1":1},"type":{"4":"Paper","7":"Paper","6":"Paper","5":"Paper","3":"Paper","2":"Paper","1":"Paper"},"resources":{"4":[{"name":"textfile","url":"https:\/\/data.d4science.net\/6nwG","description":""}],"7":[{"name":"reinforcement learning","url":"https:\/\/data.d4science.net\/QDPK","description":""}],"6":[{"name":"retrieval-augmented generation","url":"https:\/\/data.d4science.net\/x3Yy","description":""}],"5":[{"name":"neural approaches to conversational information retrieval","url":"https:\/\/data.d4science.net\/Fr32","description":""}],"3":[{"name":"paper","url":"https:\/\/data.d4science.org\/shub\/E_cERSSERldlBFak1pOTZ4eXJRajM3ekl4a3l0L0JBZmpENE01TGRvNEE3TnB4UEhUTENTQ1RzbnJWQVFPKzRacg==","description":"paper about chatbots"}],"2":[{"name":"learning to summarize from human feedback","url":"https:\/\/data.d4science.org\/shub\/E_cnlTU2xJMTVXbXpSTHVJcDZPQkl0eThOUGRGR3ZqaFZUZGdWUmtHb25wN2pPbW9RUDVINFdQUXl1T1dwTXY5Vw==","description":"paper about developing models for machine summarization using human \r\nfeedback"}],"1":[{"name":"deep reinforcement learning from human preferences","url":"https:\/\/data.d4science.org\/shub\/E_NUMzdFB1Q0xiRGl4S2hFa3VEcU11NExrMVppb29hT0RvdEkwWDdOdTAyMWFLeTBleGx1V2Z5Z28rVVpBSlBYbQ==","description":""}]},"tags":{"4":["atag"],"7":["reinforcement learning"],"6":["qa","retrieval","answer generation","transformers"],"5":["cir","ir","conversational agents","conversational information retrieval","neural information retrieval"],"3":["chatbot","conversational agent"],"2":["deep learning","language generation","reinforcement learning","reinforcement learning from human feedback","rlhf"],"1":["reinforcement learning","reinforcement learning from human feedback","rlhf"]},"title":{"4":"this is a sample item","7":"reinforcement learning","6":"retrieval-augmented language generation","5":"neural conversational information retrieval","3":"survey about chatbots","2":"summarizing from human feedback","1":"deep reinforcement learning from human preferences"},"author":{"4":"candela leonardo","7":"ibrahim ahmed salah tawfik","6":"ibrahim ahmed salah tawfik","5":"ibrahim ahmed salah tawfik","3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"ibrahim ahmed salah tawfik"},"notes":{"4":"this is a sample item","7":"this paper explains the main concepts and algorithms of reinforcement \r\nlearning.","6":"this paper describes an algorithm to generate answers based on paragraphs \r\npotentially containing the answer.","5":"this paper is a survey about the different applications in which neural \r\nconversational information retrieval can be used.","3":"this paper presents a survey about chatbots and conversational agents.","2":"this paper explains a method for generating machine translations from human \r\nfeedback.","1":"this paper explains how to use human preferences to generate rewards for a \r\nreinforcement learning algorithm."},"metadata_created":{"4":1675700208.3923931122,"7":1676130193.0102539062,"6":1676130537.5889539719,"5":1676130738.7923879623,"3":1681208191.8971168995,"2":1681446416.8742809296,"1":1681446678.2581589222}}
						
						
					
				
				
					
						Reference in New Issue
					
					View Git Blame
					Copy Permalink