Back
Mean squared advantage minimization as a consequence of entropic policy improvement regularization
@conference{BelPet18, title = {Mean squared advantage minimization as a consequence of entropic policy improvement regularization}, booktitle = {European Workshop on Reinforcement Learning (EWRL)}, month = oct, year = {2018}, slug = {belpet18-81db1f52-bdcf-4dcf-a1e1-3bdc5925b196}, author = {Belousov, B. and Peters, J.}, url = {https://ewrl.files.wordpress.com/2018/09/ewrl_14_2018_paper_33.pdf}, month_numeric = {10} }