Policy gradient methods are a type of reinforcement learning techniques that rely upon optimizing parametrized policies with respect to the expected return (long-term cumulative reward) by gradient descent. They do not suffer from many of the problems that have been marring traditional reinforcement learning approaches such as the lack of guarantees of a value function, the intractability problem resulting from uncertain state information and the complexity arising from continuous states & actions.
Author(s): | Peters, J. |
Journal: | Scholarpedia |
Volume: | 5 |
Number (issue): | 11 |
Pages: | 3698 |
Year: | 2010 |
Month: | November |
Day: | 0 |
Bibtex Type: | Article (article) |
DOI: | 10.4249/scholarpedia.3698 |
Digital: | 0 |
Electronic Archiving: | grant_archive |
Language: | en |
Organization: | Max-Planck-Gesellschaft |
School: | Biologische Kybernetik |
Links: |
BibTex
@article{6940, title = {Policy gradient methods}, journal = {Scholarpedia}, abstract = {Policy gradient methods are a type of reinforcement learning techniques that rely upon optimizing parametrized policies with respect to the expected return (long-term cumulative reward) by gradient descent. They do not suffer from many of the problems that have been marring traditional reinforcement learning approaches such as the lack of guarantees of a value function, the intractability problem resulting from uncertain state information and the complexity arising from continuous states & actions.}, volume = {5}, number = {11}, pages = {3698}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, month = nov, year = {2010}, slug = {6940}, author = {Peters, J.}, month_numeric = {11} }