Back
Efficient Sample Reuse in EM-Based Policy Search
Direct policy search is a promising reinforcement learning framework in particular for controlling in continuous, high-dimensional systems such as anthropomorphic robots. Policy search often requires a large number of samples for obtaining a stable policy update estimator due to its high flexibility. However, this is prohibitive when the sampling cost is expensive. In this paper, we extend a EM-based policy search method so that previously collected samples can be efficiently reused. The usefulness of the proposed method, called Reward-weighted Regression with sample Reuse, is demonstrated through a robot learning experiment.
@inproceedings{6068, title = {Efficient Sample Reuse in EM-Based Policy Search}, journal = {Machine Learning and Knowledge Discovery in Databases: European Conference ECML PKDD 2009}, booktitle = {16th European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases}, abstract = {Direct policy search is a promising reinforcement learning framework in particular for controlling in continuous, high-dimensional systems such as anthropomorphic robots. Policy search often requires a large number of samples for obtaining a stable policy update estimator due to its high flexibility. However, this is prohibitive when the sampling cost is expensive. In this paper, we extend a EM-based policy search method so that previously collected samples can be efficiently reused. The usefulness of the proposed method, called Reward-weighted Regression with sample Reuse, is demonstrated through a robot learning experiment.}, pages = {469-484}, editors = {Buntine, W. , M. Grobelnik, D. Mladenic, J. Shawe-Taylor}, publisher = {Springer}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, address = {Berlin, Germany}, month = sep, year = {2009}, slug = {6068}, author = {Hachiya, H. and Peters, J. and Sugiyama, M.}, month_numeric = {9} }