There has been significant recent progress in the area of unsupervised skill discovery, utilizing various information-theoretic objectives as measures of diversity. Despite these advances, challenges remain: current methods require significant online interaction, fail to leverage vast amounts of available task-agnostic data and typically lack a quantitative measure of skill utility. We address these challenges by proposing a principled offline algorithm for unsupervised skill discovery that, in addition to maximizing diversity, ensures that each learned skill imitates state-only expert demonstrations to a certain degree. Our main analytical contribution is to connect Fenchel duality, reinforcement learning, and unsupervised skill discovery to maximize a mutual information objective subject to KL-divergence state occupancy constraints. Furthermore, we demonstrate the effectiveness of our method on the standard offline benchmark D4RL and on a custom offline dataset collected from a 12-DoF quadruped robot for which the policies trained in simulation transfer well to the real robotic system.
Author(s): | Vlastelica Marin and Cheng Jin and Martius, Georg and Kolev, Pavel |
Book Title: | Offline Diversity Maximization under Imitation Constraints |
Journal: | Reinforcement Learning Journal |
Volume: | 3 |
Pages: | 1377-1409 |
Year: | 2023 |
Month: | July |
Day: | 21 |
Project(s): | |
Bibtex Type: | Article (article) |
DOI: | https://doi.org/10.5281/zenodo.13899776 |
State: | Published |
URL: | https://api.semanticscholar.org/CorpusID:264709383 |
Links: |
BibTex
@article{vlastelica2023:OfflineDM, title = {Offline Diversity Maximization under Imitation Constraints}, journal = {Reinforcement Learning Journal}, booktitle = {Offline Diversity Maximization under Imitation Constraints}, abstract = {There has been significant recent progress in the area of unsupervised skill discovery, utilizing various information-theoretic objectives as measures of diversity. Despite these advances, challenges remain: current methods require significant online interaction, fail to leverage vast amounts of available task-agnostic data and typically lack a quantitative measure of skill utility. We address these challenges by proposing a principled offline algorithm for unsupervised skill discovery that, in addition to maximizing diversity, ensures that each learned skill imitates state-only expert demonstrations to a certain degree. Our main analytical contribution is to connect Fenchel duality, reinforcement learning, and unsupervised skill discovery to maximize a mutual information objective subject to KL-divergence state occupancy constraints. Furthermore, we demonstrate the effectiveness of our method on the standard offline benchmark D4RL and on a custom offline dataset collected from a 12-DoF quadruped robot for which the policies trained in simulation transfer well to the real robotic system.}, volume = {3}, pages = {1377-1409}, month = jul, year = {2023}, slug = {vlastelica2023-offlinedm}, author = {Marin, Vlastelica and Jin, Cheng and Martius, Georg and Kolev, Pavel}, url = {https://api.semanticscholar.org/CorpusID:264709383}, month_numeric = {7} }