Back
Feudal Multi-Agent Hierarchies for Cooperative Reinforcement Learning
{We investigate how reinforcement learning agents can learn tocooperate. Drawing inspiration from human societies, in whichsuccessful coordination of many individuals is often facilitated byhierarchical organisation, we introduce Feudal Multi-agent Hierar-chies (FMH). In this framework, a \textquoteleftmanager\textquoteright agent, which is taskedwith maximising the environmentally-determined reward func-tion, learns to communicate subgoals to multiple, simultaneously-operating, \textquoteleftworker\textquoteright agents. Workers, which are rewarded for achiev-ing managerial subgoals, take concurrent actions in the world. Weoutline the structure of FMH and demonstrate its potential for de-centralised learning and control. We find that, given an adequate setof subgoals from which to choose, FMH performs, and particularlyscales, substantially better than cooperative approaches that use ashared reward function.}
@inproceedings{item_3180163, title = {{Feudal Multi-Agent Hierarchies for Cooperative Reinforcement Learning}}, booktitle = {{Annual Conference of the American Library Association (ALA 2019)}}, abstract = {{We investigate how reinforcement learning agents can learn tocooperate. Drawing inspiration from human societies, in whichsuccessful coordination of many individuals is often facilitated byhierarchical organisation, we introduce Feudal Multi-agent Hierar-chies (FMH). In this framework, a \textquoteleftmanager\textquoteright agent, which is taskedwith maximising the environmentally-determined reward func-tion, learns to communicate subgoals to multiple, simultaneously-operating, \textquoteleftworker\textquoteright agents. Workers, which are rewarded for achiev-ing managerial subgoals, take concurrent actions in the world. Weoutline the structure of FMH and demonstrate its potential for de-centralised learning and control. We find that, given an adequate setof subgoals from which to choose, FMH performs, and particularlyscales, substantially better than cooperative approaches that use ashared reward function.}}, pages = {1--5}, address = {Washington, DC, USA}, year = {2019}, slug = {item_3180163}, author = {Ahilan, S and Dayan, P} }