Back
Benchmarks and Challenges in Pose Estimation for Egocentric Hand Interactions with Objects
We interact with the world with our hands and see it through our own (egocentric) perspective.A holistic 3D understanding of such interactions from egocentric views is important for tasks in robotics, AR/VR, action recognition and motion generation. Accurately reconstructing such interactions in 3D is challenging due to heavy occlusion, viewpoint bias, camera distortion, and motion blur from the head movement.To this end, we designed the HANDS23 challenge based on the AssemblyHands and ARCTIC datasets with carefully designed training and testing splits. Based on the results of the top submitted methods and more recent baselines on the leaderboards, we perform a thorough analysis on 3D hand(-object) reconstruction tasks.Our analysis demonstrates the effectiveness of addressing distortion specific to egocentric cameras, adopting high-capacity transformers to learn complex hand-object interactions, and fusing predictions from different views.Our study further reveals challenging scenarios intractable with state-of-the-art methods, such as fast hand motion, object reconstruction from narrow egocentric views, and close contact between two hands and objects.Our efforts will enrich the community’s knowledge foundation and facilitate future hand studies on egocentric hand-object interactions.
@inproceedings{fan2024benchmarks, title = {Benchmarks and Challenges in Pose Estimation for Egocentric Hand Interactions with Objects}, booktitle = {European Conference on Computer Vision (ECCV 2024)}, abstract = {We interact with the world with our hands and see it through our own (egocentric) perspective.A holistic 3D understanding of such interactions from egocentric views is important for tasks in robotics, AR/VR, action recognition and motion generation. Accurately reconstructing such interactions in 3D is challenging due to heavy occlusion, viewpoint bias, camera distortion, and motion blur from the head movement.To this end, we designed the HANDS23 challenge based on the AssemblyHands and ARCTIC datasets with carefully designed training and testing splits. Based on the results of the top submitted methods and more recent baselines on the leaderboards, we perform a thorough analysis on 3D hand(-object) reconstruction tasks.Our analysis demonstrates the effectiveness of addressing distortion specific to egocentric cameras, adopting high-capacity transformers to learn complex hand-object interactions, and fusing predictions from different views.Our study further reveals challenging scenarios intractable with state-of-the-art methods, such as fast hand motion, object reconstruction from narrow egocentric views, and close contact between two hands and objects.Our efforts will enrich the community’s knowledge foundation and facilitate future hand studies on egocentric hand-object interactions.}, pages = {428--448}, series = {LNCS}, publisher = {Springer Cham}, month = sep, year = {2024}, slug = {fan2024benchmarks}, author = {Fan, Zicong and Ohkawa, Takehiko and Yang, Linlin and Lin, Nie and Zhou, Zhishan and Zhou, Shihao and Liang, Jiajun and Gao, Zhong and Zhang, Xuanyang and Zhang, Xue and Li, Fei and Zheng, Liu and Lu, Feng and Zeid, Karim Abou and Leibe, Bastian and On, Jeongwan and Baek, Seungryul and Prakash, Aditya and Gupta, Saurabh and He, Kun and Sato, Yoichi and Hilliges, Otmar and Chang, Hyung Jin and Yao, Angela}, month_numeric = {9} }