
Generating personalized 3D avatars is crucial for AR/VR. However, recent text-to-3D methods that generate avatars for celebrities or fictional characters, struggle with everyday people. Methods for faithful reconstruction typically require full-body images in controlled settings. What if a user could just upload their personal "OOTD" (Outfit Of The Day) photo collection and get a faithful avatar in return? The challenge is that such casual photo collections contain diverse poses, challenging viewpoints, cropped views, and occlusion (albeit with a consistent outfit, accessories and hairstyle). We address this novel "Album2Human" task by developing PuzzleAvatar, a novel model that generates a faithful 3D avatar (in a canonical pose) from a personal OOTD album, while bypassing the challenging estimation of body and camera pose. To this end, we fine-tune a foundational vision-language model (VLM) on such photos, encoding the appearance, identity, garments, hairstyles, and accessories of a person into (separate) learned tokens and instilling these cues into the VLM. In effect, we exploit the learned tokens as "puzzle pieces" from which we assemble a faithful, personalized 3D avatar. Importantly, we can customize avatars by simply inter-changing tokens. As a benchmark for this new task, we collect a new dataset, called PuzzleIOI, with 41 subjects in a total of nearly 1K OOTD configurations, in challenging partial photos with paired ground-truth 3D bodies. Evaluation shows that PuzzleAvatar not only has high reconstruction accuracy, outperforming TeCH and MVDreamBooth, but also a unique scalability to album photos, and strong robustness. Our code and data are publicly available for research purpose.
Author(s): | Yuliang Xiu and Zhen Liu and Dimitris Tzionas and Michael J. Black |
Journal: | ACM Transactions on Graphics |
Volume: | 43 |
Number (issue): | 6 |
Pages: | 1--15 |
Year: | 2024 |
Month: | December |
Publisher: | ACM |
Project(s): | |
Bibtex Type: | Article (article) |
DOI: | https://doi.org/10.1145/3687771 |
State: | Published |
URL: | https://puzzleavatar.is.tue.mpg.de/ |
Article Number: | 283 |
Electronic Archiving: | grant_archive |
Event Place: | Tokyo, Japan |
BibTex
@article{puzzleavatar2024xiu , title = {{PuzzleAvatar}: Assembling {3D} Avatars from Personal Albums}, journal = {ACM Transactions on Graphics}, abstract = {Generating personalized 3D avatars is crucial for AR/VR. However, recent text-to-3D methods that generate avatars for celebrities or fictional characters, struggle with everyday people. Methods for faithful reconstruction typically require full-body images in controlled settings. What if a user could just upload their personal "OOTD" (Outfit Of The Day) photo collection and get a faithful avatar in return? The challenge is that such casual photo collections contain diverse poses, challenging viewpoints, cropped views, and occlusion (albeit with a consistent outfit, accessories and hairstyle). We address this novel "Album2Human" task by developing PuzzleAvatar, a novel model that generates a faithful 3D avatar (in a canonical pose) from a personal OOTD album, while bypassing the challenging estimation of body and camera pose. To this end, we fine-tune a foundational vision-language model (VLM) on such photos, encoding the appearance, identity, garments, hairstyles, and accessories of a person into (separate) learned tokens and instilling these cues into the VLM. In effect, we exploit the learned tokens as "puzzle pieces" from which we assemble a faithful, personalized 3D avatar. Importantly, we can customize avatars by simply inter-changing tokens. As a benchmark for this new task, we collect a new dataset, called PuzzleIOI, with 41 subjects in a total of nearly 1K OOTD configurations, in challenging partial photos with paired ground-truth 3D bodies. Evaluation shows that PuzzleAvatar not only has high reconstruction accuracy, outperforming TeCH and MVDreamBooth, but also a unique scalability to album photos, and strong robustness. Our code and data are publicly available for research purpose.}, volume = {43}, number = {6}, pages = {1--15}, publisher = {ACM}, month = dec, year = {2024}, slug = {puzzleavatar}, author = {Xiu, Yuliang and Liu, Zhen and Tzionas, Dimitris and Black, Michael J.}, url = {https://puzzleavatar.is.tue.mpg.de/}, month_numeric = {12} }