Recent advances in single-view 3D hair digitization have made the creation of high-quality CG characters scalable and accessible to end-users, enabling new forms of personalized VR and gaming experiences. To handle the complexity and variety of hair structures, most cutting-edge techniques rely on the successful retrieval of a particular hair model from a comprehensive hair database. Not only are the aforementioned data-driven methods storage intensive, but they are also prone to failure for highly unconstrained input images, complicated hairstyles, and failed face detection. Instead of using a large collection of 3D hair models directly, we propose to represent the manifold of 3D hairstyles implicitly through a compact latent space of a volumetric variational autoencoder (VAE). This deep neural network is trained with volumetric orientation field representations of 3D hair models and can synthesize new hairstyles from a compressed code. To enable end-to-end 3D hair inference, we train an additional embedding network to predict the code in the VAE latent space from any input image. Strand-level hairstyles can then be generated from the predicted volumetric representation. Our fully automatic framework does not require any ad-hoc face fitting, intermediate classification and segmentation, or hairstyle database retrieval. Our hair synthesis approach is significantly more robust and can handle a much wider variation of hairstyles than state-of-the-art data-driven hair modeling techniques with challenging inputs, including photos that are low-resolution, overexposured, or contain extreme head poses. The storage requirements are minimal and a 3D hair model can be produced from an image in a second. Our evaluations also show that successful reconstructions are possible from highly stylized cartoon images, non-human subjects, and pictures taken from behind a person. Our approach is particularly well suited for continuous and plausible hair interpolation between very different hairstyles.

@inproceedings{saito_3d_2018,
address = {Tokyo, Japan},
title = {3D {Hair} {Synthesis} {Using} {Volumetric} {Variational} {Autoencoders}},
isbn = {978-1-4503-6008-1},
url = {http://dl.acm.org/citation.cfm?doid=3272127.3275019},
doi = {10.1145/3272127.3275019},
abstract = {Recent advances in single-view 3D hair digitization have made the creation of high-quality CG characters scalable and accessible to end-users, enabling new forms of personalized VR and gaming experiences. To handle the complexity and variety of hair structures, most cutting-edge techniques rely on the successful retrieval of a particular hair model from a comprehensive hair database. Not only are the aforementioned data-driven methods storage intensive, but they are also prone to failure for highly unconstrained input images, complicated hairstyles, and failed face detection. Instead of using a large collection of 3D hair models directly, we propose to represent the manifold of 3D hairstyles implicitly through a compact latent space of a volumetric variational autoencoder (VAE). This deep neural network is trained with volumetric orientation field representations of 3D hair models and can synthesize new hairstyles from a compressed code. To enable end-to-end 3D hair inference, we train an additional embedding network to predict the code in the VAE latent space from any input image. Strand-level hairstyles can then be generated from the predicted volumetric representation. Our fully automatic framework does not require any ad-hoc face fitting, intermediate classification and segmentation, or hairstyle database retrieval. Our hair synthesis approach is significantly more robust and can handle a much wider variation of hairstyles than state-of-the-art data-driven hair modeling techniques with challenging inputs, including photos that are low-resolution, overexposured, or contain extreme head poses. The storage requirements are minimal and a 3D hair model can be produced from an image in a second. Our evaluations also show that successful reconstructions are possible from highly stylized cartoon images, non-human subjects, and pictures taken from behind a person. Our approach is particularly well suited for continuous and plausible hair interpolation between very different hairstyles.},
booktitle = {{SIGGRAPH} {Asia} 2018 {Technical} {Papers} on - {SIGGRAPH} {Asia} '18},
publisher = {ACM Press},
author = {Saito, Shunsuke and Hu, Liwen and Ma, Chongyang and Ibayashi, Hikaru and Luo, Linjie and Li, Hao},
month = dec,
year = {2018},
keywords = {Graphics, UARC},
pages = {1--12}
}