Back
Fast Kernel ICA using an Approximate Newton Method
Recent approaches to independent component analysis (ICA) have used kernel independence measures to obtain very good performance, particularly where classical methods experience difficulty (for instance, sources with near-zero kurtosis). We present Fast Kernel ICA (FastKICA), a novel optimisation technique for one such kernel independence measure, the Hilbert-Schmidt independence criterion (HSIC). Our search procedure uses an approximate Newton method on the special orthogonal group, where we estimate the Hessian locally about independence. We employ incomplete Cholesky decomposition to efficiently compute the gradient and approximate Hessian. FastKICA results in more accurate solutions at a given cost compared with gradient descent, and is relatively insensitive to local minima when initialised far from independence. These properties allow kernel approaches to be extended to problems with larger numbers of sources and observations. Our method is competitive with other modern and classical ICA approaches in both speed and accuracy.
@inproceedings{4295, title = {Fast Kernel ICA using an Approximate Newton Method}, journal = {Proceedings of the 11th International Conference on Artificial Intelligence and Statistics (AISTATS 2007)}, booktitle = {JMLR Workshop and Conference Proceedings Volume 2: AISTATS 2007}, abstract = {Recent approaches to independent component analysis (ICA) have used kernel independence measures to obtain very good performance, particularly where classical methods experience difficulty (for instance, sources with near-zero kurtosis). We present Fast Kernel ICA (FastKICA), a novel optimisation technique for one such kernel independence measure, the Hilbert-Schmidt independence criterion (HSIC). Our search procedure uses an approximate Newton method on the special orthogonal group, where we estimate the Hessian locally about independence. We employ incomplete Cholesky decomposition to efficiently compute the gradient and approximate Hessian. FastKICA results in more accurate solutions at a given cost compared with gradient descent, and is relatively insensitive to local minima when initialised far from independence. These properties allow kernel approaches to be extended to problems with larger numbers of sources and observations. Our method is competitive with other modern and classical ICA approaches in both speed and accuracy.}, pages = {476-483}, editors = {Meila, M. , X. Shen}, publisher = {MIT Press}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, address = {Cambridge, MA, USA}, month = mar, year = {2007}, slug = {4295}, author = {Shen, H. and Jegelka, S. and Gretton, A.}, month_numeric = {3} }