Back
Measuring Statistical Dependence with Hilbert-Schmidt Norms
We propose an independence criterion based on the eigenspectrum of covariance operators in reproducing kernel Hilbert spaces (RKHSs), consisting of an empirical estimate of the Hilbert-Schmidt norm of the cross-covariance operator (we term this a Hilbert-Schmidt Independence Criterion, or HSIC). This approach has several advantages, compared with previous kernel-based independence criteria. First, the empirical estimate is simpler than any other kernel dependence test, and requires no user-defined regularisation. Second, there is a clearly defined population quantity which the empirical estimate approaches in the large sample limit, with exponential convergence guaranteed between the two: this ensures that independence tests based on {methodname} do not suffer from slow learning rates. Finally, we show in the context of independent component analysis (ICA) that the performance of HSIC is competitive with that of previously published kernel-based criteria, and of other recently published ICA methods.
@inproceedings{3774, title = {Measuring Statistical Dependence with Hilbert-Schmidt Norms}, journal = {Algorithmic Learning Theory: 16th International Conference, ALT 2005}, booktitle = {Algorithmic Learning Theory, Lecture Notes in Computer Science, Vol. 3734}, abstract = {We propose an independence criterion based on the eigenspectrum of covariance operators in reproducing kernel Hilbert spaces (RKHSs), consisting of an empirical estimate of the Hilbert-Schmidt norm of the cross-covariance operator (we term this a Hilbert-Schmidt Independence Criterion, or HSIC). This approach has several advantages, compared with previous kernel-based independence criteria. First, the empirical estimate is simpler than any other kernel dependence test, and requires no user-defined regularisation. Second, there is a clearly defined population quantity which the empirical estimate approaches in the large sample limit, with exponential convergence guaranteed between the two: this ensures that independence tests based on {methodname} do not suffer from slow learning rates. Finally, we show in the context of independent component analysis (ICA) that the performance of HSIC is competitive with that of previously published kernel-based criteria, and of other recently published ICA methods.}, pages = {63-78}, editors = {S Jain and H-U Simon and E Tomita}, publisher = {Springer}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, address = {Berlin, Germany}, month = oct, year = {2005}, slug = {3774}, author = {Gretton, A. and Bousquet, O. and Smola, A. and Schoelkopf, B.}, month_numeric = {10} }