Back
Injective Hilbert Space Embeddings of Probability Measures
A Hilbert space embedding for probability measures has recently been proposed, with applications including dimensionality reduction, homogeneity testing and independence testing. This embedding represents any probability measure as a mean element in a reproducing kernel Hilbert space (RKHS). The embedding function has been proven to be injective when the reproducing kernel is universal. In this case, the embedding induces a metric on the space of probability distributions defined on compact metric spaces. In the present work, we consider more broadly the problem of specifying characteristic kernels, defined as kernels for which the RKHS embedding of probability measures is injective. In particular, characteristic kernels can include non-universal kernels. We restrict ourselves to translation-invariant kernels on Euclidean space, and define the associated metric on probability measures in terms of the Fourier spectrum of the kernel and characteristic functions of these measures. The support of the kernel spectrum is important in finding whether a kernel is characteristic: in particular, the embedding is injective if and only if the kernel spectrum has the entire domain as its support. Characteristic kernels may nonetheless have difficulty in distinguishing certain distributions on the basis of finite samples, again due to the interaction of the kernel spectrum and the characteristic functions of the measures.
@inproceedings{5122, title = {Injective Hilbert Space Embeddings of Probability Measures}, journal = {Proceedings of the 21st Annual Conference on Learning Theory (COLT 2008)}, booktitle = {Proceedings of the 21st Annual Conference on Learning Theory}, abstract = {A Hilbert space embedding for probability measures has recently been proposed, with applications including dimensionality reduction, homogeneity testing and independence testing. This embedding represents any probability measure as a mean element in a reproducing kernel Hilbert space (RKHS). The embedding function has been proven to be injective when the reproducing kernel is universal. In this case, the embedding induces a metric on the space of probability distributions defined on compact metric spaces. In the present work, we consider more broadly the problem of specifying characteristic kernels, defined as kernels for which the RKHS embedding of probability measures is injective. In particular, characteristic kernels can include non-universal kernels. We restrict ourselves to translation-invariant kernels on Euclidean space, and define the associated metric on probability measures in terms of the Fourier spectrum of the kernel and characteristic functions of these measures. The support of the kernel spectrum is important in finding whether a kernel is characteristic: in particular, the embedding is injective if and only if the kernel spectrum has the entire domain as its support. Characteristic kernels may nonetheless have difficulty in distinguishing certain distributions on the basis of finite samples, again due to the interaction of the kernel spectrum and the characteristic functions of the measures.}, pages = {111-122}, editors = {RA Servedio and T Zhang}, publisher = {Omnipress}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, address = {Madison, WI, USA}, month = jul, year = {2008}, slug = {5122}, author = {Sriperumbudur, BK. and Gretton, A. and Fukumizu, K. and Lanckriet, G. and Sch{\"o}lkopf, B.}, month_numeric = {7} }