Recently the Fisher score (or the Fisher kernel) is increasingly used as a feature extractor for classification problems. The Fisher score is a vector of parameter derivatives of loglikelihood of a probabilistic model. This paper gives a theoretical analysis about how class information is preserved in the space of the Fisher score, which turns out that the Fisher score consists of a few important dimensions with class information and many nuisance dimensions. When we perform clustering with the Fisher score, K-Means type methods are obviously inappropriate because they make use of all dimensions. So we will develop a novel but simple clustering algorithm specialized for the Fisher score, which can exploit important dimensions. This algorithm is successfully tested in experiments with artificial data and real data (amino acid sequences).
Author(s): | Tsuda, K. and Kawanabe, M. and Müller, KR. |
Book Title: | Advances in Neural Information Processing Systems 15 |
Journal: | Advances in Neural Information Processing Systems 15 |
Pages: | 729-736 |
Year: | 2003 |
Month: | October |
Day: | 0 |
Editors: | Becker, S. , S. Thrun, K. Obermayer |
Publisher: | MIT Press |
Bibtex Type: | Conference Paper (inproceedings) |
Address: | Cambridge, MA, USA |
Event Name: | Sixteenth Annual Conference on Neural Information Processing Systems (NIPS 2002) |
Event Place: | Vancouver, BC, Canada |
Digital: | 0 |
Electronic Archiving: | grant_archive |
ISBN: | 0-262-02550-7 |
Organization: | Max-Planck-Gesellschaft |
School: | Biologische Kybernetik |
Links: |
BibTex
@inproceedings{2175, title = {Clustering with the Fisher score}, journal = {Advances in Neural Information Processing Systems 15}, booktitle = {Advances in Neural Information Processing Systems 15}, abstract = {Recently the Fisher score (or the Fisher kernel) is increasingly used as a feature extractor for classification problems. The Fisher score is a vector of parameter derivatives of loglikelihood of a probabilistic model. This paper gives a theoretical analysis about how class information is preserved in the space of the Fisher score, which turns out that the Fisher score consists of a few important dimensions with class information and many nuisance dimensions. When we perform clustering with the Fisher score, K-Means type methods are obviously inappropriate because they make use of all dimensions. So we will develop a novel but simple clustering algorithm specialized for the Fisher score, which can exploit important dimensions. This algorithm is successfully tested in experiments with artificial data and real data (amino acid sequences).}, pages = {729-736}, editors = {Becker, S. , S. Thrun, K. Obermayer}, publisher = {MIT Press}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, address = {Cambridge, MA, USA}, month = oct, year = {2003}, slug = {2175}, author = {Tsuda, K. and Kawanabe, M. and M{\"u}ller, KR.}, month_numeric = {10} }