We introduce a family of unsupervised algorithms, numerical taxonomy clustering, to simultaneously cluster data, and to learn a taxonomy that encodes the relationship between the clusters. The algorithms work by maximizing the dependence between the taxonomy and the original data. The resulting taxonomy is a more informative visualization of complex data than simple clustering; in addition, taking into account the relations between different clusters is shown to substantially improve the quality of the clustering, when compared with state-of-the-art algorithms in the literature (both spectral clustering and a previous dependence maximization approach). We demonstrate our algorithm on image and text data.
Author(s): | Blaschko, MB. and Gretton, A. |
Number (issue): | 181 |
Year: | 2008 |
Month: | November |
Day: | 0 |
Bibtex Type: | Technical Report (techreport) |
Digital: | 0 |
Electronic Archiving: | grant_archive |
Institution: | Max-Planck Institute for Biological Cybernetics, Tübingen, Germany |
Language: | en |
Organization: | Max-Planck-Gesellschaft |
School: | Biologische Kybernetik |
Links: |
BibTex
@techreport{5618, title = {Taxonomy Inference Using Kernel Dependence Measures}, abstract = {We introduce a family of unsupervised algorithms, numerical taxonomy clustering, to simultaneously cluster data, and to learn a taxonomy that encodes the relationship between the clusters. The algorithms work by maximizing the dependence between the taxonomy and the original data. The resulting taxonomy is a more informative visualization of complex data than simple clustering; in addition, taking into account the relations between different clusters is shown to substantially improve the quality of the clustering, when compared with state-of-the-art algorithms in the literature (both spectral clustering and a previous dependence maximization approach). We demonstrate our algorithm on image and text data.}, number = {181}, organization = {Max-Planck-Gesellschaft}, institution = {Max-Planck Institute for Biological Cybernetics, Tübingen, Germany}, school = {Biologische Kybernetik}, month = nov, year = {2008}, slug = {5618}, author = {Blaschko, MB. and Gretton, A.}, month_numeric = {11} }