Back
Taxonomy Inference Using Kernel Dependence Measures
We introduce a family of unsupervised algorithms, numerical taxonomy clustering, to simultaneously cluster data, and to learn a taxonomy that encodes the relationship between the clusters. The algorithms work by maximizing the dependence between the taxonomy and the original data. The resulting taxonomy is a more informative visualization of complex data than simple clustering; in addition, taking into account the relations between different clusters is shown to substantially improve the quality of the clustering, when compared with state-of-the-art algorithms in the literature (both spectral clustering and a previous dependence maximization approach). We demonstrate our algorithm on image and text data.
@techreport{5618, title = {Taxonomy Inference Using Kernel Dependence Measures}, abstract = {We introduce a family of unsupervised algorithms, numerical taxonomy clustering, to simultaneously cluster data, and to learn a taxonomy that encodes the relationship between the clusters. The algorithms work by maximizing the dependence between the taxonomy and the original data. The resulting taxonomy is a more informative visualization of complex data than simple clustering; in addition, taking into account the relations between different clusters is shown to substantially improve the quality of the clustering, when compared with state-of-the-art algorithms in the literature (both spectral clustering and a previous dependence maximization approach). We demonstrate our algorithm on image and text data.}, number = {181}, organization = {Max-Planck-Gesellschaft}, institution = {Max-Planck Institute for Biological Cybernetics, Tübingen, Germany}, school = {Biologische Kybernetik}, month = nov, year = {2008}, slug = {5618}, author = {Blaschko, MB. and Gretton, A.}, month_numeric = {11} }