Back
Approximation Algorithms for Tensor Clustering
We present the first (to our knowledge) approximation algo- rithm for tensor clusteringa powerful generalization to basic 1D clustering. Tensors are increasingly common in modern applications dealing with complex heterogeneous data and clustering them is a fundamental tool for data analysis and pattern discovery. Akin to their 1D cousins, common tensor clustering formulations are NP-hard to optimize. But, unlike the 1D case no approximation algorithms seem to be known. We address this imbalance and build on recent co-clustering work to derive a tensor clustering algorithm with approximation guarantees, allowing metrics and divergences (e.g., Bregman) as objective functions. Therewith, we answer two open questions by Anagnostopoulos et al. (2008). Our analysis yields a constant approximation factor independent of data size; a worst-case example shows this factor to be tight for Euclidean co-clustering. However, empirically the approximation factor is observed to be conservative, so our method can also be used in practice.
@inproceedings{5916, title = {Approximation Algorithms for Tensor Clustering}, booktitle = {Algorithmic Learning Theory: 20th International Conference}, abstract = {We present the first (to our knowledge) approximation algo- rithm for tensor clusteringa powerful generalization to basic 1D clustering. Tensors are increasingly common in modern applications dealing with complex heterogeneous data and clustering them is a fundamental tool for data analysis and pattern discovery. Akin to their 1D cousins, common tensor clustering formulations are NP-hard to optimize. But, unlike the 1D case no approximation algorithms seem to be known. We address this imbalance and build on recent co-clustering work to derive a tensor clustering algorithm with approximation guarantees, allowing metrics and divergences (e.g., Bregman) as objective functions. Therewith, we answer two open questions by Anagnostopoulos et al. (2008). Our analysis yields a constant approximation factor independent of data size; a worst-case example shows this factor to be tight for Euclidean co-clustering. However, empirically the approximation factor is observed to be conservative, so our method can also be used in practice.}, pages = {368-383}, editors = {Gavalda, R. , G. Lugosi, T. Zeugmann, S. Zilles}, publisher = {Springer}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, address = {Berlin, Germany}, month = oct, year = {2009}, slug = {5916}, author = {Jegelka, S. and Sra, S. and Banerjee, A.}, month_numeric = {10} }