Back
Information Marginalization on Subgraphs
Real-world data often involves objects that exhibit multiple relationships; for example, papers and authors exhibit both paper-author interactions and paper-paper citation relationships. A typical learning problem requires one to make inferences about a subclass of objects (e.g. papers), while using the remaining objects and relations to provide relevant information. We present a simple, unified mechanism for incorporating information from multiple object types and relations when learning on a targeted subset. In this scheme, all sources of relevant information are marginalized onto the target subclass via random walks. We show that marginalized random walks can be used as a general technique for combining multiple sources of information in relational data. With this approach, we formulate new algorithms for transduction and ranking in relational data, and quantify the performance of new schemes on real world dataachieving good results in many problems.
@inproceedings{4163, title = {Information Marginalization on Subgraphs}, journal = {Knowledge Discovery in Databases: PKDD 2006}, booktitle = {ECML/PKDD 2006}, abstract = {Real-world data often involves objects that exhibit multiple relationships; for example, papers and authors exhibit both paper-author interactions and paper-paper citation relationships. A typical learning problem requires one to make inferences about a subclass of objects (e.g. papers), while using the remaining objects and relations to provide relevant information. We present a simple, unified mechanism for incorporating information from multiple object types and relations when learning on a targeted subset. In this scheme, all sources of relevant information are marginalized onto the target subclass via random walks. We show that marginalized random walks can be used as a general technique for combining multiple sources of information in relational data. With this approach, we formulate new algorithms for transduction and ranking in relational data, and quantify the performance of new schemes on real world dataachieving good results in many problems.}, pages = {199-210}, editors = {F{\"u}rnkranz, J. , T. Scheffer, M. Spiliopoulou}, publisher = {Springer}, organization = {Max-Planck-Gesellschaft}, school = {Biologische Kybernetik}, address = {Berlin, Germany}, month = sep, year = {2006}, slug = {4163}, author = {Huang, J. and Zhu, T. and Rereiner, R. and Zhou, D. and Schuurmans, D.}, month_numeric = {9} }