@inproceedings {Kut2015, title = {Weighted Similarity Estimation in Data Streams}, booktitle = {CIKM}, year = {2015}, month = {10/2015}, publisher = {ACM}, organization = {ACM}, abstract = {

Similarity computation between pairs of objects is often a bottleneck in many applications that have to deal with massive volumes of data. Motivated by applications such as collaborative filtering in large-scale recommender systems, and influence probabilities learning in social networks, we present new randomized algorithms for the estimation of weighted similarity in data streams.

Previous works have addressed the problem of learning binary similarity measures in a streaming setting. To the best of our knowledge, the algorithms proposed here are the first that specifically address the estimation of weighted similarity in data streams. The algorithms need only one pass over the data, making them ideally suited to handling massive data streams in real time.

We obtain precise theoretical bounds on the approximation error and complexity of the algorithms. The results of evaluating our algorithms on two real-life datasets validate the theoretical findings and demonstrate the applicability of the proposed algorithms.

}, author = {Konstantin Kutzkov and Mohamed Ahmed and Sofia Nikitaki} }