@inproceedings {Van2016, title = {A Brief History of MPLS Usage in IPv6}, booktitle = {Passive and Active Measurement Conference (PAM)}, year = {2016}, month = {03/2016}, abstract = {

Recent researches have stated the fast deployment of IPv6. It\ has been demonstrated that IPv6 grows much faster, being so more and more\ adopted by both Internet service providers but also by servers and end-hosts.\ In parallel, researches have been conducted to discover and assess the usage of\ MPLS tunnels. Indeed, recent developments in the ICMP protocol make certain
categories of MPLS tunnels transparent to traceroute probing. However, these\ studies focus only on IPv4, where MPLS is strongly deployed.

In this paper, we provide a first look at how MPLS is used under IPv6\ networks using traceroute data collected by CAIDA.\ We have observed, at the first glance, that the MPLS deployment and usage seem to greatly differ between IPv4 and IPv6,\ in particular in the way MPLS label stacks are used. While label stacks are not that frequent\ in IPv4 (and mostly correspond to a VPN usage), they are prevalent in IPv6. \ However, after a deeper look at the label stack typical content in IPv6, we understand that 2-label stack tunnels are mainly used for dual stack 6PE tunnels and ECMP load sharing purpose. \ The technical deployment of such tunnels is really similar to VPN in practice but the objective is not the same (they are standard tunnels made with the IPv4 LDP for carrying IPv6 traffic).

}, keywords = {6PE tunnels, IPv6, LSE Stack, MPLS}, author = {Yves Vanaubel and Pascal M{\'e}rindol and Jean-Jacques Pansiot and Benoit Donnet} } @article {Teg2016, title = {Experimental investigation on TCP throughput behavior in Optical Fiber Access Networks}, journal = {Fiber and Integrated Optics}, year = {2016}, type = {Journal article.}, author = {Edion Tego and Francesco Matera and Donato Del Buono} } @inproceedings {Pap2016a, title = {Mixed-Integer Optimization for the Combined capacitated Facility Location-Routing Problem}, booktitle = {International Conference on the Design of Reliable Communication Networks (DRCN) 2016}, year = {2016}, publisher = {IEEE XPlore}, organization = {IEEE XPlore}, address = {Paris, France}, author = {Dimitri Papadimitriou and Didier Colle and Piet Demeester} } @article {Pap2016, title = {Understanding and modeling the inter-play between Sustainability, Resilience, and Robustness in networks}, journal = {Electronic Notes in Discrete Mathematics}, volume = {51}, year = {2016}, author = {Dimitri Papadimitriou and H.Niedermayer and B.Lannoo and J.Rak} } @article {Cas2016, title = {Unveiling Network and Service Performance Degradation in the Wild with mPlane}, journal = {IEEE Communications Magazine - Network Testing Series}, year = {2016}, abstract = {

Unveiling network and service performance issues in complex and highly decentralized systems such as the Internet is a major challenge. Indeed, the Internet is based on decentralization and diversity. However, its distributed nature leads to operational brittleness and difficulty in identifying the root causes of performance degradation. In such a context, network measurements are a fundamental pillar to shed light and to unveil design and implementation defects. To tackle this fragmentation and visibility problem, we have recently conceived mPlane, a distributed measurement platform which runs, collects and analyses traffic measurements to study the operation and functioning of the Internet. In this paper, we show the potentiality of the mPlane approach to unveil network and service degradation issues in live, operational networks, involving both fixed-line and cellular networks. In particular, we combine active and passive measurements to troubleshoot problems in end-customer Internet access connections, or to automatically detect and diagnose anomalies in Internet-scale services (e.g., YouTube) which impact a large number of end-users.

}, author = {Pedro Casas and Pierdomenico Fiadino and Sarah Wassermann and Stefano Traverso and Alessandro D{\textquoteright}Alconzo and Edion Tego and Francesco Matera and Marco Mellia} } @inproceedings {Cas2015a, title = {On the Analysis of QoE in Cellular Networks: from Subjective Tests to Large-scale Traffic Measurements}, booktitle = {6th International Workshop on Traffic Analysis and Characterization (TRAC)}, year = {2015}, author = {Pedro Casas and Martin Varela and Pierdomenico Fiadino and Mirko Schiavone and Helena Rivas and Raimund Schatz} } @inproceedings {DR:AIMS-15, title = {Anycast census and geolocation}, booktitle = {7th Workshop on Active Internet Measurements (AIMS 2015)}, year = {2015}, month = {April 2015}, url = {http://www.enst.fr/~drossi/paper/rossi15aims.pdf}, author = {Cicalese, Danilo and Auge, Jordan and Joumblatt, Diana and Rossi, Dario and Friedman, Timur} } @inproceedings {Bae2015, title = {Cache Oblivious Scheduling of Shared Workloads}, booktitle = {31st IEEE International Conference on Data Engineering (ICDE)}, year = {2015}, month = {05/2015}, publisher = {IEEE}, organization = {IEEE}, address = {Seoul, Korea}, abstract = {

Shared workload optimization is feasible if the set of tasks to be executed is known in advance, as is the case in updating a set of materialized views or executing an extract-transform-load workflow. In this paper, we consider dataintensive shared workloads with precedence constraints arising from data dependencies, i.e., before executing some task, other tasks may have to run first and generate some data needed by the next task(s). While there has been previous work on identifying common subexpressions in shared workloads and task re-ordering to enable shared scans, in this paper we go a step further and solve the problem of scheduling shared data-intensive workloads in a cache-oblivious way. Our solution relies on a novel formulation of precedence constrained scheduling with the additional constraint that once a data item is in the cache, all tasks that require this data item should execute as soon as possible thereafter. The intuition behind this formulation is that the longer a data item remains in the cache, the more likely it is to be evicted regardless of the cache size. We give an optimal ordering algorithm using A* search over the space of possible orderings, and we propose efficient and effective heuristics that obtain nearly-optimal results in much less time. We present experimental results on real-life data warehouse workloads and the TCP-DS benchmark to validate our claims.

}, author = {Arian B{\"a}r and Lukasz Golab and Stefan Ruehrup and Mirko Schiavone and Pedro Casas} } @inproceedings {Faat1510:Cautious, title = {A Cautious Look at Using Internet Standards-to-be in Research Work}, booktitle = {2015 IEEE Conference on Standards for Communications and Networking (CSCN) (CSCN{\textquoteright}15)}, year = {2015}, address = {Tokyo, Japan}, abstract = {

Standardization of Internet protocols is usually a somewhat slow process. The reasons for this are manifold. Besides working out the protocol details, things like opposing stakeholder interests can prolong the consensus building process, new requirements might be introduced that require technical changes to the protocol, coordination across standards developing organizations (SDOs) might add delays just to name a few. For potential users of a standard-to-be, the time to specify and implement it often stalls progress on projects which could have been finished far earlier using proprietary - but ultimately non-interoperable - implementations. For research work, interoperability is however not always an important concern. The overhead and delay of an SDO in these cases is typically a hard to calculate risk for a research project. It represents an external dependency for the work but there is only a finite amount of funding and time to finish the project. On the other hand, using standardized technology increases the likelihood that the output of the project is being used by external parties after the lifetime of a research project and the implementation experience can be valuable input to the standardization process. In this paper, we analyze the lifecycle of recent Internet standards to provide researchers an insight into the Internet Engineering Task Force (IETF) standardization process duration. We evaluate different areas, document phases, working groups and other aspects of the standardization process. This allows researchers to better judge whether they want to employ standards-to-be in research work or engage with the IETF to specify protocols based on research prototypes.

}, keywords = {document lifecycle, ietf, RFCs}, author = {Michael Faath and Rolf Winter and Fabian Weisshaar} } @inproceedings {Fia2015b, title = {Challenging Entropy-based Anomaly Detection and Diagnosis in Cellular Networks}, booktitle = {ACM SIGCOMM}, year = {2015}, author = {Pierdomenico Fiadino and Alessandro D{\textquoteright}Alconzo and Mirko Schiavone and Pedro Casas} } @inproceedings {DR:CoNEXT-15, title = {Characterizing IPv4 Anycast Adoption and Deployment}, booktitle = {ACM CoNEXT}, year = {2015}, month = {12/2015}, publisher = {ACM}, organization = {ACM}, address = {Heidelberg, DE}, url = {http://www.enst.fr/~drossi/paper/rossi15conext.pdf}, author = {Cicalese, Danilo and Auge, Jordan and Joumblatt, Diana and Friedman, Tim ur and Rossi, Dario} } @article {Teg2015, title = {Comparison of TCP congestion control algorithms in data transfers on high RTT }, year = {2015}, type = {Poster}, author = {Edion Tego and Francesco Matera} } @article {D61, title = {Demonstration Plan}, number = {D6.1}, year = {2015}, month = {01/2015}, type = {Public Deliverable}, issn = {D6.1}, author = {Pedro Casas and Edion Tego and Francesco Matera and Maurizio Dusi and A. Bakay and Balazs Szabo and G. Rozsa and Stefano Traverso and Ilias Leontiadis and L. Baltrunas and Yan Grunenberger and Andrea Fregosi and A. Kahveci and Eike Kowallik and G. Mattellini and C. Meregalli and Stefano Raffaglio and M. Russo and Andrea Sannino and M. Scarpino} } @inproceedings {Pap2015a, title = {Distributed Monitoring Problem}, booktitle = {7th International Network Optimization Conference (INOC) 2015}, year = {2015}, author = {Dimitri Papadimitriou and Bernard Fortz} } @inproceedings {TrKu15, title = {Enabling Internet-Wide Deployment of Explicit Congestion Notification}, booktitle = {Proceedings of the 2015 Passive and Active Measurement Conference}, year = {2015}, month = {03/2015}, address = {New York}, abstract = {

\ac{ECN} is an TCP/IP extension to signal network congestion without packet loss, which has barely seen deployment though it was standardized and implemented more than a decade ago. On-going activities in research and standardization aim to make the usage of \ac{ECN} more beneficial. This measurement study provides an update on deployment status and newly assesses the marginal risk of enabling \ac{ECN} negotiation by default on client end-systems. Additionally, we dig deeper into causes of connectivity and negotiation issues linked to \ac{ECN}. We find that about five websites per thousand suffer additional connection setup latency when fallback per RFC 3168 is correctly implemented; we provide a patch for Linux to properly perform this fallback. Moreover, we detect and explore a number of cases in which \ac{ECN} brokenness is clearly path-dependent, i.e. on middleboxes beyond the access or content provider network. Further analysis of these cases can guide their elimination, further reducing the risk of enabling \ac{ECN} by default.

}, author = {Brian Trammell and Mirja K{\"u}hlewind and Damiano Boppart and Iain Learmonth and Gorry Fairhurst and Richard Scheffenegger} } @inproceedings {TrKuPAM15, title = {Enabling Internet-Wide Deployment of Explicit Congestion Notification}, booktitle = {Proceedings of the 2015 Passive and Active Measurement Conference}, year = {2015}, month = {Mar}, address = {New York}, url = {http://ecn.ethz.ch/ecn-pam15.pdf}, author = {Brian Trammell and Mirja K{\"u}hlewind and Damiano Boppart and Iain Learmonth and Gorry Fairhurst and Richard Scheffenegger} } @inproceedings {Cas2015b, title = {Exploring QoE in Cellular Networks: How Much Bandwidth do you Need for Popular Smartphone Apps?}, booktitle = {5th ACM SIGCOMM Workshop on All Things Cellular: Operations, Applications and Challenges}, year = {2015}, author = {Pedro Casas and Raimund Schatz and Florian Wamser and Michael Seufert and Ralf Irmer} } @inproceedings {DR:INFOCOM-15a, title = {A Fistful of Pings: Accurate and Lightweight Anycast Enumeration and Geolocation}, booktitle = {IEEE INFOCOM}, year = {2015}, month = {04/2015}, abstract = {

Use of IP-layer anycast has increased in the last few years: once relegated to DNS root and top-level domain servers, anycast is now commonly used to assist distribution of general purpose content by CDN providers. Yet, the measurement techniques for discovering anycast replicas have been designed around DNS, limiting their usefulness to this particular service. This raises the need for protocol agnostic methodologies, that should additionally be as lightweight as possible in order to scale up anycast service discovery. This is precisely the aim of this paper, which proposes a new method for exhaustive and accurate enumeration and city-level geolocation of anycast instances, requiring only a handful of latency measurements from a set of known vantage points. Our method exploits an iterative workflow that enumerates (an optimization problem) and geolocates (a classification problem) anycast replicas. We thoroughly validate our methodology on available ground truth (several DNS root servers), using multiple measurement infrastructures (PlanetLab, RIPE), obtaining extremely accurate results (even with simple algorithms, that we compare with the global optimum), that we make available to the scientific community. Compared to the state of the art work that appeared in INFOCOM 2013 and IMC 2013, our technique (i) is not bound to a specific protocol, (ii) requires 1000 times fewer vantage points, not only (iii) achieves over 50\% recall but also (iv) accurately identifies the city-level geolocation for over 78\% of the enumerated servers, with (v) a mean geolocation error of 361 km for all enumerated servers.

}, url = {http://www.enst.fr/~drossi/paper/rossi15infocom.pdf}, author = {Cicalese, Danilo and Joumblatt, Diana and Rossi, Dario and Buob, Marc-Olivier and Auge, Jordan and Friedman, Timur} } @inproceedings {Faa2015a, title = {A GLIMPSE of the Internet{\textquoteright}s Fabric}, booktitle = {International Conference on Performance Evaluation Methodologies and Tools}, year = {2015}, month = {12/2015}, address = {Berlin}, author = {Michael Faath and Rolf Winter} } @article {2016, title = {HFSP: Bringing size-based scheduling to Hadoop}, journal = {IEEE Transaction on Cloud Computing}, year = {2015}, author = {Mario Pastorelli and Damiano Carra and Matteo Dell{\textquoteright}Amico and Pietro Michiardi} } @inproceedings {Dim2015, title = {Identifying the Root Cause of Video Streaming Issues on Mobile Devices}, booktitle = {CoNext}, year = {2015}, author = {G. Dimopoulos and I. Leontiadis and P. Barlet-Ros and K. Papagiannaki and P. Steenkiste} } @inproceedings {DR:TRAC-15, title = {Impact of Carrier-Grade NAT on Web Browsing}, booktitle = { 6th International Workshop on TRaffic Analysis and Characterization (TRAC) - The paper won the BEST PAPER award}, year = {2015}, month = {08/2015}, publisher = {IEEE}, organization = {IEEE}, address = {Dobrovnik, Croatia}, abstract = {

Public IPv4 addresses are a scarce resource. While IPv6 adoption is lagging, Network Address Translation (NAT) technologies have been deployed over the last years to alleviate IPv4 exiguity and their high rental cost. In particular, Carrier- Grade NAT (CGN) is a well known solution to mask a whole ISP network behind a limited amount of public IP addresses, significantly reducing expenses. Despite its economical benefits, CGN can introduce connectiv- ity issues which have sprouted a considerable effort in research, development and standardization. However, to the best of our knowledge, little effort has been dedicated to investigate the impact that CGN deployment may have on users{\textquoteright} traffic. This paper fills the gap. We leverage passive measurements from an ISP network deploying CGN and, by means of the Jensen- Shannon divergence, we contrast several performance metrics considering customers being offered public or private addresses. In particular, we gauge the impact of CGN presence on users{\textquoteright} web browsing experience. Our results testify that CGN is a mature and stable technology as, if properly deployed, it does not harm users{\textquoteright} web browsing experience. Indeed, while our analysis lets emerge expected stochastic differences of certain indexes (e.g., the difference in the path hop count), the measurements related to the quality of users{\textquoteright} browsing are otherwise unperturbed. Interestingly, we also observe that CGN protects customers from unsolicited, often malicious, traffic.

}, url = {http://www.enst.fr/~drossi/paper/rossi15trac.pdf}, author = {Bocchi, Enrico and Safari, Ali and Traverso, Stefano and Finamore, Alessandro and Di Gennaro, Valeria and Mellia, Marco and Munafo, Maurizio and Rossi, Dario} } @inproceedings {7249283, title = {Lagrangian relaxation for the time-dependent combined network design and routing problem}, booktitle = {Communications (ICC), 2015 IEEE International Conference on}, year = {2015}, month = {June}, publisher = {IEEE XPlore}, organization = {IEEE XPlore}, address = {London, United-Kingdom}, keywords = {allocation planning process, flow conservation constraints, integer programming, integer programming methods, Lagrangian relaxation, linear programming, Maintenance engineering, multi-commodity capacitated fixed charge network design, network routing problem, Optimization, Quality of Service, Reliability, resource allocation, resource installation, Resource management, Routing, routing decision process, telecommunication network routing, telecommunication traffic, time-dependent combined network design, traffic demands}, doi = {10.1109/ICC.2015.7249283}, author = {Dimitri Papadimitriou and Bernard Fortz and Enrico Gorgone} } @inproceedings {DR:INFOCOM-15b, title = {A Lightweight Anycast Enumeration and Geolocation}, booktitle = {IEEE INFOCOM, Demo Session}, year = {2015}, address = {Hong Kong, China}, abstract = {

Several Internet services such as CDNs, DNS name servers, and sinkholes use IP-layer anycast to reduce user response times and increase robustness with respect to network failures and denial of service attacks. However, current geolocation tools fail with anycast IP addresses. In our recent work [1], we remedy to this by developing an anycast detection, enumeration, and geolocation technique based on a set of delay measurements from a handful of geographically distributed vantage points. The technique (i) detects if an IP is anycast, (ii) enumerates replicas by finding the maximum set of non-overlapping disks (i.e., areas centered around vantage points), and (iii) geolocates the replicas by solving a classification problem and assigning the server location to the most likely city. We propose to demo this technique. In particular, we visually show how to detect an anycast IP, enumerate its replicas, and geolocate them on a map. The demo allows to browse previously geolocated services, as well as to explore new targets on demand.

}, url = {http://www.enst.fr/~drossi/paper/rossi15infocom-b.pdf}, author = {Cicalese, Danilo and Joumblatt, Diana and Rossi, Dario and Buob, Marc-Olivier and Auge, Jordan and Friedman, Timur} } @inproceedings {Yon2015, title = {A Lightweight Network Proximity Service Based On Neighborhood Models}, booktitle = {22nd IEEE Symposium on Communications and Vehicular Technology in the Benelux (SCVT)}, year = {2015}, month = {11/2015}, publisher = {IEEE}, organization = {IEEE}, address = {Luxembourg}, abstract = {

This paper proposes a network proximity service\ based on the neighborhood models used in recommender systems.\ Unlike previous approaches, our service infers network proximity\ without trying to recover the latency between network nodes. By\ asking each node to probe a number of landmark nodes which\ can be servers at Google, Yahoo and Facebook, etc., a simple\ proximity measure is computed and allows the direct ranking\ and rating of network nodes by their proximity to a target node.\ The service is thus lightweight and can be easily deployed in\ e.g. P2P and CDN applications. Simulations on existing datasets\ and experiments with a deployment over PlanetLab showed\ that our service achieves an accurate proximity inference that\ is comparable to state-of-the-art latency prediction approaches,\ while being much simpler.

}, author = {Liao, Yongjun and Du, Wei and Leduc, Guy} } @inproceedings {Faa2015, title = {Measurements with the Masses}, booktitle = {IRTF \& ISOC Research and Applications of Internet Measurements (RAIM) Workshop}, year = {2015}, address = {Yokohama, Japan}, author = {Michael Faath and Rolf Winter} } @inproceedings {Seu2015a, title = {On the Monitoring of YouTube QoE in Cellular Networks from End-devices}, booktitle = {Seventh ACM S3 Workshop}, year = {2015}, author = {Michael Seufert and Florian Wamser and Pedro Casas and Ralf Irmer and Phuoc Tran-Gia and Raimund Schatz} } @inproceedings {Van2015, title = {MPLS Under the Microscope: Revealing Actual Transit Path Diversity}, booktitle = {Internet Measurement Conference (IMC)}, year = {2015}, month = {10/2015}, abstract = {

Traffic Engineering (TE) is one of the keys for improving packet forwarding in\ the Internet. It allows IP network operators to finely tune their forwarding\ paths according to various customer needs. One of the most popular tool\ available today for optimizing the use of networking resources is MPLS. On the\ one hand, operators may use MPLS and label distribution mechanisms such as RSVP-TE\ in conjunction with BGP to define multiple transit paths (for a given edge pair)
verifying different constraints on their network. On the other hand, when\ operators simply enable LDP for distributing MPLS labels in order to improve the\ scalability of their network, another kind of path diversity may appear thanks\ to the ECMP feature of IGP routing.

In this paper, using an MPLS labels analysis, we demonstrate that it is possible\ to better understand the transit path diversity deployed within a given ISP.\ More specifically, we introduce the Label Pattern Recognition (LPR) algorithm, a\ method for analyzing traceroute data including MPLS information. LPR reveals\ the actual usage of MPLS according to the inferred label distribution protocol and\ is able to make the distinction between ECMP and TE multi-path forwarding.\ Based on an extensive and longitudinal traceroute dataset obtained from CAIDA,
we apply LPR and find that each ISP behavior is really specific in regard to its\ MPLS usage. In particular, we are able to observe independently for each ISP\ the MPLS path diversity and usage, and its evolution over time.\ Globally speaking, the main outcomes of our study are that (i) the usage of\ MPLS has been increasing over the the last five years with basic encapsulation\ being predominant, (ii) path diversity is mainly provided thanks to ECMP and\ LDP, and, (iii), TE using MPLS is as common as MPLS without path diversity.

}, keywords = {ECMP, LDP, MPLS, multipath, network discovery, RSVP-TE, traceroute, traffic engineering}, author = {Yves Vanaubel and Pascal M{\'e}rindol and Jean-Jacques Pansiot and Benoit Donnet} } @inproceedings {bar2015m, title = {MTRAC - Discovering M2M Devices in Cellular Networks from Coarse-grained Measurements}, booktitle = {IEEE International Conference on Communications (ICC)}, year = {2015}, author = {Arian B{\"a}r and Philippe Svoboda and Pedro Casas} } @inproceedings {Nay2015, title = {multi-context TLS (mcTLS): Enabling Secure In-Network Functionality in TLS}, booktitle = {2015 ACM SIGCOMM Conference (SIGCOMM {\textquoteright}15)}, year = {2015}, publisher = {ACM}, organization = {ACM}, address = {London}, abstract = {

Transport Layer Security (TLS), is the de facto protocol supporting secure HTTP (HTTPS), and is being discussed as the default transport protocol for HTTP2.0. It has seen wide adoption and is currently carrying a significant fraction of the overall HTTP traffic (Facebook, Google and Twitter use it by default). However, TLS makes the fundamental assumption that all functionality resides solely at the endpoints, and is thus unable to utilize the many in-network services that optimize network resource usage, improve user experience, and protect clients and servers from security threats. Re-introducing such in-network functionality into secure TLS sessions today is done through hacks, in many cases weakening overall security.

In this paper we introduce multi-context TLS (mcTLS) which enhances TLS by allowing middleboxes to be fully supported participants in TLS sessions. mcTLS breaks the "all-or-nothing" security model by allowing endpoints and content providers to explicitly introduce middleboxes in secure end-to-end sessions, while deciding whether they should have read or write access, and to which specific parts of the content. mcTLS enables transparency and control for both clients and servers.

We evaluate a prototype mcTLS implementation in both controlled and "live" experiments, showing that the benefits offered have minimal overhead.More importantly, we show that mcTLS can be incrementally deployed and requires small changes to clients, servers, and middleboxes, for a large number of use cases.

}, author = {David Naylor and Kyle Schomp and Matteo Varvello and Ilias Leontiadis and Jeremy Blackburn and Diego Lopez and Konstantina Papagiannaki and Pablo Rodriguez and Peter Steenkiste} } @inproceedings {Tra2015, title = {A New Transport Encapsulation for Middlebox Cooperation}, booktitle = {Proceedings of the 2015 IEEE Conference on Standards for Communications and Networking}, year = {2015}, month = {Oct}, address = {Tokyo, Japan}, author = {Brian Trammell and Mirja Kuehlewind and Elio Gubser and Joe Hildebrand} } @inproceedings {Pap2015, title = {Nonparametric Statistical Methods to Analyze the Internet Connectivity Reliability}, booktitle = {IEEE International Conference on Communications Quality and Reliability (CQR 2015)}, year = {2015}, author = {Dimitri Papadimitriou and Davide Careglio} } @inproceedings {Tra2015a, title = {Observing Internet Path Transparency to Support Protocol Engineering}, booktitle = {Proceedings of the first IRTF/ISOC Workshop on Research and Applications of Internet Measurements (RAIM)}, year = {2015}, month = {Oct}, address = {Yokohama, Japan}, author = {Brian Trammell and Mirja K{\"u}hlewind} } @inproceedings {7145326, title = {Online Social Networks anatomy: On the analysis of Facebook and WhatsApp in cellular networks}, booktitle = {IFIP Networking Conference (IFIP Networking), 2015}, year = {2015}, month = {May}, keywords = {Cellular Networks, Content Delivery Networks, Europe, Facebook, Internet, IP networks, Network Measurements, Online Social Networks, Organizations, Servers, WhatsApp}, doi = {10.1109/IFIPNetworking.2015.7145326}, author = {Fiadino, Pierdomenico and Casas, Pedro and Schiavone, Mirko and D{\textquoteright}Alconzo, Alessandro} } @article {7096995, title = {Personal Cloud Storage Benchmarks and Comparison}, journal = {Cloud Computing, IEEE Transactions on}, volume = {PP}, year = {2015}, pages = {1-1}, abstract = {The large amount of space offered by personal cloud storage services (e.g., Dropbox and OneDrive), together with the possibility of synchronizing devices seamlessly, keep attracting customers to the cloud. Despite the high public interest, little information about system design and actual implications on performance is available when selecting a cloud storage service. Systematic benchmarks to assist in comparing services and understanding the effects of design choices are still lacking. This paper proposes a methodology to understand and benchmark personal cloud storage services. Our methodology unveils their architecture and capabilities. Moreover, by means of repeatable and customizable tests, it allows the measurement of performance metrics under different workloads. The effectiveness of the methodology is shown in a case study in which 11 services are compared under the same conditions. Our case study reveals interesting differences in design choices. Their implications are assessed in a series of benchmarks. Results show no clear winner, with all services having potential for improving performance. In some scenarios, the synchronization of the same files can take 20 times longer. In other cases, we observe a wastage of twice as much network capacity, questioning the design of some services. Our methodology and results are thus useful both as benchmarks and as guidelines for system design.}, keywords = {Benchmark testing, Cloud computing, Cloud storage, Computers, Google, Measurements, Performance, Servers, Synchronization}, issn = {2168-7161}, doi = {10.1109/TCC.2015.2427191}, author = {Enrico Bocchi and Idilio Drago and Marco Mellia} } @inproceedings {Boc2015, title = {Personal Cloud Storage: Usage, Performance and Impact of Terminals }, booktitle = {4th IEEE International Conference on Cloud Networking (IEEE CloudNet 2015)}, year = {2015}, month = {10/2015}, publisher = {IEEE}, organization = {IEEE}, address = {Niagara Falls, Canada}, abstract = {

Personal cloud storage services such as Dropbox and OneDrive are popular among Internet users. They help in sharing content and backing up data by relying on the cloud to store files. The rise of mobile terminals and the presence of new providers question whether the usage of cloud storage is evolving. This knowledge is essential to understand the workload these services need to handle, their performance, and implications. In this paper we present a comprehensive characterization of personal cloud storage services. Relying on traces collected for one month in an operational network, we show that users of each service present distinct behaviors. Dropbox is now threatened by competitors, with OneDrive and Google Drive reaching large market shares. However, the popularity of the latter services seems to be driven by their integration into Windows and Android. Indeed, around 50\% of their users do not produce any workload. Considering performance, providers show distinct trade-offs, with bottlenecks that hardly allow users to fully exploit their access line bandwidth. Finally, usage of cloud services is now ordinary among mobile users, thanks to the automatic backup of pictures and media files.

}, keywords = {Cloud storage, Monitoring}, url = {http://www.ieee-cloudnet.org/program.html}, author = {Enrico Bocchi and Idilio Drago and Marco Mellia} } @inproceedings {Cas2015, title = {QoMOSN - On the Analysis of Traffic and Quality of Experience in Mobile Online Social Networks}, booktitle = {European Conference on Networks and Communications (EuCNC)}, year = {2015}, author = {Pedro Casas and Pierdomenico Fiadino and Mirko Schiavone} } @article {Wei2014, title = {Rating Network Paths for Locality-Aware Overlay Construction and Routing}, journal = {IEEE/ACM Transactions on Networking}, volume = {23}, year = {2015}, month = {10/2015}, chapter = {1661}, abstract = {

This paper investigates the rating of network paths,\ i.e. acquiring quantized measures of path properties such as\ round-trip time and available bandwidth. Comparing to finegrained\ measurements, coarse-grained ratings are appealing in\ that they are not only informative but also cheap to obtain.

Motivated by this insight, we firstly address the scalable\ acquisition of path ratings by statistical inference. By observing\ similarities to recommender systems, we examine the applicability\ of solutions to recommender system and show that our\ inference problem can be solved by a class of matrix factorization\ techniques. A technical contribution is an active and progressive\ inference framework that not only improves the accuracy by\ selectively measuring more informative paths but also speeds\ up the convergence for available bandwidth by incorporating its\ measurement methodology.

Then, we investigate the usability of rating-based network\ measurement and inference in applications. A case study is\ performed on whether locality awareness can be achieved for\ overlay networks of Pastry and BitTorrent using inferred ratings.

We show that such coarse-grained knowledge can improve the\ performance of peer selection and that finer granularities do not\ always lead to larger improvements.

}, keywords = {matrix factorization, network inference, rating-based network measurement, recommender system}, author = {Du, Wei and Liao, Yongjun and Tao, Narisu and Geurts, Pierre and Fu, Xiaoming and Leduc, Guy} } @inproceedings {Fia2015a, title = {RCATool - A Framework for Detecting and Diagnosing Anomalies in Cellular Networks}, booktitle = {27th International Teletraffic Congress (ITC)}, year = {2015}, author = {Pierdomenico Fiadino and Alessandro D{\textquoteright}Alconzo and Mirko Schiavone and Pedro Casas} } @inproceedings {7325228, title = {Robust cooperative monitoring problem}, booktitle = {Reliable Networks Design and Modeling (RNDM), 2015 7th International Workshop on}, year = {2015}, month = {10/2015}, keywords = {box+ellipsoidal perturbation set, box+polyhedral perturbation set, cooperative communication, integer programming, linear programming, MILP, Minimization, mixed-integer linear program, Monitoring, Optimization, passive monitoring point configuration, passive monitoring point placement, robust cooperative monitoring problem, Robustness, Routing, telecommunication traffic, time-varying traffic flow monitoring, Topology, Uncertainty}, doi = {10.1109/RNDM.2015.7325228}, author = {Dimitri Papadimitriou and Bernard Fortz} } @inproceedings {D.2015, title = {Robust cooperative monitoring problem}, booktitle = {Reliable Networks Design and Modeling (RNDM), 2015 7th International Workshop on}, year = {2015}, month = {Oct}, keywords = {box+ellipsoidal perturbation set, box+polyhedral perturbation set, cooperative communication, integer programming, linear programming, MILP, Minimization, mixed-integer linear program, Monitoring, Optimization, passive monitoring point configuration, passive monitoring point placement, robust cooperative monitoring problem, Robustness, Routing, telecommunication traffic, time-varying traffic flow monitoring, Topology, Uncertainty}, doi = {10.1109/RNDM.2015.7325228}, author = {Papadimitriou, D. and Fortz, B.} } @inproceedings {D.P2015, title = {Robust Cooperative Monitoring Problem}, booktitle = {7th International Workshop on Reliable Networks Design and Modeling (RNDM) 2015}, year = {2015}, month = {10/2015}, publisher = {IEEE XPlore}, organization = {IEEE XPlore}, address = {Munich, Germany}, author = {Dimitri Papadimitriou and Bernard Fortz} } @article {Teg2015a, title = {SLA verification and certification, Traffic Monitoring and Analysis}, year = {2015}, type = {Poster}, author = {Edion Tego and Elena Mammi and Ariana Rufini and Francesco Matera} } @inproceedings {Hou2015, title = {A Study of the Impact of DNS Resolvers on Performance Using a Causal Approach}, booktitle = {Internet Teletraffic Congress}, year = {2015}, month = {08/2015}, address = {Ghent, Belgium}, abstract = {For a user to access any resource on the Internet, it is necessary to first locate a server hosting the requested resource. The Domain Name System service (DNS) represents the first step in this process, translating a human readable name, the resource host name, into an IP address. With the expansion of Content Distribution Networks (CDNs), the DNS service has seen its importance increase. In a CDN, objects are replicated on different servers to decrease the distance from the client to a server hosting the object that needs to be accessed. The DNS service should improve user experience by directing its demand to the optimal CDN server. While most of the Internet Service Providers (ISPs) offer a DNS service to their customers, it is now common to see clients using a public DNS service instead. This choice may have an impact on Web browsing performance. In this paper we study the impact of choosing one DNS server instead of another and we compare the performance of a large European ISP DNS service with the one of a public DNS service, Google DNS. We propose a causal approach to expose the structural dependencies of the different parameters impacted by the DNS service used and we show how to model these dependencies with a Bayesian network. This model allows us to explain and quantify the benefits obtained by clients using their ISP DNS service and to propose a solution to further improve their performance.}, keywords = {DNS, reasoner}, author = {Hadrien Hours and Ernst Biersack and Patrick Loiseau and Alessandro Finamore and Marco Mellia} } @inproceedings {Cas2015d, title = {Taming QoE in Cellular Networks: from Subjective Lab Studies to Measurements in the Field}, booktitle = {IRTF \& ISOC Workshop on Research and Applications of Internet Measurements (RAIM)}, year = {2015}, month = {11/2015}, author = {P Casas and B Gardlo and M Seufert and F Wamser and R Schatz} } @inproceedings {Cas2015c, title = {Taming QoE in Cellular Networks: from Subjective Lab Studies to Measurements in the Field}, booktitle = {11th International Conference on Network and Service Management (CNSM)}, year = {2015}, month = {10/2015}, author = {P Casas and B Gardlo and M Seufert and F Wamser and R Schatz} } @inproceedings {Ede2015a, title = {Towards a Middlebox Policy Taxonomy: Path Impairments}, booktitle = {International Workshop on Network Science for Communication Networks (NetSciCom)}, year = {2015}, month = {04/2015}, abstract = {

Recent years have seen the rise of middleboxes, such as firewalls, NATs, proxies,\ or Deep Packet Inspectors. Those middleboxes play an important role in today{\textquoteright}s\ Internet, including enterprise networks and cellular networks. However, despite\ their huge success in modern network architecture, they have a negative impact\ on the Internet evolution as they can slow down the TCP protocol evolution and its\ extensions. Making available a summary of the potential middlebox network\ interferences is of the highest importance as it could allow researchers to\ confront their new transport protocol to potential issues caused by middleboxes.\ And, consequently, allowing again innovation in the Internet.

This is exactly what we tackle in this paper. We propose a path impairment\ oriented middlebox taxonomy that aims at categorizing the initial purpose of a\ middlebox policy as well as its potential unexpected complications. Based on a\ measurement campaign on IPv4 and IPv6 networks, we confront our taxonomy to the\ real world. Our dataset is freely available.

}, keywords = {classification, IPv6, middleboxes, path impairment, tracebox}, author = {Korian Edeline and Benoit Donnet} } @inproceedings {Fia2015, title = {Towards Automatic Detection and Diagnosis of Internet Service Anomalies via DNS Traffic Analysis}, booktitle = {6th International Workshop on Traffic Analysis and Characterization (TRAC)}, year = {2015}, author = {Pierdomenico Fiadino and Alessandro D{\textquoteright}Alconzo and Mirko Schiavone and Pedro Casas} } @inproceedings {Was2015, title = {Towards DisNETPerf: a Distributed Internet Paths Performance Analyzer}, booktitle = {The 11th International Conference on emerging Networking EXperiments and Technologies - CoNEXT 2015}, year = {2015}, month = {11/2015}, abstract = {

For more than 25 years now, traceroute has demonstrated its supremacy for network-path measurement, becoming the most widely used Internet path diagnosis tool today. A major limitation of traceroute when the destination is not controllable by the user is its inability to measure reverse paths, i.e., the path from a destination back to the source. Proposed techniques to address this issue \ rely on IP address spoofing, which might lead to security concerns. In this paper we introduce and evaluate DisNETPerf, a new tool for locating probes that are the closest to a distant server. Those probes are then used to collect data from the server point-of-view to the service user for path performance monitoring and troubleshooting purposes. We propose two techniques for probe location, and demonstrate that the reverse path can be measured with very high accuracy in certain scenarios.

}, author = {Sarah Wassermann and Pedro Casas and Benoit Donnet} } @inproceedings {Thi2015a, title = {Tracking Middleboxes in the Mobile World with TraceboxAndroid}, booktitle = {7th International Workshop on Traffic Monitoring and Analysis (TMA)}, year = {2015}, month = {04/2015}, abstract = {

Middleboxes are largely deployed over cellular networks. It is known that they\ might disrupt network performance, expose users to security issues, and harm\ protocols deployability. Further, hardly any network measurements tools for\ smartphones are able to infer middlebox behaviors, specially if one cannot\ control both ends of a path. In this paper, we present TraceboxAndroid a
proof-of-concept measurement application for Android mobile devices\ implementing the tracebox algorithm. It aims at diagnosing middlebox-impaired\ paths by detecting and locating rewriting middleboxes. We analyze a dataset\ sample to highlight the range of opportunities offered by TraceboxAndroid. We\ show that TraceboxAndroid can be useful for mobile users as well as for the
research community.

}, keywords = {Android, tracebox}, author = {Valentin Thirion and Korian Edeline and Benoit Donnet} } @inproceedings {2015a, title = {Troubleshooting Web Sessions with CUSUM}, year = {2015} } @inproceedings {Wam2015a, title = {Understanding YouTube QoE in Cellular Networks with YoMoApp - a QoE Monitoring Tool for YouTube Mobile}, booktitle = {ACM MOBICOM}, year = {2015}, author = {Florian Wamser and Michael Seufert and Pedro Casas and Ralf Irmer and Phuoc Tran-Gia and Raimund Schatz} } @inproceedings {Pie2015, title = {Vivisecting WhatsApp in Cellular Networks: Servers, Flows, and Quality of Experience}, booktitle = {Traffic Monitoring and Analysis}, year = {2015}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, keywords = {Cellular Networks, Large-Scale Measurements, Quality of Experience, Traffic characterization, WhatsApp}, isbn = {978-3-319-17171-5}, doi = {10.1007/978-3-319-17172-2_4}, url = {http://dx.doi.org/10.1007/978-3-319-17172-2_4}, author = {Fiadino, Pierdomenico and Schiavone, Mirko and Casas, Pedro} } @inproceedings {Kut2015, title = {Weighted Similarity Estimation in Data Streams}, booktitle = {CIKM}, year = {2015}, month = {10/2015}, publisher = {ACM}, organization = {ACM}, abstract = {

Similarity computation between pairs of objects is often a bottleneck in many applications that have to deal with massive volumes of data. Motivated by applications such as collaborative filtering in large-scale recommender systems, and influence probabilities learning in social networks, we present new randomized algorithms for the estimation of weighted similarity in data streams.

Previous works have addressed the problem of learning binary similarity measures in a streaming setting. To the best of our knowledge, the algorithms proposed here are the first that specifically address the estimation of weighted similarity in data streams. The algorithms need only one pass over the data, making them ideally suited to handling massive data streams in real time.

We obtain precise theoretical bounds on the approximation error and complexity of the algorithms. The results of evaluating our algorithms on two real-life datasets validate the theoretical findings and demonstrate the applicability of the proposed algorithms.

}, author = {Konstantin Kutzkov and Mohamed Ahmed and Sofia Nikitaki} } @inproceedings {Wam2015, title = {YoMoApp: a Tool for Analyzing QoE of YouTube HTTP Adaptive Streaming in Mobile Networks}, booktitle = {European Conference on Networks and Communications (EuCNC)}, year = {2015}, author = {Florian Wamser and Michael Seufert and Pedro Casas and Ralf Irmer and Phuoc Tran-Gia and Raimund Schatz} } @inproceedings {Seu2015, title = {YouTube QoE on Mobile Devices: Subjective Analysis of Classical vs. Adaptive Video Streaming}, booktitle = {6th International Workshop on Traffic Analysis and Characterization (TRAC)}, year = {2015}, author = {Michael Seufert and Florian Wamser and Pedro Casas and Ralf Irmer and Phuoc Tran-Gia and Raimund Schatz} } @inproceedings {DR:TMA-15, title = {Zen and the art of network troubleshooting: a hands on experimental study}, booktitle = {Traffic Monitoring and Analysis}, year = {2015}, abstract = {

Growing network complexity necessitates tools and methodologies to automate network troubleshooting. In this paper, we follow a crowd-sourcing trend, and argue for the need to deploy measurement probes at end-user devices and gateways, which can be under the control of the users or the ISP. Depending on the amount of information available to the probes (e.g., ISP topology), we formalize the network troubleshooting task as either a clustering or a classification problem, that we solve with an algorithm that (i) achieves perfect classification under the assumption of a strategic selection of probes (e.g., assisted by an ISP) and (ii) operates blindly with respect to the network performance metrics, of which we consider delay and bandwidth in this paper. While previous work on network troubleshooting privileges a more theoretical vs practical approaches, our workflow balances both aspects as (i) we conduct a set of controlled experiments with a rigorous and reproducible methodology, (ii) on an emulator that we thoroughly calibrate, (iii) contrasting experimental results affected by real-world noise with expected results from a probabilistic model.

}, url = {http://www.enst.fr/~drossi/paper/rossi15tma.pdf}, author = {Espinet, Fran{\c c}ois and Joumblatt, Diana and Rossi, Dario} } @inproceedings {E.2014, title = {Active measurements and limitations of TCP protocol during SLA test}, booktitle = {Poster session the Sixth Workshop on Traffic and Monitoring Analysis (TMA)}, year = {2014}, month = {04/2014}, author = {Edion Tego} } @inbook {Sil2014, title = {Aggregation of Statistical Data from Passive Probes: Techniques and Best Practices}, booktitle = {Traffic Monitoring and Analysis}, series = {Lecture Notes in Computer Science}, volume = {8406}, year = {2014}, pages = {38-50}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = {

Passive probes continuously generate statistics on large number of metrics, that are possibly represented as probability mass functions (pmf). The need for consolidation of several pmfs arises in two contexts, namely: (i) whenever a central point collects and aggregates measurement of multiple disjoint vantage points, and (ii) whenever a local measurement processed at a single vantage point needs to be distributed over multiple cores of the same physical probe, in order to cope with growing link capacity. In this work, we take an experimental approach and study both cases using, whenever possible, open source software and datasets. Considering different consolidation strategies, we assess their accuracy in estimating pmf deciles (from the 10th to the 90th) of diverse metrics, obtaining general design and tuning guidelines. In our dataset, we find that Monotonic Spline Interpolation over a larger set of percentiles (e.g., adding 5th, 10th, 15th, and so on) allow fairly accurate pmf consolidation in both the multiple vantage points (median error is about 1\%, maximum 30\%) and local processes (median 0.1\%, maximum 1\%) cases.

}, keywords = {Data aggregation, data reduction, scalability problem}, isbn = {978-3-642-54998-4}, doi = {10.1007/978-3-642-54999-1_4}, url = {http://dx.doi.org/10.1007/978-3-642-54999-1_4}, author = {Colabrese, Silvia and Rossi, Dario and Mellia, Marco} } @article {Dus2014a, title = {Algorithm and Scheduler Design and Implementation}, year = {2014}, month = {09/2014}, keywords = {algorithm design, job scheduler, mPlane software, repository tools}, isbn = {D3.3}, issn = {D3.3}, author = {Maurizio Dusi and Saverio Niccolini and Sofia Nikitaki and Daniele Apiletti and Elena Baralis and Alessandro Finamore and Luigi Grimaudo and Stefano Traverso and Francesco Matera and Edion Tego and V, Guchev and Zied Ben Houidi and Pietro Michiardi and Marco Milanesio and YiXi Gong and Dario Rossi and Ilias Leontiadis and G, Dimopoulos and Tivadar Szemethy and A, Bakay and Arian B{\"a}r and Pedro Casas and Alessandro D{\textquoteright}Alconzo and Pierdomenico Fiadino} } @inproceedings {Cas2014d, title = {On the Analysis of QoE-based Performance Degradation in YouTube Traffic}, booktitle = {10th International Conference on Network and Service Management, CNSM 2014}, year = {2014}, month = {11/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Rio de Janeiro, Brazil}, abstract = {

YouTube is the most popular service in today{\textquoteright}s Internet. Google relies on its massive Content Delivery Network (CDN) to push YouTube videos as close as possible to the end-users to improve their Quality of Experience (QoE), as well as to pursue its own optimization goals. Adopting space and time variant traffic delivery policies, Google servers handle users{\textquoteright} requests from multiple geo-distributed locations at different times. Such traffic delivery policies can have a relevant impact on the traffic routed through the Internet Service Providers (ISPs) providing the access, but most importantly, they can have negative effects on the end-user QoE. In this paper we shed light on the problem of diagnosing QoE-based performance degradation events in YouTube{\textquoteright}s traffic. Through the analysis of one month of YouTube flow traces collected at the network of a large European ISP, we particularly identify and drill down a Google{\textquoteright}s CDN server selection policy negatively impacting the watching experience of YouTube users during several days at peak-load times. The analysis combines both the user-side perspective and the CDN perspective of the end-to-end YouTube delivery service to diagnose the problem. On the one hand, we rely on the monitoring of YouTube QoE-based Key Performance Indicators (KPIs) to detect performance degradation events affecting the end-customers. On the other hand, we analyze the temporal behavior of the Google CDN traffic delivery policies, by tracking the activity of the Google servers providing the videos. The analysis is supported by time-series analysis, entropy-based approaches, and clustering techniques to flag the aforementioned anomaly. The main contributions of the paper are threefold: firstly, we provide a large-scale characterization of the YouTube service in terms of traffic characteristics and provisioning behavior of the Google CDN servers. Secondly, we introduce simple yet effective QoE-based KPIs to monitor YouTube videos from the end-user perspective. Finally and most important, we analyze and provide evidence of the occurrence of QoE-based YouTube anomalies induced by CDN server selection policies, which are somehow normally hidden from the common knowledge of the end-user. This is a main issue for ISPs, who see their reputation degrade when such events occur, even if Google is the culprit.

}, keywords = {clustering, Content Delivery Networks, Empirical Entropy, Performance Degradation, Quality of Experience, YouTube}, author = {Pedro Casas and Alessandro D{\textquoteright}Alconzo and Pierdomenico Fiadino and Arian B{\"a}r and Alessandro Finamore} } @inproceedings {DR:ICN-14c, title = {Analyzing Cacheable Traffic in ISP Access Networks for Micro CDN applications via Content-Centric Networking}, booktitle = {ACM SIGCOMM Information Centric Networks (ICN)}, year = {2014}, month = {09/2014}, address = {Paris, FR}, url = {http://www.enst.fr/~drossi/paper/rossi14icn-c.pdf}, author = {Imbrenda, Claudio and Muscariello, Luca and Rossi, Dario} } @inproceedings {Ruf2014b, title = {Bandwidth Measurements and Capacity Exploitation in Gigabit Passive Optical Networks}, booktitle = {Fotonica 2014}, year = {2014}, month = {05/2014}, abstract = {

We report an experimental investigation on the
measurement of the available bandwidth for users in Gigabit
Passive Optical Networks (GPON) and the limitations caused by
the Internet protocols. We point out that the huge capacity
offered by the GPON highlights the enormous differences that
can be showed among the available and actually exploitable
bandwidth in the case of TCP. In this ultrabroadband
environment we also investigated on use of the UDP and of the
multisession TCP. A correlation in terms of QoE is also reported.

}, keywords = {access capacity, GPON, QoE, QoS, Throughput}, author = {Arianna Rufini and Edion Tego and Francesco Matera and Marco Mellia} } @inproceedings {Cas2014e, title = {Characterizing Microsoft Lync Online in Mobile Networks: a Quality of Experience Perspective}, booktitle = {3rd IEEE International Conference on Cloud Networking}, year = {2014}, month = {10/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Luxembourg, Luxembourg}, abstract = {

Cloud-based systems are gaining enormous popularity due to a number of promised benefits, including ease of deployment and administration, scalability and flexibility, and costs savings. However, as more personal and business applications migrate to the Cloud, the service quality becomes an important differentiator between providers. ISPs, Cloud providers and enterprises migrating their services to the Cloud must therefore understand the network requirements to ensure proper end-user Quality of Experience (QoE) in these services. This paper addresses the problem of QoE in Telepresence and Remote Collaboration (TRC) services provided by Microsoft Lync Online (MLO). MLO is a Cloud-based service providing online meeting capabilities including videoconferencing, audio calls, and desktop sharing, and has become the default system for TRC in enterprise scenarios. We present a complete study of the QoE undergone by 44 MLO users in controlled subjective lab tests. The study is performed on three different interactive scenarios running on top of the real MLO Cloud service, additionally shaping the Lync flows at the access network to influence the participants experience. The scenarios include audioconferencing, videoconferencing, and remote collaboration though desktop sharing. By passively monitoring the end-to-end QoS achieved by the Lync flows, and correlating it with the QoE feedbacks provided by the participants, this study permits to better understand the interplays between network performance and QoE in TRC Cloud services. In addition, we provide a network-level characterization of the traffic generated by MLO, as well as an overview on the infrastructure hosting MLO servers.

}, keywords = {Audioconferencing, Cloud QoE, Distributed Data Center, Microsoft Lync Online, MOS, Remote Desktop Sharing, Telepresence, Videoconferencing}, author = {Pedro Casas and Andreas Sackl and Sebastian Egger and Raimund Schatz} } @inproceedings {Fia, title = {Characterizing Web Services Provisioning via CDNs: The Case of Facebook}, booktitle = {5th International Workshop on TRaffic Analysis and Characterization}, year = {2014}, month = {08/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Nicosia, Cyprus}, abstract = {

Today{\textquoteright}s Internet consists of massive scale web\ services and Content Delivery Networks (CDNs). This paper sheds\ light on the way major Internet-scale web services content is\ hosted and delivered. By analyzing a full month of HTTP traffic\ traces collected at the mobile network of a major European ISP,\ we characterize the paradigmatic case of Facebook, considering\ not only the traffic flows but also the main organizations and\ CDNs providing them. Our study serves the main purpose of\ better understanding how major web services are provisioned\ in today{\textquoteright}s Internet, paying special attention to the temporal\ dynamics of the service delivery and the interplays between the\ involved hosting organizations. To the best of our knowledge, this\ is the first paper providing such an analysis in mobile networks.

}, keywords = {Akamai, Content Delivery Networks, Facebook, HTTP Traffic, mobile networks}, author = {Pierdomenico Fiadino and Alessandro D{\textquoteright}Alconzo and Pedro Casas} } @inproceedings {Boc2014, title = {Cloud Storage Service Benchmarking: Methodologies and Experimentations}, booktitle = {3rd IEEE International Conference on Cloud Networking (IEEE CloudNet 2014)}, year = {2014}, month = {10/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Luxembourg}, abstract = {

Data storage is one of today{\textquoteright}s fundamental services with companies, universities and research centers having the need of storing large amounts of data every day. Cloud storage services are emerging as strong alternative to local storage, allowing customers to save costs of buying and maintaining expensive hardware. Several solutions are available on the market, the most famous being Amazon S3. However it is rather difficult to access information about each service architecture, performance, and pricing. To shed light on storage services from the customer perspective, we propose a benchmarking methodology, apply it to four popular offers (Amazon S3, Amazon Glacier, Windows Azure Blob and Rackspace Cloud Files), and compare their performance. Each service is analysed as a black box and benchmarked through crafted workloads. We take the perspective of a customer located in Europe, looking for possible service providers and the optimal data center where to deploy its applications. At last, we complement the analysis by comparing the actual and forecast costs faced when using each service.

According to collected results, all services show eventual weaknesses related to some workload, with no all-round eligible winner, e.g., some offers providing excellent or poor performance when exchanging large or small files. For all services, it is of paramount importance to accurately select the data center to where deploy the applications, with throughput that varies by factors from 2x to 10x. The methodology (and tools implementing it) here presented is instrumental for potential customers to identify the most suitable offer for their needs.\

}, keywords = {Amazon S3, Benchmarking, Cloud storage, Performance measurement, Web services, Windows Azure}, author = {Enrico Bocchi and Marco Mellia and Sofiane Sarni} } @inproceedings {Nay2014, title = {The Cost of the {\textquotedblleft}S{\textquotedblright} in HTTPS}, booktitle = {ACM Conference on emerging Networking EXperiments and Technologies (CoNEXT)}, year = {2014}, month = {12/2014}, author = {David Naylor and Alessandro Finamore and Ilias Leontiadis and Yan Grunenberger and Marco Mellia and Kostantina Papagiannaki and Peter Steenkiste} } @article {D43, title = { Cross-check of Analysis Modules and Reasoner Interactions}, number = {D4.3}, year = {2014}, month = {10/2014}, type = {Deliverable}, keywords = {reasoner, WP4}, author = {Umberto Manferdini and Stefano Traverso and Marco Mellia and Edion Tego and Francesco Matera and Zied Ben Houidi and Marco Milanesio and Pietro Michiardi and Dario Rossi and D. Cicalese and D. Joumblatt and Jordan Aug{\'e} and Maurizio Dusi and Sofia Nikitaki and Mohamed Ahmed and Ilias Leontiadis and L. Baltrunas and M. Varvello and Pedro Casas and Alessandro D{\textquoteright}Alconzo and Benoit Donnet and W. Du and Guy Leduc and Y. Liao and Alessandro Capello and Fabrizio Invernizzi} } @inproceedings {Bae2014, title = {DBStream: an Online Aggregation, Filtering and Processing System for Network Traffic Monitoring}, booktitle = {TRAC}, year = {2014}, month = {08/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Nicosia, Cyprus}, author = {Arian Baer and Pedro Casas and Lukasz Golab and Alessandro Finamore} } @article {DR:COMNET-14a, title = {Delay-based congestion control: Flow vs. BitTorrent swarm perspectives}, journal = {Elsevier Computer Networks}, volume = {60}, year = {2014}, month = {02/2014}, chapter = {115 -- 128}, abstract = {

BitTorrent, one of the most widespread file-sharing P2P applications, recently introduced LEDBAT, a novel congestion control protocol aiming at (i) limiting the additional delay due to queuing, to reduce interference with the rest of user traffic (e.g., Web, VoIP and gaming) sharing the same access bottleneck, and (ii) efficiently using the available link capacity, to provide users with good BitTorrent performance at the same time. In this work, we adopt two complementary perspectives: namely, a flow viewpoint to assess the Quality of Service (QoS) as in classic congestion control studies, and a BitTorrent swarm viewpoint to assess peer-to-peer users Quality of Experience (QoE). We additionally point out that congestion control literature is rich of protocols, such as VEGAS, LP, and NICE sharing similarities with LEDBAT, that is therefore mandatory to consider in the analysis. Hence, adopting the above viewpoints we both (i) contrast LEDBAT to the other protocols and (ii) provide deep understanding of the novel protocol and its implication on QoS and QoE. Our simulation based investigation yields several insights. At flow-level, we gather LEDBAT to be lowest priority among all protocols, which follows from its design that strives to explicitly bound the queuing delay at the bottleneck link to a maximum target value. At the same time, we see that this very same protocol parameter can be exploited by adversaries, that can set a higher target to gain an unfair advantage over competitors. Interestingly, swarm-level performance exhibit an opposite trade-off, with smaller targets being more advantageous for QoE of BitTorrent users. This can be explained with the fact that larger delay targets slow down BitTorrent signaling task, with possibly negative effect on the swarming protocol efficiency. Additionally, we see that for the above reason, in heterogeneous swarms, any delay-based protocol (i.e., not only LEDBAT but also VEGAS or NICE) can yield a competitive QoE advantage over loss-based TCP. Overall this tension between swarm and flow-levels suggests that, at least in current ADSL/cable access bottleneck scenarios, a safe LEDBAT operational point may be used in practice. At the same time, our results also point out that benefits similar to LEDBAT can also be gathered with other delay-based protocols such as VEGAS or NICE.

}, url = {http://www.enst.fr/~drossi/paper/rossi14comnet-a.pdf}, author = {Claudio Testa and Dario Rossi} } @article {D42, title = {Design of the Reasoner}, number = {D4.2}, year = {2014}, month = {06/2014}, type = {report}, keywords = {design, private deliverable, reasoner, WP4}, issn = {D4.2}, author = {Pedro Casas and Alessandro D{\textquoteright}Alconzo and Maurizio Dusi and Sofia Nikitaki and Mohamed Ahmed and Stefano Traverso and Marco Mellia and Daniele Apiletti and Luigi Grimaudo and Elena Baralis and Dario Rossi and D. Joumblatt and Alessandro Capello and M. D{\textquoteright}Ambrosio and Fabrizio Invernizzi and M. Ullio and Andrea Fregosi and Eike Kowallik and Stefano Raffaglio and Andrea Sannino and Marco Milanesio and Edion Tego and Francesco Matera and Tivadar Szemethy and Balazs Szabo and L. N{\'e}meth and Zied Ben Houidi and G. Dimopoulos and Ilias Leontiadis and Yan Grunenberger and L. Baltrunas and Michael Faath and Rolf Winter and Dimitri Papadimitriou} } @inproceedings {Fia2014, title = {On the Detection of Network Traffic Anomalies in Content Delivery Network Services}, booktitle = {ITC26}, year = {2014}, month = {09/2014}, address = {Karlskrona, Sweden}, author = {Pierdomenico Fiadino and Alessandro D{\textquoteright}Alconzo and Arian B{\"a}r and Alessandro Finamore and Pedro Casas} } @inproceedings {Schiavone:2014:DDA:2680821.2680831, title = {Diagnosing Device-Specific Anomalies in Cellular Networks}, booktitle = {Proceedings of the 2014 CoNEXT on Student Workshop}, year = {2014}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, keywords = {anomaly diagnosis, cellular networks., entropy-based analysis}, isbn = {978-1-4503-3282-8}, doi = {10.1145/2680821.2680831}, url = {http://doi.acm.org/10.1145/2680821.2680831}, author = {Schiavone, Mirko and Romirer-Maierhofer, Peter and Fiadino, Pierdomenico and Casas, Pedro} } @inproceedings {Cas2014a, title = {Distributed active measurement of Internet queuing delays}, booktitle = {Passive and Active Measurement (PAM), Extended Abstract}, year = {2014}, month = {March}, address = {Los Angeles, USA}, abstract = {

Despite growing link capacities, over-dimensioned buffers are still causing, in the Internet of the second decade of the third millenium, hosts to suffer from severe queuing delays (or bufferbloat). While maximum bufferbloat possibly exceeds few seconds, it is far less clear how often this maximum is hit in practice. This paper reports on our ongoing work to build a spatial and temporal map of Internet bufferbloat, describing a system based on distributed agents running on PlanetLab that aims at providing a quantitative answer to the above question.

}, url = {http://www.enst.fr/~drossi/paper/rossi14pam-b.pdf}, author = {P. Casoria and D Rossi and Jordan Aug{\'e} and Marc-Oliver Buob and T. Friedman and A. Pescape} } @article {Ber2014, title = {A Distributed Architecture for the Monitoring of Clouds and CDNs: Applications to Amazon AWS}, journal = {IEEE Transactions on Network and Service Management}, volume = {In press}, year = {2014}, abstract = {

Clouds and CDNs are systems that tend to separate the content being requested by users from the physical servers capable of serving it. From the network point of view, monitoring and optimizing performance for the traffic they generate is a challenging task, given the same resource can be located in multiple places, which can in turn change at any time. The first step in understanding Cloud and CDN systems is thus the engineering of a monitoring platform. In this paper, we propose a novel solution which combines passive and active measurements, and whose workflow has been tailored to specifically characterize the traffic generated by Cloud and CDN infrastructures. We validate our platform by performing a longitudinal characterization of the very well known Cloud and CDN infrastructure provider Amazon Web Services (AWS). By observing the traffic generated by more than 50,000 Internet users of an Italian ISP, we explore the EC2, S3 and CloudFront AWS services, unveiling their infrastructure, the pervasiveness of web-services they host, and their traffic allocation policies as seen from our vantage points. Most importantly, we observe their evolution over a two- year long period. The solution provided in this paper can be of interest for i) developers aiming at building measurement tools for Cloud Infrastructure Providers, ii) developers interested in failure and anomaly detection systems, and iii) third-party SLA certificators who can design systems to independently monitor performance. At last, we believe the results about AWS presented in this paper are interesting as they are among the first to unveil properties of AWS as seen from the operator point of view.

}, keywords = {Amazon, AWS, CDNs, Clouds}, author = {Ignacio Bermudez and Stefano Traverso and Marco Mellia and Maurizio Munafo{\textquoteright}} } @article {Tra2014a, title = {Evolving Transport in the Internet}, journal = {IEEE Internet Computing}, year = {2014}, month = {09/2014}, abstract = {

The Internet{\textquoteright}s transport layer has seen little evolution over the past three decades, despite wildly changing requirements. Commonly-deployed transport protocols lack diversity, reducing our ability to evolve them to meet these new application requirements. In this work, the authors describe aspects of this problem and propose a solution space and agenda for improving the situation.

}, author = {Brian Trammell}, editor = {Joe Hildebrand} } @inproceedings {Tra2014b, title = {Exploiting Hybrid Measurements for Network Troubleshooting}, booktitle = {IEEE Networks}, year = {2014}, month = {09/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Funchal, PT}, abstract = {Network measurements are a fundamental pillar to understand network performance and perform root cause analysis in case of problems. Traditionally, either active or passive measurements are considered. While active measurements allow to know exactly the workload injected by the application into the network, the passive measurements can offer a more detailed view of transport and network layer impacts. In this paper, we present a hybrid approach in which active throughput measurements are regularly run while a passive measurement tool monitors the generated packets. This allows us to correlate the application layer measurements obtained by the active tool with the more detailed view offered by the passive monitor. The proposed methodology has been implemented following the mPlane reference architecture, tools have been installed in the Fastweb network, and we collect measurements for more than three months. We report then a subset of results that show the benefits obtained when correlating active and passive measurements. Among results, we pinpoint cases of congestion, of ADSL misconfiguration, and of modem issues that impair throughput obtained by the users.}, keywords = {Hybrid measurements, measurement analysis, WP2}, author = {Stefano Traverso and Edion Tego and Eike Kowallik and Stefano Raffaglio and Andrea Fregosi and Marco Mellia and Francesco Matera} } @article {DR:COMNET-14b, title = {{Fighting the bufferbloat: on the coexistence of AQM and low priority congestion control (extended version)}}, journal = {Elsevier Computer Networks}, volume = {60}, year = {2014}, chapter = {115--128}, abstract = {

Nowadays, due to excessive queuing, delays on the Internet can grow longer than the round trip time between the Moon and the Earth {\textendash} for which the {\textquoteleft}{\textquoteleft}bufferbloa t{\textquoteright}{\textquoteright} term was recently coined. Some point to active queue management (AQM) as the solution. Others propose end-to-end low-priority congestion control techniques (LPCC). Under both approaches, promising advances have been made in recent times: notable examples are CoDel for AQM, and LEDBAT for LPCC. In this paper, we warn of a potentially fateful interaction when AQM and LPCC techniques are combined: namely, AQM resets the relative level of priority between best-effort and low-priority congestion control protocols. We validate the generality of our findings by an extended set of experiments with packet-level ns2 simulation, considering 5 AQM techniques and 3 LPCC protocols, and carry on a thorough sensitivity analysis varying several parameters of the networking scenario. We complete the simulation via an experimental campaign conducted on both controlled testbeds and on the Internet, confirming the reprioritization issue to hold in the real world at least under all combination of AQM policies and LPCC protocols available in the Linux kernel. To promote cross-comparison, we make our scripts and dataset available to the research community.

}, url = {http://www.enst.fr/~drossi/paper/rossi14comnet-b.pdf}, author = {YiXi Gong and Dario Rossi and Claudio Testa and Silvio Valenti and Dave Taht} } @inbook {Geo2014, title = {Flow Management at Multi-Gbps: Tradeoffs and Lessons Learned}, booktitle = {Traffic Monitoring and Analysis}, series = {Lecture Notes in Computer Science}, volume = {8406}, year = {2014}, pages = {1-14}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, isbn = {978-3-642-54998-4}, doi = {10.1007/978-3-642-54999-1_1}, url = {http://dx.doi.org/10.1007/978-3-642-54999-1_1}, author = {Nassopulos, Georges and Rossi, Dario and Gringoli, Francesco and Nava, Lorenzo and Dusi, Maurizio and Santiago del Rio, PedroMaria} } @inproceedings {Leo2014, title = {From Cells to Streets: Estimating Mobile Paths with Cellular-Side Data}, booktitle = {CoNEXT}, year = {2014}, publisher = {ACM}, organization = {ACM}, address = {Sydney, Australia}, author = {Ilias Leontiadis and Antonio Lima and Haewoon Kwak and Rade Stanojevic and David Wetherall and Konstantina Papagiannaki} } @inproceedings {Ben2014, title = {Gold mining in a River of Internet Content Traffic}, booktitle = {6th International Workshop on Traffic Monitoring and Analysis, TMA}, year = {2014}, month = {04/2014}, publisher = {Springer}, organization = {Springer}, address = {London}, abstract = {With the advent of Over-The-Top content providers (OTTs), Internet Service Providers (ISPs) saw their portfolio of services shrink to the low margin role of data transporters. In order to counter this effect, some ISPs started to follow big OTTs like Facebook and Google in trying to turn their data into a valuable asset. In this paper, we explore the questions of what meaningful information can be extracted from network data, and what interesting insights it can provide. To this end, we tackle the first challenge of detecting {\textquotedblleft}user-URLs{\textquotedblright}, i.e., those links that were clicked by users as opposed to those objects automatically downloaded by browsers and applications. We devise algorithms to pinpoint such URLs, and validate them on manually collected ground truth traces. We then apply them on a three-day long traffic trace spanning more than 19,000 residential users that generated around 190 million HTTP transactions. We find that only 1.6\% of these observed URLs were actually clicked by users. As a first application for our methods, we answer the question of which platforms participate most in promoting the Internet content. Surprisingly, we find that, despite its notoriety, only 11\% of the user URL visits are coming from Google Search. }, keywords = {Content mining, HTTP Traffic, URL extraction}, author = {Zied Ben-Houidi and Giuseppe Scavo and Samir Ghamri-Doudane and Alessandro Finamore and Stefano Traverso and Marco Mellia} } @inproceedings {Tra2014, title = {Inline Data Integrity Signals for Passive Measurement}, booktitle = {Sixth International Workshop on Traffic Monitoring and Analysis (TMA 2014)}, year = {2014}, month = {04/2014}, abstract = {

In passive network measurement, the quality of an observed traffic stream is obviously crucial to the quality of the results. Some sources of error (e.g., packet loss at a capture device) are well understood, others less so. In this work, we describe the inline data integrity measurement provided by the QoF TCP-aware flow meter. By instrumenting the data structures QoF uses for detecting lost and retransmitted TCP segments, we can provide an in-band, per-flow estimate of observation loss: segments which were received by the recipient but not observed by the flow meter. We evaluate this mechanism against controlled, induced error, and apply it to two data sets used in previous work.

}, author = {Brian Trammell and David Gugelmann and Nevil Brownlee} } @inproceedings {Bae2014a, title = {Large-Scale Network Traffic Monitoring with DBStream, a System for Rolling Big Data Analysis}, booktitle = {International Conference on Big Data, IEEE BigData}, year = {2014}, month = {11/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Washington D.C., USA}, abstract = {

The complexity of the Internet has rapidly increased, making it more important and challenging to design scalable network monitoring tools. Network monitoring typically requires rolling data analysis, i.e., continuously and incrementally updating (rolling-over) various reports and statistics over high-volume data streams. In this paper, we describe DBStream, which is an SQL-based system that explicitly supports incremental queries for rolling data analysis. We also present a performance comparison of DBStream with a parallel data processing engine (Spark), showing that, in some scenarios, a single DBStream node can outperform a cluster of ten Spark nodes on rolling network monitoring workloads. Although our performance evaluation is based on network monitoring data, our results can be generalized to other big data problems with high volume and velocity.

}, keywords = {Big Data Analysis, Data Stream Processing, network data analysis, System Performance}, author = {Arian B{\"a}r and Alessandro Finamore and Pedro Casas and Lukasz Golab and Marco Mellia} } @article {Tra, title = {mPlane: an Intelligent Measurement Plane for the Internet}, journal = {IEEE Communications Magazine, Special Issue on Monitoring and Troubleshooting Multi-domain Networks using Measurement Federations}, volume = {42}, year = {2014}, month = {05/2014}, author = {Brian Trammell and Pedro Casas and Dario Rossi and Arian B{\"a}r and Zied Ben-Houidi and Ilias Leontiadis and Tivadar Szemethy and Marco Mellia} } @article {Ruf2014, title = {Multilevel Bandwidth Measurements and Capacity Exploitation in Gigabit Passive Optical Networks}, journal = {IET Communications}, volume = {8}, year = {2014}, month = {11/2014}, pages = {8}, chapter = {3357}, abstract = {

We report an experimental investigation on the measurement of the available bandwidth for the users in Gigabit Passive Optical Networks (GPON) and the limitations caused by the Internet protocols, and TCP in particular. We point out that the huge capacity offered by the GPON highlights the enormous differences that can be showed among the available and actually exploitable bandwidth. In fact, while the physical layer capacity can reach value of 100 Mb/s and more, the bandwidth at disposal of the user (i.e. either throughput at transport layer or goodput at application layer) can be much lower when applications and services based on TCP protocol are considered. In the context of Service Level Agreements (SLA) verification, we show how to simultaneously measure throughput and line capacity by offering a method to verify multilayer SLA. We also show how it is possible to better to exploit the physical layer capacity by adopting multiple TCP connections to avoid the bottleneck of a single connection.

}, keywords = {Fiber Networks, GPON, Quality of Service, TCP}, issn = {1751-8628}, doi = {10.1049/iet-com.2014.0165}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6980492}, author = {Arianna Rufini and Marco Mellia and Edion Tego and Francesco Matera} } @inproceedings {Ruf2014a, title = {Multilevel QoS vs QoE measurements and Verification of Service Level Agreements}, booktitle = {European Conference on Networks and Communications, EUCNC 2014}, year = {2014}, month = {06/2014}, abstract = {

Experimental investigation on QoS measurements in terms of throughput versus access capacity, including a correlation in terms of QoE evaluated for some web TV\

}, author = {Arianna Rufini and Edion Tego and Francesco Matera} } @article {rfc7119, title = {Operation of the IP Flow Information Export (IPFIX) Protocol on IPFIX Mediators (RFC 7119)}, year = {2014}, month = {02/2014}, abstract = {

This document specifies the operation of the IP Flow Information Export (IPFIX) protocol specific to IPFIX Mediators, including Template and Observation Point management, timing considerations, and other Mediator-specific concerns.

}, url = {http://www.ietf.org/rfc/rfc7119.txt}, author = {B. Claise and A. Kobayashi and B. Trammell} } @inproceedings {DR:TMA-14c, title = {Peeking Through the BitTorrent Seedbox Hosting Ecosystem}, booktitle = {Traffic Monitoring and Analysis (TMA)}, year = {2014}, abstract = {

In this paper, we propose a lightweight method for detecting and classifying BitTorrent content providers with a minimal amount of resources. While
heavy methodologies are typically used (which require long term observation
and data exchange with peers of the swarm and/or a semantic analysis of torrent
websites), we instead argue that such complexity can be avoided by analyzing
the correlations between peers and torrents. We apply our methodology to study
over 50K torrents injected in ThePirateBay during one month, collecting more
than 400K IPs addresses. Shortly, we find that exploiting the correlations not
only enhances the classification accuracy keeping the technique lightweight (our
methodology reliably identifies about 150 seedboxes), but also uncovers seeding behaviors that were not previously noticed (e.g., as multi-port and multi-host
seeding). Finally, we correlate the popularity of seedbox hosting in our dataset
to criteria (e.g., cost, storage space, Web popularity) that can bias the selection
process of BitTorrent content providers.

}, url = {http://www.enst.fr/~drossi/paper/rossi14tma-c.pdf}, author = {Dario Rossi and Guilhem Pujol and Xiao Wang and Fabien Mathieu} } @inproceedings {DR:TRAC-14, title = {A per-Application Account of Bufferbloat: Causes and Impact on Users}, booktitle = {5th International Workshop on TRaffic Analysis and Characterization (TRAC), Best Paper Award}, year = {2014}, abstract = {

We propose a methodology to gauge the extent of queueing delay (aka bufferbloat) in the Internet, based on purely passive measurement of TCP traffic. We implement our methodology in Tstat and make it available as open source software. We leverage Deep Packet Inspection (DPI) and behavioral classification of Tstat to breakdown the queueing delay across different applications, in order to evaluate the impact of bufferbloat on user experience. We show that there is no correlation between the ISP traffic load and the queueing delay, thus confirming that bufferbloat is related only to the traffic of each single user (or household). Finally, we use frequent itemset mining techniques to associate the amount of queueing delay seen by each host with the set of its active applications, with the goal of investigating the root cause of bufferbloat.

}, url = {http://www.enst.fr/drossi/paper/rossi14trac.pdf}, author = {Andrea Araldo and Dario Rossi} } @article {Pap2014, title = {Practical issues for the implementation of survivability and recovery techniques in optical networks}, journal = {Journal Optical Switching and Networking}, volume = {14}, year = {2014}, chapter = {179}, author = {Dimitri Papadimitriou}, editor = {G.Ellinas and J.Rak and D.Staessens and J.Sterbenz and K.Walkowiak} } @article {2014, title = {Practical issues for the implementation of survivability and recovery techniques in optical networks,}, journal = {Journal Optical Switching and Networking}, volume = {14}, year = {2014}, chapter = {179}, author = {Dimitri Papadimitriou} } @inproceedings {Cit2014, title = {On the Quality of BGP Route Collectors for iBGP Policy Inference}, booktitle = {IFIP Networking}, year = {2014}, month = {June 2014}, abstract = {

A significant portion of what is known about Internet routing stems out from\ public BGP datasets. For this reason, numerous research efforts were devoted to\ (i) assessing the (in)completeness of the datasets, (ii) identifying biases\ in the dataset, and (iii) augmenting data quality by optimally placing new\ collectors. However, those studies focused on techniques to extract information\ about the AS-level Internet topology.

In this paper, we show that considering different metrics influences the\ conclusions about biases and collector placement. Namely, we compare AS-level\ topology discovery with \iac inference. We find that the same datasets exhibit\ significantly diverse biases for these two metrics. For example, the sensitivity\ to the number and position of collectors is noticeably different. Moreover, for\ both metrics, the marginal utility of adding a new collector is strongly\ localized with respect to the proximity of the collector. Our results suggest\ that the {\textquoteleft}{\textquoteleft}optimal{\textquoteright}{\textquoteright} position for new collectors can only be defined with\ respect to a specific metric, hence posing a fundamental trade-off for\ maximizing the utility of extensions to the BGP data collection infrastructure.

}, keywords = {bias, iBGP policies, measurement, network topology}, author = {Luca Cittadini and Stefano Vissichio and Benoit Donnet} } @inproceedings {Teg2014, title = {Quality of Service Management based on Software Defined Networking Approach in wide GbE Networks}, booktitle = {Euromed}, year = {2014}, month = {11/2014}, abstract = {

This work experimentally demonstrates how to control and manage user Quality of Service (QoS) by acting on the switching on-off of the optical Gigabit Ethernet (GbE) interfaces in a wide area network test bed including routers and GPON accesses. The QoS is monitored at the user location by means of active probes developed in the framework of the FP7 MPLANE project. The network topology is managed according to some current Software Defined Network issues and in particular an Orchestrator checks the user quality, the traffic load in the GbE links and manages the network interface reconfiguration when congestion occurs in some network segments.

}, keywords = {GbE, PON, QoS, SDN}, author = {Edion Tego and Francesco Matera and Vincenzo Attanasio and Donato Del Buono} } @inproceedings {Dus2014, title = {Reactive Logic in Software-Defined Networking: Measuring Flow-Table Requirements}, booktitle = {5th International Workshop on TRaffic Analysis and Characterization (TRAC)}, year = {2014}, month = {2014}, address = {Nicosia, Cyprus}, author = {Maurizio Dusi and Roberto Bifulco and Francesco Gringoli and Fabian Schneider} } @article {rfc7125, title = {Revision of the tcpControlBits IP Flow Information Export (IPFIX) Information Element (RFC 7125)}, year = {2014}, month = {02/2014}, abstract = {

This document revises the tcpControlBits IP Flow Information Export (IPFIX) Information Element as originally defined in RFC 5102 to reflect changes to the TCP Flags header field since RFC 793.

}, url = {http://www.ietf.org/rfc/rfc7125.txt}, author = {B. Trammell and P. Aitken} } @inproceedings {2014a, title = {Revisiting size-based scheduling with estimated job sizes}, year = {2014} } @inproceedings {EURECOM+4268, title = {Revisiting size-based scheduling with estimated job sizes}, booktitle = {{MASCOTS} 2014, {IEEE} 22nd {I}nternational {S}ymposium on {M}odeling analysis and simulation of computer and telecommunication systems, {S}eptember 9-11, 2014, {P}aris, {F}rance / {A}lso published on {A}r{X}iv}, year = {2014}, month = {03}, address = {{P}aris, {FRANCE}}, url = {http://www.eurecom.fr/publication/4268}, author = {Dell{\textquoteright}Amico, Matteo and Carra, Damiano and Pastorelli, Mario and Michiardi, Pietro} } @inproceedings {Col2014, title = {Scalable accurate consolidation of passively measured statistical data}, booktitle = {Passive and Active Measurement (PAM), Extended Abstract}, year = {2014}, month = {March}, address = {Los Angeles, USA}, abstract = {

Passive probes continuously collect a significant amount of traffic vol- ume, and autonomously generate statistics on a large number of metrics. A common statistical output of passive probe is represented by probability mass functions (pmf). The need for consolidation of several pmfs arises in two contexts, namely: (i) whenever a central point collects and aggregates measurement of multiple disjoint vantage points, and (ii) whenever a local measurement processed at a single vantage point needs to be distributed over multiple cores of the same physical probe, in order to cope with growing link capacity. Taking an experimental approach, we study both cases assessing the impact of different consolidation strategies, obtaining general design and tuning guidelines.

}, url = {http://perso.telecom-paristech.fr/~drossi/paper/rossi14pam-a.pdf}, author = {S. Colabrese and D Rossi and Marco Mellia} } @article {grimaudoTNSM2014, title = {SeLeCT: Self-Learning Classifier for Internet Traffic}, journal = {IEEE Transactions on Network and Service Management}, volume = {11}, year = {2014}, month = {06/2014}, chapter = {144}, abstract = {

Network visibility is a critical part of traffic engineering, network management, and security. The most popular\ current solutions - Deep Packet Inspection (DPI) and statistical\ classification, deeply rely on the availability of a training set.\ Besides the cumbersome need to regularly update the signatures,\ their visibility is limited to classes the classifier has been trained\ for. Unsupervised algorithms have been envisioned as a viable\ alternative to automatically identify classes of traffic. However,\ the accuracy achieved so far does not allow to use them for traffic\ classification in practical scenario.

To address the above issues, we propose SeLeCT, a Self-Learning Classifier for Internet Traffic. It uses unsupervised algorithms along with an adaptive seeding approach to automatically\ let classes of traffic emerge, being identified and labeled. Unlike\ traditional classifiers, it requires neither a-priori knowledge of\ signatures nor a training set to extract the signatures. Instead,\ SeLeCT automatically groups flows into pure (or homogeneous)\ clusters using simple statistical features. SeLeCT simplifies label\ assignment (which is still based on some manual intervention) so\ that proper class labels can be easily discovered. Furthermore,\ SeLeCT uses an iterative seeding approach to boost its ability to\ cope with new protocols and applications.

We evaluate the performance of SeLeCT using traffic traces\ collected in different years from various ISPs located in 3\ different continents. Our experiments show that SeLeCT achieves\ excellent precision and recall, with overall accuracy close to 98\%.\ Unlike state-of-art classifiers, the biggest advantage of SeLeCT\ is its ability to discover new protocols and applications in an\ almost automated fashion.

}, keywords = {clustering, self-seeding, Traffic Classification, unsupervised machine learning}, author = {Luigi Grimaudo and Marco Mellia and Elena Baralis and Ram Keralapura} } @inproceedings {Cas2014, title = {Understanding HTTP Traffic and CDN Behavior from the Eyes of a Mobile ISP}, booktitle = {Passive and Active Measurements Conference (PAM)}, year = {2014}, author = {Pedro Casas and Pierdomenico Fiadino and Arian B{\"a}r} } @inproceedings {DR:PAM-14c, title = {Violation of Interdomain Routing Assumptions}, booktitle = {Passive and Active Measurement (PAM)}, year = {2014}, month = {03/2014}, address = {Los Angeles, USA}, url = {http://www.enst.fr/~drossi/paper/rossi14pam-c.pdf}, author = {R. Mazloum and M.-O. Buob and J. Auge and B. Baynat and T. Friedman and D. Rossi} } @inproceedings {Fiaa, title = {Vivisecting WhatsApp through Large-Scale Measurements in Mobile Networks}, booktitle = {SIGCOMM 2014}, year = {2014}, month = {08/2014}, publisher = {ACM}, organization = {ACM}, address = {Chicago, USA}, abstract = {

WhatsApp, the new giant in instant multimedia messaging in mobile networks is rapidly increasing its popularity, taking over the traditional SMS/MMS messaging. In this paper we present the first large-scale characterization of WhatsApp, useful among others to ISPs willing to understand the impacts of this and similar applications on their networks. Through the combined analysis of passive measurements at the core of a national mobile network, worldwide geo-distributed active measurements, and traffic analysis at end devices, we show that: (i) the WhatsApp hosting architecture is highly centralized and exclusively located in the US; (ii) video sharing covers almost 40\% of the total WhatsApp traffic volume; (iii) flow characteristics depend on the OS of the end device; (iv) despite the big latencies to US servers, download throughputs are as high as 1.5 Mbps; (v) users react immediately and negatively to service outages through social networks feedbacks.

}, keywords = {Large-Scale Measurements, mobile networks, WhatsApp}, author = {Pierdomenico Fiadino and Mirko Schiavone and Pedro Casas} } @article {6975242, title = {When YouTube Does not Work - Analysis of QoE-Relevant Degradation in Google CDN Traffic}, journal = {Network and Service Management, IEEE Transactions on}, volume = {11}, year = {2014}, month = {Dec}, pages = {441-457}, keywords = {CDN distributed services, CDN server selection strategies, client-server systems, content delivery network, Content Delivery Networks, Degradation, dynamic approach, dynamic server selection strategies, end-user QoE, end-user quality of experience, European ISP, Google, Google CDN traffic, Google server selection strategies, IP networks, iterative structured process, load reduction, QoE-relevant anomaly characterization, QoE-relevant anomaly detection, QoE-relevant anomaly diagnosis, QoE-relevant degradation, Quality of Experience, Servers, social networking (online), statistical analysis, statistical analysis methodologies, Statistical Data Analysis, telecommunication traffic, Traffic Monitoring, Videos, watching experience improvement, YouTube, YouTube flow trace collection, YouTube QoE-relevant degradation, YouTube videos}, issn = {1932-4537}, doi = {10.1109/TNSM.2014.2377691}, author = {Casas, Pedro and D{\textquoteright}Alconzo, Alessandro and Fiadino, Pierdomenico and B{\"a}r, Arian and Finamore, Alessandro and Zseby, Tanja} } @inproceedings {A2014, title = {Who to Blame when YouTube is not Working? Detecting Anomalies in CDN Provisioned Services}, booktitle = {TRAC}, year = {2014}, month = {08/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Nicosia, Cyprus}, author = {Alessandro D{\textquoteright}Alconzo and Pedro Casas and Pierdomenico Fiadino and Arian B{\"a}r and Alessandro Finamore} } @inproceedings {Cas2014b, title = {YouTube All Around: Characterizing YouTube from Mobile and Fixed-line Network Vantage Points}, booktitle = {EuCNC}, year = {2014}, month = {06/2014}, address = {Bologna, IT}, author = {Pedro Casas and Pierdomenico Fiadino and Arian B{\"a}r and Alessandro D{\textquoteright}Alconzo and Alessandro Finamore and Marco Mellia} } @inproceedings {Cas2014c, title = {YouTube in the Move: Understanding the Performance of YouTube in Cellular Networks (BEST PAPER AWARD RUNNER UP)}, booktitle = {Wireless Days 2014}, year = {2014}, month = {11/2014}, publisher = {IEEE}, organization = {IEEE}, address = {Rio de Janeiro, Brazil}, abstract = {

YouTube is the most popular and volume-dominant service in today{\textquoteright}s Internet, and is changing the way ISPs manage their networks. Understanding the performance of YouTube traffic is paramount for ISPs, specially for mobile operators, who must handle the huge surge of traffic with the constraints and challenges of cellular networks. In this paper we present an empirical analysis of the performance of YouTube flows accessed through a national-wide cellular network, considering download throughput as well as end-user Quality of Experience (QoE) metrics. The analysis considers the characteristics and impacts of the Content Delivery Network hosting YouTube, and compares its behavior with other popular HTTP video streaming services accessed through cellular networks. The QoE analysis is performed through end-user device measurements, which directly reflect the experience of the end-users. Our study additionally shows the potentiality of monitoring YouTube performance in cellular networks directly from the smart-phones of the users, bypassing the traffic visibility loss at the core of the network introduced by traffic encryption (e.g., HTTPS).

}, keywords = {Cellular Networks, Content Delivery Networks, End-device Measurements, QoE, Traffic Measurements, YouTube}, author = {Pedro Casas and Pierdomenico Fiadino and Andreas Sackl and Alessandro D{\textquoteright}Alconzo} } @article {Mic2013, title = {Basic Network Data Analysis}, number = {D3.1}, year = {2013}, month = {05/2013}, institution = {mPlane Consortium}, type = {Public Deliverable}, address = {Torino}, abstract = {

This document describes the requirements, input, output for the algorithms needed to perform analytic tasks on a large amount of data, in the context of WP3. Starting from the use cases defined in WP1, we identify the algorithms needed to address the various scenario requirements. Operating on a large amount of data, these algorithms strive for parallel and scalable approaches; the designing and implementation of the algorithm itself can be a challenging research task since today very little is known concerning how to develop efficient and scalable algorithms that runs on parallel processing frameworks.
The algorithm in the storage layer are characterized by the fact that they operate on a large amount of data, and produce a concise representation of it, extracting features and aggregating it, so that the produced output is easier to handle and understand. Depending on the amount of data produced, on the scenario characteristics and on the time constraints, algorithms can require a real time (or near real time) or a batch processing.
For each algorithm and use case, the input data and the initial state, the computation to run and the output produced are described.

}, keywords = {algorithms, big data, storage}, issn = {D3.1}, author = {Pietro Michiardi and Antonio Barbuzzi and Alessandro Finamore and Stefano Traverso and Daniele Apiletti and Elena Baralis and Tania Cerquitelli and Silvia Chiusano and Luigi Grimaudo and A. Rufini and Francesco Matera and A. Valentii and Maurizio Dusi and Mohamed Ahmed and Tivadar Szemethy and L. N{\'e}meth and R. Szalay and Ilias Leontiadis and Yan Grunenberger and P. Casas and Alessandro D{\textquoteright}Alconzo and A B{\"a}r and D Rossi and YiXi Gong} } @inproceedings {Dra2013, title = {Benchmarking Personal Cloud Storage}, booktitle = {Internet Measurement Conference - IMC}, year = {2013}, month = {10/2013}, publisher = {ACM}, organization = {ACM}, address = {Barcelona (ES)}, abstract = {

Personal cloud storage services are data-intensive applica- tions already producing a significant share of Internet traffic. Several solutions offered by different companies attract more and more people. However, little is known about each service capabilities, architecture and {\textendash} most of all {\textendash} performance implications of design choices. This paper presents a methodology to study cloud storage services. We apply our methodology to compare 5 popular offers, revealing different system architectures and capabilities. The implications on performance of different designs are assessed executing a series of benchmarks. Our results show no clear winner, with all services suffering from some limitations or having potential for improvement. In some scenarios, the upload of the same file set can take seven times more, wasting twice as much capacity. Our methodology and results are useful thus as both benchmark and guideline for system design.

}, keywords = {Active Measurements, Personal Cloud Storage}, doi = {10.1145/2504730.2504762}, url = {http://www.simpleweb.org/wiki/Cloud_benchmarks}, author = {Idilio Drago and Enrico Bocchi and Marco Mellia and Herman Slatman and Aiko Pras} } @inproceedings {Tes2013, title = {Data Plane Throughput vs Control Plane Delay: Experimental Study of BitTorrent Performance}, booktitle = {IEEE P2P{\textquoteright}XIII}, year = {2013}, month = {09/2013}, abstract = {In this paper, we address the trade-off between the data plane efficiency and the control plane timeliness for the BitTorrent performance. We argue that loss-based congestion control protocols can fill large buffers, leading to a higher end-to-end delay, unlike low-priority or delay-based congestion control protocols. We perform experiments for both the uTorrent and mainline BitTorrent clients, and we study the impact of uTP (a novel transport protocol proposed by BitTorrent) and several TCP congestion control algorithms (Cubic, New Reno, LP, Vegas and Nice) on the download completion time. Briefly, in case peers in the swarm all use the same congestion control algorithm, we observe that the specific algorithm has only a limited impact on the swarm performance. Conversely, when a mix of TCP congestion control algorithms coexists, peers employing a delay-based low-priority algorithm exhibit shorter completion time.}, url = {http://www.enst.fr/~drossi/paper/rossi13p2p-a.pdf}, author = {C. Testa and D Rossi and A. Rao and A. Legout} } @article {Bar2013a, title = {Database Layer Design}, number = {D3.2}, year = {2013}, month = {11/2013}, institution = {mPlane Consortium}, type = {Public Deliverable}, address = {Torino}, keywords = {big data, databases, repositories, storage}, issn = {D3.2}, author = {A B{\"a}r and P. Casas and Alessandro D{\textquoteright}Alconzo and Alessandro Finamore and Antonio Barbuzzi and Maurizio Dusi and Gianni De Rosa and Tivadar Szemethy and Ilias Leontiadis and D Rossi and Jordan Aug{\'e} and Marc-Oliver Buob} } @article {Pap2013, title = {Design of Analysis Modules}, number = {D4.1}, year = {2013}, month = {11/2013}, institution = {mPlane Consortium}, type = {Public Deliverable}, address = {Torino}, keywords = {algorithms, analysis}, isbn = {D4.1}, issn = {D4.1}, author = {Dimitri Papadimitriou and Zied Ben-Houidi and Samir Ghamri-Doudane and D Rossi and M. Milanesio and P. Casas and Alessandro D{\textquoteright}Alconzo and Edion Tego and Francesco Matera and Maurizio Dusi and Tivadar Szemethy and L. M{\'a}th{\'e} and Alessandro Finamore and Stefano Traverso and Ilias Leontiadis and Yan Grunenberger and L. Baltrunas and Benoit Donnet and Guy Leduc and Y. Liao} } @inproceedings {Ara2013, title = {Dissecting Bufferbloat: Measurement and Per-Application Breakdown of Queueing Delay}, booktitle = {ACM CoNEXT{\textquoteright}13, Student Workshop}, year = {2013}, abstract = {

We propose a passive methodology to estimate the queueing delay incurred by TCP traffic, and additionally leverage DPI classification to breakdown the delay across different applications. Ultimately, we correlate the queueing delay to the performance perceived by the users of that applications, depending on their delay-sensitivity. We implement our methodology in Tstat, and make it available 1 as open source software to the community. We validate and tune the tool, and run a preliminary measurement campaign based on a real ISP traffic trace, showing interesting yet partly counter-intuitive results.

}, doi = {10.1145/2537148.2537785}, url = {http://www.enst.fr/~drossi/paper/rossi13conext.pdf}, author = {A. Araldo and D Rossi} } @article {Lia2013, title = {DMFSGD: A decentralized matrix factorization algorithm for network distance prediction}, journal = {IEEE/ACM Transactions on Networking}, volume = {21}, year = {2013}, month = {10/2013}, chapter = {1511}, abstract = {

The knowledge of end-to-end network distances is\ essential to many Internet applications. As active probing of all\ pairwise distances is infeasible in large-scale networks, a natural\ idea is to measure a few pairs and to predict the other ones\ without actually measuring them. This paper formulates the\ prediction problem as matrix completion where the unknown\ entries in a pairwise distance matrix constructed from a network\ are to be predicted. By assuming that the distance matrix has\ a low-rank characteristics, the problem is solvable by lowrank\ approximation based on matrix factorization. The new\ formulation circumvents the well-known drawbacks of existing\ approaches based on Euclidean embedding.

A new algorithm, so-called Decentralized Matrix Factorization\ by Stochastic Gradient Descent (DMFSGD), is proposed. By\ letting network nodes exchange messages with each other, the\ algorithm is fully decentralized and only requires each node\ to collect and to process local measurements, with neither\ explicit matrix constructions nor special nodes such as landmarks\ and central servers. In addition, we compared comprehensively\ matrix factorization and Euclidean embedding to demonstrate\ the suitability of the former on network distance prediction. We\ further studied the incorporation of a robust loss function and\ of non-negativity constraints. Extensive experiments on various\ publicly-available datasets of network delays show not only the\ scalability and the accuracy of our approach, but also its usability\ in real Internet applications.

}, keywords = {matrix completion, matrix factorization, network distance prediction, stochastic gradient descent}, author = {Yongjun Liao and Wei Du and Pierre Geurts and Guy Leduc} } @inproceedings {Ber2013, title = {Exploring the Cloud from Passive Measurements: the Amazon AWS case}, booktitle = {The 32nd Annual IEEE International Conference on Computer Communications (INFOCOM{\textquoteright}2013)}, year = {2013}, address = {Turin, Italy}, abstract = {

Cloud Providers are nowadays the most popular way to quickly deploy new services on the Internet. Understanding mechanisms currently adopted in cloud design is fundamental to identify possible bottlenecks, to optimize performance, and to design more efficient platforms. This paper presents a characterization of Amazon{\textquoteright}s Web Services (AWS), the most prominent cloud provider that offers computing, storage, and content delivery platforms. Leveraging passive measurements collected from several vantage points in Italy for several months, we explore the EC2, S3 and CloudFront AWS services to unveil their infrastructure, the pervasiveness of content they host, and their traffic allocation policies. Measurements reveal that most of the content residing on EC2 and S3 is served by one single Amazon datacenter located in Virginia despite it appears to be the worst performing one for Italian users. This causes traffic to take long and expensive paths in the network. Since no automatic migration and load-balancing policies are offered by AWS among different locations, content is exposed to outages, as we were able to observe in our data. The CloudFront CDN, on the contrary, shows much better performance thanks to the effective cache selection policy that serves 98\% of the traffic from the nearest available cache. CloudFront exhibits also dynamic load-balancing policies, in contrast to the static allocation of instances on EC2 and S3. Information presented in this paper will be useful for developers aiming at entrusting AWS to deploy their contents, and for researchers willing to improve cloud design.

}, doi = {10.1109/INFCOM.2013.6566769}, author = {Ignacio Nicolas Bermudez and Stefano Traverso and Marco Mellia and Maurizio M Munafo{\textquoteright}} } @inproceedings {Gon2013, title = {Fighting the bufferbloat: on the coexistence of AQM and low priority congestion control}, booktitle = {IEEE INFOCOM Workshop on Traffic Monitoring and Analysis (TMA{\textquoteright}13)}, year = {2013}, keywords = {Bufferbloat}, doi = {10.1109/INFCOM.2013.6567153}, url = {http://www.enst.fr/~drossi/paper/rossi13tma-b.pdf}, author = {YiXi Gong and D Rossi and C. Testa and S. Valenti and D. Taht} } @article {Cap2013, title = {First Data Collection Track Record}, number = {D5.1}, year = {2013}, month = {11/2013}, institution = {mPlane Consortium}, type = {Private Deliverable}, address = {Torino}, keywords = {data sets, integration, measurement systems, scenarios, use cases}, author = {Alessandro Capello and Fabrizio Invernizzi and Omar Jabr and Dimitri Papadimitriou and Dario Rossi and YiXi Gong and Brian Trammell and Marco Milanesio and Ernst Biersack and Rolf Winter and Francesco Matera and Arianna Rufini and Edion Tego and Maurizio Dusi and Balazs Szabo and Tivadar Szemethy and Alessandro Finamore and Marco Mellia and Ilias Leontiadis and Benoit Donnet} } @inproceedings {Mar2013, title = {HFSP: Size-based Scheduling for Hadoop}, booktitle = {BIGDATA 2013, IEEE International Conference on BigData, October 6-9, 2013, Santa-Clara, CA, USA}, year = {2013}, month = {10}, url = {http://www.eurecom.fr/publication/4106}, author = {Pastorelli Mario and Barbuzzi Antonio and Carra Damiano and Dell{\textquoteright}Amico Matteo and Michiardi Pietro} } @inproceedings {Fia2013, title = {HTTPTag: A Flexible On-line HTTP Classification System for Operational 3G Networks}, booktitle = {INFOCOM{\textquoteright}2013 Demo/Poster Session (INFOCOM{\textquoteright}2013 - Demo/Poster Session)}, year = {2013}, address = {Turin, Italy}, abstract = {

The popularity of web-based services and applications like YouTube and Facebook has taken HTTP back to the pole position on end-user traffic consumption. We present HTTPTag, a flexible on-line HTTP classification system based on pattern matching and tagging. HTTPTag recognizes on the fly and tracks the evolution of more than 280 applications running on top of HTTP in an operational 3G network, representing more than 70\% of the total HTTP traffic volume consumed by its customers. HTTPTag improves the network traffic visibility of an operator, performing tasks such as top-services ranking, long-term monitoring of applications popularity, and trend analysis among others.

}, keywords = {3G Networks, HTTP, Pattern Matching, Traffic Classification}, author = {Pierdomenico Fiadino and A B{\"a}r and P. Casas} } @inproceedings {Ros2013, title = {I tube, YouTube, P2PTube: assessing ISP benefits of peer-assisted caching of YouTube content}, booktitle = {IEEE P2P{\textquoteright}XIII}, year = {2013}, month = {09/2013}, abstract = {

This paper proposes P2PTube, a very simple yet effective set-top-box system to assist diffusion of YouTube videos. We argue that, due to the spatial and temporal nature of video requests, the simplest design already provides non marginal gains. Our trace driven evaluation shows that, with moderate cache size (100MB) and nominal upload rates (500Kbps), about half of the video requests could be served by P2PTube. Interestingly, we also see that non marginal gains are already achievable with tiny caches {\textendash} which is tied to the presence of advertisement prior that the actual video requested by the user.

}, url = {http://www.enst.fr/~drossi/paper/rossi13p2p-b.pdf}, author = {D Rossi and Y. Nicolas and D. Wolff and Alessandro Finamore} } @inproceedings {Cas2013, title = {IP Mining: Extracting Knowledge from the Dynamics of the Internet Addressing Space (BEST PAPER AWARD)}, booktitle = {25th International Teletraffic Congress, ITC 25}, year = {2013}, abstract = {

Going back to the Internet of one decade ago, HTTP-based content and web services were provided by centralized or barely distributed servers. Single hosts providing exclusive services at fixed IP addresses was the standard approach. Current situation has drastically changed, and the mapping of IPs to different content and services is nowadays extremely dynamic. The adoption of large CDNs by major Internet players, the extended usage of transparent content caching, the explosion of Cloud-based services, and the decoupling between content providers and the hosting infrastructure have created a difficult to manage Internet landscape. Understanding such a complex scenario is paramount for network operators, both to control the traffic on their networks and to improve the quality experienced by their customers, specially when something goes wrong. Using a full week of HTTP traffic traces collected at the mobile broadband network of a major European ISP, this paper studies the associations between web services, the hosting organizations-ASes, and the content servers{\textquoteright} IPs. By mining correlations among these, we extract useful insights about the dynamics of the IP addressing space used by the top web services, and the way content providers and hosting organizations deliver their services to the mobile endusers. The extracted knowledge is applied on two specific use-cases, the former on hosting and service delivery characterization, the latter on automatic IP-based HTTP services classification.

}, keywords = {Content Delivery Networks, HTTP Traffic, IP Addressing Space, Traffic Classification and Analysis}, doi = {10.1109/ITC.2013.6662933}, author = {Pedro Casas and Pierdomenico Fiadino and Arian B{\"a}r} } @inbook {Pap2013b, title = {Measurement-based Experimental Research Methodology}, booktitle = {Measurement Methodology and Tools}, series = {Lecture Notes in Computer Science Series}, volume = {7586}, year = {2013}, pages = {pp 5-22}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, issn = {978-3-642-41295-0}, doi = {10.1007/978-3-642-41296-7_2}, author = {D. Papadimitriou and L. F{\`a}brega and P. Vil{\`a} and D. Careglio and P. Demeester} } @inproceedings {Tra2013c, title = {A Measurement-Centered Approach to Latency Reduction}, booktitle = {ISOC Workshop on Reducing Internet Latency}, year = {2013}, month = {09/2013}, type = {Position Paper}, address = {London, England}, url = {http://www.internetsociety.org/latency2013}, author = {Brian Trammell and Alessandro Finamore and Marco Mellia} } @inproceedings {Cas2013c, title = {Mini-IPC: A Minimalist Approach for HTTP Traffic Classification using IP Addresses}, booktitle = {4th International Workshop on Traffic Analysis and Classification, TRAC 2013}, year = {2013}, abstract = {

The popularity of web-based services and multimedia applications like YouTube, Google Web Search, Facebook, and a bewildering range of Internet applications has taken HTTP back to the pole position on end-user traffic consumption. Today{\textquoteright}s Internet users exchange most of their content via HTTP. In this paper we address the problem of on-line HTTP traffic classification from network measurements. Building on the results provided by HTTPTag, a flexible system for on-line HTTP classification, we present and explore Mini-IPC. Mini-IPC is a minimalist approach for classifying HTTP flows using only the IP addresses of the servers hosting the corresponding content. Using one full week of HTTP traffic traces collected at the mobile broadband network of a major European ISP, we investigate to which extent the most popular HTTP-based services are hosted by well-defined sets of IP addresses, and evaluate the performance of Mini-IPC to classify these services using IPs only.

}, keywords = {CDNs, HTTP Traffic, IP Addressing Space, Mobile Networks{\textquoteright} Traffic, Traffic Classification and Analysis}, doi = {10.1109/IWCMC.2013.6583537}, author = {P. Casas and Pierdomenico Fiadino} } @inproceedings {Gon2013a, title = {Modeling the interdependency of low-priority congestion control and active queue management}, booktitle = { 25th International Teletraffic Congress (ITC{\textquoteright}25), Runner up for best-paper award}, year = {2013}, abstract = {

Recently, a negative interplay has been shown to
arise when scheduling/AQM techniques and low-priority conges-
tion control protocols are used together: namely, AQM resets
the relative level of priority among congestion control protocols.
This work explores this issue by (i) studying a fluid model that
describes system dynamics of heterogeneous congestion control
protocols competing on a bottleneck link governed by AQM and
(ii) proposing a system level solution able to reinstate priorities
among protocols.

}, doi = {10.1109/ITC.2013.6662942}, author = {YiXi Gong and D Rossi and Emilio Leonardi} } @article {Tra2013d, title = {mPlane Architecture Specification}, number = {D1.3}, year = {2013}, month = {11/2013}, institution = {mPlane Consortium}, type = {Public Deliverable}, address = {Torino}, keywords = {architecture, measurement, platform, scenario, use case}, author = {Brian Trammell and Marco Mellia and Alessandro Finamore and Stefano Traverso and Tivadar Szemethy and Balazs Szabo and D Rossi and Benoit Donnet and Fabrizio Invernizzi and Dimitri Papadimitriou} } @inproceedings {Pap2013a, title = {Multi-agent Statistical Relational Learning}, booktitle = {2nd European Teletraffic Seminar (ETS 2013)}, year = {2013}, author = {Dimitri Papadimitriou} } @inproceedings {Bel2013, title = {On Netflix catalog dynamics and caching performance}, booktitle = {IEEE CAMAD}, year = {2013}, month = {09/2013}, abstract = {

Multimedia streaming applications have substantially changed the market policy of an increasing number of content providers that offer streaming services to the users. The need for effective video content delivery re-fueled interest for caching: since the Web-like workload of the 90s are not longer fit to describe the new Web of videos, in this work we investigate the suitability of the publicly available Netflix dataset for caching studies. Our analysis shows that, as the dataset continuously evolves (i) a steady state description is not statistically meaningful and (ii) despite the cache hit ratio decreases due to the growth of active movies in the catalog, simple caching replacement approaches are close to the optimum given the growing skew in the popularity distribution over the time. Additionally, we point out that, since the dataset reports logs of movie ratings, anomalies arise when ratings are considered to be movie views. At the same time, we show anomalies yield conservative caching results, that reinforces the soundness of our study.

}, url = {http://www.enst.fr/~drossi/paper/rossi13camad.pdf}, author = {Walter Bellante and Rosa Vilardi and D Rossi} } @inproceedings {Van2013, title = {Network Fingerprinting: TTL-Based Router Signature}, booktitle = {ACM/USENIX Internet Measurement Conference (IMC)}, year = {2013}, month = {10/2013}, address = {Barcelona, Spain}, abstract = {

Fingerprinting networking equipment has many potential applications and benefits\ in network management and security. More generally, it is useful for the\ understanding of network structures and their behaviors. In this paper, we\ describe a simple fingerprinting mechanism based on the initial TTL values used\ by routers to reply to various probing messages. We show that main classes
obtained using this simple mechanism are meaningful to distinguish routers\ platforms. Besides, it comes at a very low additional cost compared to standard\ active topology discovery measurements. As a proof of concept, we apply our\ method to gain more insight on the behavior of MPLS routers and to, thus, more\ accurately quantify their visible/invisible deployment.

}, keywords = {fingerprinting, initial TTL, MPLS router signature, network discovery}, doi = {10.1145/2504730.2504761}, author = {Yves Vanaubel and Jean-Jacques Pansiot and Pascal M{\'e}rindol and Benoit Donnet} } @inproceedings {Van2013a, title = {Network Fingerprinting: TTL-Based Router Signatures}, booktitle = {ACM Internet Measurement Conference (IMC)}, year = {2013}, month = {10/2013}, address = {Barcelona, Spain}, abstract = {

Fingerprinting networking equipment has many potential applications and benefits\ in network management and security. More generally, it is useful for the\ understanding of network structures and their behaviors. In this paper, we\ describe a simple fingerprinting mechanism based on the initial TTL values used\ by routers to reply to various probing messages. We show that main classes\ obtained using this simple mechanism are meaningful to distinguish routers
platforms. Besides, it comes at a very low additional cost compared to standard\ active topology discovery measurements. As a proof of concept, we apply our\ method to gain more insight on the behavior of MPLS routers and to, thus, more\ accurately quantify their visible/invisible deployment.

}, keywords = {fingerprinting, initial TTL, MPLS, network discovery, router signatures}, author = {Yves Vanaubel and Jean-Jacques Pansiot and Pascal M{\'e}rindol and Benoit Donnet} } @inproceedings {Chi2013, title = {Passive bufferbloat measurement exploiting transport layer information}, booktitle = {IEEE GLOBECOM}, year = {2013}, month = {12/2013}, url = {http://www.enst.fr/~drossi/paper/rossi13globecom.pdf}, author = {C. Chirichella and D Rossi and C. Testa and T. Friedman and A. Pescape} } @article {Dus2013, title = {Plans for Using and Disseminating mPlane Knowledge}, number = {D7.2}, year = {2013}, month = {11/2013}, institution = {mPlane Consortium}, type = {Public Deliverable}, address = {Torino}, keywords = {dissemination, exploitation, open-source software, publications, standardization}, issn = {D7.2}, author = {Maurizio Dusi and Saverio Niccolini and Antonio Barbuzzi and M. Milanesio and Alessandro Finamore and Brian Trammell and Edion Tego and Francesco Matera and Tivadar Szemethy and Rolf Winter and Zied Ben-Houidi and Andrea Fregosi and Benoit Donnet and Fabrizio Invernizzi and Dimitri Papadimitriou and Ilias Leontiadis and Yan Grunenberger and P. Casas} } @article {Tra2013b, title = {python-ipfix-0.9.1}, year = {2013}, abstract = {

This module provides a Python interface to IPFIX message streams, and provides tools for building IPFIX Exporting and Collecting Processes. It handles message framing and deframing, encoding and decoding IPFIX data records using templates, and a bridge between IPFIX ADTs and appropriate Python data types.\

}, url = {http://pypi.python.org/pypi/ipfix}, author = {Brian Trammell} } @inproceedings {Aru2013, title = {Quality-of-Experience driven Acceleration of Thin Client Connections}, booktitle = {IEEE International Symposium on Network Computing and Applications}, year = {2013}, month = {08/2013}, edition = {12}, doi = {10.1109/NCA.2013.26}, author = {Mayutan Arumaithurai and Jan Seedorf and Maurizio Dusi and Edo Monticelli and Renato Lo Cigno} } @inproceedings {Chi2013c, title = {Remotely Gauging Upstream Bufferbloat Delays}, booktitle = {Passive and Active Measurement (PAM)}, year = {2013}, abstract = { {\textquoteleft}{\textquoteleft}Bufferbloat{\textquoteright}{\textquoteright} is the growth in buffer size that has led Internet delays to occasionally exceed the light propagation delay from the Earth to the Moon. Manufacturers have built in large buffers to prevent losses on Wi-Fi, cable and ADSL links. But the combination of some links{\textquoteright} limited bandwidth with TCP{\textquoteright}s tendency to saturate that bandwidth results in excessive queuing delays. In response, new congestion control protocols such as BitTorrent{\textquoteright}s uTP/LEDBAT aim at explicitly limiting the delay that they add over the bottleneck link. This work proposes and validate a methodology to monitor the upstream queuing delay experienced by remote hosts, both those using LEDBAT, through LEDBAT{\textquoteright}s native one-way delay measurements, and those using TCP (via the Time-stamp Option). }, doi = {10.1007/978-3-642-36516-4_25}, url = {http://www.enst.fr/~drossi/paper/rossi13pam.pdf}, author = {C. Chirichella and D Rossi and C. Testa and T. Friedman and A. Pescape} } @inproceedings {Det2013, title = {Revealing Middlebox Interference with Tracebox}, booktitle = {ACM/USENIX Internet Measurement Conference (IMC)}, year = {2013}, month = {10/2013}, abstract = {

Middleboxes such as firewalls, NAT, proxies, or Deep Pack-et Inspection play an\ increasingly important role in various types of IP networks, including\ enterprise and cellular networks. Recent studies have shed the light on their\ impact on real traffic and the complexity of managing them. Network operators\ and researchers have few tools to understand the impact of those boxes on any
path. In this paper, we propose tracebox, an extension to the widely used\ traceroute tool, that is capable of detecting various types of middlebox\ interference over almost any path. \ tracebox sends IP packets containing TCP\ segments with different TTL values and analyses the packet encapsulated in the\ returned ICMP messages. Further, as recent routers quote, in the ICMP message,\ the entire IP packet that they received, tracebox is able to detect any\ modification performed by upstream middleboxes. In addition, tracebox can often\ pinpoint the network hop where the middlebox interference occurs. We evaluate\ tracebox with measurements performed on PlanetLab nodes. Our analysis reveals\ various types of middleboxes that were not expected on such an experimental\ testbed supposed to be connected to the Internet without any restriction.

}, keywords = {middlebox, network discovery, tracebox}, doi = {10.1145/2504730.2504757}, author = {Gregory Detal and Benjamin Hesmans and Olivier Bonaventure and Yves Vanaubel and Benoit Donnet} } @inproceedings {Val2013, title = {RILAnalyzer: a Comprehensive 3G Monitor on Your Phone}, booktitle = {Internet Measurement Conference (IMC)}, year = {2013}, doi = {10.1145/2504730.2504764}, author = {Narseo Vallina-Rodriguez and A. Aucinas and M. Almeida and Yan Grunenberger and Konstantina Papagiannaki and Jon Crowcroft} } @inproceedings {Api2013, title = {SEARUM: a cloud-based SErvice for Association RUle Mining}, booktitle = {The 11th IEEE International Symposium on Parallel and Distributed Processing with Applications (ISPA-13)}, year = {2013}, abstract = {

Large volumes of data are being produced by various modern applications at an ever increasing rate. These applications range from wireless sensors networks to social networks. The automatic analysis of such huge data volume is a challenging task since a large amount of interesting knowledge can be extracted. Association rule mining is an exploratory data analysis method able to discover interesting and hidden correlations among data. Since this data mining process is characterized by computationally intensive tasks, efficient distributed approaches are needed to increase its scalability. This paper proposes a novel cloud-based service, named SEARUM, to efficiently mine association rules on a distributed computing model. SEARUM consists of a series of distributed MapReduce jobs run in the cloud. Each job performs a different step in the association rule mining process. As a case study, the proposed approach has been applied to the network data scenario. The experimental validation, performed on two real network datasets, shows the effectiveness and the efficiency of\ SEARUM in mining association rules on a distributed computing model.

}, keywords = {association rule mining, cloud-based service, distributed computing model, network data analysis}, doi = {10.1109/TrustCom.2013.153}, author = {Daniele Apiletti and Elena Baralis and Tania Cerquitelli and Silvia Chiusano and Luigi Grimaudo} } @article {Pap2013c, title = {Selection of Existing Probes and Datasets}, number = {D2.1}, year = {2013}, month = {08/2013}, institution = {mPlane Consortium}, type = {Public Deliverable}, address = {Torino}, abstract = {The mPlane architecture has been designed to include the possibility to interface with existing systems and platforms. While most measurement platforms in existence target a very specific measurement use case (e.g., the discovery of the Internet{\textquoteright}s router-level topology, the continuous measurement of the RTT among host pairs, the exporting via SNMP of network state, etc.), there are platforms that have a large deployed base, with lot of data being at disposal, and/or continuously collecting data. It would be a waste of resources to merely reproduce this effort within mPlane. Instead, mPlane aims at directly interfacing with existing systems and re-using their capabilities and data to feed measurement results to the mPlane intelligence. This document lists selected existing systems that are important for mPlane either for theoretical, conceptual or practical reasons, and that are part of the background of mPlane partners. A sub-set of these systems will be eventually incorporated into mPlane by developing the necessary interfaces. Others could be integrated by the means of proxy probes, i.e., the conceptual component responsible for such interfacing. The main focus of this document is to elaborate the concept of proxy probes, enumerate the systems that will be possibly considered for interface (proxy probe) development, and to give high level descriptions of the proxy probe design for these systems. The following list enumerates the systems that the consortium has chosen to include: - QoF - a TCP-aware IPFIX flow meter Cisco Ping and SLA Agents - commercial availability and basic network parameter agents - Tracebox - a tool for middlebox detection and identification - Scamper - a sophisticated active probing tool - MERLIN - a router-level topology discovery tool - TopHat - a configurable measurement system on top of PlanetLab - Tstat - a passive network monitoring tool - BlockMon - a flexible network monitoring and analysis tool - MisuraInternet - a QoS measurement system - Firelog - a Firefox plugin to measure HTTP QoE - Pytomo - an end-host-based video OoE measurement tool - DATI - a high performance deep packet inspector - MobiPerf - a tool for monitoring smartphone performance}, keywords = {active probes, existing probes, passive probes, probes, proxy probes}, author = {Dimitri Papadimitriou and Dario Rossi and YiXi Gong and Brian Trammell and Marco Milanesio and Ernst Biersack and Rolf Winter and Francesco Matera and Maurizio Dusi and Balazs Szabo and Tivadar Szemethy and Alessandro Finamore and Marco Mellia and Alessandro Capello and Fabio Invernizzi and Omar Jabr and Ilias Leontiadis and Benoit Donnet} } @inproceedings {Gri2013, title = {Self-Learning Classifier for Internet Traffic}, booktitle = {The 5th IEEE International Traffic Monitoring and Analysis Workshop (TMA 2013)}, year = {2013}, abstract = {

Network visibility is a critical part of traffic engineering, network management, and security. Recently, unsupervised algorithms have been envisioned as a viable alternative\ to automatically identify classes of traffic. However, the accuracy\ achieved so far does not allow to use them for traffic classification\ in practical scenario.
In this paper, we propose SeLeCT, a Self-Learning Classifier\ for Internet traffic. It uses unsupervised algorithms along with\ an adaptive learning approach to automatically let classes of\ traffic emerge, being identified and (easily) labeled. SeLeCT\ automatically groups flows into pure (or homogeneous) clusters\ using alternating simple clustering and filtering phases to remove\ outliers. SeLeCT uses an adaptive learning approach to boost its\ ability to spot new protocols and applications. Finally, SeLeCT\ also simplifies label assignment (which is still based on some\ manual intervention) so that proper class labels can be easily\ discovered.
We evaluate the performance of SeLeCT using traffic traces\ collected in different years from various ISPs located in 3\ different continents. Our experiments show that SeLeCT achieves\ overall accuracy close to 98\%. Unlike state-of-art classifiers, the\ biggest advantage of SeLeCT is its ability to help discovering\ new protocols and applications in an almost automated fashion.

}, doi = {10.1109/INFCOMW.2013.6562900}, author = {Luigi Grimaudo and Marco Mellia and Elena Baralis and Ram Keralapura} } @article {Ros2013a, title = {Specification of mPlane Access Control and Data Protection Mechanisms}, number = {D1.2}, year = {2013}, month = {08/2013}, institution = {mPlane Consortium}, type = {Public Deliverable}, address = {Torino}, abstract = {

This document primarily defines security specifications for the mPlane architecture (in terms of authentication, access control and safe communications), on the basis of what specified in the D1.1. Also, it provides a description of the measures that can be adopted in order to guarantee the privacy of the data gathered through the probes. This aspect of the mPlane infrastructure must not be neglected, since from a legal point of view the users{\textquoteright} right to privacy must be protected in any case. The techniques to be adopted are anonymization and aggregation, but utility of data decreases as the level of privacy increases, hence it is necessary to find a good trade-off. Two protocols are proposed for secure communications among components: TLS and SSH, which adopt respectively X.509 certificates and RSA keys for identity management. As the access control policy that will be adopted depends mostly on the mPlane administrators{\textquoteright} choices, this document provides a survey of several approaches. The cross-domain and the mobile scenarios are also analyzed, providing solutions that can guarantee access control, security and privacy.

}, keywords = {access control, anonymisation, authentication plane, privacy, security}, author = {Gianni De Rosa and Stefano Pentassuglia} } @article {Mar2013a, title = {Standardizing large-scale measurement platforms}, journal = {SIGCOMM Comput. Commun. Rev.}, volume = {43}, year = {2013}, pages = {58{\textendash}63}, keywords = {design, ietf, measurement platforms, standardization}, issn = {0146-4833}, doi = {10.1145/2479957.2479967}, url = {http://doi.acm.org/10.1145/2479957.2479967}, author = {Bagnulo Marcelo and Eardley Philip and Burbridge Trevor and Brian Trammell and Rolf Winter} } @inproceedings {Kue2013, title = {On the state of ECN and TCP Options on the Internet}, booktitle = {Passive and Active Measurement Conference (PAM)}, year = {2013}, month = {03/2013}, address = {Hong Kong}, abstract = {

Explicit Congestion Notification (ECN) is a TCP/IP extension that can avoid packet loss and thus improve network performance. Though standardized in 2001, it is barely used in today{\textquoteright}s Internet. This study, following on previous active measurement studies over the past decade, shows marked and continued increase in the deployment of ECN-capable servers, and usability of ECN on the majority of paths to such servers. We additionally present new measurements of ECN on IPv6, passive observation of actual ECN usage from flow data, and observations on other congestion-relevant TCP options (SACK, Timestamps and Window Scaling). We further present initial work on burst loss metrics for loss-based congestion control following from our findings.\

}, doi = {10.1007/978-3-642-36516-4_14}, author = {Mirja Kuehlewind and Sebastian Neuner and Brian Trammell} } @article {Dav2013, title = {Stream-monitoring with blockmon: convergence of network measurements and data analytics platforms}, journal = {SIGCOMM Comput. Commun. Rev.}, volume = {43}, year = {2013}, pages = {29{\textendash}36}, keywords = {data analysis, distributed computing, performance analysis}, issn = {0146-4833}, doi = {10.1145/2479957.2479962}, url = {http://doi.acm.org/10.1145/2479957.2479962}, author = {Simoncelli, Davide and Maurizio Dusi and Francesco Gringoli and Saverio Niccolini} } @article {Tra2013, title = {Temporal locality in today{\textquoteright}s content caching: why it matters and how to model it}, journal = {ACM SIGCOMM Computer Communication Review}, volume = {43}, year = {2013}, month = {10/2013}, chapter = {5}, abstract = {

The dimensioning of caching systems represents a difficult task in the design of infrastructures for content distribution in the current Internet. This paper addresses the problem of defining a realistic arrival process for the content requests generated by users, due its critical importance for both analytical and simulative evaluations of the performance of caching systems. First, with the aid of YouTube traces collected inside operational residential networks, we identify the characteristics of real traffic that need to be considered or can be safely neglected in order to accurately predict the performance of a cache. Second, we propose a new parsimonious traffic model, named the Shot Noise Model (SNM), that enables users to natively capture the dynamics of content popularity, whilst still being suf- ficiently simple to be employed effectively for both analytical and scalable simulative studies of caching systems. Finally, our results show that the SNM presents a much better solution to account for the temporal locality observed in real traffic compared to existing approaches.

}, doi = {10.1145/2541468.2541470}, author = {Stefano Traverso and Mohamed Ahmed and Michele Garetto and Paolo Giaccone and Emilio Leonardi and Saverio Niccolini} } @inproceedings {Ale2013, title = {Is There a Case for Mobile Phone Content Pre-staging?}, booktitle = {Proceedings of the Ninth ACM Conference on Emerging Networking Experiments and Technologies (Best Short Paper Award)}, year = {2013}, month = {12/2013}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {

Content caching is a fundamental building block of the Internet. Caches are widely deployed at network edges to improve performance for end-users, and to reduce load on web servers and the backbone network. Considering mobile 3G/4G networks, however, the bottleneck is at the access link, where bandwidth is shared among all mobile terminals. As such, per-user capacity cannot grow to cope with the traffic demand. Unfortunately, caching policies would not reduce the load on the wireless link which would have to carry multiple copies of the same object that is being downloaded by multiple mobile terminals sharing the same access link.

In this paper we investigate if it is worth to push the caching paradigm even farther. We hypothesize a system in which mobile terminals implement a local cache, where popular content can be pushed/pre-staged. This exploits the peculiar broadcast capability of the wireless channels to replicate content "for free" on all terminals, saving the cost of transmitting multiple copies of those popular objects. Relying on a large data set collected from a European mobile carrier, we analyse the content popularity characteristics of mobile traffic, and quantify the benefit that the push-to-mobile system would produce. We found that content pre-staging, by proactively and periodically broadcasting "bundles" of popular objects to devices, allows to both greatly i) improve users{\textquoteright} performance and ii) reduce up to 20\% (40\%) the downloaded volume (number of requests) in optimistic scenarios with a bundle of 100 MB. However, some technical constraints and content characteristics could question the actual gain such system would reach in practice.

}, keywords = {content pre-staging, mobile networks}, isbn = {978-1-4503-2101-3}, doi = {10.1145/2535372.2535414}, url = {http://doi.acm.org/10.1145/2535372.2535414}, author = {Finamore, Alessandro and Mellia, Marco and Gilani, Zafar and Papagiannaki, Konstantina and Erramilli, Vijay and Grunenberger, Yan} } @inproceedings {Chi2013a, title = {To the Moon and back: are Internet bufferbloat delays really that large}, booktitle = {IEEE INFOCOM Workshop on Traffic Monitoring and Analysis (TMA{\textquoteright}13)}, year = {2013}, doi = {10.1109/INFCOMW.2013.6562886}, url = {http://www.enst.fr/~drossi/paper/rossi13tma-a.pdf}, author = {C. Chirichella and D Rossi} } @article {Tra2013a, title = {Use Case Elaboration and Requirements Specification}, number = {D1.1}, year = {2013}, month = {01/2013}, institution = {mPlane Consortium}, type = {Public Deliverable}, address = {Torino}, abstract = {

The document defines the requirements for the mPlane architecture on the background of a set of scenarios explored by the consortium, a survey of existing comparable measurement systems and platforms and applicable standards therefore, and a set of architectural first principles drawn from the description of work and the consortium{\textquoteright}s experience.\ As mPlane is intended to be a fully flexible measurement platform, freely integrating existing probes and repositories with ones to be developed in the project, this document is primarily concerned with the definition of interfaces among mPlane components. While it does enumerate capabilities to be provided by these components, these are primarily intended to ensure the platform has the flexibility required to meet all the scenarios envisioned; the enumerations of measurements, metrics, data types, and other component capabilities are therefore not to be construed to limit the scope of work on components within the project to just those scenarios treated in this document; nor do the scenarios enumerated here define the capabilities to be demonstrated in the project{\textquoteright}s integrated trial.\

}, keywords = {architecture, measurement, platform, scenario, use case}, issn = {D1.1}, author = {Brian Trammell and Stephan Neuhaus and Francesco Matera and Ernst Biersack and Antonio Barbuzzi and Saverio Niccolini and Mohamed Ahmed and Maurizio Dusi and Tivadar Szemethy and Balazs Szabo and P. Casas and A B{\"a}r and Konstantina Papagiannaki and Yan Grunenberger and Ilias Leontiadis and Rolf Winter and Zied Ben-Houidi and Giovanna Carofiglio and Samir Ghamri-Doudane and Diego Perino and D Rossi} } @inproceedings {Cas2013a, title = {YOUQMON: A System for On-line Monitoring of YouTube QoE in Operational 3G Networks}, booktitle = {31st IFIP Performance}, year = {2013}, abstract = {

YouTube is changing the way operators manage network performance monitoring. In this paper we introduce YOUQMON, a novel on-line monitoring system for assessing the Quality of Experience (QoE) undergone by HSPA/3G customers watching YouTube videos, using network-layer measurements only. YOUQMON combines passive traffic analysis techniques to detect stalling events in YouTube video streams, with a QoE model to map stallings into a Mean Opinion Score reflecting the end-user experience. We evaluate the stalling detection performance of YOUQMON with hundreds of YouTube video streams, and present results showing the feasibility of performing real-time YouTube QoE monitoring in an operational mobile broadband network.

}, keywords = {3G Networks, MOS, QoE Monitoring, Stallings, YouTube}, doi = {10.1145/2518025.2518033}, author = {P. Casas and M. Seufert and R. Schatz} } @inproceedings {Val2012, title = {Breaking for commercials: characterizing mobile advertising}, booktitle = {Proceedings of the 2012 ACM conference on Internet measurement conference}, year = {2012}, publisher = {ACM}, organization = {ACM}, address = {Boston, MA}, abstract = {

Mobile phones and tablets can be considered as the first incarnation of the post-PC era. Their explosive adoption rate has been driven by a number of factors, with the most signifcant influence being applications (apps) and app markets. Individuals and organizations are able to develop and publish apps, and the most popular form of monetization is mobile advertising.

The mobile advertisement (ad) ecosystem has been the target of prior research, but these works typically focused on a small set of apps or are from a user privacy perspective. In this work we make use of a unique, anonymized data set corresponding to one day of traffic for a major European mobile carrier with more than 3 million subscribers. We further take a principled approach to characterize mobile ad traffic along a number of dimensions, such as overall traffic, frequency, as well as possible implications in terms of en- ergy on a mobile device.

Our analysis demonstrates a number of inefficiencies in today{\textquoteright}s ad delivery. We discuss the benefits of well-known techniques, such as pre-fetching and caching, to limit the energy and network signalling overhead caused by current systems. A prototype im- plementation on Android devices demonstrates an improvement of 50\% in terms of energy consumption for offline ad-sponsored apps while limiting the amount of ad related traffic.\

}, keywords = {advertisement, caching, cellular, energy, smartphones, traffic}, isbn = {978-1-4503-1705-4}, doi = {10.1145/2398776.2398812}, url = {http://doi.acm.org/10.1145/2398776.2398812}, author = {Narseo Vallina-Rodriguez and Jay Shah and Alessandro Finamore and Yan Grunenberger and Konstantina Papagiannaki and Hamed Haddadi and Jon Crowcroft} } @inproceedings {Ber2012, title = {DNS to the rescue: Discerning Content and Services in a Tangled Web}, booktitle = {Internet Measurement Conference 2012}, volume = {1}, year = {2012}, month = {11/2012}, pages = {413-426}, publisher = {ACM}, organization = {ACM}, edition = {ACM}, address = {Boston, MA}, abstract = {

A careful perusal of the Internet evolution reveals two major trends - explosion of cloud-based services and video stream- ing applications. In both of the above cases, the owner (e.g., CNN, YouTube, or Zynga) of the content and the organiza- tion serving it (e.g., Akamai, Limelight, or Amazon EC2) are decoupled, thus making it harder to understand the associ- ation between the content, owner, and the host where the content resides. This has created a tangled world wide web that is very hard to unwind, impairing ISPs{\textquoteright} and network administrators{\textquoteright} capabilities to control the traffic flowing in their networks.

In this paper, we present DN-Hunter, a system that lever- ages the information provided by DNS traffic to discern the tangle. Parsing through DNS queries, DN-Hunter tags traf- fic flows with the associated domain name. This association has several applications and reveals a large amount of use- ful information: (i) Provides a fine-grained traffic visibility even when the traffic is encrypted (i.e., TLS/SSL flows), thus enabling more effective policy controls, (ii) Identifies flows even before the flows begin, thus providing superior net- work management capabilities to administrators, (iii) Un- derstand and track (over time) different CDNs and cloud providers that host content for a particular resource, (iv) Discern all the services/content hosted by a given CDN or cloud provider in a particular geography and time interval, and (v) Provides insights into all applications/services run- ning on any given layer-4 port number.

We conduct extensive experimental analysis and show re- sults from real traffic traces (including FTTH and 4G ISPs) that support our hypothesis. Simply put, the information provided by DNS traffic is one of the key components re- quired for understanding the tangled web, and bringing the ability to effectively manage network traffic back to the op- erators.\

}, keywords = {DNS, mPlane, passive measurement, WP2}, isbn = {978-1-4503-1705-4}, doi = {10.1145/2398776.2398819}, url = {http://dl.acm.org/citation.cfm?id=2398776.2398819\&coll=DL\&dl=GUIDE\&CFID=225051145\&CFTOKEN=42401286}, author = {Ignacio Nicolas Bermudez and Marco Mellia and Maurizio M Munafo{\textquoteright} and Ram Keralapura and Antonio Nucci} } @inproceedings {Tra2012, title = {On Flow Concurrency in the Internet and its Implications for Capacity Sharing}, booktitle = {Proceedings of the Second ACM CoNext Capacity Sharing Workshop (CSWS)}, year = {2012}, month = {12/2012}, address = {Nice, France}, doi = {10.1145/2413219.2413225}, author = {Brian Trammell and Dominik Schatzmann} } @inproceedings {Dra2012, title = {Inside Dropbox: Understanding Personal Cloud Storage Services}, booktitle = {Internet Measurement Conference - IMC}, year = {2012}, month = {11/2012}, publisher = {ACM}, organization = {ACM}, address = {Boston, MA}, abstract = {

Personal cloud storage services are gaining popularity. With a rush of providers to enter the market and an increasing of- fer of cheap storage space, it is to be expected that cloud storage will soon generate a high amount of Internet traffic. Very little is known about the architecture and the perfor- mance of such systems, and the workload they have to face. This understanding is essential for designing efficient cloud storage systems and predicting their impact on the network.

This paper presents a characterization of Dropbox, the leading solution in personal cloud storage in our datasets. By means of passive measurements, we analyze data from four vantage points in Europe, collected during 42 consecu- tive days. Our contributions are threefold: Firstly, we are the first to study Dropbox, which we show to be the most widely-used cloud storage system, already accounting for a volume equivalent to around one third of the YouTube traffic at campus networks on some days. Secondly, we character- ize the workload users in different environments generate to the system, highlighting how this reflects on network traf- fic. Lastly, our results show possible performance bottle- necks caused by both the current system architecture and the storage protocol. This is exacerbated for users connected far from storage data-centers.

All measurements used in our analyses are publicly avail- able in anonymized form at the SimpleWeb trace repository: http://traces.simpleweb.org/dropbox/\

}, keywords = {Dropbox, passive measurement}, issn = {978-1-4503-1705-4}, doi = {10.1145/2398776.2398827}, url = {http://dl.acm.org/citation.cfm?id=2398776.2398827\&coll=DL\&dl=GUIDE\&CFID=225051145\&CFTOKEN=42401286}, author = {Idilio Drago and Marco Mellia and Maurizio M Munafo{\textquoteright} and Anna Sperotto and Ramin Sadre and Aiko Pras} } @inproceedings {Gon2012, title = {Interaction or Interference: can AQM and Low Priority Congestion Control Successfully Collaborate}, booktitle = {ACM CoNEXT, Extended Abstract}, year = {2012}, month = {12/2012}, abstract = {

Heterogeneity in the Internet ecosystem sometimes turns interaction into interference. Over the years, active queue management (AQM) and end-to-end low-priority congestion control (LPCC) have been proposed as alternative solutions to counter the persistently full buffer problem -- that recently became popular under the {\textquoteleft}{\textquoteleft}bufferbloat{\textquoteright}{\textquoteright} term. In this work, we point out the existence of a negative interplay among AQM and LPCC techniques. Intuitively, as AQM is designed to penalize the most aggressive flows it mainly hit best effort TCP: it follows that LPCC is not able to maintain its low priority, thus becoming as aggressive as TCP. By an extended set of simulation on various AQM policies and LPCC protocols, including the very recent CoDel AQM and LEDBAT LPCC proposals, we point out that this interference is quite universal and deserves further attention.

}, doi = {10.1145/2413247.2413263}, url = {http://www.enst.fr/drossi/paper/rossi12conext.pdf}, author = {YiXi Gong and D Rossi and C. Testa and S. Valenti and D. Taht} } @inproceedings {Fin2012, title = {The need for an intelligent measurement plane: The example of time-variant CDN policies}, booktitle = {Telecommunications Network Strategy and Planning Symposium (NETWORKS), 2012 XVth International }, year = {2012}, month = {10/2012}, pages = {1 - 6 }, abstract = {In this paper we characterize how web-based services are delivered by large organizations in today{\textquoteright}s Internet. Taking advantage oftwo week-long data sets separated in time by 10 months and reporting the web activity of more than 10,000 ADSL residential customers, we identify the services offered by large organizations like Google, Akamai and Amazon. We then compare theevolution of both policies used to serve requests, and the infrastructure they use to match the users{\textquoteright} demand. Results depict anovercrowded scenario in constant evolution. Big-players are more and more responsible for the majority of the volume and a plethora of other organizations offering similar or more specific services through different CDNs and traffic policies. Unfortunately, no standard tools and methodologies are available to capture and expose the hidden properties of this in constant evolution picture. A deeper understanding of such dynamics is however fundamental to improve the performance of current and future Internet. To this extend, we claim the need for a Internet-wide, standard, flexible and intelligent measurement plane to be added tothe current Internet infrastructure.}, keywords = {Facebook, Google, Monitoring, Organizations, Servers, Streaming media, Throughput}, issn = {978-1-4673-1390-2}, doi = {http://dx.doi.org/10.1109/NETWKS.2012.6381662}, author = {Alessandro Finamore and Vinicius Gehlen and Marco Mellia and Maurizio M Munafo{\textquoteright}} } @inproceedings {Rio2012, title = {Wire-speed statistical classification of network traffic on commodity hardware}, booktitle = {ACM Internet Measurement Conference (IMC)}, year = {2012}, month = {11/2012}, abstract = {

In this paper we present a software-based traffic classification engine running on commodity multi-core hardware, able to process in real-time aggregates of up to 14.2Mpps over a single 10Gbps interface -- i.e., the maximum possible packet rate over a 10Gbps Ethernet links given the minimum frame size of 64Bytes. This significant advance with respect to the current state of the art in terms of achieved classification rates are made possible by: (i) the use of an improved network driver, PacketShader, to efficiently move batches of packets from the NIC to the main CPU; (ii) the use of lightweight statistical classification techniques exploiting the size of the first few packets of every observed flow; (iii) a careful tuning of critical parameters of the hardware environment and the software application itself.

}, doi = {10.1145/2398776.2398784}, author = {Pedro Maria Santiago del Rio and D Rossi and Francesco Gringoli and Lorenzo Nava and Luca Salgarelli and Javier Aracil} }