Publications
2023
Feltin, Thomas; Marché, Léo; Cordero, Juan Antonio; Brockners, Frank; Clausen, Thomas
DNN Partitioning for Inference Throughput Acceleration at the Edge Journal Article
In: IEEE Access, 2023, ISSN: 2169-3536.
@article{nokey,
title = {DNN Partitioning for Inference Throughput Acceleration at the Edge},
author = {Thomas Feltin and Léo Marché and Juan Antonio Cordero and Frank Brockners and Thomas Clausen},
editor = {IEEE},
url = {https://ieeexplore.ieee.org/document/10042405},
doi = {10.1109/ACCESS.2023.3244497},
issn = {2169-3536},
year = {2023},
date = {2023-02-13},
journal = {IEEE Access},
abstract = {Deep neural network (DNN) inference on streaming data requires computing resources to satisfy inference throughput requirements. However, latency and privacy sensitive deep learning applications cannot afford to offload computation to remote clouds because of the implied transmission cost and lack of trust in third-party cloud providers. Among solutions to increase performance while keeping computation on a constrained environment, hardware acceleration can be onerous, and model optimization requires extensive design efforts while hindering accuracy. DNN partitioning is a third complementary approach, and consists of distributing the inference workload over several available edge devices, taking into account the edge network properties and the DNN structure, with the objective of maximizing the inference throughput (number of inferences per second). This paper introduces a method to predict inference and transmission latencies for multi-threaded distributed DNN deployments, and defines an optimization process to maximize the inference throughput. A branch and bound solver is then presented and analyzed to quantify the achieved performance and complexity. This analysis has led to the definition of the acceleration region, which describes deterministic conditions on the DNN and network properties under which DNN partitioning is beneficial. Finally, experimental results confirm the simulations and show inference throughput improvements in sample edge deployments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2022
Yao, Zhiyuan; Desmouceaux, Yoann; Cordero, Juan Antonio; Townsley, Mark; Clausen, Thomas Heide
Aquarius-Enable Fast, Scalable, Data-Driven Service Management in the Cloud Journal Article
In: IEEE Transactions on Network and Service Management, 2022, ISSN: 1932-4537.
@article{nokeyi,
title = {Aquarius-Enable Fast, Scalable, Data-Driven Service Management in the Cloud},
author = {Zhiyuan Yao and Yoann Desmouceaux and Juan Antonio Cordero and Mark Townsley and Thomas Heide Clausen},
url = {https://ieeexplore.ieee.org/abstract/document/9852806},
doi = {10.1109/TNSM.2022.3197130},
issn = {1932-4537},
year = {2022},
date = {2022-12-01},
urldate = {2022-12-01},
journal = {IEEE Transactions on Network and Service Management},
abstract = {In order to dynamically manage and update networking policies in cloud data centers, Virtual Network Functions (VNFs) use, and therefore actively collect, networking state information -and in the process, incur additional control signaling and management overhead, especially in larger data centers. In the meantime, VNFs in production prefer distributed and straightforward heuristics over advanced learning algorithms to avoid intractable additional processing latency under high-performance and low-latency networking constraints. This paper identifies the challenges of deploying learning algorithms in the context of cloud data centers, and proposes Aquarius to bridge the application of machine learning (ML) techniques on distributed systems and service management. Aquarius passively yet efficiently gathers reliable observations, and enables the use of ML techniques to collect, infer, and supply accurate networking state information -without incurring additional signaling and management overhead. It offers fine-grained and programmable visibility to distributed VNFs, and enables both open-and close-loop control over networking systems. This paper illustrates the use of Aquarius with a traffic classifier, an auto-scaling system, and a load balancer -and demonstrates the use of three different ML paradigms -unsupervised, supervised, and reinforcement learning, within Aquarius, for network state inference and service management. Testbed evaluations show that Aquarius suitably improves network state visibility and brings notable performance gains for various scenarios with low overhead.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yao, Zhiyuan; Ding, Zihan
Learning Distributed and Fair Policies for Network Load Balancing as Markov Potentia Game Proceedings Article
In: 36th Conference on Neural Information Processing Systems (NeurIPS 2022), 2022.
@inproceedings{nokeyj,
title = {Learning Distributed and Fair Policies for Network Load Balancing as Markov Potentia Game},
author = {Zhiyuan Yao and Zihan Ding},
url = {https://arxiv.org/pdf/2206.01451},
year = {2022},
date = {2022-11-28},
urldate = {2022-11-28},
booktitle = {36th Conference on Neural Information Processing Systems (NeurIPS 2022)},
abstract = {This paper investigates the network load balancing problem in data centers (DCs) where multiple load balancers (LBs) are deployed, using the multi-agent reinforcement learning (MARL) framework. The challenges of this problem consist of the heterogeneous processing architecture and dynamic environments, as well as limited and partial observability of each LB agent in distributed networking systems, which can largely degrade the performance of in-production load balancing algorithms in real-world setups. Centralised-training-decentralised-execution (CTDE) RL scheme has been proposed to improve MARL performance, yet it incurs -- especially in distributed networking systems, which prefer distributed and plug-and-play design scheme -- additional communication and management overhead among agents. We formulate the multi-agent load balancing problem as a Markov potential game, with a carefully and properly designed workload distribution fairness as the potential function. A fully distributed MARL algorithm is proposed to approximate the Nash equilibrium of the game. Experimental evaluations involve both an event-driven simulator and real-world system, where the proposed MARL load balancing algorithm shows close-to-optimal performance in simulations, and superior results over in-production LBs in the real-world system.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yao, Zhiyuan; Desmouceaux, Yoann; Cordero, Juan Antonio; Townsley, Mark; Clausen, Thomas Heide
Efficient Data-Driven Network Functions Proceedings Article
In: 30th International Symposium on the Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS 2022), 2022.
@inproceedings{nokeyg,
title = {Efficient Data-Driven Network Functions},
author = {Zhiyuan Yao and Yoann Desmouceaux and Juan Antonio Cordero and Mark Townsley and Thomas Heide Clausen},
url = {https://arxiv.org/pdf/2208.11385},
year = {2022},
date = {2022-10-18},
urldate = {2022-10-18},
booktitle = {30th International Symposium on the Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS 2022)},
abstract = {Cloud environments require dynamic and adaptive networking policies. It is preferred to use heuristics over advanced learning algorithms in Virtual Network Functions (VNFs) in production becuase of high-performance constraints. This paper proposes Aquarius to passively yet efficiently gather observations and enable the use of machine learning to collect, infer, and supply accurate networking state information-without incurring additional signalling and management overhead. This paper illustrates the use of Aquarius with a traffic classifier, an autoscaling system, and a load balancer-and demonstrates the use of three different machine learning paradigms-unsupervised, supervised, and reinforcement learning, within Aquarius, for inferring network state. Testbed evaluations show that Aquarius increases network state visibility and brings notable performance gains with low overhead.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yao, Zhiyuan; Ding, Zihan; Clausen, Thomas Heide
Multi-agent reinforcement learning for network load balancing in data center Proceedings Article
In: 31st ACM International Conference on Information and Knowledge Management (CIKM'22), 2022.
@inproceedings{nokeyh,
title = {Multi-agent reinforcement learning for network load balancing in data center},
author = {Zhiyuan Yao and Zihan Ding and Thomas Heide Clausen},
url = {https://www.researchgate.net/profile/Zhiyuan_Yao13/publication/358163217_Multi-Agent_Reinforcement_Learning_for_Network_Load_Balancing_in_Data_Center/links/62fe5fd3e3c7de4c34666311/Multi-Agent-Reinforcement-Learning-for-Network-Load-Balancing-in-Data-Center.pdf},
doi = {10.1145/3511808.3557133},
year = {2022},
date = {2022-10-17},
urldate = {2022-10-17},
booktitle = {31st ACM International Conference on Information and Knowledge Management (CIKM'22)},
abstract = {This paper presents the network load balancing problem, a challenging real-world task for multi-agent reinforcement learning (MARL) methods. Conventional heuristic solutions like Weighted-Cost Multi-Path (WCMP) and Local Shortest Queue (LSQ) are less flexible to the changing workload distributions and arrival rates, with a poor balance among multiple load balancers. The cooperative network load balancing task is formulated as a Dec-POMDP problem, which naturally induces the MARL methods. To bridge the reality gap for applying learning-based methods, all models are directly trained and evaluated on a real-world system from moderate- to large-scale setups. Experimental evaluations show that the independent and “selfish” load balancing strategies are not necessarily the globally optimal ones, while the proposed MARL solution has a superior performance over different realistic settings. Additionally, the potential difficulties of the application and deployment of MARL methods for network load balancing are analysed, which helps draw the attention of the learning and network communities to such challenges.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Haddad, Rami; Melki, Reem; Cosma, Serban
OpenAPI Extended Security Scheme: A Method to Reduce the Prevalence of BOLA Proceedings Article
In: API Specifications Conference (ASC), OpenAPI Initiative 2022.
@inproceedings{nokey,
title = {OpenAPI Extended Security Scheme: A Method to Reduce the Prevalence of BOLA},
author = {Rami Haddad and Reem Melki and Serban Cosma},
year = {2022},
date = {2022-09-19},
urldate = {2022-09-19},
booktitle = {API Specifications Conference (ASC)},
organization = {OpenAPI Initiative},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yao, Zhiyuan; Desmouceaux, Yoann; Cordero, Juan Antonio; Clausen, Thomas Heide
HLB: Towards Load-Aware Load-Balancing Journal Article
In: IEEE/ACM Transactions on Networking, 2022, ISSN: 1558-2566.
@article{nokey,
title = {HLB: Towards Load-Aware Load-Balancing},
author = {Zhiyuan Yao and Yoann Desmouceaux and Juan Antonio Cordero and Thomas Heide Clausen},
doi = {10.1109/TNET.2022.3177163},
issn = {1558-2566},
year = {2022},
date = {2022-06-05},
urldate = {2022-06-05},
journal = {IEEE/ACM Transactions on Networking},
abstract = {The purpose of network load balancers is to optimize quality of service to the users of a set of servers - basically, to improve response times and to reducing computing resources - by properly distributing workloads. This paper proposes a distributed, application-agnostic, Hybrid Load Balancer (HLB) that - without explicit monitoring or signaling - infers server occupancies and processing speeds, which allows making optimised workload placement decisions. This approach is evaluated both through simulations and extensive experiments, including synthetic workloads and Wikipedia replays on a real-world testbed. Results show significant performance gains, in terms of both response time and system utilisation, when compared to existing load-balancing algorithms.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
Yao, Zhiyuan; Ding, Zihan; Clausen, Thomas Heide
Reinforced Workload Distribution Fairness Proceedings Article
In: Machine Learning for Systems at 35th Conference on Neural Information Processing Systems (NeurIPS 2021), 2021.
@inproceedings{yao2021reinforced,
title = {Reinforced Workload Distribution Fairness},
author = {Zhiyuan Yao and Zihan Ding and Thomas Heide Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2021/11/2111.00008-1.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Machine Learning for Systems at 35th Conference on Neural Information Processing Systems (NeurIPS 2021)},
abstract = {Network load balancers are central components in data centers, that distributes workloads across multiple servers and thereby contribute to offering scalable services. However, when load balancers operate in dynamic environments with limited monitoring of application server loads, they rely on heuristic algorithms that require manual configurations for fairness and performance. To alleviate that, this paper proposes a distributed asynchronous reinforcement learning mechanism to-with no active load balancer state monitoring and limited network observations-improve the fairness of the workload distribution achieved by a load balancer. The performance of proposed mechanism is evaluated and compared with stateof-the-art load balancing algorithms in a simulator, under configurations with progressively increasing complexities. Preliminary results show promise in RLbased load balancing algorithms, and identify additional challenges and future research directions, including reward function design and model scalability.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yao, Zhiyuan; Desmouceaux, Yoann; Townsley, Mark; Clausen, Thomas Heide
Towards Intelligent Load Balancing in Data Centers Proceedings Article
In: Machine Learning for Systems at 35th Conference on Neural Information Processing Systems (NeurIPS 2021), Dec 2021, Sydney, Australia, 2021.
@inproceedings{yao2021intelligent,
title = {Towards Intelligent Load Balancing in Data Centers},
author = {Zhiyuan Yao and Yoann Desmouceaux and Mark Townsley and Thomas Heide Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2021/11/2110.15788.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Machine Learning for Systems at 35th Conference on Neural Information Processing Systems (NeurIPS 2021), Dec 2021, Sydney, Australia},
abstract = {Network load balancers are important components in data centers to provide scalable services. Workload distribution algorithms are based on heuristics, e.g., Equal-Cost Multi-Path (ECMP), Weighted-Cost Multi-Path (WCMP) or naive machine learning (ML) algorithms, e.g., ridge regression. Advanced ML-based approaches help achieve performance gain in different networking and system problems. However, it is challenging to apply ML algorithms on networking problems in real-life systems. It requires domain knowledge to collect features from low-latency, high-throughput, and scalable networking systems, which are dynamic and heterogenous. This paper proposes Aquarius to bridge the gap between ML and networking systems and demonstrates its usage in the context of network load balancers. This paper demonstrates its ability of conducting both offline data analysis and online model deployment in realistic systems. The results show that the ML model trained and deployed using Aquarius improves load balancing performance yet they also reveals more challenges to be resolved to apply ML for networking systems.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizzi, Carmine; Yao, Zhiyuan; Desmouceaux, Yoann; Townsley, Mark; Clausen, Thomas Heide
Charon: Load-Aware Load-Balancing in P4 Proceedings Article
In: 1st Joint International Workshop on Network Programmability & Automation (NetPA) at 17th International Conference on Network and Service Management (CNSM 2021),, 2021.
@inproceedings{rizzi2021charon,
title = {Charon: Load-Aware Load-Balancing in P4},
author = {Carmine Rizzi and Zhiyuan Yao and Yoann Desmouceaux and Mark Townsley and Thomas Heide Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2021/11/2110.14389.pdf},
year = {2021},
date = {2021-10-01},
urldate = {2021-01-01},
booktitle = {1st Joint International Workshop on Network Programmability & Automation (NetPA) at 17th International Conference on Network and Service Management (CNSM 2021),},
abstract = {Load-Balancers play an important role in data centers as they distribute network flows across application servers and guarantee per-connection consistency. It is hard however to make fair load balancing decisions so that all resources are efficiently occupied yet not overloaded. Tracking connection states allows to infer server load states and make informed decisions, but at the cost of additional memory space consumption. This makes it hard to implement on programmable hardware, which has constrained memory but offers line-rate performance. This paper presents Charon, a stateless load-aware load balancer that has line-rate performance implemented in P4-NetFPGA. Charon passively collects load states from application servers and employs the power-of-2-choices scheme to make data-driven load balancing decisions and improve resource utilization. Perconnection consistency is preserved statelessly by encoding server ID in a covert channel. The prototype design and implementation details are described in this paper. Simulation results show performance gains in terms of load distribution fairness, quality of service, throughput and processing latency.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Hawari, Mohammed; Clausen, Thomas
OP4T: Bringing Advanced Network Packet Timestamping into the Field Proceedings Article
In: 2021 International Conference on Information Networking (ICOIN), pp. 137-142, 2021.
@inproceedings{Hawari2021,
title = {OP4T: Bringing Advanced Network Packet Timestamping into the Field},
author = {Mohammed Hawari and Thomas Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2021/03/paper.pdf},
doi = {10.1109/ICOIN50884.2021.9333927},
year = {2021},
date = {2021-01-13},
booktitle = {2021 International Conference on Information Networking (ICOIN)},
pages = {137-142},
abstract = {Because it is very bursty, the microsecond-scale temporal behaviour of network traffic in data-centres is chal- lenging to measure and understand. To bring observability into data-centre networks, this paper introduces the Open Platform for Programmable Precise Packet Timestamping (OP4T), a hardware architecture, targeting Field-Programmable Gateway Arrays (FPGAs), integrated into data-centre servers as a Smart Network Interface Card (SmartNIC), and flexible enough to enable advanced latency diagnosis.
In this paper, OP4T is specified, and an open-source im- plementation of that architecture is proposed, targeting the NetFPGA SUME prototyping board. By leveraging the P4 programming language, and partial reconfiguration, that open- source implementation is experimentally shown to enable in-band, precise packet timestamping, without sacrificing the achievable throughput. As an illustration, OP4T is shown to be usable to measure fine-grained properties of a software packet forwarder, e.g., packet batching.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In this paper, OP4T is specified, and an open-source im- plementation of that architecture is proposed, targeting the NetFPGA SUME prototyping board. By leveraging the P4 programming language, and partial reconfiguration, that open- source implementation is experimentally shown to enable in-band, precise packet timestamping, without sacrificing the achievable throughput. As an illustration, OP4T is shown to be usable to measure fine-grained properties of a software packet forwarder, e.g., packet batching.
2020
Desmouceaux, Yoann; Enguehard, Marcel; Clausen, Thomas
Joint Monitorless Load-Balancing and Autoscaling for Zero-Wait-Time in Data Centers Journal Article
In: IEEE Transactions on Network and Service Management, 2020.
@article{Desmouceaux2020,
title = {Joint Monitorless Load-Balancing and Autoscaling for Zero-Wait-Time in Data Centers},
author = {Yoann Desmouceaux and Marcel Enguehard and Thomas Clausen},
url = {https://www.epizeuxis.net/wp-content/uploads/2020/12/Joint-Monitorless-Load-Balancing-and-Autoscaling-for-Zero-Wait-Time-in-Data-Centers.pdf},
doi = {10.1109/TNSM.2020.3045059},
year = {2020},
date = {2020-12-31},
urldate = {2020-12-31},
journal = {IEEE Transactions on Network and Service Management},
abstract = {Cloud architectures achieve scaling through two main functions: (i) load-balancers, which dispatch queries among replicated virtualized application instances, and (ii) autoscalers, which automatically adjust the number of replicated instances to accommodate variations in load patterns. These functions are often provided through centralized load monitoring, incurring operational complexity. This paper introduces a unified and centralized-monitoring-free architecture achieving both autoscal- ing and load-balancing, reducing operational overhead while increasing response time performance. Application instances are virtually ordered in a chain, and new queries are forwarded along this chain until an instance, based on its local load, accepts the query. Autoscaling is triggered by the last application instance, which inspects its average load and infers if its chain is under- or over-provisioned. An analytical model of the system is derived, and proves that the proposed technique can achieve asymptotic zero-wait time with high (and controlable) probability. This result is confirmed by extensive simulations, which highlight close-to- ideal performance in terms of both response time and resource costs.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2019
Desmouceaux, Yoann; Cordero, Juan Antonio; Clausen, Thomas
Reliable B.I.E.R. with Peer Caching Journal Article
In: IEEE Transactions on Network and Service Management, 2019, ISSN: 1932-4537.
@article{Desmouceaux2019,
title = {Reliable B.I.E.R. with Peer Caching},
author = {Yoann Desmouceaux and Juan Antonio Cordero and Thomas Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2019/11/Reliable-B.I.E.R.-with-Peer-Caching.pdf},
doi = {10.1109/TNSM.2019.2950158},
issn = {1932-4537},
year = {2019},
date = {2019-11-01},
journal = {IEEE Transactions on Network and Service Management},
abstract = {Multicast protocols usually require building multicast trees and maintaining state in intermediate routers, incurring operation complexity. B.I.E.R. (Bit-Indexed Explicit Replication) ambitions to alleviate this complexity by allowing for source-driven selection of destinations and state-less packet forwarding. B.I.E.R. can also be used to achieve reliable delivery of content, by retransmitting packet to the exact set of destinations which have missed it. While B.I.E.R.- based reliable multicast exhibits attractive performance attributes, repair of a lost packet is achieved through source retransmissions, which may be costly and even unnecessary if close peers are able to provide a copy of the packet.
Thus, this paper extends the use of reliable B.I.E.R. multicast to allow recoveries from peers, using Segment Routing (SR) to steer retransmission requests through potential candidates. A framework is introduced, which can accommodate different policies for the selection of candidate peers for retransmissions. Simple (both static and adaptive) policies are introduced and analyzed, both (i) theoretically and (ii) by way of simulations in data-center-like and real-world topologies. Results indicate that local peer recovery is able to substantially reduce the overall retransmission traffic, and that this can be achieved through simple policies, where no signaling is required to build a set of candidate peers.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Thus, this paper extends the use of reliable B.I.E.R. multicast to allow recoveries from peers, using Segment Routing (SR) to steer retransmission requests through potential candidates. A framework is introduced, which can accommodate different policies for the selection of candidate peers for retransmissions. Simple (both static and adaptive) policies are introduced and analyzed, both (i) theoretically and (ii) by way of simulations in data-center-like and real-world topologies. Results indicate that local peer recovery is able to substantially reduce the overall retransmission traffic, and that this can be achieved through simple policies, where no signaling is required to build a set of candidate peers.
2018
Desmouceaux, Yoann; Townsley, Mark; Clausen, Thomas
Zero-Loss Virtual Machine Migration with IPv6 Segment Routing Proceedings Article
In: Proceedings 1st SR+SFC Workshop at IEEE CNSM, 2018.
@inproceedings{Desmouceaux2018e,
title = {Zero-Loss Virtual Machine Migration with IPv6 Segment Routing},
author = {Yoann Desmouceaux and Mark Townsley and Thomas Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2019/11/Zero-Loss-Virtual-Machine-Migration-with-Segment-Routing.pdf},
year = {2018},
date = {2018-11-01},
booktitle = {Proceedings 1st SR+SFC Workshop at IEEE CNSM},
abstract = {With the development of large-scale data centers, Virtual Machine (VM) migration is a key component for resource optimization, cost reduction, and maintenance. From a network perspective, traditional VM migration mechanisms rely on the hypervisor running at the destination host advertising the new location of the VM once migration is complete. However, this creates a period of time during which the VM is not reachable, yielding packet loss.
This paper introduces a method to perform zero-loss VM migration by using IPv6 Segment Routing (SR). Rather than letting the hypervisor update a locator mapping after VM migration is complete, a logical path consisting of the source and destination hosts is pre-provisioned. Packets destined to the migrating VM are sent through this path using SR, shortly before, during, and shortly after migration – the virtual router on the source host being in charge of forwarding packets locally if the VM migration has not completed yet, or to the destination host otherwise. The proposed mechanism is implemented as a VPP plugin, and feasibility of zero-loss VM migration is demonstrated with various workloads. Evaluation shows that this yields benefits in terms of session opening latency and TCP throughput.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
This paper introduces a method to perform zero-loss VM migration by using IPv6 Segment Routing (SR). Rather than letting the hypervisor update a locator mapping after VM migration is complete, a logical path consisting of the source and destination hosts is pre-provisioned. Packets destined to the migrating VM are sent through this path using SR, shortly before, during, and shortly after migration – the virtual router on the source host being in charge of forwarding packets locally if the VM migration has not completed yet, or to the destination host otherwise. The proposed mechanism is implemented as a VPP plugin, and feasibility of zero-loss VM migration is demonstrated with various workloads. Evaluation shows that this yields benefits in terms of session opening latency and TCP throughput.
Toussaint, Arthur; Hawari, Mohammed; Clausen, Thomas
Chasing Linux Jitter Sources for Uncompressed Video Book Section
In: In Proceedings of the 1st Workshop on High-Precision Networks Operations and Control (HiPNet 2018) ad the IEEE 14th International Conference on Network and Service Management (CNSM), 2018.
@incollection{Toussaint2018,
title = {Chasing Linux Jitter Sources for Uncompressed Video},
author = {Arthur Toussaint and Mohammed Hawari and Thomas Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2019/11/CNSM-HipNet-Toussaint-et.-al.-Chasing-Linux-Jutter-Sources-for-Uncompressed-Video.pdf},
year = {2018},
date = {2018-11-01},
booktitle = {In Proceedings of the 1st Workshop on High-Precision Networks Operations and Control (HiPNet 2018) ad the IEEE 14th International Conference on Network and Service Management (CNSM)},
abstract = {Beyond the transport of uncompressed video over IP networks, defined in standards such as ST2022-6, the ability to build software-based Video Processing Functions (VPF) on commodity hardware and using general purpose Operating Systems is the next logical step in the evolution of the media industry towards an “all-IP” world. In that context, understand- ing the jitter induced on an ST2022-6 stream by a commodity platform is essential. This paper describes a general methodology to enumerate jitter sources on commodity platforms and to quantify their relative contribution to the overall system jitter. The methodology is applied to the Linux kernel, producing a classification of the different sources of jitter, and a quantification of their impact.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Pit--Claudel, Benoit; Desmouceaux, Yoann; Pfister, Pierre; Townsley, Mark; Clausen, Thomas
Stateless Load-Aware Load Balancing in P4 Proceedings Article
In: 1st P4 European Workshop (P4EU), 2018.
@inproceedings{Pit--Claudel2018,
title = {Stateless Load-Aware Load Balancing in P4},
author = {Benoit Pit--Claudel and Yoann Desmouceaux and Pierre Pfister and Mark Townsley and Thomas Clausen},
url = {http://www.thomasclausen.net/en/p4eu-2018/},
year = {2018},
date = {2018-09-24},
publisher = {1st P4 European Workshop (P4EU)},
abstract = {Leveraging the performance opportunities offered by programmable hardware, stateless load-balancing architectures allowing line-rate processing are appealing. Moreover, it has been demonstrated that significantly fairer load-balancing can be achieved by an architecture that considers the actual load of application instances when dispatching connection requests. Architectures which maintain per-connection state for resiliency and/or track application load state for fairness are, however, at odds with hardware-imposed memory constraints. Thus, a desirable load-balancer for programmable hardware would be both stateless and able to dispatch queries to application instances according to their current load.
This paper presents SHELL, a stateless application-aware load-balancer combining (i) a power-of-choices scheme using IPv6 Segment Routing to dispatch new flows to a suitable application instance from among multiple candidates, and (ii) the use of a covert channel to record/report which flow was assigned to which candidate in a stateless fashion. In addition, consistent hashing versioning is used to ensure that connections are maintained to the correct application instance, using Segment Routing to “browse” through the history when needed. The stateless design of SHELL makes it suitable for hardware implementation, and this paper describes the implementation of a P4-NetFPGA prototype. A performance evaluation of this SHELL implementation demonstrates throughput and latency characteristics comparable to other stateless load-balancing implementations, while enabling application instance-load-aware dispatching and significantly increasing per-connection consistency resiliency.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
This paper presents SHELL, a stateless application-aware load-balancer combining (i) a power-of-choices scheme using IPv6 Segment Routing to dispatch new flows to a suitable application instance from among multiple candidates, and (ii) the use of a covert channel to record/report which flow was assigned to which candidate in a stateless fashion. In addition, consistent hashing versioning is used to ensure that connections are maintained to the correct application instance, using Segment Routing to “browse” through the history when needed. The stateless design of SHELL makes it suitable for hardware implementation, and this paper describes the implementation of a P4-NetFPGA prototype. A performance evaluation of this SHELL implementation demonstrates throughput and latency characteristics comparable to other stateless load-balancing implementations, while enabling application instance-load-aware dispatching and significantly increasing per-connection consistency resiliency.
Cordero, Juan Antonio; Lou, Wei
Take your time, get it closer: content dissemination within mobile pedestrian crowds Journal Article
In: Wireless Networks, 2018, ISSN: 1572-8196.
@article{Cordero2018,
title = {Take your time, get it closer: content dissemination within mobile pedestrian crowds},
author = {Juan Antonio Cordero and Wei Lou},
url = {https://doi.org/10.1007/s11276-018-1731-2},
doi = {10.1007/s11276-018-1731-2},
issn = {1572-8196},
year = {2018},
date = {2018-05-05},
journal = {Wireless Networks},
abstract = {The explosion of traffic demands in the edge of the Internet, mostly by mobile users, is putting under pressure current networking infrastructures. This is particularly acute when huge amounts of users and active wireless devices gather in reduced geographical spaces, increasing the risk of exceeding planned capacity of deployed infrastructure. This trend motivates research on edge computing, and in particular, on mechanisms to offload or address locally part of the user injected traffic at the access infrastructure, thus reducing the need of Internet requests and retrievals. This paper concentrates on the ability of mobile crowds --and corresponding access networks---to fulfill content requests originated within the mesh, with minimal intervention of the Internet infrastructure. Simple heuristics are revisited, proposed, discussed and evaluated to improve autonomous content discovery and dissemination within high-density, low-mobility crowds, by combining notions already explored for MANET routing: deliberate jittering and autonomous distance-based overlay pruning. Results over synthetic networks and real mobility traces indicate that these mechanisms improve efficiency and quality of content request discoveries, by reducing significantly collisions and increasing stability of discovered paths in dense pedestrian crowds.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Desmouceaux, Yoann; Toubaline, Sonia; Clausen, Thomas
Flow-Aware Workload Migration in Data Centers Journal Article
In: Springer - Journal of Network and Systems Management (JONS), 2018.
@article{Desmouceaux2018a,
title = {Flow-Aware Workload Migration in Data Centers},
author = {Yoann Desmouceaux and Sonia Toubaline and Thomas Clausen},
url = {https://link.springer.com/epdf/10.1007/s10922-018-9452-5?author_access_token=qm_40d91CsNLlZ_vZ0tZFPe4RwlQNchNByi7wbcMAY4xSrvbLplDMLQ3AN9vWEoUIxtZAIdnOGAzJH5W3YOrbGteOLvaEXsEE1xFv66lVxTKlL40BAS25fsaLf8w1RJAvY69owHWqhJkTmAZpvdCkQ%3D%3D
https://www.epizeuxis.net/wp-content/uploads/2018/03/jons-2018.pdf},
doi = {10.1007/s10922-018-9452-5},
year = {2018},
date = {2018-03-10},
journal = {Springer - Journal of Network and Systems Management (JONS)},
abstract = {In data centers, subject to workloads with heterogeneous (and sometimes short) lifetimes, workload migration is a way of attaining a more efficient utilization of the underlying physical machines.
To not introduce performance degradation, such workload migration must take into account not only machine resources, and per-task resource requirements, but also application dependencies in terms of network communication.
This articleformat presents a workload migration model capturing all of these constraints.
A linear programming framework is developed allowing accurate representation of per-task resources requirements and inter-task network demands. Using this, a multi-objective problem is formulated to compute a re-allocation of tasks that (i) maximizes the total inter-task throughput, while (ii) minimizing the cost incurred by migration and (iii) allocating the maximum number of new tasks.
A baseline algorithm, solving this multi-objective problem using the $epsilon$-constraint method is proposed, in order to generate the set of Pareto-optimal solutions. As this algorithm is compute-intensive for large topologies, a heuristic, which computes an approximation of the Pareto front, is then developed, and evaluated on different topologies and with different machine load factors. These evaluations show that the heuristic can provide close-to-optimal solutions, while reducing the solving time by one to two order of magnitudes.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
To not introduce performance degradation, such workload migration must take into account not only machine resources, and per-task resource requirements, but also application dependencies in terms of network communication.
This articleformat presents a workload migration model capturing all of these constraints.
A linear programming framework is developed allowing accurate representation of per-task resources requirements and inter-task network demands. Using this, a multi-objective problem is formulated to compute a re-allocation of tasks that (i) maximizes the total inter-task throughput, while (ii) minimizing the cost incurred by migration and (iii) allocating the maximum number of new tasks.
A baseline algorithm, solving this multi-objective problem using the $epsilon$-constraint method is proposed, in order to generate the set of Pareto-optimal solutions. As this algorithm is compute-intensive for large topologies, a heuristic, which computes an approximation of the Pareto front, is then developed, and evaluated on different topologies and with different machine load factors. These evaluations show that the heuristic can provide close-to-optimal solutions, while reducing the solving time by one to two order of magnitudes.
Desmouceaux, Yoann; Clausen, Thomas; Cordero, Juan Antonio; Townsley, W. Mark
Reliable Multicast with B.I.E.R. Journal Article
In: IEEE/KICS Journal of Communications and Networks (JCN), vol. 20, no. 2, pp. 182-197, 2018.
@article{Desmouceaux0000,
title = {Reliable Multicast with B.I.E.R.},
author = {Yoann Desmouceaux and Thomas Clausen and Juan Antonio Cordero and W. Mark Townsley },
url = {http://www.thomasclausen.net/wp-content/uploads/2018/03/jcn-2018.pdf},
year = {2018},
date = {2018-02-28},
journal = {IEEE/KICS Journal of Communications and Networks (JCN)},
volume = {20},
number = {2},
pages = {182-197},
abstract = {Inter-network multicast protocols, which build and maintain multicast trees, incur both explicit protocol signalling, and maintenance of state in intermediate routers in the network. B.I.E.R. (Bit-Indexed Explicit Replication) is a technique which can provide a multicast service yet removes such complexities: in- termediate routers are unencumbered by group management, and no per-group state is to be maintained.
This paper explores the use of B.I.E.R. as a basis for develop- ing an efficient and reliable multicast mechanism, where redun- dant traffic is avoided, essential traffic is forwarded along shortest paths, and no per-flow state is required in intermediate routers. Evaluated by way of both an analytical model and network sim- ulation both in generic and in real network topologies with vary- ing background traffic loads, the proposed B.I.E.R.-based reliable multicast mechanism exhibits attractive performance attributes: it attains delivery success rates as high as any other reliable multicast service, but with significantly better link utilisation and no per-flow or per-group state in intermediate routers of the network.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
This paper explores the use of B.I.E.R. as a basis for develop- ing an efficient and reliable multicast mechanism, where redun- dant traffic is avoided, essential traffic is forwarded along shortest paths, and no per-flow state is required in intermediate routers. Evaluated by way of both an analytical model and network sim- ulation both in generic and in real network topologies with vary- ing background traffic loads, the proposed B.I.E.R.-based reliable multicast mechanism exhibits attractive performance attributes: it attains delivery success rates as high as any other reliable multicast service, but with significantly better link utilisation and no per-flow or per-group state in intermediate routers of the network.
Desmouceaux, Yoann; Pfister, Pierre; Tollet, Jérôme; Townsley, W. Mark; Clausen, Thomas
6LB: Scalable and Application-Aware Load Balancing with Segment Routing Journal Article
In: IEEE/ACM Transactions on Networking, vol. 26, no. 2, pp. 819-834, 2018, ISSN: 1063-6692.
@article{Desmouceaux2018,
title = {6LB: Scalable and Application-Aware Load Balancing with Segment Routing},
author = {Yoann Desmouceaux and Pierre Pfister and Jérôme Tollet and W. Mark Townsley and Thomas Clausen},
url = {http://www.thomasclausen.net/wp-content/uploads/2018/02/2018-IEEE-Transactions-on-Networking-6LB-Scalable-and-Application-Aware-Load-Balancing-with-Segment-Routing.pdf},
doi = {10.1109/TNET.2018.2799242},
issn = {1063-6692},
year = {2018},
date = {2018-02-15},
urldate = {2018-02-15},
journal = {IEEE/ACM Transactions on Networking},
volume = {26},
number = {2},
pages = {819-834},
abstract = {Network load-balancers generally either do not take application state into account, or do so at the cost of a central- ized monitoring system. This paper introduces a load-balancer running exclusively within the IP forwarding plane, i.e. in an application protocol agnostic fashion – yet which still provides application-awareness and makes real-time, decentralized deci- sions. To that end, IPv6 Segment Routing is used to direct data packets from a new flow through a chain of candidate servers, until one decides to accept the connection, based solely on its local state. This way, applications themselves naturally decide on how to fairly share incoming connections, while incurring minimal network overhead, and no out-of-band signaling. A consistent hashing algorithm, as well as an in-band stickiness protocol, allow for the proposed solution to be able to be reliably distributed across a large number of instances.
Performance evaluation by means of an analytical model and actual tests on different workloads (including a Wikipedia replay as a realistic workload) show significant performance benefits in terms of shorter response times, when compared to a traditional random load-balancer. In addition, this paper introduces and compares kernel bypass high-performance implementations of both 6LB and a state-of-the-art load-balancer, showing that the significant system-level benefits of 6LB are achievable with a negligible data-path CPU overhead.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Performance evaluation by means of an analytical model and actual tests on different workloads (including a Wikipedia replay as a realistic workload) show significant performance benefits in terms of shorter response times, when compared to a traditional random load-balancer. In addition, this paper introduces and compares kernel bypass high-performance implementations of both 6LB and a state-of-the-art load-balancer, showing that the significant system-level benefits of 6LB are achievable with a negligible data-path CPU overhead.
2017
Perez, Samuel; Cordero, Juan Antonio; Coupechoux, Marceau
ODMAC++: An IoT Communication Manager based on Energy Harvesting Prediction Proceedings Article
In: Proceedings of the IEEE International Symposium on Personal, Indoor and Mobile Radio Communications (PIMRC'2017)., IEEE, 2017.
@inproceedings{Perez2017,
title = {ODMAC++: An IoT Communication Manager based on Energy Harvesting Prediction},
author = {Samuel Perez and Juan Antonio Cordero and Marceau Coupechoux},
year = {2017},
date = {2017-10-08},
booktitle = {Proceedings of the IEEE International Symposium on Personal, Indoor and Mobile Radio Communications (PIMRC'2017).},
publisher = {IEEE},
abstract = {In large low-power networks of battery-driven sen- sors, power outages are a major concern and communication rates have to be carefully designed in order to optimize energy consumption, network connectivity and sensors lifetime. In some IoT use cases, power can be supplied to sensors by way of renewable energy automatic harvesting (solar panels, etc.). Given the high variability of energy arrival processes, energy consumption in sensors, in particular caused by transmissions to the sink, has to be aligned with energy harvesting patterns, so as to maximize throughput while avoiding power outages that may arise when the battery is empty. This paper proposes ODMAC++, an extension to a well-known protocol for sensor transmission scheduling in a WSN. ODMAC++ relies on learning techniques to adapt sensors communication rate to energy harvesting patterns, and uses a beaconing mechanism whose frequency is adjusted based on past measurements on the harvested energy process. Simulations based on analytical energy arrival models and on real solar radiation measurements indicate that ODMAC++ is able to avoid power outages and to cope with battery limitation and energy variations due to variability in time.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Desmouceaux, Yoann; Pfister, Pierre; Tollet, Jerome; Townsley, W. Mark; Clausen, Thomas
SRLB: The Power of Choices in Load Balancing with Segment Routing Proceedings Article
In: In Proceedings of the 37th IEEE International Conference on Distributed Computing Systems (ICDCS), 2017.
@inproceedings{Desmouceaux2017b,
title = {SRLB: The Power of Choices in Load Balancing with Segment Routing},
author = {Yoann Desmouceaux and Pierre Pfister and Jerome Tollet and W. Mark Townsley and Thomas Clausen},
url = {http://www.thomasclausen.net/wp-content/uploads/2017/05/camera-ready-ieeepdfexpress.pdf},
year = {2017},
date = {2017-06-05},
booktitle = {In Proceedings of the 37th IEEE International Conference on Distributed Computing Systems (ICDCS)},
abstract = {Network load-balancers generally either do not take application state into account, or do so at the cost of a central- ized monitoring system. This paper introduces a load-balancer running exclusively within the IP forwarding plane, i.e. in an application protocol agnostic fashion – yet which still provides application-awareness and makes real-time, decentralized deci- sions. To that end, IPv6 Segment Routing is used to direct data packets from a new flow through a chain of candidate servers, until one decides to accept the connection, based on its local state. This way, applications themselves naturally decide on how to share incoming connections, while incurring minimal network overhead, and no out-of-band signaling.
Tests on different workloads – including realistic workloads such as replaying actual Wikipedia access traffic towards a set of replica Wikipedia instances – show significant performance benefits, in terms of shorter response times, when compared to a traditional random load-balancer.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tests on different workloads – including realistic workloads such as replaying actual Wikipedia access traffic towards a set of replica Wikipedia instances – show significant performance benefits, in terms of shorter response times, when compared to a traditional random load-balancer.
2016
Augustin, Aloys; Yi, Jiazi; Clausen, Thomas; Townsley, Mark
A Study of LoRa: Long Range & Low Power Networks for the Internet of Things Journal Article
In: MDPI Sensors, vol. 16, no. 9, pp. 1466, 2016, ISSN: 1424-8220, ((5 yr Impact Factor: 2.437)).
@article{Augustin2016,
title = {A Study of LoRa: Long Range & Low Power Networks for the Internet of Things},
author = {Aloys Augustin and Jiazi Yi and Thomas Clausen and Mark Townsley},
url = {http://www.thomasclausen.net/2016-a-study-of-lora-long-range-low-power-networks-for-the-internet-of-things/},
doi = {10.3390/s16091466},
issn = {1424-8220},
year = {2016},
date = {2016-09-09},
journal = {MDPI Sensors},
volume = {16},
number = {9},
pages = {1466},
abstract = {LoRa is a long-range, low-power, low-bitrate, wireless telecommunications system, promoted as an infrastructure solution for the Internet of Things: end-devices use LoRa across a single wireless hop to communicate to gateway(s), connected to the Internet and which act as transparent bridges and relay messages between these end-devices and a central network server. This paper provides an overview of LoRa and an in-depth analysis of its functional components. The physical and data link layer performance is evaluated by field tests and simulations. Based on the analysis and evaluations, some possible solutions for performance enhancements are proposed.},
note = {(5 yr Impact Factor: 2.437)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jin, Kaiwan; Pfister, Pierre; Yi, Jiazi
Distributed Node Consensus Protocol: Analysis, Evaluation and Performance Proceedings Article
In: Proceedings of the IEEE International Conference on Communications (ICC) 2016, 2016.
@inproceedings{Jin2016,
title = {Distributed Node Consensus Protocol: Analysis, Evaluation and Performance},
author = {Kaiwan Jin and Pierre Pfister and Jiazi Yi},
doi = {10.1109/ICC. 2016.7510939},
year = {2016},
date = {2016-05-23},
publisher = {Proceedings of the IEEE International Conference on Communications (ICC) 2016},
abstract = {Abstract—This paper analyzes and evaluates the Distributed Node Consensus Protocol (DNCP), a state synchronization mech- anism developed by the IETF Homenet working group. DNCP enables network function automation for home networks, which are growing in size and complexity. The basic mechanisms of DNCP are studied in this paper, including the state abstraction, synchronization process and keep-alive mechanism. The over- head is analyzed in single-link topology type. To evaluate the performance of DNCP in more complex scenarios, a reference implementation of DNCP is integrated into ns3 simulator. The convergence time and transmission overhead in various topology types are measured. Based on the obtained results, the correctness of DNCP is verified, and the behavior of DNCP can be concluded.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}