blob: c4750bebf441238587efb052613b2f47f2226080 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Apache Mesos - Port Mapping Network Isolator</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta property="og:locale" content="en_US"/>
<meta property="og:type" content="website"/>
<meta property="og:title" content="Apache Mesos"/>
<meta property="og:site_name" content="Apache Mesos"/>
<meta property="og:url" content="http://mesos.apache.org/"/>
<meta property="og:image" content="http://mesos.apache.org/assets/img/mesos_logo_fb_preview.png"/>
<meta property="og:description"
content="Apache Mesos abstracts resources away from machines,
enabling fault-tolerant and elastic distributed systems
to easily be built and run effectively."/>
<meta name="twitter:card" content="summary"/>
<meta name="twitter:site" content="@ApacheMesos"/>
<meta name="twitter:title" content="Apache Mesos"/>
<meta name="twitter:image" content="http://mesos.apache.org/assets/img/mesos_logo_fb_preview.png"/>
<meta name="twitter:description"
content="Apache Mesos abstracts resources away from machines,
enabling fault-tolerant and elastic distributed systems
to easily be built and run effectively."/>
<link href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
<link rel="alternate" type="application/atom+xml" title="Apache Mesos Blog" href="/blog/feed.xml">
<link href="../../../assets/css/main.css" media="screen" rel="stylesheet" type="text/css" />
<!-- Google Analytics Magic -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-20226872-1']);
_gaq.push(['_setDomainName', 'apache.org']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
</head>
<body>
<!-- magical breadcrumbs -->
<div class="topnav">
<div class="container">
<ul class="breadcrumb">
<li>
<div class="dropdown">
<a data-toggle="dropdown" href="#">Apache Software Foundation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu" aria-labelledby="dLabel">
<li><a href="http://www.apache.org">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a href="http://www.apache.org/security/">Security</a></li>
</ul>
</div>
</li>
<li><a href="http://mesos.apache.org">Apache Mesos</a></li>
<li><a href="/documentation
/">Documentation
</a></li>
</ul><!-- /.breadcrumb -->
</div><!-- /.container -->
</div><!-- /.topnav -->
<!-- navbar excitement -->
<div class="navbar navbar-default navbar-static-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#mesos-menu" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="/"><img src="/assets/img/mesos_logo.png" alt="Apache Mesos logo"/></a>
</div><!-- /.navbar-header -->
<div class="navbar-collapse collapse" id="mesos-menu">
<ul class="nav navbar-nav navbar-right">
<li><a href="/gettingstarted/">Getting Started</a></li>
<li><a href="/blog/">Blog</a></li>
<li><a href="/documentation/latest/">Documentation</a></li>
<li><a href="/downloads/">Downloads</a></li>
<li><a href="/community/">Community</a></li>
</ul>
</div><!-- /#mesos-menu -->
</div><!-- /.container -->
</div><!-- /.navbar -->
<div class="content">
<div class="container">
<div class="row-fluid">
<div class="col-md-4">
<h4>If you're new to Mesos</h4>
<p>See the <a href="/gettingstarted/">getting started</a> page for more
information about downloading, building, and deploying Mesos.</p>
<h4>If you'd like to get involved or you're looking for support</h4>
<p>See our <a href="/community/">community</a> page for more details.</p>
</div>
<div class="col-md-8">
<h1>Port Mapping Network Isolator</h1>
<p>The port mapping network isolator provides a way to achieve
per-container network monitoring and isolation without relying on IP
per container. The network isolator prevents a single container from
exhausting the available network ports, consuming an unfair share of
the network bandwidth or significantly delaying packet transmission
for others. Network statistics for each active container are published
through the
<a href="/documentation/latest/./endpoints/slave/monitor/statistics/">/monitor/statistics</a> endpoint
on the agent. The port mapping network isolator is transparent for the
majority of tasks running on an agent (those that bind to port 0 and
let the kernel allocate their port).</p>
<h2>Installation</h2>
<p>Port mapping network isolator is <strong>not</strong> supported by default. To
enable it you need to install additional dependencies and configure it
during the build process.</p>
<h3>Prerequisites</h3>
<p>Per-container network monitoring and isolation is only supported on Linux kernel
versions 3.6 and above. Additionally, the kernel must include these patches
(merged in kernel version 3.15).</p>
<ul>
<li><a href="https://github.com/torvalds/linux/commit/6a662719c9868b3d6c7d26b3a085f0cd3cc15e64">6a662719c9868b3d6c7d26b3a085f0cd3cc15e64</a></li>
<li><a href="https://github.com/torvalds/linux/commit/0d5edc68739f1c1e0519acbea1d3f0c1882a15d7">0d5edc68739f1c1e0519acbea1d3f0c1882a15d7</a></li>
<li><a href="https://github.com/torvalds/linux/commit/e374c618b1465f0292047a9f4c244bd71ab5f1f0">e374c618b1465f0292047a9f4c244bd71ab5f1f0</a></li>
<li><a href="https://github.com/torvalds/linux/commit/25f929fbff0d1bcebf2e92656d33025cd330cbf8">25f929fbff0d1bcebf2e92656d33025cd330cbf8</a></li>
</ul>
<p>The following packages are required on the agent:</p>
<ul>
<li><a href="https://github.com/thom311/libnl/releases">libnl3</a> >= 3.2.26</li>
<li><a href="http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2">iproute</a> >= 2.6.39 is advised for debugging purpose but not required.</li>
</ul>
<p>Additionally, if you are building from source, you need will also need the
libnl3 development package to compile Mesos:</p>
<ul>
<li><a href="https://github.com/thom311/libnl/releases">libnl3-devel / libnl3-dev</a> >= 3.2.26</li>
</ul>
<h3>Build</h3>
<p>To build Mesos with port mapping network isolator support, you need to
add a configure option:</p>
<pre><code>$ ./configure --with-network-isolator
$ make
</code></pre>
<h2>Configuration</h2>
<p>The port mapping network isolator is enabled on the agent by adding
<code>network/port_mapping</code> to the agent command line <code>--isolation</code> flag.</p>
<pre><code>--isolation="network/port_mapping"
</code></pre>
<p>If the agent has not been compiled with port mapping network isolator
support, it will refuse to start and print an error:</p>
<pre><code>I0708 00:17:08.080271 44267 containerizer.cpp:111] Using isolation: network/port_mapping
Failed to create a containerizer: Could not create MesosContainerizer: Unknown or unsupported
isolator: network/port_mapping
</code></pre>
<h2>Configuring network ports</h2>
<p>Without port mapping network isolator, all the containers on a host
share the public IP address of the agent and can bind to any port
allowed by the OS.</p>
<p>When the port mapping network isolator is enabled, each container on
the agent has a separate network stack (via Linux <a href="http://lwn.net/Articles/580893/">network
namespaces</a>). All containers still
share the same public IP of the agent (so that the service discovery
mechanism does not need to be changed). The agent assigns each
container a non-overlapping range of the ports and only packets
to/from these assigned port ranges will be delivered. Applications
requesting the kernel assign a port (by binding to port 0) will be
given ports from the container assigned range. Applications can bind
to ports outside the container assigned ranges but packets from
to/from these ports will be silently dropped by the host.</p>
<p>Mesos provides two ranges of ports to containers:</p>
<ul>
<li><p>OS allocated &ldquo;<a href="https://en.wikipedia.org/wiki/Ephemeral_port">ephemeral</a>&rdquo; ports
are assigned by the OS in a range specified for each container by Mesos.</p></li>
<li><p>Mesos allocated &ldquo;non-ephemeral&rdquo; ports are acquired by a framework using the
same Mesos resource offer mechanism used for cpu, memory etc. for allocation to
executors/tasks as required.</p></li>
</ul>
<p>Additionally, the host itself will require ephemeral ports for network
communication. You need to configure these three <strong>non-overlapping</strong> port ranges
on the host.</p>
<h3>Host ephemeral port range</h3>
<p>The currently configured host ephemeral port range can be discovered at any time
using the command <code>sysctl net.ipv4.ip_local_port_range</code>. If ports need to be set
aside for agent containers, the ephemeral port range can be updated in
<code>/etc/sysctl.conf</code>. Rebooting after the update will apply the change and
eliminate the possibility that ports are already in use by other processes. For
example, by adding the following:</p>
<pre><code># net.ipv4.ip_local_port_range defines the host ephemeral port range, by
# default 32768-61000. We reduce this range to allow the Mesos agent to
# allocate ports 32768-57344
# net.ipv4.ip_local_port_range = 32768 61000
net.ipv4.ip_local_port_range = 57345 61000
</code></pre>
<h3>Container port ranges</h3>
<p>The container ephemeral and non-ephemeral port ranges are configured using the
agent <code>--resources</code> flag. The non-ephemeral port range is provided to the
master, which will then offer it to frameworks for allocation.</p>
<p>The ephemeral port range is sub-divided by the agent, giving
<code>ephemeral_ports_per_container</code> (default 1024) to each container. The maximum
number of containers on the agent will therefore be limited to approximately:</p>
<pre><code>number of ephemeral_ports / ephemeral_ports_per_container
</code></pre>
<p>The master <code>--max_executors_per_agent</code> flag is be used to prevent allocation of
more executors on an agent when the ephemeral port range has been exhausted.</p>
<p>It is recommended (but not required) that <code>ephemeral_ports_per_container</code> be set
to a power of 2 (e.g., 512, 1024) and the lower bound of the ephemeral port
range be a multiple of <code>ephemeral_ports_per_container</code> to minimize CPU overhead
in packet processing. For example:</p>
<pre><code>--resources=ports:[31000-32000];ephemeral_ports:[32768-57344] \
--ephemeral_ports_per_container=512
</code></pre>
<h3>Rate limiting container traffic</h3>
<p>Outbound traffic from a container to the network can be rate limited to prevent
a single container from consuming all available network resources with
detrimental effects to the other containers on the host. The
<code>--egress_rate_limit_per_container</code> flag specifies that each container launched
on the host be limited to the specified bandwidth (in bytes per second).
Network traffic which would cause this limit to be exceeded is delayed for later
transmission. The TCP protocol will adjust to the increased latency and reduce
the transmission rate ensuring no packets need be dropped.</p>
<pre><code>--egress_rate_limit_per_container=100MB
</code></pre>
<p>We do not rate limit inbound traffic since we can only modify the network flows
after they have been received by the host and any congestion has already
occurred.</p>
<h3>Egress traffic isolation</h3>
<p>Delaying network data for later transmission can increase latency and jitter
(variability) for all traffic on the interface. Mesos can reduce the impact on
other containers on the same host by using flow classification and isolation
using the containers port ranges to maintain unique flows for each container and
sending traffic from these flows fairly (using the
<a href="https://tools.ietf.org/html/draft-hoeiland-joergensen-aqm-fq-codel-00">FQ_Codel</a>
algorithm). Use the <code>--egress_unique_flow_per_container</code> flag to enable.</p>
<pre><code>--egress_unique_flow_per_container
</code></pre>
<h3>Putting it all together</h3>
<p>A complete agent command line enabling port mapping network isolator,
reserving ports 57345-61000 for host ephemeral ports, 32768-57344 for
container ephemeral ports, 31000-32000 for non-ephemeral ports
allocated by the framework, limiting container transmit bandwidth to
300 Mbits/second (37.5MBytes) with unique flows enabled would thus be:</p>
<pre><code>mesos-agent \
--isolation=network/port_mapping \
--resources=ports:[31000-32000];ephemeral_ports:[32768-57344] \
--ephemeral_ports_per_container=1024 \
--egress_rate_limit_per_container=37500KB \
--egress_unique_flow_per_container
</code></pre>
<h2>Monitoring container network statistics</h2>
<p>Mesos exposes statistics from the Linux network stack for each container network
on the <a href="/documentation/latest/./endpoints/slave/monitor/statistics/">/monitor/statistics</a> agent endpoint.</p>
<p>From the network interface inside the container, we report the following
counters (since container creation) under the <code>statistics</code> key:</p>
<table class="table table-striped">
<thead>
<tr><th>Metric</th><th>Description</th><th>Type</th>
</thead>
<tr>
<td><code>net_rx_bytes</code></td>
<td>Received bytes</td>
<td>Counter</td>
</tr>
<tr>
<td><code>net_rx_dropped</code></td>
<td>Packets dropped on receive</td>
<td>Counter</td>
</tr>
<tr>
<td><code>net_rx_errors</code></td>
<td>Errors reported on receive</td>
<td>Counter</td>
</tr>
<tr>
<td><code>net_rx_packets</code></td>
<td>Packets received</td>
<td>Counter</td>
</tr>
<tr>
<td><code>net_tx_bytes</code></td>
<td>Sent bytes</td>
<td>Counter</td>
</tr>
<tr>
<td><code>net_tx_dropped</code></td>
<td>Packets dropped on send</td>
<td>Counter</td>
</tr>
<tr>
<td><code>net_tx_errors</code></td>
<td>Errors reported on send</td>
<td>Counter</td>
</tr>
<tr>
<td><code>net_tx_packets</code></td>
<td>Packets sent</td>
<td>Counter</td>
</tr>
</table>
<p>Additionally, <a href="http://tldp.org/HOWTO/Traffic-Control-HOWTO/intro.html">Linux Traffic Control</a> can report the following
statistics for the elements which implement bandwidth limiting and bloat
reduction under the <code>statistics/net_traffic_control_statistics</code> key. The entry
for each of these elements includes:</p>
<table class="table table-striped">
<thead>
<tr><th>Metric</th><th>Description</th><th>Type</th>
</thead>
<tr>
<td><code>backlog</code></td>
<td>Bytes queued for transmission [1]</td>
<td>Gauge</td>
</tr>
<tr>
<td><code>bytes</code></td>
<td>Sent bytes</td>
<td>Counter</td>
</tr>
<tr>
<td><code>drops</code></td>
<td>Packets dropped on send</td>
<td>Counter</td>
</tr>
<tr>
<td><code>overlimits</code></td>
<td>Count of times the interface was over its transmit limit when it attempted to send a packet. Since the normal action when the network is overlimit is to delay the packet, the overlimit counter can be incremented many times for each packet sent on a heavily congested interface. [2]</td>
<td>Counter</td>
</tr>
<tr>
<td><code>packets</code></td>
<td>Packets sent</td>
<td>Counter</td>
</tr>
<tr>
<td><code>qlen</code></td>
<td>Packets queued for transmission</td>
<td>Gauge</td>
</tr>
<tr>
<td><code>ratebps</code></td>
<td>Transmit rate in bytes/second [3]</td>
<td>Gauge</td>
</tr>
<tr>
<td><code>ratepps</code></td>
<td>Transmit rate in packets/second [3]</td>
<td>Gauge</td>
</tr>
<tr>
<td><code>requeues</code></td>
<td>Packets failed to send due to resource contention (such as kernel locking) [3]</td>
<td>Counter</td>
</tr>
</table>
<p>[1] <code>backlog</code> is only reported on the bloat_reduction interface.</p>
<p>[2] <code>overlimits</code> are only reported on the bw_limit interface.</p>
<p>[3] Currently always reported as 0 by the underlying Traffic Control element.</p>
<p>For example, these are the statistics you will get by hitting the <code>/monitor/statistics</code> endpoint on an agent with network monitoring turned on:</p>
<pre><code>$ curl -s http://localhost:5051/monitor/statistics | python2.6 -mjson.tool
[
{
"executor_id": "job.1436298853",
"executor_name": "Command Executor (Task: job.1436298853) (Command: sh -c 'iperf ....')",
"framework_id": "20150707-195256-1740121354-5150-29801-0000",
"source": "job.1436298853",
"statistics": {
"cpus_limit": 1.1,
"cpus_nr_periods": 16314,
"cpus_nr_throttled": 16313,
"cpus_system_time_secs": 2667.06,
"cpus_throttled_time_secs": 8036.840845388,
"cpus_user_time_secs": 123.49,
"mem_anon_bytes": 8388608,
"mem_cache_bytes": 16384,
"mem_critical_pressure_counter": 0,
"mem_file_bytes": 16384,
"mem_limit_bytes": 167772160,
"mem_low_pressure_counter": 0,
"mem_mapped_file_bytes": 0,
"mem_medium_pressure_counter": 0,
"mem_rss_bytes": 8388608,
"mem_total_bytes": 9945088,
"net_rx_bytes": 10847,
"net_rx_dropped": 0,
"net_rx_errors": 0,
"net_rx_packets": 143,
"net_traffic_control_statistics": [
{
"backlog": 0,
"bytes": 163206809152,
"drops": 77147,
"id": "bw_limit",
"overlimits": 210693719,
"packets": 107941027,
"qlen": 10236,
"ratebps": 0,
"ratepps": 0,
"requeues": 0
},
{
"backlog": 15481368,
"bytes": 163206874168,
"drops": 27081494,
"id": "bloat_reduction",
"overlimits": 0,
"packets": 107941070,
"qlen": 10239,
"ratebps": 0,
"ratepps": 0,
"requeues": 0
}
],
"net_tx_bytes": 163200529816,
"net_tx_dropped": 0,
"net_tx_errors": 0,
"net_tx_packets": 107936874,
"perf": {
"duration": 0,
"timestamp": 1436298855.82807
},
"timestamp": 1436300487.41595
}
}
]
</code></pre>
</div>
</div>
</div><!-- /.container -->
</div><!-- /.content -->
<hr>
<!-- footer -->
<div class="footer">
<div class="container">
<div class="col-md-4 social-blk">
<span class="social">
<a href="https://twitter.com/ApacheMesos"
class="twitter-follow-button"
data-show-count="false" data-size="large">Follow @ApacheMesos</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
<a href="https://twitter.com/intent/tweet?button_hashtag=mesos"
class="twitter-hashtag-button"
data-size="large"
data-related="ApacheMesos">Tweet #mesos</a>
<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script>
</span>
</div>
<div class="col-md-8 trademark">
<p>&copy; 2012-2017 <a href="http://apache.org">The Apache Software Foundation</a>.
Apache Mesos, the Apache feather logo, and the Apache Mesos project logo are trademarks of The Apache Software Foundation.
<p>
</div>
</div><!-- /.container -->
</div><!-- /.footer -->
<!-- JS -->
<script src="//code.jquery.com/jquery-1.11.0.min.js" type="text/javascript"></script>
<script src="//netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js" type="text/javascript"></script>
</body>
</html>