| <!DOCTYPE html> |
| <html> |
| <head> |
| <meta charset="utf-8"> |
| <title>Apache Mesos - Port Mapping Network Isolator</title> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <meta property="og:locale" content="en_US"/> |
| <meta property="og:type" content="website"/> |
| <meta property="og:title" content="Apache Mesos"/> |
| <meta property="og:site_name" content="Apache Mesos"/> |
| <meta property="og:url" content="http://mesos.apache.org/"/> |
| <meta property="og:image" content="http://mesos.apache.org/assets/img/mesos_logo_fb_preview.png"/> |
| <meta property="og:description" |
| content="Apache Mesos abstracts resources away from machines, |
| enabling fault-tolerant and elastic distributed systems |
| to easily be built and run effectively."/> |
| |
| <meta name="twitter:card" content="summary"/> |
| <meta name="twitter:site" content="@ApacheMesos"/> |
| <meta name="twitter:title" content="Apache Mesos"/> |
| <meta name="twitter:image" content="http://mesos.apache.org/assets/img/mesos_logo_fb_preview.png"/> |
| <meta name="twitter:description" |
| content="Apache Mesos abstracts resources away from machines, |
| enabling fault-tolerant and elastic distributed systems |
| to easily be built and run effectively."/> |
| |
| <link href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet"> |
| <link rel="alternate" type="application/atom+xml" title="Apache Mesos Blog" href="/blog/feed.xml"> |
| <link href="../../../assets/css/main.css" media="screen" rel="stylesheet" type="text/css" /> |
| |
| |
| |
| <!-- Google Analytics Magic --> |
| <script type="text/javascript"> |
| var _gaq = _gaq || []; |
| _gaq.push(['_setAccount', 'UA-20226872-1']); |
| _gaq.push(['_setDomainName', 'apache.org']); |
| _gaq.push(['_trackPageview']); |
| |
| (function() { |
| var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; |
| ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; |
| var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); |
| })(); |
| </script> |
| |
| </head> |
| <body> |
| <!-- magical breadcrumbs --> |
| <div class="topnav"> |
| <div class="container"> |
| <ul class="breadcrumb"> |
| <li> |
| <div class="dropdown"> |
| <a data-toggle="dropdown" href="#">Apache Software Foundation <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu" aria-labelledby="dLabel"> |
| <li><a href="http://www.apache.org">Apache Homepage</a></li> |
| <li><a href="http://www.apache.org/licenses/">License</a></li> |
| <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li> |
| <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li> |
| <li><a href="http://www.apache.org/security/">Security</a></li> |
| </ul> |
| </div> |
| </li> |
| |
| <li><a href="http://mesos.apache.org">Apache Mesos</a></li> |
| |
| |
| <li><a href="/documentation |
| /">Documentation |
| </a></li> |
| |
| |
| </ul><!-- /.breadcrumb --> |
| </div><!-- /.container --> |
| </div><!-- /.topnav --> |
| |
| <!-- navbar excitement --> |
| <div class="navbar navbar-default navbar-static-top" role="navigation"> |
| <div class="container"> |
| <div class="navbar-header"> |
| <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#mesos-menu" aria-expanded="false"> |
| <span class="sr-only">Toggle navigation</span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| </button> |
| <a class="navbar-brand" href="/"><img src="/assets/img/mesos_logo.png" alt="Apache Mesos logo"/></a> |
| </div><!-- /.navbar-header --> |
| |
| <div class="navbar-collapse collapse" id="mesos-menu"> |
| <ul class="nav navbar-nav navbar-right"> |
| <li><a href="/gettingstarted/">Getting Started</a></li> |
| <li><a href="/blog/">Blog</a></li> |
| <li><a href="/documentation/latest/">Documentation</a></li> |
| <li><a href="/downloads/">Downloads</a></li> |
| <li><a href="/community/">Community</a></li> |
| </ul> |
| </div><!-- /#mesos-menu --> |
| </div><!-- /.container --> |
| </div><!-- /.navbar --> |
| |
| <div class="content"> |
| <div class="container"> |
| <div class="row-fluid"> |
| <div class="col-md-4"> |
| <h4>If you're new to Mesos</h4> |
| <p>See the <a href="/gettingstarted/">getting started</a> page for more |
| information about downloading, building, and deploying Mesos.</p> |
| |
| <h4>If you'd like to get involved or you're looking for support</h4> |
| <p>See our <a href="/community/">community</a> page for more details.</p> |
| </div> |
| <div class="col-md-8"> |
| <h1>Port Mapping Network Isolator</h1> |
| |
| <p>The port mapping network isolator provides a way to achieve |
| per-container network monitoring and isolation without relying on IP |
| per container. The network isolator prevents a single container from |
| exhausting the available network ports, consuming an unfair share of |
| the network bandwidth or significantly delaying packet transmission |
| for others. Network statistics for each active container are published |
| through the |
| <a href="/documentation/latest/./endpoints/slave/monitor/statistics/">/monitor/statistics</a> endpoint |
| on the agent. The port mapping network isolator is transparent for the |
| majority of tasks running on an agent (those that bind to port 0 and |
| let the kernel allocate their port).</p> |
| |
| <h2>Installation</h2> |
| |
| <p>Port mapping network isolator is <strong>not</strong> supported by default. To |
| enable it you need to install additional dependencies and configure it |
| during the build process.</p> |
| |
| <h3>Prerequisites</h3> |
| |
| <p>Per-container network monitoring and isolation is only supported on Linux kernel |
| versions 3.6 and above. Additionally, the kernel must include these patches |
| (merged in kernel version 3.15).</p> |
| |
| <ul> |
| <li><a href="https://github.com/torvalds/linux/commit/6a662719c9868b3d6c7d26b3a085f0cd3cc15e64">6a662719c9868b3d6c7d26b3a085f0cd3cc15e64</a></li> |
| <li><a href="https://github.com/torvalds/linux/commit/0d5edc68739f1c1e0519acbea1d3f0c1882a15d7">0d5edc68739f1c1e0519acbea1d3f0c1882a15d7</a></li> |
| <li><a href="https://github.com/torvalds/linux/commit/e374c618b1465f0292047a9f4c244bd71ab5f1f0">e374c618b1465f0292047a9f4c244bd71ab5f1f0</a></li> |
| <li><a href="https://github.com/torvalds/linux/commit/25f929fbff0d1bcebf2e92656d33025cd330cbf8">25f929fbff0d1bcebf2e92656d33025cd330cbf8</a></li> |
| </ul> |
| |
| |
| <p>The following packages are required on the agent:</p> |
| |
| <ul> |
| <li><a href="https://github.com/thom311/libnl/releases">libnl3</a> >= 3.2.26</li> |
| <li><a href="http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2">iproute</a> >= 2.6.39 is advised for debugging purpose but not required.</li> |
| </ul> |
| |
| |
| <p>Additionally, if you are building from source, you need will also need the |
| libnl3 development package to compile Mesos:</p> |
| |
| <ul> |
| <li><a href="https://github.com/thom311/libnl/releases">libnl3-devel / libnl3-dev</a> >= 3.2.26</li> |
| </ul> |
| |
| |
| <h3>Build</h3> |
| |
| <p>To build Mesos with port mapping network isolator support, you need to |
| add a configure option:</p> |
| |
| <pre><code>$ ./configure --with-network-isolator |
| $ make |
| </code></pre> |
| |
| <h2>Configuration</h2> |
| |
| <p>The port mapping network isolator is enabled on the agent by adding |
| <code>network/port_mapping</code> to the agent command line <code>--isolation</code> flag.</p> |
| |
| <pre><code>--isolation="network/port_mapping" |
| </code></pre> |
| |
| <p>If the agent has not been compiled with port mapping network isolator |
| support, it will refuse to start and print an error:</p> |
| |
| <pre><code>I0708 00:17:08.080271 44267 containerizer.cpp:111] Using isolation: network/port_mapping |
| Failed to create a containerizer: Could not create MesosContainerizer: Unknown or unsupported |
| isolator: network/port_mapping |
| </code></pre> |
| |
| <h2>Configuring network ports</h2> |
| |
| <p>Without port mapping network isolator, all the containers on a host |
| share the public IP address of the agent and can bind to any port |
| allowed by the OS.</p> |
| |
| <p>When the port mapping network isolator is enabled, each container on |
| the agent has a separate network stack (via Linux <a href="http://lwn.net/Articles/580893/">network |
| namespaces</a>). All containers still |
| share the same public IP of the agent (so that the service discovery |
| mechanism does not need to be changed). The agent assigns each |
| container a non-overlapping range of the ports and only packets |
| to/from these assigned port ranges will be delivered. Applications |
| requesting the kernel assign a port (by binding to port 0) will be |
| given ports from the container assigned range. Applications can bind |
| to ports outside the container assigned ranges but packets from |
| to/from these ports will be silently dropped by the host.</p> |
| |
| <p>Mesos provides two ranges of ports to containers:</p> |
| |
| <ul> |
| <li><p>OS allocated “<a href="https://en.wikipedia.org/wiki/Ephemeral_port">ephemeral</a>” ports |
| are assigned by the OS in a range specified for each container by Mesos.</p></li> |
| <li><p>Mesos allocated “non-ephemeral” ports are acquired by a framework using the |
| same Mesos resource offer mechanism used for cpu, memory etc. for allocation to |
| executors/tasks as required.</p></li> |
| </ul> |
| |
| |
| <p>Additionally, the host itself will require ephemeral ports for network |
| communication. You need to configure these three <strong>non-overlapping</strong> port ranges |
| on the host.</p> |
| |
| <h3>Host ephemeral port range</h3> |
| |
| <p>The currently configured host ephemeral port range can be discovered at any time |
| using the command <code>sysctl net.ipv4.ip_local_port_range</code>. If ports need to be set |
| aside for agent containers, the ephemeral port range can be updated in |
| <code>/etc/sysctl.conf</code>. Rebooting after the update will apply the change and |
| eliminate the possibility that ports are already in use by other processes. For |
| example, by adding the following:</p> |
| |
| <pre><code># net.ipv4.ip_local_port_range defines the host ephemeral port range, by |
| # default 32768-61000. We reduce this range to allow the Mesos agent to |
| # allocate ports 32768-57344 |
| # net.ipv4.ip_local_port_range = 32768 61000 |
| net.ipv4.ip_local_port_range = 57345 61000 |
| </code></pre> |
| |
| <h3>Container port ranges</h3> |
| |
| <p>The container ephemeral and non-ephemeral port ranges are configured using the |
| agent <code>--resources</code> flag. The non-ephemeral port range is provided to the |
| master, which will then offer it to frameworks for allocation.</p> |
| |
| <p>The ephemeral port range is sub-divided by the agent, giving |
| <code>ephemeral_ports_per_container</code> (default 1024) to each container. The maximum |
| number of containers on the agent will therefore be limited to approximately:</p> |
| |
| <pre><code>number of ephemeral_ports / ephemeral_ports_per_container |
| </code></pre> |
| |
| <p>The master <code>--max_executors_per_agent</code> flag is be used to prevent allocation of |
| more executors on an agent when the ephemeral port range has been exhausted.</p> |
| |
| <p>It is recommended (but not required) that <code>ephemeral_ports_per_container</code> be set |
| to a power of 2 (e.g., 512, 1024) and the lower bound of the ephemeral port |
| range be a multiple of <code>ephemeral_ports_per_container</code> to minimize CPU overhead |
| in packet processing. For example:</p> |
| |
| <pre><code>--resources=ports:[31000-32000];ephemeral_ports:[32768-57344] \ |
| --ephemeral_ports_per_container=512 |
| </code></pre> |
| |
| <h3>Rate limiting container traffic</h3> |
| |
| <p>Outbound traffic from a container to the network can be rate limited to prevent |
| a single container from consuming all available network resources with |
| detrimental effects to the other containers on the host. The |
| <code>--egress_rate_limit_per_container</code> flag specifies that each container launched |
| on the host be limited to the specified bandwidth (in bytes per second). |
| Network traffic which would cause this limit to be exceeded is delayed for later |
| transmission. The TCP protocol will adjust to the increased latency and reduce |
| the transmission rate ensuring no packets need be dropped.</p> |
| |
| <pre><code>--egress_rate_limit_per_container=100MB |
| </code></pre> |
| |
| <p>We do not rate limit inbound traffic since we can only modify the network flows |
| after they have been received by the host and any congestion has already |
| occurred.</p> |
| |
| <h3>Egress traffic isolation</h3> |
| |
| <p>Delaying network data for later transmission can increase latency and jitter |
| (variability) for all traffic on the interface. Mesos can reduce the impact on |
| other containers on the same host by using flow classification and isolation |
| using the containers port ranges to maintain unique flows for each container and |
| sending traffic from these flows fairly (using the |
| <a href="https://tools.ietf.org/html/draft-hoeiland-joergensen-aqm-fq-codel-00">FQ_Codel</a> |
| algorithm). Use the <code>--egress_unique_flow_per_container</code> flag to enable.</p> |
| |
| <pre><code>--egress_unique_flow_per_container |
| </code></pre> |
| |
| <h3>Putting it all together</h3> |
| |
| <p>A complete agent command line enabling port mapping network isolator, |
| reserving ports 57345-61000 for host ephemeral ports, 32768-57344 for |
| container ephemeral ports, 31000-32000 for non-ephemeral ports |
| allocated by the framework, limiting container transmit bandwidth to |
| 300 Mbits/second (37.5MBytes) with unique flows enabled would thus be:</p> |
| |
| <pre><code>mesos-agent \ |
| --isolation=network/port_mapping \ |
| --resources=ports:[31000-32000];ephemeral_ports:[32768-57344] \ |
| --ephemeral_ports_per_container=1024 \ |
| --egress_rate_limit_per_container=37500KB \ |
| --egress_unique_flow_per_container |
| </code></pre> |
| |
| <h2>Monitoring container network statistics</h2> |
| |
| <p>Mesos exposes statistics from the Linux network stack for each container network |
| on the <a href="/documentation/latest/./endpoints/slave/monitor/statistics/">/monitor/statistics</a> agent endpoint.</p> |
| |
| <p>From the network interface inside the container, we report the following |
| counters (since container creation) under the <code>statistics</code> key:</p> |
| |
| <table class="table table-striped"> |
| <thead> |
| <tr><th>Metric</th><th>Description</th><th>Type</th> |
| </thead> |
| <tr> |
| <td><code>net_rx_bytes</code></td> |
| <td>Received bytes</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>net_rx_dropped</code></td> |
| <td>Packets dropped on receive</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>net_rx_errors</code></td> |
| <td>Errors reported on receive</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>net_rx_packets</code></td> |
| <td>Packets received</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>net_tx_bytes</code></td> |
| <td>Sent bytes</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>net_tx_dropped</code></td> |
| <td>Packets dropped on send</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>net_tx_errors</code></td> |
| <td>Errors reported on send</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>net_tx_packets</code></td> |
| <td>Packets sent</td> |
| <td>Counter</td> |
| </tr> |
| </table> |
| |
| |
| <p>Additionally, <a href="http://tldp.org/HOWTO/Traffic-Control-HOWTO/intro.html">Linux Traffic Control</a> can report the following |
| statistics for the elements which implement bandwidth limiting and bloat |
| reduction under the <code>statistics/net_traffic_control_statistics</code> key. The entry |
| for each of these elements includes:</p> |
| |
| <table class="table table-striped"> |
| <thead> |
| <tr><th>Metric</th><th>Description</th><th>Type</th> |
| </thead> |
| <tr> |
| <td><code>backlog</code></td> |
| <td>Bytes queued for transmission [1]</td> |
| <td>Gauge</td> |
| </tr> |
| <tr> |
| <td><code>bytes</code></td> |
| <td>Sent bytes</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>drops</code></td> |
| <td>Packets dropped on send</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>overlimits</code></td> |
| <td>Count of times the interface was over its transmit limit when it attempted to send a packet. Since the normal action when the network is overlimit is to delay the packet, the overlimit counter can be incremented many times for each packet sent on a heavily congested interface. [2]</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>packets</code></td> |
| <td>Packets sent</td> |
| <td>Counter</td> |
| </tr> |
| <tr> |
| <td><code>qlen</code></td> |
| <td>Packets queued for transmission</td> |
| <td>Gauge</td> |
| </tr> |
| <tr> |
| <td><code>ratebps</code></td> |
| <td>Transmit rate in bytes/second [3]</td> |
| <td>Gauge</td> |
| </tr> |
| <tr> |
| <td><code>ratepps</code></td> |
| <td>Transmit rate in packets/second [3]</td> |
| <td>Gauge</td> |
| </tr> |
| <tr> |
| <td><code>requeues</code></td> |
| <td>Packets failed to send due to resource contention (such as kernel locking) [3]</td> |
| <td>Counter</td> |
| </tr> |
| </table> |
| |
| |
| <p>[1] <code>backlog</code> is only reported on the bloat_reduction interface.</p> |
| |
| <p>[2] <code>overlimits</code> are only reported on the bw_limit interface.</p> |
| |
| <p>[3] Currently always reported as 0 by the underlying Traffic Control element.</p> |
| |
| <p>For example, these are the statistics you will get by hitting the <code>/monitor/statistics</code> endpoint on an agent with network monitoring turned on:</p> |
| |
| <pre><code>$ curl -s http://localhost:5051/monitor/statistics | python2.6 -mjson.tool |
| [ |
| { |
| "executor_id": "job.1436298853", |
| "executor_name": "Command Executor (Task: job.1436298853) (Command: sh -c 'iperf ....')", |
| "framework_id": "20150707-195256-1740121354-5150-29801-0000", |
| "source": "job.1436298853", |
| "statistics": { |
| "cpus_limit": 1.1, |
| "cpus_nr_periods": 16314, |
| "cpus_nr_throttled": 16313, |
| "cpus_system_time_secs": 2667.06, |
| "cpus_throttled_time_secs": 8036.840845388, |
| "cpus_user_time_secs": 123.49, |
| "mem_anon_bytes": 8388608, |
| "mem_cache_bytes": 16384, |
| "mem_critical_pressure_counter": 0, |
| "mem_file_bytes": 16384, |
| "mem_limit_bytes": 167772160, |
| "mem_low_pressure_counter": 0, |
| "mem_mapped_file_bytes": 0, |
| "mem_medium_pressure_counter": 0, |
| "mem_rss_bytes": 8388608, |
| "mem_total_bytes": 9945088, |
| "net_rx_bytes": 10847, |
| "net_rx_dropped": 0, |
| "net_rx_errors": 0, |
| "net_rx_packets": 143, |
| "net_traffic_control_statistics": [ |
| { |
| "backlog": 0, |
| "bytes": 163206809152, |
| "drops": 77147, |
| "id": "bw_limit", |
| "overlimits": 210693719, |
| "packets": 107941027, |
| "qlen": 10236, |
| "ratebps": 0, |
| "ratepps": 0, |
| "requeues": 0 |
| }, |
| { |
| "backlog": 15481368, |
| "bytes": 163206874168, |
| "drops": 27081494, |
| "id": "bloat_reduction", |
| "overlimits": 0, |
| "packets": 107941070, |
| "qlen": 10239, |
| "ratebps": 0, |
| "ratepps": 0, |
| "requeues": 0 |
| } |
| ], |
| "net_tx_bytes": 163200529816, |
| "net_tx_dropped": 0, |
| "net_tx_errors": 0, |
| "net_tx_packets": 107936874, |
| "perf": { |
| "duration": 0, |
| "timestamp": 1436298855.82807 |
| }, |
| "timestamp": 1436300487.41595 |
| } |
| } |
| ] |
| </code></pre> |
| |
| </div> |
| </div> |
| |
| </div><!-- /.container --> |
| </div><!-- /.content --> |
| |
| <hr> |
| |
| |
| |
| <!-- footer --> |
| <div class="footer"> |
| <div class="container"> |
| <div class="col-md-4 social-blk"> |
| <span class="social"> |
| <a href="https://twitter.com/ApacheMesos" |
| class="twitter-follow-button" |
| data-show-count="false" data-size="large">Follow @ApacheMesos</a> |
| <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script> |
| <a href="https://twitter.com/intent/tweet?button_hashtag=mesos" |
| class="twitter-hashtag-button" |
| data-size="large" |
| data-related="ApacheMesos">Tweet #mesos</a> |
| <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script> |
| </span> |
| </div> |
| |
| <div class="col-md-8 trademark"> |
| <p>© 2012-2017 <a href="http://apache.org">The Apache Software Foundation</a>. |
| Apache Mesos, the Apache feather logo, and the Apache Mesos project logo are trademarks of The Apache Software Foundation. |
| <p> |
| </div> |
| </div><!-- /.container --> |
| </div><!-- /.footer --> |
| |
| <!-- JS --> |
| <script src="//code.jquery.com/jquery-1.11.0.min.js" type="text/javascript"></script> |
| <script src="//netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js" type="text/javascript"></script> |
| </body> |
| </html> |