blob: e90fd2a60e7ec208f0d87e0709f4efbff60ef6c1 [file] [log] [blame]
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink 0.10-SNAPSHOT Documentation: Cluster Execution</title>
<link rel="shortcut icon" href="http://flink.apache.org/docs/master/page/favicon.ico" type="image/x-icon">
<link rel="icon" href="http://flink.apache.org/docs/master/page/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
<link rel="stylesheet" href="http://flink.apache.org/docs/master/page/css/flink.css">
<link rel="stylesheet" href="http://flink.apache.org/docs/master/page/css/syntax.css">
<link rel="stylesheet" href="http://flink.apache.org/docs/master/page/css/codetabs.css">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Top navbar. -->
<nav class="navbar navbar-default navbar-fixed-top">
<div class="container">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="http://flink.apache.org"><img alt="Apache Flink" src="http://flink.apache.org/docs/master/page/img/navbar-brand-logo.jpg"></a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav">
<li><a href="http://flink.apache.org/docs/master/index.html">Overview<span class="hidden-sm hidden-xs"> 0.10</span></a></li>
<!-- Setup -->
<li class="dropdown">
<a href="http://flink.apache.org/docs/master/setup" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Setup <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<li><a href="http://flink.apache.org/docs/master/setup/building.html">Get Flink 0.10-SNAPSHOT</a></li>
<li class="divider"></li>
<li role="presentation" class="dropdown-header"><strong>Deployment</strong></li>
<li><a href="http://flink.apache.org/docs/master/setup/local_setup.html" class="active">Local</a></li>
<li><a href="http://flink.apache.org/docs/master/setup/cluster_setup.html">Cluster (Standalone)</a></li>
<li><a href="http://flink.apache.org/docs/master/setup/yarn_setup.html">YARN</a></li>
<li><a href="http://flink.apache.org/docs/master/setup/gce_setup.html">GCloud</a></li>
<li><a href="http://flink.apache.org/docs/master/setup/flink_on_tez.html">Flink on Tez <span class="badge">Beta</span></a></li>
<li class="divider"></li>
<li><a href="http://flink.apache.org/docs/master/setup/config.html">Configuration</a></li>
</ul>
</li>
<!-- Programming Guides -->
<li class="dropdown">
<a href="http://flink.apache.org/docs/master/apis" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Programming Guides <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<li><a href="http://flink.apache.org/docs/master/apis/programming_guide.html"><strong>Batch: DataSet API</strong></a></li>
<li><a href="http://flink.apache.org/docs/master/apis/streaming_guide.html"><strong>Streaming: DataStream API</strong> <span class="badge">Beta</span></a></li>
<li><a href="http://flink.apache.org/docs/master/apis/python.html">Python API <span class="badge">Beta</span></a></li>
<li class="divider"></li>
<li><a href="scala_shell.html">Interactive Scala Shell</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/dataset_transformations.html">Dataset Transformations</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/best_practices.html">Best Practices</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/example_connectors.html">Connectors</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/examples.html">Examples</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/local_execution.html">Local Execution</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/cluster_execution.html">Cluster Execution</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/cli.html">Command Line Interface</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/web_client.html">Web Client</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/iterations.html">Iterations</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/java8.html">Java 8</a></li>
<li><a href="http://flink.apache.org/docs/master/apis/hadoop_compatibility.html">Hadoop Compatability <span class="badge">Beta</span></a></li>
</ul>
</li>
<!-- Libraries -->
<li class="dropdown">
<a href="http://flink.apache.org/docs/master/libs" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Libraries <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<li><a href="http://flink.apache.org/docs/master/libs/spargel_guide.html">Graphs: Spargel</a></li>
<li><a href="http://flink.apache.org/docs/master/libs/gelly_guide.html">Graphs: Gelly <span class="badge">Beta</span></a></li>
<li><a href="http://flink.apache.org/docs/master/libs/ml/">Machine Learning <span class="badge">Beta</span></a></li>
<li><a href="http://flink.apache.org/docs/master/libs/table.html">Relational: Table <span class="badge">Beta</span></a></li>
</ul>
</li>
<!-- Internals -->
<li class="dropdown">
<a href="http://flink.apache.org/docs/master/internals" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Internals <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<li role="presentation" class="dropdown-header"><strong>Contribute</strong></li>
<li><a href="http://flink.apache.org/docs/master/internals/how_to_contribute.html">How to Contribute</a></li>
<li><a href="http://flink.apache.org/docs/master/internals/coding_guidelines.html">Coding Guidelines</a></li>
<li><a href="http://flink.apache.org/docs/master/internals/ide_setup.html">IDE Setup</a></li>
<li><a href="http://flink.apache.org/docs/master/internals/logging.html">Logging</a></li>
<li class="divider"></li>
<li role="presentation" class="dropdown-header"><strong>Internals</strong></li>
<li><a href="http://flink.apache.org/docs/master/internals/general_arch.html">Architecture &amp; Process Model</a></li>
<li><a href="http://flink.apache.org/docs/master/internals/types_serialization.html">Type Extraction &amp; Serialization</a></li>
<li><a href="http://flink.apache.org/docs/master/internals/job_scheduling.html">Jobs &amp; Scheduling</a></li>
<li><a href="http://flink.apache.org/docs/master/internals/add_operator.html">How-To: Add an Operator</a></li>
</ul>
</li>
</ul>
<form class="navbar-form navbar-right hidden-sm hidden-md" role="search" action="http://flink.apache.org/docs/master/search-results.html">
<div class="form-group">
<input type="text" class="form-control" name="q" placeholder="Search all pages">
</div>
<button type="submit" class="btn btn-default">Search</button>
</form>
</div><!-- /.navbar-collapse -->
</div><!-- /.container -->
</nav>
<!-- Main content. -->
<div class="container">
<div class="row">
<div class="col-sm-10 col-sm-offset-1">
<h1>Cluster Execution</h1>
<ul id="markdown-toc">
<li><a href="#command-line-interface" id="markdown-toc-command-line-interface">Command Line Interface</a></li>
<li><a href="#remote-environment" id="markdown-toc-remote-environment">Remote Environment</a> <ul>
<li><a href="#maven-dependency" id="markdown-toc-maven-dependency">Maven Dependency</a></li>
<li><a href="#example" id="markdown-toc-example">Example</a></li>
</ul>
</li>
<li><a href="#linking-with-modules-not-contained-in-the-binary-distribution" id="markdown-toc-linking-with-modules-not-contained-in-the-binary-distribution">Linking with modules not contained in the binary distribution</a> <ul>
<li><a href="#packaging-dependencies-with-your-usercode-with-maven" id="markdown-toc-packaging-dependencies-with-your-usercode-with-maven">Packaging dependencies with your usercode with Maven</a></li>
</ul>
</li>
</ul>
<p>Flink programs can run distributed on clusters of many machines. There
are two ways to send a program to a cluster for execution:</p>
<h2 id="command-line-interface">Command Line Interface</h2>
<p>The command line interface lets you submit packaged programs (JARs) to a cluster
(or single machine setup).</p>
<p>Please refer to the <a href="cli.html">Command Line Interface</a> documentation for
details.</p>
<h2 id="remote-environment">Remote Environment</h2>
<p>The remote environment lets you execute Flink Java programs on a cluster
directly. The remote environment points to the cluster on which you want to
execute the program.</p>
<h3 id="maven-dependency">Maven Dependency</h3>
<p>If you are developing your program as a Maven project, you have to add the
<code>flink-clients</code> module using this dependency:</p>
<div class="highlight"><pre><code class="language-xml"><span class="nt">&lt;dependency&gt;</span>
<span class="nt">&lt;groupId&gt;</span>org.apache.flink<span class="nt">&lt;/groupId&gt;</span>
<span class="nt">&lt;artifactId&gt;</span>flink-clients<span class="nt">&lt;/artifactId&gt;</span>
<span class="nt">&lt;version&gt;</span>0.10-SNAPSHOT<span class="nt">&lt;/version&gt;</span>
<span class="nt">&lt;/dependency&gt;</span></code></pre></div>
<h3 id="example">Example</h3>
<p>The following illustrates the use of the <code>RemoteEnvironment</code>:</p>
<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="n">ExecutionEnvironment</span> <span class="n">env</span> <span class="o">=</span> <span class="n">ExecutionEnvironment</span>
<span class="o">.</span><span class="na">createRemoteEnvironment</span><span class="o">(</span><span class="s">&quot;flink-master&quot;</span><span class="o">,</span> <span class="mi">6123</span><span class="o">,</span> <span class="s">&quot;/home/user/udfs.jar&quot;</span><span class="o">);</span>
<span class="n">DataSet</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">data</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="na">readTextFile</span><span class="o">(</span><span class="s">&quot;hdfs://path/to/file&quot;</span><span class="o">);</span>
<span class="n">data</span>
<span class="o">.</span><span class="na">filter</span><span class="o">(</span><span class="k">new</span> <span class="n">FilterFunction</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="kd">public</span> <span class="kt">boolean</span> <span class="nf">filter</span><span class="o">(</span><span class="n">String</span> <span class="n">value</span><span class="o">)</span> <span class="o">{</span>
<span class="k">return</span> <span class="n">value</span><span class="o">.</span><span class="na">startsWith</span><span class="o">(</span><span class="s">&quot;http://&quot;</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">})</span>
<span class="o">.</span><span class="na">writeAsText</span><span class="o">(</span><span class="s">&quot;hdfs://path/to/result&quot;</span><span class="o">);</span>
<span class="n">env</span><span class="o">.</span><span class="na">execute</span><span class="o">();</span>
<span class="o">}</span></code></pre></div>
<p>Note that the program contains custom user code and hence requires a JAR file with
the classes of the code attached. The constructor of the remote environment
takes the path(s) to the JAR file(s).</p>
<h2 id="linking-with-modules-not-contained-in-the-binary-distribution">Linking with modules not contained in the binary distribution</h2>
<p>The binary distribution contains jar packages in the <code>lib</code> folder that are automatically
provided to the classpath of your distrbuted programs. Almost all of Flink classes are
located there with a few exceptions, for example the streaming connectors and some freshly
added modules. To run code depending on these modules you need to make them accessible
during runtime, for which we suggest two options:</p>
<ol>
<li>Either copy the required jar files to the <code>lib</code> folder onto all of your TaskManagers.
Note that you have to restar your TaskManagers after this.</li>
<li>Or package them with your usercode.</li>
</ol>
<p>The latter version is recommended as it respects the classloader management in Flink.</p>
<h3 id="packaging-dependencies-with-your-usercode-with-maven">Packaging dependencies with your usercode with Maven</h3>
<p>To provide these dependencies not included by Flink we suggest two options with Maven.</p>
<ol>
<li>The maven assembly plugin builds a so called fat jar cointaining all your dependencies.
Assembly configuration is straight-forward, but the resulting jar might become bulky. See
<a href="http://maven.apache.org/plugins/maven-assembly-plugin/usage.html">usage</a>.</li>
<li>The maven unpack plugin, for unpacking the relevant parts of the dependencies and
then package it with your code.</li>
</ol>
<p>Using the latter approach in order to bundle the Kafka connector, <code>flink-connector-kafka</code>
you would need to add the classes from both the connector and the Kafka API itself. Add
the following to your plugins section.</p>
<div class="highlight"><pre><code class="language-xml"><span class="nt">&lt;plugin&gt;</span>
<span class="nt">&lt;groupId&gt;</span>org.apache.maven.plugins<span class="nt">&lt;/groupId&gt;</span>
<span class="nt">&lt;artifactId&gt;</span>maven-dependency-plugin<span class="nt">&lt;/artifactId&gt;</span>
<span class="nt">&lt;version&gt;</span>2.9<span class="nt">&lt;/version&gt;</span>
<span class="nt">&lt;executions&gt;</span>
<span class="nt">&lt;execution&gt;</span>
<span class="nt">&lt;id&gt;</span>unpack<span class="nt">&lt;/id&gt;</span>
<span class="c">&lt;!-- executed just before the package phase --&gt;</span>
<span class="nt">&lt;phase&gt;</span>prepare-package<span class="nt">&lt;/phase&gt;</span>
<span class="nt">&lt;goals&gt;</span>
<span class="nt">&lt;goal&gt;</span>unpack<span class="nt">&lt;/goal&gt;</span>
<span class="nt">&lt;/goals&gt;</span>
<span class="nt">&lt;configuration&gt;</span>
<span class="nt">&lt;artifactItems&gt;</span>
<span class="c">&lt;!-- For Flink connector classes --&gt;</span>
<span class="nt">&lt;artifactItem&gt;</span>
<span class="nt">&lt;groupId&gt;</span>org.apache.flink<span class="nt">&lt;/groupId&gt;</span>
<span class="nt">&lt;artifactId&gt;</span>flink-connector-kafka<span class="nt">&lt;/artifactId&gt;</span>
<span class="nt">&lt;version&gt;</span>0.10-SNAPSHOT<span class="nt">&lt;/version&gt;</span>
<span class="nt">&lt;type&gt;</span>jar<span class="nt">&lt;/type&gt;</span>
<span class="nt">&lt;overWrite&gt;</span>false<span class="nt">&lt;/overWrite&gt;</span>
<span class="nt">&lt;outputDirectory&gt;</span>${project.build.directory}/classes<span class="nt">&lt;/outputDirectory&gt;</span>
<span class="nt">&lt;includes&gt;</span>org/apache/flink/**<span class="nt">&lt;/includes&gt;</span>
<span class="nt">&lt;/artifactItem&gt;</span>
<span class="c">&lt;!-- For Kafka API classes --&gt;</span>
<span class="nt">&lt;artifactItem&gt;</span>
<span class="nt">&lt;groupId&gt;</span>org.apache.kafka<span class="nt">&lt;/groupId&gt;</span>
<span class="nt">&lt;artifactId&gt;</span>kafka_<span class="nt">&lt;YOUR_SCALA_VERSION&gt;&lt;/artifactId&gt;</span>
<span class="nt">&lt;version&gt;&lt;YOUR_KAFKA_VERSION&gt;&lt;/version&gt;</span>
<span class="nt">&lt;type&gt;</span>jar<span class="nt">&lt;/type&gt;</span>
<span class="nt">&lt;overWrite&gt;</span>false<span class="nt">&lt;/overWrite&gt;</span>
<span class="nt">&lt;outputDirectory&gt;</span>${project.build.directory}/classes<span class="nt">&lt;/outputDirectory&gt;</span>
<span class="nt">&lt;includes&gt;</span>kafka/**<span class="nt">&lt;/includes&gt;</span>
<span class="nt">&lt;/artifactItem&gt;</span>
<span class="nt">&lt;/artifactItems&gt;</span>
<span class="nt">&lt;/configuration&gt;</span>
<span class="nt">&lt;/execution&gt;</span>
<span class="nt">&lt;/executions&gt;</span>
<span class="nt">&lt;/plugin&gt;</span></code></pre></div>
<p>Now when running <code>mvn clean package</code> the produced jar includes the required dependencies.</p>
</div>
<div class="col-sm-10 col-sm-offset-1">
<!-- Disqus thread and some vertical offset -->
<div style="margin-top: 75px; margin-bottom: 50px" id="disqus_thread"></div>
</div>
</div>
</div><!-- /.container -->
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="http://flink.apache.org/docs/master/page/js/codetabs.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
<!-- Disqus -->
<script type="text/javascript">
var disqus_shortname = 'stratosphere-eu';
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</body>
</html>