| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| --> |
| <!DOCTYPE html> |
| |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> |
| |
| <title>Apache Flink 0.10-SNAPSHOT Documentation: Spargel Graph Processing API - DEPRECATED</title> |
| |
| <link rel="shortcut icon" href="http://flink.apache.org/docs/master/page/favicon.ico" type="image/x-icon"> |
| <link rel="icon" href="http://flink.apache.org/docs/master/page/favicon.ico" type="image/x-icon"> |
| |
| <!-- Bootstrap --> |
| <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> |
| <link rel="stylesheet" href="http://flink.apache.org/docs/master/page/css/flink.css"> |
| <link rel="stylesheet" href="http://flink.apache.org/docs/master/page/css/syntax.css"> |
| <link rel="stylesheet" href="http://flink.apache.org/docs/master/page/css/codetabs.css"> |
| |
| <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> |
| <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> |
| <!--[if lt IE 9]> |
| <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> |
| <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> |
| <![endif]--> |
| </head> |
| <body> |
| |
| |
| |
| |
| |
| |
| <!-- Top navbar. --> |
| <nav class="navbar navbar-default navbar-fixed-top"> |
| <div class="container"> |
| <!-- The logo. --> |
| <div class="navbar-header"> |
| <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| </button> |
| <div class="navbar-logo"> |
| <a href="http://flink.apache.org"><img alt="Apache Flink" src="http://flink.apache.org/docs/master/page/img/navbar-brand-logo.jpg"></a> |
| </div> |
| </div><!-- /.navbar-header --> |
| |
| <!-- The navigation links. --> |
| <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> |
| <ul class="nav navbar-nav"> |
| <li><a href="http://flink.apache.org/docs/master/index.html">Overview<span class="hidden-sm hidden-xs"> 0.10</span></a></li> |
| |
| <!-- Setup --> |
| <li class="dropdown"> |
| <a href="http://flink.apache.org/docs/master/setup" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Setup <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu"> |
| <li><a href="http://flink.apache.org/docs/master/setup/building.html">Get Flink 0.10-SNAPSHOT</a></li> |
| |
| <li class="divider"></li> |
| <li role="presentation" class="dropdown-header"><strong>Deployment</strong></li> |
| <li><a href="http://flink.apache.org/docs/master/setup/local_setup.html" class="active">Local</a></li> |
| <li><a href="http://flink.apache.org/docs/master/setup/cluster_setup.html">Cluster (Standalone)</a></li> |
| <li><a href="http://flink.apache.org/docs/master/setup/yarn_setup.html">YARN</a></li> |
| <li><a href="http://flink.apache.org/docs/master/setup/gce_setup.html">GCloud</a></li> |
| <li><a href="http://flink.apache.org/docs/master/setup/flink_on_tez.html">Flink on Tez <span class="badge">Beta</span></a></li> |
| |
| <li class="divider"></li> |
| <li><a href="http://flink.apache.org/docs/master/setup/config.html">Configuration</a></li> |
| </ul> |
| </li> |
| |
| <!-- Programming Guides --> |
| <li class="dropdown"> |
| <a href="http://flink.apache.org/docs/master/apis" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Programming Guides <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu"> |
| <li><a href="http://flink.apache.org/docs/master/apis/programming_guide.html"><strong>Batch: DataSet API</strong></a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/streaming_guide.html"><strong>Streaming: DataStream API</strong> <span class="badge">Beta</span></a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/python.html">Python API <span class="badge">Beta</span></a></li> |
| |
| <li class="divider"></li> |
| <li><a href="scala_shell.html">Interactive Scala Shell</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/dataset_transformations.html">Dataset Transformations</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/best_practices.html">Best Practices</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/example_connectors.html">Connectors</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/examples.html">Examples</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/local_execution.html">Local Execution</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/cluster_execution.html">Cluster Execution</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/cli.html">Command Line Interface</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/web_client.html">Web Client</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/iterations.html">Iterations</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/java8.html">Java 8</a></li> |
| <li><a href="http://flink.apache.org/docs/master/apis/hadoop_compatibility.html">Hadoop Compatability <span class="badge">Beta</span></a></li> |
| </ul> |
| </li> |
| |
| <!-- Libraries --> |
| <li class="dropdown"> |
| <a href="http://flink.apache.org/docs/master/libs" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Libraries <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu"> |
| <li><a href="http://flink.apache.org/docs/master/libs/spargel_guide.html">Graphs: Spargel</a></li> |
| <li><a href="http://flink.apache.org/docs/master/libs/gelly_guide.html">Graphs: Gelly <span class="badge">Beta</span></a></li> |
| <li><a href="http://flink.apache.org/docs/master/libs/ml/">Machine Learning <span class="badge">Beta</span></a></li> |
| <li><a href="http://flink.apache.org/docs/master/libs/table.html">Relational: Table <span class="badge">Beta</span></a></li> |
| </ul> |
| </li> |
| |
| <!-- Internals --> |
| <li class="dropdown"> |
| <a href="http://flink.apache.org/docs/master/internals" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Internals <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu"> |
| <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> |
| <li><a href="http://flink.apache.org/docs/master/internals/how_to_contribute.html">How to Contribute</a></li> |
| <li><a href="http://flink.apache.org/docs/master/internals/coding_guidelines.html">Coding Guidelines</a></li> |
| <li><a href="http://flink.apache.org/docs/master/internals/ide_setup.html">IDE Setup</a></li> |
| <li><a href="http://flink.apache.org/docs/master/internals/logging.html">Logging</a></li> |
| <li class="divider"></li> |
| <li role="presentation" class="dropdown-header"><strong>Internals</strong></li> |
| <li><a href="http://flink.apache.org/docs/master/internals/general_arch.html">Architecture & Process Model</a></li> |
| <li><a href="http://flink.apache.org/docs/master/internals/types_serialization.html">Type Extraction & Serialization</a></li> |
| <li><a href="http://flink.apache.org/docs/master/internals/job_scheduling.html">Jobs & Scheduling</a></li> |
| <li><a href="http://flink.apache.org/docs/master/internals/add_operator.html">How-To: Add an Operator</a></li> |
| </ul> |
| </li> |
| </ul> |
| <form class="navbar-form navbar-right hidden-sm hidden-md" role="search" action="http://flink.apache.org/docs/master/search-results.html"> |
| <div class="form-group"> |
| <input type="text" class="form-control" name="q" placeholder="Search all pages"> |
| </div> |
| <button type="submit" class="btn btn-default">Search</button> |
| </form> |
| </div><!-- /.navbar-collapse --> |
| </div><!-- /.container --> |
| </nav> |
| |
| |
| |
| |
| <!-- Main content. --> |
| <div class="container"> |
| |
| |
| <div class="row"> |
| <div class="col-sm-10 col-sm-offset-1"> |
| <h1>Spargel Graph Processing API - DEPRECATED</h1> |
| |
| |
| |
| <p>Spargel is our <a href="http://giraph.apache.org">Giraph</a> like <strong>graph processing</strong> Java API. It supports basic graph computations, which are run as a sequence of <a href="http://flink.apache.org/docs/master/apis/iterations.html#supersteps">supersteps</a>. Spargel and Giraph both implement the <a href="https://en.wikipedia.org/wiki/Bulk_Synchronous_Parallel">Bulk Synchronous Parallel (BSP)</a> programming model, propsed by Google’s <a href="http://googleresearch.blogspot.de/2009/06/large-scale-graph-computing-at-google.html">Pregel</a>.</p> |
| |
| <p>The API provides a <strong>vertex-centric</strong> view on graph processing with two basic operations per superstep:</p> |
| |
| <ol> |
| <li><strong>Send messages</strong> to other vertices, and</li> |
| <li><strong>Receive messages</strong> from other vertices and <strong>update own vertex state</strong>.</li> |
| </ol> |
| |
| <p>This vertex-centric view makes it easy to express a large class of graph problems efficiently. We will list all <em>relevant interfaces</em> of the <strong>Spargel API</strong> to implement and walk through an <strong>example Spargel program</strong>.</p> |
| |
| <ul id="markdown-toc"> |
| <li><a href="#spargel-api---deprecated" id="markdown-toc-spargel-api---deprecated">Spargel API - DEPRECATED</a></li> |
| <li><a href="#example-propagate-minimum-vertex-id-in-graph" id="markdown-toc-example-propagate-minimum-vertex-id-in-graph">Example: Propagate Minimum Vertex ID in Graph</a></li> |
| </ul> |
| |
| <h2 id="spargel-api---deprecated">Spargel API - DEPRECATED</h2> |
| <p>The Spargel API is Deprecated. Please check out the new <a href="gelly_guide.html">Gelly API</a> for graph processing with Apache Flink. If you want to port your Spargel code into Gelly, |
| please check the <a href="gelly_guide.html#migrating-spargel-code-to-gelly">migration guide</a>.</p> |
| |
| <p>The Spargel API is part of the <em>addons</em> Maven project. All relevant classes are located in the <em>org.apache.flink.spargel.java</em> package.</p> |
| |
| <p>Add the following dependency to your <code>pom.xml</code> to use the Spargel.</p> |
| |
| <div class="highlight"><pre><code class="language-xml"><span class="nt"><dependency></span> |
| <span class="nt"><groupId></span>org.apache.flink<span class="nt"></groupId></span> |
| <span class="nt"><artifactId></span>flink-spargel<span class="nt"></artifactId></span> |
| <span class="nt"><version></span>0.10-SNAPSHOT<span class="nt"></version></span> |
| <span class="nt"></dependency></span></code></pre></div> |
| |
| <p>Extend <strong>VertexUpdateFunction<</strong><em>VertexKeyType</em>, <em>VertexValueType</em>, <em>MessageType</em><strong>></strong> to implement your <em>custom vertex update logic</em>.</p> |
| |
| <p>Extend <strong>MessagingFunction<</strong><em>VertexKeyType</em>, <em>VertexValueType</em>, <em>MessageType</em>, <em>EdgeValueType</em><strong>></strong> to implement your <em>custom message logic</em>.</p> |
| |
| <p>Create a <strong>SpargelIteration</strong> operator to include Spargel in your data flow.</p> |
| |
| <h2 id="example-propagate-minimum-vertex-id-in-graph">Example: Propagate Minimum Vertex ID in Graph</h2> |
| |
| <p>The Spargel operator <strong>SpargelIteration</strong> includes Spargel graph processing into your data flow. As usual, it can be combined with other operators like <em>map</em>, <em>reduce</em>, <em>join</em>, etc.</p> |
| |
| <div class="highlight"><pre><code class="language-java"><span class="n">FileDataSource</span> <span class="n">vertices</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">FileDataSource</span><span class="o">(...);</span> |
| <span class="n">FileDataSource</span> <span class="n">edges</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">FileDataSource</span><span class="o">(...);</span> |
| |
| <span class="n">SpargelIteration</span> <span class="n">iteration</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">SpargelIteration</span><span class="o">(</span><span class="k">new</span> <span class="nf">MinMessager</span><span class="o">(),</span> <span class="k">new</span> <span class="nf">MinNeighborUpdater</span><span class="o">());</span> |
| <span class="n">iteration</span><span class="o">.</span><span class="na">setVertexInput</span><span class="o">(</span><span class="n">vertices</span><span class="o">);</span> |
| <span class="n">iteration</span><span class="o">.</span><span class="na">setEdgesInput</span><span class="o">(</span><span class="n">edges</span><span class="o">);</span> |
| <span class="n">iteration</span><span class="o">.</span><span class="na">setNumberOfIterations</span><span class="o">(</span><span class="n">maxIterations</span><span class="o">);</span> |
| |
| <span class="n">FileDataSink</span> <span class="n">result</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">FileDataSink</span><span class="o">(...);</span> |
| <span class="n">result</span><span class="o">.</span><span class="na">setInput</span><span class="o">(</span><span class="n">iteration</span><span class="o">.</span><span class="na">getOutput</span><span class="o">());</span> |
| |
| <span class="k">new</span> <span class="nf">Plan</span><span class="o">(</span><span class="n">result</span><span class="o">);</span></code></pre></div> |
| |
| <p>Besides the <strong>program logic</strong> of vertex updates in <em>MinNeighborUpdater</em> and messages in <em>MinMessager</em>, you have to specify the <strong>initial vertex</strong> and <strong>edge input</strong>. Every vertex has a <strong>key</strong> and <strong>value</strong>. In each superstep, it <strong>receives messages</strong> from other vertices and updates its state:</p> |
| |
| <ul> |
| <li><strong>Vertex</strong> input: <strong>(id</strong>: <em>VertexKeyType</em>, <strong>value</strong>: <em>VertexValueType</em><strong>)</strong></li> |
| <li><strong>Edge</strong> input: <strong>(source</strong>: <em>VertexKeyType</em>, <strong>target</strong>: <em>VertexKeyType</em>[, <strong>value</strong>: <em>EdgeValueType</em>])</li> |
| </ul> |
| |
| <p>For our example, we set the vertex ID as both <em>id and value</em> (initial minimum) and <em>leave out the edge values</em> as we don’t need them:</p> |
| |
| <p class="text-center"> |
| <img alt="Spargel Example Input" width="75%" src="fig/spargel_example_input.png" /> |
| </p> |
| |
| <p>In order to <strong>propagate the minimum vertex ID</strong>, we iterate over all received messages (which contain the neighboring IDs) and update our value, if we found a new minimum:</p> |
| |
| <div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">class</span> <span class="nc">MinNeighborUpdater</span> <span class="kd">extends</span> <span class="n">VertexUpdateFunction</span><span class="o"><</span><span class="n">IntValue</span><span class="o">,</span> <span class="n">IntValue</span><span class="o">,</span> <span class="n">IntValue</span><span class="o">></span> <span class="o">{</span> |
| |
| <span class="nd">@Override</span> |
| <span class="kd">public</span> <span class="kt">void</span> <span class="nf">updateVertex</span><span class="o">(</span><span class="n">IntValue</span> <span class="n">id</span><span class="o">,</span> <span class="n">IntValue</span> <span class="n">currentMin</span><span class="o">,</span> <span class="n">Iterator</span><span class="o"><</span><span class="n">IntValue</span><span class="o">></span> <span class="n">messages</span><span class="o">)</span> <span class="o">{</span> |
| <span class="kt">int</span> <span class="n">min</span> <span class="o">=</span> <span class="n">Integer</span><span class="o">.</span><span class="na">MAX_VALUE</span><span class="o">;</span> |
| |
| <span class="c1">// iterate over all received messages</span> |
| <span class="k">while</span> <span class="o">(</span><span class="n">messages</span><span class="o">.</span><span class="na">hasNext</span><span class="o">())</span> <span class="o">{</span> |
| <span class="kt">int</span> <span class="n">next</span> <span class="o">=</span> <span class="n">messages</span><span class="o">.</span><span class="na">next</span><span class="o">().</span><span class="na">getValue</span><span class="o">();</span> |
| <span class="n">min</span> <span class="o">=</span> <span class="n">next</span> <span class="o"><</span> <span class="n">min</span> <span class="o">?</span> <span class="n">next</span> <span class="o">:</span> <span class="n">min</span><span class="o">;</span> |
| <span class="o">}</span> |
| |
| <span class="c1">// update vertex value, if new minimum</span> |
| <span class="k">if</span> <span class="o">(</span><span class="n">min</span> <span class="o"><</span> <span class="n">currentMin</span><span class="o">.</span><span class="na">getValue</span><span class="o">())</span> <span class="o">{</span> |
| <span class="n">setNewVertexValue</span><span class="o">(</span><span class="k">new</span> <span class="nf">IntValue</span><span class="o">(</span><span class="n">min</span><span class="o">));</span> |
| <span class="o">}</span> |
| <span class="o">}</span> |
| <span class="o">}</span></code></pre></div> |
| |
| <p>The <strong>messages in each superstep</strong> consist of the <strong>current minimum ID</strong> seen by the vertex:</p> |
| |
| <div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">class</span> <span class="nc">MinMessager</span> <span class="kd">extends</span> <span class="n">MessagingFunction</span><span class="o"><</span><span class="n">IntValue</span><span class="o">,</span> <span class="n">IntValue</span><span class="o">,</span> <span class="n">IntValue</span><span class="o">,</span> <span class="n">NullValue</span><span class="o">></span> <span class="o">{</span> |
| |
| <span class="nd">@Override</span> |
| <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sendMessages</span><span class="o">(</span><span class="n">IntValue</span> <span class="n">id</span><span class="o">,</span> <span class="n">IntValue</span> <span class="n">currentMin</span><span class="o">)</span> <span class="o">{</span> |
| <span class="c1">// send current minimum to neighbors</span> |
| <span class="n">sendMessageToAllNeighbors</span><span class="o">(</span><span class="n">currentMin</span><span class="o">);</span> |
| <span class="o">}</span> |
| <span class="o">}</span></code></pre></div> |
| |
| <p>The <strong>API-provided method</strong> <code>sendMessageToAllNeighbors(MessageType)</code> sends the message to all neighboring vertices. It is also possible to address specific vertices with <code>sendMessageTo(VertexKeyType, MessageType)</code>.</p> |
| |
| <p>If the value of a vertex does not change during a superstep, it will <strong>not send</strong> any messages in the superstep. This allows to do incremental updates to the <strong>hot (changing) parts</strong> of the graph, while leaving <strong>cold (steady) parts</strong> untouched.</p> |
| |
| <p>The computation <strong>terminates</strong> after a specified <em>maximum number of supersteps</em> <strong>-OR-</strong> the <em>vertex states stop changing</em>.</p> |
| |
| <p class="text-center"> |
| <img alt="Spargel Example" width="75%" src="fig/spargel_example.png" /> |
| </p> |
| |
| </div> |
| |
| <div class="col-sm-10 col-sm-offset-1"> |
| <!-- Disqus thread and some vertical offset --> |
| <div style="margin-top: 75px; margin-bottom: 50px" id="disqus_thread"></div> |
| </div> |
| </div> |
| |
| </div><!-- /.container --> |
| |
| <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> |
| <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> |
| <!-- Include all compiled plugins (below), or include individual files as needed --> |
| <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> |
| <script src="http://flink.apache.org/docs/master/page/js/codetabs.js"></script> |
| |
| <!-- Google Analytics --> |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); |
| |
| ga('create', 'UA-52545728-1', 'auto'); |
| ga('send', 'pageview'); |
| </script> |
| |
| <!-- Disqus --> |
| <script type="text/javascript"> |
| var disqus_shortname = 'stratosphere-eu'; |
| (function() { |
| var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; |
| dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; |
| (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); |
| })(); |
| </script> |
| </body> |
| </html> |