features/collocatedprocessing.html - ignite-website - Git at Google

 <!--
 ▄▄▄       ██▓███   ▄▄▄       ▄████▄   ██░ ██ ▓█████     ██▓  ▄████  ███▄    █  ██▓▄▄▄█████▓▓█████
 ▒████▄    ▓██░  ██▒▒████▄    ▒██▀ ▀█  ▓██░ ██▒▓█   ▀    ▓██▒ ██▒ ▀█▒ ██ ▀█   █ ▓██▒▓  ██▒ ▓▒▓█   ▀
 ▒██  ▀█▄  ▓██░ ██▓▒▒██  ▀█▄  ▒▓█    ▄ ▒██▀▀██░▒███      ▒██▒▒██░▄▄▄░▓██  ▀█ ██▒▒██▒▒ ▓██░ ▒░▒███
 ░██▄▄▄▄██ ▒██▄█▓▒ ▒░██▄▄▄▄██ ▒▓▓▄ ▄██▒░▓█ ░██ ▒▓█  ▄    ░██░░▓█  ██▓▓██▒  ▐▌██▒░██░░ ▓██▓ ░ ▒▓█  ▄
 ▓█   ▓██▒▒██▒ ░  ░ ▓█   ▓██▒▒ ▓███▀ ░░▓█▒░██▓░▒████▒   ░██░░▒▓███▀▒▒██░   ▓██░░██░  ▒██▒ ░ ░▒████▒
 ▒▒   ▓▒█░▒▓▒░ ░  ░ ▒▒   ▓▒█░░ ░▒ ▒  ░ ▒ ░░▒░▒░░ ▒░ ░   ░▓   ░▒   ▒ ░ ▒░   ▒ ▒ ░▓    ▒ ░░   ░░ ▒░ ░
  ▒   ▒▒ ░░▒ ░       ▒   ▒▒ ░  ░  ▒    ▒ ░▒░ ░ ░ ░  ░    ▒ ░  ░   ░ ░ ░░   ░ ▒░ ▒ ░    ░     ░ ░  ░
  ░   ▒   ░░         ░   ▒   ░         ░  ░░ ░   ░       ▒ ░░ ░   ░    ░   ░ ░  ▒ ░  ░         ░
      ░  ░               ░  ░░ ░       ░  ░  ░   ░  ░    ░        ░          ░  ░              ░  ░
 -->

 <!--
 Licensed to the Apache Software Foundation (ASF) under one
 or more contributor license agreements.  See the NOTICE file
 distributed with this work for additional information
 regarding copyright ownership.  The ASF licenses this file
 to you under the Apache License, Version 2.0 (the
 "License"); you may not use this file except in compliance
 with the License.  You may obtain a copy of the License at

 http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing,
 software distributed under the License is distributed on an
 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 KIND, either express or implied.  See the License for the
 specific language governing permissions and limitations
 under the License.
 -->

 <!DOCTYPE html>
 <html lang="en">
 <head>
     <link rel="canonical" href="https://ignite.apache.org/features/collocatedprocessing.html" />
     <meta charset="utf-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
     <meta http-equiv="Pragma" content="no-cache" />
     <meta http-equiv="Expires" content="0" />

     <title>Co-located Processing - Apache Ignite</title>

     <meta name="description"
           content="Apache Ignite supports co-located processing technique for compute- and data-intensive calculations
           as well as machine learning algorithms. The technique increases performance by eliminating the impact of
           network latency."/>


     <!--#include virtual="/includes/styles.html" -->

     <!--#include virtual="/includes/sh.html" -->
 </head>
 <body>
 <div id="wrapper">
     <!--#include virtual="/includes/header.html" -->

     <main id="main" role="main" class="container">
         <section id="memory-centric" class="page-section">
             <h1 class="first">Minimizing Network Utilization With Co-located Processing</h1>
             <div class="col-sm-12 col-md-12 col-xs-12" style="padding:0 0 20px 0;">
                 <div class="col-sm-6 col-md-6 col-xs-12" style="padding-left:0; padding-right:0">
                     <p>
                         By working with disk-based systems such as relational or NoSQL databases, many of us accustomed
                         to using a classic client-server approach for data processing. Client applications usually bring
                         data from servers, use the records for local calculations, and discard the data as soon as the
                         business tasks complete. This approach does not scale well if a significant volume of data gets
                         transferred over the network.
                     </p>
                     <p>
                         In addition to the client-server approach, Apache Ignite supports a co-located processing
                         technique. The primary aim of the technique is to increase the performance of your complex
                         calculations or SQL with JOINs by running them straight on Ignite cluster nodes. In such a case,
                         the calculations work with local data sets of the cluster nodes, thus, avoiding records shuffling
                         over the network and eliminating an impact of the network latency on the performance of your
                         applications.
                     </p>
                 </div>
                 <div class="col-sm-6 col-md-6 col-xs-12" style="padding-right:0">
                     <img class="img-responsive" src="/images/collocated_processing.png" width="440px" style="float: right; margin-top: -25px;"/>
                 </div>
             </div>

             <div class="page-heading">Data Co-location</div>
             <p>
                 To exploit the co-located processing in practice, first, you need to co-locate data sets by storing
                 related records on the same cluster node. That is also known as affinity co-location in Ignite.
             </p>
             <p>
                 For example, let's introduce <code>Country</code> and <code>City</code> tables and co-locate
                 all <code>City</code> records that have a similar <code>Country</code> identifier on a single node. To
                 achieve this, you need to set <code>CountryCode</code> as an <code>affinityKey</code> in <code>City</code>
                 table:
             </p>
             <div class="tab-content">

                 <div class="tab-pane active" id="sql-tables">
                         <pre class="brush:sql">
                             CREATE TABLE Country (
                                 Code CHAR(3),
                                 Name CHAR(52),
                                 Continent CHAR(50),
                                 Region CHAR(26),
                                 SurfaceArea DECIMAL(10,2),
                                 Population INT(11),
                                 Capital INT(11),
                                 PRIMARY KEY (Code)
                             ) WITH "template=partitioned, backups=1";

                             CREATE TABLE City (
                                 ID INT(11),
                                 Name CHAR(35),
                                 CountryCode CHAR(3),
                                 District CHAR(20),
                                 Population INT(11),
                                 PRIMARY KEY (ID, CountryCode)
                             ) WITH "template=partitioned, backups=1, affinityKey=CountryCode";
                         </pre>
                 </div>
             </div>
             <p>
                 This way, you instruct Ignite to store all the <code>Cities</code> with the same <code>CountryCode
                 </code> on a single cluster node. As soon as the data is co-located, Ignite can execute compute- and
                 data-intensive logic, SQL with JOINs straight on the cluster nodes minimizing or eliminating network utilization.
             </p>

             <div class="page-heading">SQL and Distributed JOINs</div>
             <p>
                 Ignite SQL engine performs much faster if a query gets executed against co-located records. That's
                 especially crucial for SQL with JOINs that can span many cluster nodes.
             </p>
             <p>
                 Taking the previous example with <code>Country</code> and <code>City</code> tables,
                 let's join those tables returning the most populated cities across several countries:
             </p>
             <div class="tab-content">
                 <div class="tab-pane active" id="sql-joins-query">
                         <pre class="brush:sql">
                             SELECT country.name, city.name, MAX(city.population) as max_pop
                             FROM country
                             JOIN city ON city.countrycode = country.code
                             WHERE country.code IN ('USA','RUS','CHN')
                             GROUP BY country.name, city.name
                             ORDER BY max_pop DESC;
                         </pre>
                 </div>
             </div>
             <p>
                 This query is executed only on the nodes that store records of China, Russia, and the USA. Plus, during
                 the JOIN, the records are not shuffled between the nodes as long as all the <code>Cities
                 </code> with the same <code>city.countrycode</code> are stored on a single node.
             </p>

             <div class="page-heading">Distributed Collocated Computations</div>
             <p>
                 Apache Ignite compute and machine learning APIs allow to perform computations and execute
                 machine learning algorithms in parallel to achieve high performance, low latency, and linear scalability.
                 Furthermore, both components work best with co-located data sets.
             </p>
             <p>
                 Let's take another example by imagining that a winter storm is about to hit a highly-populated city. As
                 a telecommunication company, you have to send a text message to 20 million residents notifying about the
                 blizzard. With the client-server approach, the company would read all 20 million records from a database
                 to an application that needs to execute some logic and send a message to the residents eventually.
             </p>
             <p>
                 A much more efficient approach would be to run the logic on and send text messages from the cluster nodes
                 that store the records of the residents. With this technique, instead of pulling 20 million records via
                 the network, you execute the logic in place and eliminate a network impact on the performance of the calculation.
             </p>

             <p>
                 Here is an example of how this logic might look like:
             </p>
             <!--<ul id="compute-example" class="nav nav-tabs">-->
                 <!--<li  class="active"><a href="#compute-task" aria-controls="profile" data-toggle="tab">Message Broadcasting Logic</a></li>-->
             <!--</ul>-->

             <div class="tab-content">

                 <div class="tab-pane active" id="compute-task">
                         <pre class="brush:java">

 Ignite ignite = ...

 // NewYork ID.
 long newYorkId = 2;

 // Sending the logic to a cluster node that stores NewYork and all its inhabitants.
 ignite.compute().affinityRun("City", newYorkId, new IgniteRunnable() {

   @IgniteInstanceResource
   Ignite ignite;

   @Override
   public void run() {
     // Getting an access to Persons cache.
     IgniteCache&#60;BinaryObject, BinaryObject&#62; people = ignite.cache("Person").withKeepBinary();


     ScanQuery&#60;BinaryObject, BinaryObject&#62; query = new ScanQuery &#60;BinaryObject, BinaryObject&#62;();

     try (QueryCursor&#60;Cache.Entry&#60;BinaryObject, BinaryObject&#62;&#62; cursor = people.query(query)) {
       // Iteration over the local cluster node data using the scan query.
       for (Cache.Entry&#60;BinaryObject, BinaryObject&#62; entry : cursor) {
         BinaryObject personKey = entry.getKey();

         // Picking NewYorker's only.
         if (personKey.&#60;Long&#62;field("CITY_ID") == newYorkId) {
             person = entry.getValue();

             // Sending the warning message to the person.
         }
       }
     }
   }
 }

                         </pre>
                 </div>
             </div>

             <div class="page-heading">Learn More</div>
             <p>
                 <a href="https://apacheignite.readme.io/docs/compute-grid" target="docs">
                     <b>Compute APIs <i class="fa fa-angle-double-right"></i></b>
                 </a>
             </p>
             <p>
                 <a href="/features/machinelearning.html">
                     <b>Machine and Deep Learning <i class="fa fa-angle-double-right"></i></b>
                 </a>
             </p>
             <p>
                 <a href="http://localhost/use-cases/hpc.html">
                     <b>High Performance Computing with Apache Ignite <i class="fa fa-angle-double-right"></i></b>
                 </a>
             </p>

         </section>
     </main>

     <!--#include virtual="/includes/footer.html" -->
 </div>
 <!--#include virtual="/includes/scripts.html" -->
 </body>
 </html>
	<!--
	▄▄▄ ██▓███ ▄▄▄ ▄████▄ ██░ ██ ▓█████ ██▓ ▄████ ███▄ █ ██▓▄▄▄█████▓▓█████
	▒████▄ ▓██░ ██▒▒████▄ ▒██▀ ▀█ ▓██░ ██▒▓█ ▀ ▓██▒ ██▒ ▀█▒ ██ ▀█ █ ▓██▒▓ ██▒ ▓▒▓█ ▀
	▒██ ▀█▄ ▓██░ ██▓▒▒██ ▀█▄ ▒▓█ ▄ ▒██▀▀██░▒███ ▒██▒▒██░▄▄▄░▓██ ▀█ ██▒▒██▒▒ ▓██░ ▒░▒███
	░██▄▄▄▄██ ▒██▄█▓▒ ▒░██▄▄▄▄██ ▒▓▓▄ ▄██▒░▓█ ░██ ▒▓█ ▄ ░██░░▓█ ██▓▓██▒ ▐▌██▒░██░░ ▓██▓ ░ ▒▓█ ▄
	▓█ ▓██▒▒██▒ ░ ░ ▓█ ▓██▒▒ ▓███▀ ░░▓█▒░██▓░▒████▒ ░██░░▒▓███▀▒▒██░ ▓██░░██░ ▒██▒ ░ ░▒████▒
	▒▒ ▓▒█░▒▓▒░ ░ ░ ▒▒ ▓▒█░░ ░▒ ▒ ░ ▒ ░░▒░▒░░ ▒░ ░ ░▓ ░▒ ▒ ░ ▒░ ▒ ▒ ░▓ ▒ ░░ ░░ ▒░ ░
	▒ ▒▒ ░░▒ ░ ▒ ▒▒ ░ ░ ▒ ▒ ░▒░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░░ ░ ▒░ ▒ ░ ░ ░ ░ ░
	░ ▒ ░░ ░ ▒ ░ ░ ░░ ░ ░ ▒ ░░ ░ ░ ░ ░ ░ ▒ ░ ░ ░
	░ ░ ░ ░░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░
	-->

	<!--
	Licensed to the Apache Software Foundation (ASF) under one
	or more contributor license agreements. See the NOTICE file
	distributed with this work for additional information
	regarding copyright ownership. The ASF licenses this file
	to you under the Apache License, Version 2.0 (the
	"License"); you may not use this file except in compliance
	with the License. You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing,
	software distributed under the License is distributed on an
	"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	KIND, either express or implied. See the License for the
	specific language governing permissions and limitations
	under the License.
	-->

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<link rel="canonical" href="https://ignite.apache.org/features/collocatedprocessing.html" />
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
	<meta http-equiv="Pragma" content="no-cache" />
	<meta http-equiv="Expires" content="0" />

	<title>Co-located Processing - Apache Ignite</title>

	<meta name="description"
	content="Apache Ignite supports co-located processing technique for compute- and data-intensive calculations
	as well as machine learning algorithms. The technique increases performance by eliminating the impact of
	network latency."/>


	<!--#include virtual="/includes/styles.html" -->

	<!--#include virtual="/includes/sh.html" -->
	</head>
	<body>
	<div id="wrapper">
	<!--#include virtual="/includes/header.html" -->

	<main id="main" role="main" class="container">
	<section id="memory-centric" class="page-section">
	<h1 class="first">Minimizing Network Utilization With Co-located Processing</h1>
	<div class="col-sm-12 col-md-12 col-xs-12" style="padding:0 0 20px 0;">
	<div class="col-sm-6 col-md-6 col-xs-12" style="padding-left:0; padding-right:0">
	<p>
	By working with disk-based systems such as relational or NoSQL databases, many of us accustomed
	to using a classic client-server approach for data processing. Client applications usually bring
	data from servers, use the records for local calculations, and discard the data as soon as the
	business tasks complete. This approach does not scale well if a significant volume of data gets
	transferred over the network.
	</p>
	<p>
	In addition to the client-server approach, Apache Ignite supports a co-located processing
	technique. The primary aim of the technique is to increase the performance of your complex
	calculations or SQL with JOINs by running them straight on Ignite cluster nodes. In such a case,
	the calculations work with local data sets of the cluster nodes, thus, avoiding records shuffling
	over the network and eliminating an impact of the network latency on the performance of your
	applications.
	</p>
	</div>
	<div class="col-sm-6 col-md-6 col-xs-12" style="padding-right:0">
	<img class="img-responsive" src="/images/collocated_processing.png" width="440px" style="float: right; margin-top: -25px;"/>
	</div>
	</div>

	<div class="page-heading">Data Co-location</div>
	<p>
	To exploit the co-located processing in practice, first, you need to co-locate data sets by storing
	related records on the same cluster node. That is also known as affinity co-location in Ignite.
	</p>
	<p>
	For example, let's introduce <code>Country</code> and <code>City</code> tables and co-locate
	all <code>City</code> records that have a similar <code>Country</code> identifier on a single node. To
	achieve this, you need to set <code>CountryCode</code> as an <code>affinityKey</code> in <code>City</code>
	table:
	</p>
	<div class="tab-content">

	<div class="tab-pane active" id="sql-tables">
	<pre class="brush:sql">
	CREATE TABLE Country (
	Code CHAR(3),
	Name CHAR(52),
	Continent CHAR(50),
	Region CHAR(26),
	SurfaceArea DECIMAL(10,2),
	Population INT(11),
	Capital INT(11),
	PRIMARY KEY (Code)
	) WITH "template=partitioned, backups=1";

	CREATE TABLE City (
	ID INT(11),
	Name CHAR(35),
	CountryCode CHAR(3),
	District CHAR(20),
	Population INT(11),
	PRIMARY KEY (ID, CountryCode)
	) WITH "template=partitioned, backups=1, affinityKey=CountryCode";
	</pre>
	</div>
	</div>
	<p>
	This way, you instruct Ignite to store all the <code>Cities</code> with the same <code>CountryCode
	</code> on a single cluster node. As soon as the data is co-located, Ignite can execute compute- and
	data-intensive logic, SQL with JOINs straight on the cluster nodes minimizing or eliminating network utilization.
	</p>

	<div class="page-heading">SQL and Distributed JOINs</div>
	<p>
	Ignite SQL engine performs much faster if a query gets executed against co-located records. That's
	especially crucial for SQL with JOINs that can span many cluster nodes.
	</p>
	<p>
	Taking the previous example with <code>Country</code> and <code>City</code> tables,
	let's join those tables returning the most populated cities across several countries:
	</p>
	<div class="tab-content">
	<div class="tab-pane active" id="sql-joins-query">
	<pre class="brush:sql">
	SELECT country.name, city.name, MAX(city.population) as max_pop
	FROM country
	JOIN city ON city.countrycode = country.code
	WHERE country.code IN ('USA','RUS','CHN')
	GROUP BY country.name, city.name
	ORDER BY max_pop DESC;
	</pre>
	</div>
	</div>
	<p>
	This query is executed only on the nodes that store records of China, Russia, and the USA. Plus, during
	the JOIN, the records are not shuffled between the nodes as long as all the <code>Cities
	</code> with the same <code>city.countrycode</code> are stored on a single node.
	</p>

	<div class="page-heading">Distributed Collocated Computations</div>
	<p>
	Apache Ignite compute and machine learning APIs allow to perform computations and execute
	machine learning algorithms in parallel to achieve high performance, low latency, and linear scalability.
	Furthermore, both components work best with co-located data sets.
	</p>
	<p>
	Let's take another example by imagining that a winter storm is about to hit a highly-populated city. As
	a telecommunication company, you have to send a text message to 20 million residents notifying about the
	blizzard. With the client-server approach, the company would read all 20 million records from a database
	to an application that needs to execute some logic and send a message to the residents eventually.
	</p>
	<p>
	A much more efficient approach would be to run the logic on and send text messages from the cluster nodes
	that store the records of the residents. With this technique, instead of pulling 20 million records via
	the network, you execute the logic in place and eliminate a network impact on the performance of the calculation.
	</p>

	<p>
	Here is an example of how this logic might look like:
	</p>
	<!--<ul id="compute-example" class="nav nav-tabs">-->
	<!--<li class="active"><a href="#compute-task" aria-controls="profile" data-toggle="tab">Message Broadcasting Logic</a></li>-->
	<!--</ul>-->

	<div class="tab-content">

	<div class="tab-pane active" id="compute-task">
	<pre class="brush:java">

	Ignite ignite = ...

	// NewYork ID.
	long newYorkId = 2;

	// Sending the logic to a cluster node that stores NewYork and all its inhabitants.
	ignite.compute().affinityRun("City", newYorkId, new IgniteRunnable() {

	@IgniteInstanceResource
	Ignite ignite;

	@Override
	public void run() {
	// Getting an access to Persons cache.
	IgniteCache<BinaryObject, BinaryObject> people = ignite.cache("Person").withKeepBinary();


	ScanQuery<BinaryObject, BinaryObject> query = new ScanQuery <BinaryObject, BinaryObject>();

	try (QueryCursor<Cache.Entry<BinaryObject, BinaryObject>> cursor = people.query(query)) {
	// Iteration over the local cluster node data using the scan query.
	for (Cache.Entry<BinaryObject, BinaryObject> entry : cursor) {
	BinaryObject personKey = entry.getKey();

	// Picking NewYorker's only.
	if (personKey.<Long>field("CITY_ID") == newYorkId) {
	person = entry.getValue();

	// Sending the warning message to the person.
	}
	}
	}
	}
	}

	</pre>
	</div>
	</div>

	<div class="page-heading">Learn More</div>
	<p>
	<a href="https://apacheignite.readme.io/docs/compute-grid" target="docs">
	<b>Compute APIs <i class="fa fa-angle-double-right"></i></b>
	</a>
	</p>
	<p>
	<a href="/features/machinelearning.html">
	<b>Machine and Deep Learning <i class="fa fa-angle-double-right"></i></b>
	</a>
	</p>
	<p>
	<a href="http://localhost/use-cases/hpc.html">
	<b>High Performance Computing with Apache Ignite <i class="fa fa-angle-double-right"></i></b>
	</a>
	</p>

	</section>
	</main>

	<!--#include virtual="/includes/footer.html" -->
	</div>
	<!--#include virtual="/includes/scripts.html" -->
	</body>
	</html>