<!--
▄▄▄       ██▓███   ▄▄▄       ▄████▄   ██░ ██ ▓█████     ██▓  ▄████  ███▄    █  ██▓▄▄▄█████▓▓█████
▒████▄    ▓██░  ██▒▒████▄    ▒██▀ ▀█  ▓██░ ██▒▓█   ▀    ▓██▒ ██▒ ▀█▒ ██ ▀█   █ ▓██▒▓  ██▒ ▓▒▓█   ▀
▒██  ▀█▄  ▓██░ ██▓▒▒██  ▀█▄  ▒▓█    ▄ ▒██▀▀██░▒███      ▒██▒▒██░▄▄▄░▓██  ▀█ ██▒▒██▒▒ ▓██░ ▒░▒███
░██▄▄▄▄██ ▒██▄█▓▒ ▒░██▄▄▄▄██ ▒▓▓▄ ▄██▒░▓█ ░██ ▒▓█  ▄    ░██░░▓█  ██▓▓██▒  ▐▌██▒░██░░ ▓██▓ ░ ▒▓█  ▄
▓█   ▓██▒▒██▒ ░  ░ ▓█   ▓██▒▒ ▓███▀ ░░▓█▒░██▓░▒████▒   ░██░░▒▓███▀▒▒██░   ▓██░░██░  ▒██▒ ░ ░▒████▒
▒▒   ▓▒█░▒▓▒░ ░  ░ ▒▒   ▓▒█░░ ░▒ ▒  ░ ▒ ░░▒░▒░░ ▒░ ░   ░▓   ░▒   ▒ ░ ▒░   ▒ ▒ ░▓    ▒ ░░   ░░ ▒░ ░
 ▒   ▒▒ ░░▒ ░       ▒   ▒▒ ░  ░  ▒    ▒ ░▒░ ░ ░ ░  ░    ▒ ░  ░   ░ ░ ░░   ░ ▒░ ▒ ░    ░     ░ ░  ░
 ░   ▒   ░░         ░   ▒   ░         ░  ░░ ░   ░       ▒ ░░ ░   ░    ░   ░ ░  ▒ ░  ░         ░
     ░  ░               ░  ░░ ░       ░  ░  ░   ░  ░    ░        ░          ░  ░              ░  ░
-->

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

<!DOCTYPE html>
<html lang="en">
<head>
    <link rel="canonical" href="https://ignite.apache.org/features/collocatedprocessing.html" />
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
    <meta http-equiv="Pragma" content="no-cache" />
    <meta http-equiv="Expires" content="0" />

    <title>Co-located Processing - Apache Ignite</title>

    <meta name="description"
          content="Apache Ignite supports co-located processing technique for compute-intensive and data-intensive calculations
          as well as machine learning algorithms. This technique increases performance by eliminating the impact of
          network latency."/>


    <!--#include virtual="/includes/styles.html" -->

    <!--#include virtual="/includes/sh.html" -->
</head>
<body>

<!--#include virtual="/includes/header.html" -->
<article>
    <header>
        <div class="container">
        
        <h1>Minimizing Network Utilization <strong>With Co-located Processing</strong></h1>
        </div>
    </header>
    <div class="container">
        <p>
            Apache Ignite®  supports co-located processing technique for compute-intensive and data-intensive calculations
            as well as machine learning algorithms. This technique increases performance by eliminating the impact of
            network latency.
        </p>
        <img class="diagram-right img-responsive" src="/images/svg-diagrams/high_performance_compute.svg" alt="Co-located Processing diagram" />
        <p>
            In traditional disk-based systems, such as relational or NoSQL databases, client applications
            usually bring data from servers, use the records for local calculations, and discard the data as
            soon as the business task is complete. This approach does not scale well if a significant volume
            of data gets transferred over the network.
        </p>
        <p>
            To overcome this issue, Apache Ignite supports a co-located processing technique. The primary
            aim of the technique is to increase the performance of your complex calculations or SQL with
            JOINs by running them straight on Ignite cluster nodes. In co-located processing, calculations
            are done on local data sets of the cluster nodes. This avoids records shuffling over the
            network and eliminates the impact of network latency on the performance of your applications.
        </p>
        
                    

        <h2>Data Co-location</h2>
        <p>
            To use co-located processing in practice, first, you need to co-locate data sets by storing
            related records on the same cluster node. This process is also known as affinity co-location in Ignite.
        </p>
        <p>
            For example, let's introduce <code>Country</code> and <code>City</code> tables and co-locate
            all <code>City</code> records that have the same <code>Country</code> identifier on a single node. To
            achieve this, you need to set <code>CountryCode</code> as an <code>affinityKey</code> in <code>City</code>
            table:
        </p>
            
        <div class="tab-content">

                <div class="tab-pane active" id="sql-tables">
                        <pre class="brush:sql">
                            CREATE TABLE Country (
                                Code CHAR(3),
                                Name CHAR(52),
                                Continent CHAR(50),
                                Region CHAR(26),
                                SurfaceArea DECIMAL(10,2),
                                Population INT(11),
                                Capital INT(11),
                                PRIMARY KEY (Code)
                            ) WITH "template=partitioned, backups=1";

                            CREATE TABLE City (
                                ID INT(11),
                                Name CHAR(35),
                                CountryCode CHAR(3),
                                District CHAR(20),
                                Population INT(11),
                                PRIMARY KEY (ID, CountryCode)
                            ) WITH "template=partitioned, backups=1, affinityKey=CountryCode";
                        </pre>
                </div>
            </div>
            
            <p>
                This way, you instruct Ignite to store all the <code>Cities</code> with the same <code>CountryCode
                </code> on a single cluster node. As soon as the data is co-located, Ignite can execute compute and
                data-intensive logic, and SQL with JOINs straight on the cluster nodes minimizing or even eliminating network
                utilization.
            </p>

            <h2>SQL and Distributed JOINs</h2>
            <p>
                Ignite SQL engine performs much faster if a query gets executed against co-located records. This is
                especially crucial for SQL with JOINs that can span many cluster nodes.
            </p>
            <p>
                Using the previous example with <code>Country</code> and <code>City</code> tables,
                let's join the two tables returning the most populated cities in the given countries:
            </p>
            <div class="tab-content">
                <div class="tab-pane active" id="sql-joins-query">
                        <pre class="brush:sql">
                            SELECT country.name, city.name, MAX(city.population) as max_pop
                            FROM country
                            JOIN city ON city.countrycode = country.code
                            WHERE country.code IN ('USA','RUS','CHN')
                            GROUP BY country.name, city.name
                            ORDER BY max_pop DESC;
                        </pre>
                </div>
            </div>
            <p>
                This query is executed only on the nodes that store records of China, Russia, and the USA. Also, during
                the JOIN, the records are not shuffled between the nodes since all the <code>Cities
                </code> with the same <code>city.countrycode</code> are stored on a single node.
            </p>

            <h2>Distributed Collocated Computations</h2>
            <p>
                Apache Ignite compute and machine learning APIs allow you to perform computations and execute
                machine learning algorithms in parallel to achieve high performance, low latency, and linear scalability.
                Furthermore, both components work best with co-located data sets.
            </p>
            <p>
                Let's take another example by imagining that a winter storm is about to hit a highly-populated city. As
                a telecommunication company, you have to send a text message to 20 million residents notifying about the
                blizzard. With the client-server approach, the company would read all 20 million records from a database
                to an application that needs to execute some logic and send a message to the residents eventually.
            </p>
            <p>
                A much more efficient approach would be to run the logic on and send text messages from the cluster nodes
                that store the records of the residents. With this technique, instead of pulling 20 million records via
                the network, you execute the logic in place and eliminate the network impact on the performance of the
                calculation.
            </p>

            <p>
                Here is an example of how this logic might look like:
            </p>
            <!--<ul id="compute-example" class="nav nav-tabs">-->
                <!--<li  class="active"><a href="#compute-task" aria-controls="profile" data-toggle="tab">Message Broadcasting Logic</a></li>-->
            <!--</ul>-->

            <div class="tab-content">

                <div class="tab-pane active" id="compute-task">
                        <pre class="brush:java">

Ignite ignite = ...

// NewYork ID.
long newYorkId = 2;

// Send the logic to the cluster node that stores NewYork and all its inhabitants.
ignite.compute().affinityRun("City", newYorkId, new IgniteRunnable() {

  @IgniteInstanceResource
  Ignite ignite;

  @Override
  public void run() {
    // Get access to the Person cache.
    IgniteCache&#60;BinaryObject, BinaryObject&#62; people = ignite.cache("Person").withKeepBinary();


    ScanQuery&#60;BinaryObject, BinaryObject&#62; query = new ScanQuery &#60;BinaryObject, BinaryObject&#62;();

    try (QueryCursor&#60;Cache.Entry&#60;BinaryObject, BinaryObject&#62;&#62; cursor = people.query(query)) {
      // Iteration over the local cluster node data using the scan query.
      for (Cache.Entry&#60;BinaryObject, BinaryObject&#62; entry : cursor) {
        BinaryObject personKey = entry.getKey();

        // Pick NewYorkers only.
        if (personKey.&#60;Long&#62;field("CITY_ID") == newYorkId) {
            person = entry.getValue();

            // Send the warning message to the person.
        }
      }
    }
  }
}

                        </pre>
                </div>
            </div>

            <div class="jumbotron jumbotron-fluid">
                <div class="container">
                  <div class="title display-6">Learn More</div>
                  <hr class="my-4">
                  <div class="row">
                    <div class="col-sm-6">
                        <ul>
                            <li><a href="https://apacheignite.readme.io/docs/compute-grid" target="docs">Compute APIs <i class="fa fa-angle-double-right"></i></a></li>
                            <li><a href="/features/machinelearning.html">Machine and Deep Learning <i class="fa fa-angle-double-right"></i></a></li>
                        </ul>
                    </div>
                    <div class="col-sm-6">
                        <ul>
                            <li><a href="/use-cases/hpc.html">High Performance Computing with Apache Ignite <i class="fa fa-angle-double-right"></i></a></li>
                        </ul>
                    </div>
                </div>
            </div>
        </div>
</article>        
    <!--#include virtual="/includes/footer.html" -->

<!--#include virtual="/includes/scripts.html" -->
</body>
</html>
