Updated use cases menu and rewrote Spark acceleration page
git-svn-id: https://svn.apache.org/repos/asf/ignite/site/branches/ignite-redisign@1873344 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/.htaccess b/.htaccess
index bf09eb2..e7a1f1b 100644
--- a/.htaccess
+++ b/.htaccess
@@ -12,6 +12,7 @@
Redirect 301 /features/persistence.html /arch/persistence.html
Redirect 301 /features/deploy.html /arch/clustering.html
Redirect 301 /features/igniterdd.html /use-cases/spark/shared-memory-layer.html
+Redirect 301 /use-cases/spark/shared-memory-layer.html /use-cases/spark/spark-acceleration.html
RewriteEngine On
diff --git a/includes/header.html b/includes/header.html
index 14d8ee0..5761e5a 100644
--- a/includes/header.html
+++ b/includes/header.html
@@ -98,78 +98,45 @@
</li>
</ul>
</li>
+
<li class="dropdown" style="cursor: pointer;">
<a class="dropdown-toggle" data-toggle="dropdown" aria-label="Use Cases">Use Cases<span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
- <li role="presentation" class="submenu-header">Proven by</li>
- <li><a href="/provenusecases.html" aria-label="Proven Use Cases"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'proven_usecases');">
- Ignite in Production</a>
- </li>
- <li class="divider">
-
- <!--Database-->
- <li role="presentation" class="submenu-header">Data Store</li>
- <li><a href="/use-cases/database/distributed-database.html" aria-label="Data Store"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'distributed-database');">
- Distributed Database</a>
- </li>
- <li><a href="/use-cases/database/in-memory-database.html" aria-label="In-Memory Database"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'distributed-database');">
- In-Memory Database</a>
+ <li><a href="/use-cases/caching/database-caching.html"
+ aria-label="In-Memory Cache"
+ onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'in_memory_cache');">
+ In-Memory Cache</a>
</li>
<li><a href="/features/datagrid.html" aria-label="In-Memory Data Grid"
onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'data_grid');">
In-Memory Data Grid</a>
</li>
+ <li><a href="/use-cases/database/in-memory-database.html" aria-label="In-Memory Database"
+ onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'distributed-database');">
+ In-Memory Database</a>
+ </li>
+ <li><a href="#" aria-label="Digital Integration Hub"
+ onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'digital_integration_hub');">
+ Digital Integration Hub</a>
+ </li>
<li><a href="/use-cases/database/key-value-store.html" aria-label="Key-Value Store"
onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'key_value_store');">
Key-Value Store</a>
</li>
- <li class="divider">
-
- <!--In-Memory Caching-->
- <li role="presentation" class="submenu-header">In-Memory Cache</li>
- <li><a href="/use-cases/caching/database-caching.html" aria-label="Database Caching"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'database-caching');">
- Database Caching</a>
+ <li><a href="/use-cases/spark/spark-acceleration.html"
+ aria-label="Apache Spark Acceleration"
+ onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'apache_spark_acceleration');">
+ Apache Spark Acceleration</a>
</li>
- <li><a href="/use-cases/caching/jcache-provider.html" aria-label="JCache Provider"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'jcache-provider');">
- JCache Provider</a>
+ <li><a href="#"
+ aria-label="Hadoop Acceleration"
+ onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'hadoop_acceleration');">
+ Apache Hadoop Acceleration</a>
</li>
- <li><a href="/use-cases/caching/hibernate-l2-cache.html" aria-label="Hibernate L2 Cache"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'hibernate-l2-cache');">
- Hibernate L2 Cache</a>
- </li>
- <li><a href="/use-cases/caching/web-session-clustering.html" aria-label="Web Session Clustering"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'web-session-clustering');">
- Web Session Clustering</a>
- </li>
- <li class="divider">
-
- <!-- Comparison -->
- <li role="presentation" class="submenu-header">Comparison</li>
- <li><a href="/use-cases/comparison/ignite-for-nosql.html" aria-label="Ignite for NoSQL Users"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'ignite-for-nosql');">
- Ignite for NoSQL Users</a>
- </li>
- <li><a href="/use-cases/comparison/ignite-for-rdbms.html" aria-label="Ignite for RDBMS Users"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'ignite-for-rdbms');">
- Ignite for RDBMS Users</a>
- </li>
- <li class="divider">
-
- <!--Ignite with Spark-->
- <li role="presentation" class="submenu-header">Ignite with Spark</li>
- <li><a href="/use-cases/spark/shared-memory-layer.html" aria-label="In-Memory Store for Spark"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'shared-memory-layer');">
- In-Memory Store for Spark</a>
- </li>
- <li><a href="/use-cases/spark/sql-queries.html" aria-label="Faster SQL for Spark"
- onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'sql-queries');">
- Faster Sql for Spark</a>
+ <li><a href="/provenusecases.html" aria-label="Proven Use Cases"
+ onclick="ga('send', 'event', 'apache_ignite_usecases', 'menu_click', 'proven_usecases');">
+ Powered by Ignite</a>
</li>
</ul>
</li>
diff --git a/use-cases/spark/shared-memory-layer.html b/use-cases/spark/spark-acceleration.html
similarity index 67%
rename from use-cases/spark/shared-memory-layer.html
rename to use-cases/spark/spark-acceleration.html
index 4619fbf..a9186a5 100644
--- a/use-cases/spark/shared-memory-layer.html
+++ b/use-cases/spark/spark-acceleration.html
@@ -33,10 +33,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
-<link rel="canonical" href="https://ignite.apache.org/use-cases/spark/shared-memory-layer.html" />
+<link rel="canonical" href="https://ignite.apache.org/use-cases/spark/spark-acceleration"/>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>Apache Spark Shared Memory Layer - Apache Ignite</title>
+ <title>Apache Spark Performance Acceleration With Apache Ignite</title>
<!--#include virtual="/includes/styles.html" -->
<!--#include virtual="/includes/sh.html" -->
@@ -47,38 +47,47 @@
<main id="main" role="main" class="container">
<section id="shared-memory-layer" class="page-section">
- <h1 class="first">Shared Memory Layer for Apache Spark</h1>
+ <h1 class="first">Apache Spark Performance Acceleration With Apache Ignite</h1>
<div class="col-sm-12 col-md-12 col-xs-12" style="padding:0 0 10px 0;">
<div class="col-sm-6 col-md-6 col-xs-12" style="padding-left:0; padding-right:0">
<p>
- Apache Ignite is a distributed memory-centric database and caching platform that is
- used by Apache Spark users to:
- <ul class="page-list" style="margin-bottom: 20px;">
- <li>
- Achieve true in-memory performance at scale and avoid data movement from a data source
- to Spark workers and applications.
- </li>
- <li>
- Boost DataFrame and SQL performance.
- </li>
- <li>
- More easily share state and data among Spark jobs.
- </li>
- </ul>
+ Apache Ignite integrates with Apache Spark to accelerate the performance of Spark applications
+ and APIs by keeping data in a shared in-memory cluster. Spark users can use Ignite as a data
+ source in a way similar to Hadoop or a relational database. Just start an Ignite cluster, set
+ it as a data source for Spark workers, and keep using Spark RDDs or DataFrames APIs or gain
+ even more speed by running Ignite SQL or compute APIs directly.
</p>
+
+ <p>
+ In addition to the performance acceleration of Spark applications, Ignite is used as a shared
+ in-memory layer by those Spark workers that need to share both data and state.
+ </p>
+
</div>
<div class="col-sm-6 col-md-6 col-xs-12" style="padding-right:0">
<img class="img-responsive" src="/images/spark_integration.png" width="440px" style="float:right;"/>
</div>
+
</div>
+ <p>
+ The performance increase is achievable for several reasons. First, Ignite is designed to store data sets
+ in memory across a cluster of nodes reducing latency of Spark operations that usually need to pull date
+ from disk-based systems. Second, Ignite tries to minimize data shuffling over the network between its
+ store and Spark applications by running certain Spark tasks, produced by RDDs or DataFrames APIs,
+ in-place on Ignite nodes. This optimization helps to reduce the effect of the network latency on
+ performance of Spark calls. Finally, the network impact can be minimized even greatly if native
+ Ignite APIs such as SQL are called from Spark applications directly. By doing that, you will completely
+ eliminate data shuffling between Spark and Ignite as long as Ignite SQL queries are always executed on
+ Ignite nodes returning a much smaller final result set to an application layer.
+ </p>
+
<div class="page-heading">Ignite Shared RDDs</div>
<p>
Apache Ignite provides an implementation of the Spark RDD which allows any data and state to be shared
in memory as RDDs across Spark jobs. The Ignite RDD provides a shared, mutable view of the same data
- in-memory in Ignite across different Spark jobs, workers, or applications. Native Spark RDDs cannot be
- shared across Spark jobs or applications.
+ in-memory in Ignite across different Spark jobs, workers, or applications.
</p>
<p>
@@ -88,18 +97,6 @@
state may either exist only during the lifespan of a Spark application (embedded mode), or it may
out-survive the Spark application (standalone mode).
</p>
- <p>
- While Apache SparkSQL supports a fairly rich SQL syntax, it doesn't implement any indexing. As a result,
- Spark queries may take minutes even on moderately small data sets because they have to do full data
- scans. With Ignite, Spark users can configure primary and secondary indexes that can bring up to 1000x
- performance gains.
- </p>
-
- <p>
- <a href="https://apacheignite-fs.readme.io/docs/ignitecontext-igniterdd" target="docs">
- <b>Ignite RDDs in Details <i class="fa fa-angle-double-right"></i></b>
- </a>
- </p>
<div class="page-heading">Ignite DataFrames</div>
<p>
@@ -108,26 +105,32 @@
a DataFrame is a distributed collection of data organized into named columns. It is conceptually
equivalent to a table in a relational database and allows Spark to leverage the Catalyst query
optimizer to produce much more efficient query execution plans in comparison to RDDs, which are
- just collections of elements partitioned across the nodes of the cluster.
+ collections of elements partitioned across the nodes of the cluster.
</p>
<p>
- Ignite expands DataFrame, simplifying development and improving data access times whenever
- Ignite is used as memory-centric storage for Spark. Benefits include:
- <ul class="page-list" style="margin-bottom: 20px;">
- <li>
- Ability to share data and state across Spark jobs by writing and reading DataFrames to/from Ignite.
- </li>
- <li>
- Faster SparkSQL queries by optimizing Spark query execution plans with Ignite SQL engine which
- include advanced indexing and avoid data movement across the network from Ignite to Spark.
- </li>
- </ul>
+ Ignite supports DataFrame APIs letting Spark to write to and read from Ignite through that interface.
+ Even more, Ignite analyses execution plans produced by Spark's Catalyst engine and can execute
+ parts of the plan on Ignite nodes directly, reducing data shuffling. All that will make your SparkSQL
+ more performant.
+ </p>
+
+ <div class="page-heading">Learn More</div>
+ <p>
+ <a href="https://apacheignite-fs.readme.io/docs/installation-deployment" target="docs">
+ <b>Ignite and Spark Installation and Deployment <i class="fa fa-angle-double-right"></i></b>
+ </a>
+ </p>
+ <p>
+ <a href="https://apacheignite-fs.readme.io/docs/ignitecontext-igniterdd" target="docs">
+ <b>Ignite RDDs in Details <i class="fa fa-angle-double-right"></i></b>
+ </a>
</p>
<p>
<a href="https://apacheignite-fs.readme.io/docs/ignite-data-frame" target="docs">
<b>Ignite DataFrames in Details <i class="fa fa-angle-double-right"></i></b>
</a>
</p>
+
</section>
</main>
diff --git a/use-cases/spark/sql-queries.html b/use-cases/spark/sql-queries.html
deleted file mode 100644
index 135ec7d..0000000
--- a/use-cases/spark/sql-queries.html
+++ /dev/null
@@ -1,95 +0,0 @@
-<!--
- ▄▄▄ ██▓███ ▄▄▄ ▄████▄ ██░ ██ ▓█████ ██▓ ▄████ ███▄ █ ██▓▄▄▄█████▓▓█████
-▒████▄ ▓██░ ██▒▒████▄ ▒██▀ ▀█ ▓██░ ██▒▓█ ▀ ▓██▒ ██▒ ▀█▒ ██ ▀█ █ ▓██▒▓ ██▒ ▓▒▓█ ▀
-▒██ ▀█▄ ▓██░ ██▓▒▒██ ▀█▄ ▒▓█ ▄ ▒██▀▀██░▒███ ▒██▒▒██░▄▄▄░▓██ ▀█ ██▒▒██▒▒ ▓██░ ▒░▒███
-░██▄▄▄▄██ ▒██▄█▓▒ ▒░██▄▄▄▄██ ▒▓▓▄ ▄██▒░▓█ ░██ ▒▓█ ▄ ░██░░▓█ ██▓▓██▒ ▐▌██▒░██░░ ▓██▓ ░ ▒▓█ ▄
- ▓█ ▓██▒▒██▒ ░ ░ ▓█ ▓██▒▒ ▓███▀ ░░▓█▒░██▓░▒████▒ ░██░░▒▓███▀▒▒██░ ▓██░░██░ ▒██▒ ░ ░▒████▒
- ▒▒ ▓▒█░▒▓▒░ ░ ░ ▒▒ ▓▒█░░ ░▒ ▒ ░ ▒ ░░▒░▒░░ ▒░ ░ ░▓ ░▒ ▒ ░ ▒░ ▒ ▒ ░▓ ▒ ░░ ░░ ▒░ ░
- ▒ ▒▒ ░░▒ ░ ▒ ▒▒ ░ ░ ▒ ▒ ░▒░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░░ ░ ▒░ ▒ ░ ░ ░ ░ ░
- ░ ▒ ░░ ░ ▒ ░ ░ ░░ ░ ░ ▒ ░░ ░ ░ ░ ░ ░ ▒ ░ ░ ░
- ░ ░ ░ ░░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░
--->
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied. See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<!DOCTYPE html>
-<html lang="en">
-<head>
-<link rel="canonical" href="https://ignite.apache.org/use-cases/spark/sql-queries.html" />
- <meta charset="utf-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>Fast Apache Spark SQL Queries - Apache Ignite</title>
- <!--#include virtual="/includes/styles.html" -->
-
- <!--#include virtual="/includes/sh.html" -->
-</head>
-<body>
-<div id="wrapper">
- <!--#include virtual="/includes/header.html" -->
-
- <main id="main" role="main" class="container">
- <section id="spark-sqlqueries" class="page-section">
- <h1 class="first">Accelerate Apache Spark SQL Queries</h1>
- <div class="col-sm-12 col-md-12 col-xs-12" style="padding:0 0 10px 0;">
- <div class="col-sm-6 col-md-6 col-xs-12" style="padding-left:0; padding-right:0">
- <p>
- Running SQL queries using Ignite shared RDDs or DataFrames is much faster than running Spark SQL
- via native RDDs or Data Frame implementations.
- </p>
- <div class="page-heading">In-Memory Indexes</div>
- <p>
- Spark does not support SQL indexes, resulting in slow SQL queries due to full scans across
- the whole data set. Such full-scan queries in spark can take minutes and introduce significant
- wait times, especially when running many queries within the same Spark application.
- <p>
- Apache Ignite, on the other hand, supports SQL with <span style="white-space: nowrap;"><code>in-memory indexing</code>.</span>
- Because of advanced in-memory indexing capabilities, IgniteRDD executes SQL
- queries 100s of times faster than Spark native RDDs or Data Frames.
- </p>
- </div>
-
- <div class="col-sm-6 col-md-6 col-xs-12" style="padding-right:0">
- <img class="img-responsive" src="/images/spark_integration.png" width="440px" style="float:right;"/>
- </div>
- </div>
- <div class="page-heading">Off-Heap Memory</div>
- <p>
- Ignite stores data and indexes in an off-heap memory that allows holding petabytes of data in Ignite
- and process them from Spark without worrying about JVM garbage collection overhead.
- </p>
- <p>
- <a href="https://apacheignite-fs.readme.io/docs/ignitecontext-igniterdd#section-running-sql-queries-against-ignite-cache" target="docs">
- <b>Run SQL Queries against Ignite cluster <i class="fa fa-angle-double-right"></i></b>
- </a>
- </p>
- <p>
- <a href="https://apacheignite-fs.readme.io/docs/ignite-data-frame" target="docs">
- <b>Speeding up DataFrames access with Ignite <i class="fa fa-angle-double-right"></i></b>
- </a>
- </p>
- </section>
- </main>
-
- <!--#include virtual="/includes/footer.html" -->
-</div>
-<!--#include virtual="/includes/scripts.html" -->
-</body>
-</html>