blob: a07ac63604dcca0fec01876616ee85502f49ee53 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-61232409-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-61232409-1');
</script>
<meta charset="UTF-8">
<meta name="ignite-version" content="2.9.0" />
<title>Ignite for Spark | Ignite Documentation</title>
<link rel="canonical" href="/docs/2.9.0/extensions-and-integrations/ignite-for-spark/overview" />
<META NAME="ROBOTS" CONTENT="NOINDEX" />
<link rel="stylesheet" href="/assets/css/styles.css?1609302780">
<link rel="stylesheet" href="/assets/css/asciidoc-pygments.css">
<link rel="shortcut icon" href="/favicon.ico">
<meta name='viewport' content='width=device-width, height=device-height, initial-scale=1.0, minimum-scale=1.0'>
<script type="text/javascript" src="/assets/js/anchor.min.js?1609302780"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
</head>
<body>
<header>
<!--#include virtual="/includes/promotion_banner.html" -->
<div class="container">
<button type='button' class='menu' title='Docs menu'>
<img src="/assets/images/menu-icon.svg"/>
</button>
<div class='home'>
<a href="/" class='home' title='Apache Ignite home'>
<img src="/assets/images/apache_ignite_logo.svg" alt="Apache Ignite logo" width="103" >
</a>
</div>
<select id="version-selector">
<option value="2.9.0">2.9.0</option>
</select>
<nav id="api-docs">
<li><a href="#">APIs</a>
<nav class='dropdown'>
<li class="dropdown-item"><a href="/releases/latest/javadoc/index.html">Java</a></li>
<li class="dropdown-item"><a href="/releases/latest/dotnetdoc/api/">C#/.NET</a></li>
<li class="dropdown-item"><a href="/releases/latest/cppdoc/index.html">C++</a></li>
<li class="dropdown-item"><a href="/releases/latest/scaladoc/scalar/index.html#org.apache.ignite.scalar.scalar$">Scala</a></li>
</nav>
</li>
<li><a href="#">Examples</a>
<nav class="dropdown">
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/examples" target="_blank" rel="noopener" title="Apache Ignite Java examples">Java</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/dotnet/examples" target="_blank" rel="noopener" title="Apache Ignite C#/.NET examples">C#/.NET</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/cpp/examples" target="_blank" rel="noopener" title="Apache Ignite C++ examples">C++</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/python/examples" target="_blank" rel="noopener" title="Apache Ignite Python examples">Python</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/nodejs/examples" target="_blank" rel="noopener" title="Apache Ignite NodeJS examples">NodeJS</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/php/examples" target="_blank" rel="noopener" title="Apache Ignite PHP examples">PHP</a></li>
</nav>
</li>
</nav>
<form class='search'>
<button class="search-close" type='button'><img src='/assets/images/cancel.svg'></button>
<input type="search" placeholder="Search…" id="search-input">
</form>
<button type='button' class='search-toggle'><img src='/assets/images/search.svg'></button>
<nav id="lang-selector">
<li><a href="#"><img src="/assets/images/icon_lang_en.png" alt="English language icon" /><span></span></a>
<nav class="dropdown">
<li class="dropdown-item"><a href="/docs/latest/" ><img src="/assets/images/icon_lang_en.png" alt="English language icon" /><span>English</span></a></li>
<li class="dropdown-item"><a href="https://www.ignite-service.cn/doc/java/" target="_blank" rel="noopener"><img src="/assets/images/icon_lang_cn.png" alt="Chinese language icon" /><span>Chinese</span></a></li>
</nav>
</li>
</nav>
<button type='button' class='top-nav-toggle'></button>
</div>
</header>
<link rel="stylesheet" href="/assets/css/docs.css">
<section class='page-docs'>
<nav class='left-nav' data-swiftype-index='false'>
<li>
<a href="/docs/2.9.0/index" class='' >Documentation Overview</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Quick Start Guides<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/quick-start/java"
class=''
>Java</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/dotnet"
class=''
>.NET/C#</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/cpp"
class=''
>C++</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/python"
class=''
>Python</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/nodejs"
class=''
>Node.JS</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/sql"
class=''
>SQL</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/php"
class=''
>PHP</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/restapi"
class=''
>REST API</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Installation<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/installation/installing-using-zip"
class=''
>Installing Using ZIP Archive</a>
</li>
<li>
<a href="/docs/2.9.0/installation/installing-using-docker"
class=''
>Installing Using Docker</a>
</li>
<li>
<a href="/docs/2.9.0/installation/deb-rpm"
class=''
>Installing DEB or RPM package</a>
</li>
<li>
<button
type='button'
class='collapsed '>Kubernetes<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/installation/kubernetes/amazon-eks-deployment" class=''>Amazon EKS</a></li>
<li><a href="/docs/2.9.0/installation/kubernetes/azure-deployment" class=''>Azure Kubernetes Service</a></li>
<li><a href="/docs/2.9.0/installation/kubernetes/gke-deployment" class=''>Google Kubernetes Engine</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/installation/vmware-installation"
class=''
>VMWare</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Setting Up<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/understanding-configuration"
class=''
>Understanding Configuration</a>
</li>
<li>
<a href="/docs/2.9.0/setup"
class=''
>Setting Up</a>
</li>
<li>
<a href="/docs/2.9.0/logging"
class=''
>Configuring Logging</a>
</li>
<li>
<a href="/docs/2.9.0/resources-injection"
class=''
>Resources Injection</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/starting-nodes" class='' >Starting and Stopping Nodes</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Clustering<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/clustering/clustering"
class=''
>Overview</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/tcp-ip-discovery"
class=''
>TCP/IP Discovery</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/zookeeper-discovery"
class=''
>ZooKeeper Discovery</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/discovery-in-the-cloud"
class=''
>Discovery in the Cloud</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/network-configuration"
class=''
>Network Configuration</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/connect-client-nodes"
class=''
>Connecting Client Nodes</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/baseline-topology"
class=''
>Baseline Topology</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/running-client-nodes-behind-nat"
class=''
>Running Client Nodes Behind NAT</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Thin Clients<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/thin-clients/getting-started-with-thin-clients"
class=''
>Thin Clients Overview</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/java-thin-client"
class=''
>Java Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/dotnet-thin-client"
class=''
>.NET Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/cpp-thin-client"
class=''
>C++ Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/python-thin-client"
class=''
>Python Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/php-thin-client"
class=''
>PHP Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/nodejs-thin-client"
class=''
>Node.js Thin Client</a>
</li>
<li>
<button
type='button'
class='collapsed '>Binary Client Protocol<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/binary-client-protocol/binary-client-protocol" class=''>Binary Client Protocol</a></li>
<li><a href="/docs/2.9.0/binary-client-protocol/data-format" class=''>Data Format</a></li>
<li><a href="/docs/2.9.0/binary-client-protocol/key-value-queries" class=''>Key-Value Queries</a></li>
<li><a href="/docs/2.9.0/binary-client-protocol/sql-and-scan-queries" class=''>SQL and Scan Queries</a></li>
<li><a href="/docs/2.9.0/binary-client-protocol/binary-type-metadata" class=''>Binary Types Metadata</a></li>
<li><a href="/docs/2.9.0/binary-client-protocol/cache-configuration" class=''>Cache Configuration</a></li>
</nav>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Data Modeling<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/data-modeling/data-modeling"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/2.9.0/data-modeling/data-partitioning"
class=''
>Data Partitioning</a>
</li>
<li>
<a href="/docs/2.9.0/data-modeling/affinity-collocation"
class=''
>Affinity Colocation</a>
</li>
<li>
<a href="/docs/2.9.0/data-modeling/binary-marshaller"
class=''
>Binary Marshaller</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Configuring Memory<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/memory-architecture"
class=''
>Memory Architecture</a>
</li>
<li>
<a href="/docs/2.9.0/memory-configuration/data-regions"
class=''
>Configuring Data Regions</a>
</li>
<li>
<a href="/docs/2.9.0/memory-configuration/eviction-policies"
class=''
>Eviction Policies</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Configuring Persistence<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/persistence/native-persistence"
class=''
>Ignite Persistence</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/external-storage"
class=''
>External Storage</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/swap"
class=''
>Swapping</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/custom-cache-store"
class=''
>Implementing Custom Cache Store</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/snapshots"
class=''
>Cluster Snapshots</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/disk-compression"
class=''
>Disk Compression</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/persistence-tuning"
class=''
>Tuning Persistence</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Configuring Caches<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/configuring-caches/configuration-overview"
class=''
>Cache Configuration</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/configuring-backups"
class=''
>Configuring Partition Backups</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/partition-loss-policy"
class=''
>Partition Loss Policy</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/atomicity-modes"
class=''
>Atomicity Modes</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/expiry-policies"
class=''
>Expiry Policy</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/on-heap-caching"
class=''
>On-Heap Caching</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/cache-groups"
class=''
>Cache Groups</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/near-cache"
class=''
>Near Caches</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/data-rebalancing" class='' >Data Rebalancing</a>
</li>
<li>
<a href="/docs/2.9.0/data-streaming" class='' >Data Streaming</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Using Key-Value API<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/key-value-api/basic-cache-operations"
class=''
>Basic Cache Operations</a>
</li>
<li>
<a href="/docs/2.9.0/key-value-api/binary-objects"
class=''
>Working with Binary Objects</a>
</li>
<li>
<a href="/docs/2.9.0/key-value-api/using-scan-queries"
class=''
>Using Scan Queries</a>
</li>
<li>
<a href="/docs/2.9.0/read-repair"
class=''
>Read Repair</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/key-value-api/transactions" class='' >Performing Transactions</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Working with SQL<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/SQL/sql-introduction"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/schemas"
class=''
>Understanding Schemas</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/indexes"
class=''
>Defining Indexes</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/sql-api"
class=''
>Using SQL API</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/distributed-joins"
class=''
>Distributed Joins</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/sql-transactions"
class=''
>SQL Transactions</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/custom-sql-func"
class=''
>Custom SQL Functions</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/JDBC/jdbc-driver"
class=''
>JDBC Driver</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/JDBC/jdbc-client-driver"
class=''
>JDBC Client Driver</a>
</li>
<li>
<button
type='button'
class='collapsed '>ODBC Driver<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/SQL/ODBC/odbc-driver" class=''>ODBC Driver</a></li>
<li><a href="/docs/2.9.0//SQL/ODBC/connection-string-dsn" class=''>Connection String and DSN</a></li>
<li><a href="/docs/2.9.0/SQL/ODBC/querying-modifying-data" class=''>Querying and Modifying Data</a></li>
<li><a href="/docs/2.9.0/SQL/ODBC/specification" class=''>Specification</a></li>
<li><a href="/docs/2.9.0/SQL/ODBC/data-types" class=''>Data Types</a></li>
<li><a href="/docs/2.9.0/SQL/ODBC/error-codes" class=''>Error Codes</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/transactions/mvcc"
class=''
>Multiversion Concurrency Control</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>SQL Reference<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/sql-reference/sql-conformance"
class=''
>SQL Conformance</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/ddl"
class=''
>Data Definition Language (DDL)</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/dml"
class=''
>Data Manipulation Language (DML)</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/transactions"
class=''
>Transactions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/operational-commands"
class=''
>Operational Commands</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/aggregate-functions"
class=''
>Aggregate functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/numeric-functions"
class=''
>Numeric Functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/string-functions"
class=''
>String Functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/date-time-functions"
class=''
>Data and Time Functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/system-functions"
class=''
>System Functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/data-types"
class=''
>Data Types</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Distributed Computing<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/distributed-computing/distributed-computing"
class=''
>Distributed Computing API</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/cluster-groups"
class=''
>Cluster Groups</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/executor-service"
class=''
>Executor Service</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/map-reduce"
class=''
>MapReduce API</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/load-balancing"
class=''
>Load Balancing</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/fault-tolerance"
class=''
>Fault Tolerance</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/job-scheduling"
class=''
>Job Scheduling</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/collocated-computations"
class=''
>Colocating Computations with Data</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Code Deployment<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/code-deployment/deploying-user-code"
class=''
>Deploying User Code</a>
</li>
<li>
<a href="/docs/2.9.0/code-deployment/peer-class-loading"
class=''
>Peer Class Loading</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Machine Learning<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/machine-learning/machine-learning"
class=''
>Machine Learning</a>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/partition-based-dataset"
class=''
>Partition Based Dataset</a>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/updating-trained-models"
class=''
>Updating Trained Models</a>
</li>
<li>
<button
type='button'
class='collapsed '>Binary Classification<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/machine-learning/binary-classification/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0/machine-learning/binary-classification/linear-svm" class=''>Linear SVM (Support Vector Machine)</a></li>
<li><a href="/docs/2.9.0/machine-learning/binary-classification/decision-trees" class=''>Decision Trees</a></li>
<li><a href="/docs/2.9.0/machine-learning/binary-classification/multilayer-perceptron" class=''>Multilayer Perceptron</a></li>
<li><a href="/docs/2.9.0/machine-learning/binary-classification/logistic-regression" class=''>Logistic Regression</a></li>
<li><a href="/docs/2.9.0/machine-learning/binary-classification/knn-classification" class=''>k-NN Classification</a></li>
<li><a href="/docs/2.9.0/machine-learning/binary-classification/ann" class=''>ANN (Approximate Nearest Neighbor)</a></li>
<li><a href="/docs/2.9.0/machine-learning/binary-classification/naive-bayes" class=''>Naive Bayes</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Regression<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/machine-learning/regression/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0/machine-learning/regression/linear-regression" class=''>Linear Regression</a></li>
<li><a href="/docs/2.9.0/machine-learning/regression/decision-trees-regression" class=''>Decision Trees Regression</a></li>
<li><a href="/docs/2.9.0/machine-learning/regression/knn-regression" class=''>k-NN Regression</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Clustering<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/machine-learning/clustering/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0/machine-learning/clustering/k-means-clustering" class=''>K-Means Clustering</a></li>
<li><a href="/docs/2.9.0/machine-learning/clustering/gaussian-mixture" class=''>Gaussian mixture (GMM)</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/preprocessing"
class=''
>Preprocessing</a>
</li>
<li>
<button
type='button'
class='collapsed '>Model Selection<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/machine-learning/model-selection/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0/machine-learning/model-selection/evaluator" class=''>Evaluator</a></li>
<li><a href="/docs/2.9.0/machine-learning/model-selection/split-the-dataset-on-test-and-train-datasets" class=''>Split the dataset on test and train datasets</a></li>
<li><a href="/docs/2.9.0/machine-learning/model-selection/hyper-parameter-tuning" class=''>Hyper-parameter tuning</a></li>
<li><a href="/docs/2.9.0/machine-learning/model-selection/pipeline-api" class=''>Pipeline API</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/multiclass-classification"
class=''
>Multiclass Classification</a>
</li>
<li>
<button
type='button'
class='collapsed '>Ensemble Methods<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/machine-learning/ensemble-methods/introduction" class=''></a></li>
<li><a href="/docs/2.9.0/machine-learning/ensemble-methods/stacking" class=''>Stacking</a></li>
<li><a href="/docs/2.9.0/machine-learning/ensemble-methods/baggin" class=''>Bagging</a></li>
<li><a href="/docs/2.9.0/machine-learning/ensemble-methods/random-forest" class=''>Random Forest</a></li>
<li><a href="/docs/2.9.0/machine-learning/ensemble-methods/gradient-boosting" class=''>Gradient Boosting</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/recommendation-systems"
class=''
>Recommendation Systems</a>
</li>
<li>
<button
type='button'
class='collapsed '>Importing Model<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/machine-learning/importing-model/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0/machine-learning/importing-model/model-import-from-gxboost" class=''>Import Model from XGBoost</a></li>
<li><a href="/docs/2.9.0/machine-learning/importing-model/model-import-from-apache-spark" class=''>Import Model from Apache Spark</a></li>
</nav>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/key-value-api/continuous-queries" class='' >Using Continuous Queries</a>
</li>
<li>
<a href="/docs/2.9.0/services/services" class='' >Using Ignite Services</a>
</li>
<li>
<a href="/docs/2.9.0/messaging" class='' >Using Ignite Messaging</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Distributed Data Structures<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/data-structures/queue-and-set"
class=''
>Queue and Set</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/atomic-types"
class=''
>Atomic Types</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/countdownlatch"
class=''
>CountDownLatch</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/atomic-sequence"
class=''
>Atomic Sequence</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/semaphore"
class=''
>Semaphore</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/id-generator"
class=''
>ID Generator</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/distributed-locks" class='' >Distributed Locks</a>
</li>
<li>
<a href="/docs/2.9.0/restapi" class='' >REST API</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>.NET Specific<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/net-specific/net-configuration-options"
class=''
>Configuration Options</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-deployment-options"
class=''
>Deployment Options</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-standalone-nodes"
class=''
>Standalone Nodes</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-logging"
class=''
>Logging</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-linq"
class=''
>LINQ</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-java-services-execution"
class=''
>Java Services Execution</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-platform-cache"
class=''
>.NET Platform Cache</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-plugins"
class=''
>Plugins</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-serialization"
class=''
>Serialization</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-cross-platform-support"
class=''
>Cross-Platform Support</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-platform-interoperability"
class=''
>Platform Interoperability</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-remote-assembly-loading"
class=''
>Remote Assembly Loading</a>
</li>
<li>
<a href="/docs/2.9.0/net-specific/net-troubleshooting"
class=''
>Troubleshooting</a>
</li>
<li>
<button
type='button'
class='collapsed '>Integrations<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/net-specific/asp-net-output-caching" class=''>ASP.NET Output Caching</a></li>
<li><a href="/docs/2.9.0/net-specific/asp-net-session-state-caching" class=''>ASP.NET Session State Caching</a></li>
<li><a href="/docs/2.9.0/net-specific/net-entity-framework-cache" class=''>Entity Framework 2nd Level Cache</a></li>
</nav>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>C++ Specific<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/cpp-specific/cpp-serialization"
class=''
>Serialization</a>
</li>
<li>
<a href="/docs/2.9.0/cpp-specific/cpp-platform-interoperability"
class=''
>Platform Interoperability</a>
</li>
<li>
<a href="/docs/2.9.0/cpp-specific/cpp-objects-lifetime"
class=''
>Objects Lifetime</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Monitoring<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/monitoring-metrics/intro"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/2.9.0/monitoring-metrics/cluster-id"
class=''
>Cluster ID and Tag</a>
</li>
<li>
<a href="/docs/2.9.0/monitoring-metrics/cluster-states"
class=''
>Cluster States</a>
</li>
<li>
<button
type='button'
class='collapsed '>Metrics<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/monitoring-metrics/configuring-metrics" class=''>Configuring Metrics</a></li>
<li><a href="/docs/2.9.0/monitoring-metrics/metrics" class=''>JMX Metrics</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>New Metrics System<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/monitoring-metrics/new-metrics-system" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0/monitoring-metrics/new-metrics" class=''>Metrics</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/monitoring-metrics/system-views"
class=''
>System Views</a>
</li>
<li>
<a href="/docs/2.9.0/monitoring-metrics/tracing"
class=''
>Tracing</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Working with Events<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/events/listening-to-events"
class=''
>Enabling and Listenting to Events</a>
</li>
<li>
<a href="/docs/2.9.0/events/events"
class=''
>Events</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Tools<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/tools/control-script"
class=''
>Control Script</a>
</li>
<li>
<a href="/docs/2.9.0/tools/visor-cmd"
class=''
>Visor CMD</a>
</li>
<li>
<a href="/docs/2.9.0/tools/gg-control-center"
class=''
>GridGain Control Center</a>
</li>
<li>
<a href="/docs/2.9.0/tools/sqlline"
class=''
>SQLLine</a>
</li>
<li>
<a href="/docs/2.9.0/tools/tableau"
class=''
>Tableau</a>
</li>
<li>
<a href="/docs/2.9.0/tools/informatica"
class=''
>Informatica</a>
</li>
<li>
<a href="/docs/2.9.0/tools/pentaho"
class=''
>Pentaho</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Security<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/security/authentication"
class=''
>Authentication</a>
</li>
<li>
<a href="/docs/2.9.0/security/ssl-tls"
class=''
>SSL/TLS</a>
</li>
<li>
<button
type='button'
class='collapsed '>Transparent Data Encryption<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/security/tde" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0/security/master-key-rotation" class=''>Master key rotation</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/security/sandbox"
class=''
>Sandbox</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle expanded '>Extensions and Integrations<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group expanded'>
<li>
<button
type='button'
class='collapsed '>Spring<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/extensions-and-integrations/spring/spring-boot" class=''>Spring Boot</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/spring/spring-data" class=''>Spring Data</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/spring/spring-caching" class=''>Spring Caching</a></li>
</nav>
</li>
<li>
<button
type='button'
class='expanded parent '>Ignite for Spark<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages expanded parent">
<li><a href="/docs/2.9.0/extensions-and-integrations/ignite-for-spark/overview" class='active'>Overview</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/ignite-for-spark/ignitecontext-and-rdd" class=''>IgniteContext and IgniteRDD</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/ignite-for-spark/ignite-dataframe" class=''>Ignite DataFrame</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/ignite-for-spark/installation" class=''>Installation</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/ignite-for-spark/spark-shell" class=''>Test Ignite with Spark-shell</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/ignite-for-spark/troubleshooting" class=''>Troubleshooting</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/extensions-and-integrations/hibernate-l2-cache"
class=''
>Hibernate L2 Cache</a>
</li>
<li>
<a href="/docs/2.9.0/extensions-and-integrations/mybatis-l2-cache"
class=''
>MyBatis L2 Cache</a>
</li>
<li>
<button
type='button'
class='collapsed '>Streaming<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/kafka-streamer" class=''>Kafka Streamer</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/camel-streamer" class=''>Camel Streamer</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/flink-streamer" class=''>Flink Streamer</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/flume-sink" class=''>Flume Sink</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/jms-streamer" class=''>JMS Streamer</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/mqtt-streamer" class=''>MQTT Streamer</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/rocketmq-streamer" class=''>RocketMQ Streamer</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/storm-streamer" class=''>Storm Streamer</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/zeromq-streamer" class=''>ZeroMQ Streamer</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/streaming/twitter-streamer" class=''>Twitter Streamer</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Cassandra Integration<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0/extensions-and-integrations/cassandra/overview" class=''>Overview</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/cassandra/configuration" class=''>Configuration</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/cassandra/usage-examples" class=''>Usage Examples</a></li>
<li><a href="/docs/2.9.0/extensions-and-integrations/cassandra/ddl-generator" class=''>DDL Generator</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/extensions-and-integrations/php-pdo"
class=''
>PHP PDO</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/plugins" class='' >Plugins</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Performance and Troubleshooting<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/perf-and-troubleshooting/general-perf-tips"
class=''
>General Performance Tips</a>
</li>
<li>
<a href="/docs/2.9.0/perf-and-troubleshooting/memory-tuning"
class=''
>Memory and JVM Tuning</a>
</li>
<li>
<a href="/docs/2.9.0/perf-and-troubleshooting/persistence-tuning"
class=''
>Persistence Tuning</a>
</li>
<li>
<a href="/docs/2.9.0/perf-and-troubleshooting/sql-tuning"
class=''
>SQL Tuning</a>
</li>
<li>
<a href="/docs/2.9.0/perf-and-troubleshooting/thread-pools-tuning"
class=''
>Thread Pools Tuning</a>
</li>
<li>
<a href="/docs/2.9.0/perf-and-troubleshooting/troubleshooting"
class=''
>Troubleshooting and Debugging</a>
</li>
<li>
<a href="/docs/2.9.0/perf-and-troubleshooting/handling-exceptions"
class=''
>Handling Exceptions</a>
</li>
<li>
<a href="/docs/2.9.0/perf-and-troubleshooting/yardstick-benchmarking"
class=''
>Benchmarking With Yardstick</a>
</li>
</nav>
</li>
</nav>
<div class="left-nav__overlay"></div>
<article data-swiftype-index='true'>
<a class='edit-link' href="https://github.com/apache/ignite/tree/IGNITE-7595/docs/_docs/extensions-and-integrations/ignite-for-spark/overview.adoc" target="_blank">Edit</a>
<h1>Ignite for Spark</h1>
<div id="preamble">
<div class="sectionbody">
<div class="paragraph">
<p>Apache Ignite is a distributed memory-centric database and caching platform that is used by Apache Spark users to:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>Achieve true in-memory performance at scale and avoid data movement from a data source to Spark workers and applications.</p>
</li>
<li>
<p>Boost DataFrame and SQL performance.</p>
</li>
<li>
<p>More easily share state and data among Spark jobs.</p>
</li>
</ul>
</div>
<div class="imageblock">
<div class="content">
<img src="/docs/2.9.0/images/spark_integration.png" alt="Spark Integration">
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="ignite-rdds">Ignite RDDs</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Apache Ignite provides an implementation of the Spark RDD which allows any data and state to be shared in memory as RDDs across Spark jobs. The Ignite RDD provides a shared, mutable view of the same data in-memory in Ignite across different Spark jobs, workers, or applications. Native Spark RDDs cannot be shared across Spark jobs or applications.</p>
</div>
<div class="paragraph">
<p>The way an <a href="/docs/2.9.0/ignite-for-spark/ignitecontext-and-rdd">IgniteRDD</a> is implemented is as a view over a distributed Ignite table (aka. cache). It can be deployed with an Ignite node either within the Spark job executing process, on a Spark worker, or in a separate Ignite cluster. It means that depending on the chosen deployment mode the shared state may either exist only during the lifespan of a Spark application (embedded mode), or it may out-survive the Spark application (standalone mode).</p>
</div>
<div class="paragraph">
<p>While Apache SparkSQL supports a fairly rich SQL syntax, it doesn&#8217;t implement any indexing. As a result, Spark queries may take minutes even on moderately small data sets because they have to do full data scans. With Ignite, Spark users can configure primary and secondary indexes that can bring up to 1000x performance gains.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="ignite-dataframes">Ignite DataFrames</h2>
<div class="sectionbody">
<div class="paragraph">
<p>The Apache Spark DataFrame API introduced the concept of a schema to describe the data, allowing Spark to manage the schema and organize the data into a tabular format. To put it simply, a DataFrame is a distributed collection of data organized into named columns. It is conceptually equivalent to a table in a relational database and allows Spark to leverage the Catalyst query optimizer to produce much more efficient query execution plans in comparison to RDDs, which are just collections of elements partitioned across the nodes of the cluster.</p>
</div>
<div class="paragraph">
<p>Ignite expands <a href="/docs/2.9.0/ignite-for-spark/ignite-dataframe">DataFrame</a>, simplifying development and improving data access times whenever Ignite is used as memory-centric storage for Spark. Benefits include:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>Ability to share data and state across Spark jobs by writing and reading DataFrames to/from Ignite.</p>
</li>
<li>
<p>Faster SparkSQL queries by optimizing Spark query execution plans with Ignite SQL engine which include​ advanced indexing and avoid data movement across the network from Ignite to Spark.</p>
</li>
</ul>
</div>
</div>
</div>
<div class="sect1">
<h2 id="supported-spark-version">Supported Spark Version</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Apache Ignite comes with two modules that support different versions of Apache Spark:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>ignite-spark — integration with Spark 2.3</p>
</li>
<li>
<p>ignite-spark-2.4 — integration with Spark 2.4</p>
</li>
</ul>
</div>
</div>
</div>
<div class="copyright">
© 2020 The Apache Software Foundation.<br/>
Apache, Apache Ignite, the Apache feather and the Apache Ignite logo are either registered trademarks or trademarks of The Apache Software Foundation.
</div>
</article>
<nav class="right-nav" data-swiftype-index='false'>
<ul class="sectlevel1">
<li><a href="#ignite-rdds">Ignite RDDs</a></li>
<li><a href="#ignite-dataframes">Ignite DataFrames</a></li>
<li><a href="#supported-spark-version">Supported Spark Version</a></li>
</ul>
<footer>
</footer>
</nav>
</section>
<script type='module' src='/assets/js/code-copy-to-clipboard.js' async crossorigin></script>
<script>
// inits deep anchors -- needs to be done here because of https://www.bryanbraun.com/anchorjs/#dont-run-it-too-late
anchors.add('.page-docs h1, .page-docs h2, .page-docs h3:not(.discrete), .page-docs h4, .page-docs h5');
anchors.options = {
placement: 'right',
visible: 'always'
};
</script>
<!-- load google fonts async -->
<script type="text/javascript">
WebFontConfig = {
google: { families: [ 'Open+Sans:300,400,600,700&display=swap' ] }
};
(function() {
var wf = document.createElement('script');
wf.src = 'https://ajax.googleapis.com/ajax/libs/webfont/1/webfont.js';
wf.type = 'text/javascript';
wf.async = 'true';
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(wf, s);
})(); </script>
<script src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script>
<script>
docsearch({
// Your apiKey and indexName will be given to you once
// we create your config
apiKey: '3eee686c0ebe39eff3baeb18c56fa5f8',
indexName: 'apache_ignite',
// Replace inputSelector with a CSS selector
// matching your search input
inputSelector: '#search-input',
// algoliaOptions: { 'facetFilters': ["version:$VERSION"] },
// Set debug to true to inspect the dropdown
debug: false,
});
</script>
<script type='module' src='/assets/js/index.js?1609302780' async crossorigin></script>
<script type='module' src='/assets/js/versioning.js?1609302780' async crossorigin></script>
</body>
</html>