blob: a8e293af22d9bebed4c0b529e7bad398a24db43a [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-61232409-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-61232409-1');
</script>
<meta charset="UTF-8">
<meta name="ignite-version" content="2.9.0" />
<title>Testing Ignite with Spark-shell | Ignite Documentation</title>
<link rel="canonical" href="/docs/2.9.0/ignite-for-spark/spark-shell" />
<link rel="stylesheet" href="/assets/css/styles.css?1600286557">
<link rel="stylesheet" href="/assets/css/asciidoc-pygments.css">
<link rel="shortcut icon" href="/favicon.ico">
<meta name='viewport' content='width=device-width, height=device-height, initial-scale=1.0, minimum-scale=1.0'>
<script type="text/javascript" src="/assets/js/anchor.min.js?1600286557"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
</head>
<body>
<header>
<!--#include virtual="/includes/promotion_banner.html" -->
<div class="container">
<button type='button' class='menu' title='Docs menu'>
<img src="/assets/images/menu-icon.svg"/>
</button>
<div class='home'>
<a href="/" class='home' title='Apache Ignite home'>
<img src="/assets/images/apache_ignite_logo.svg" alt="Apache Ignite logo" width="103" >
</a>
</div>
<nav>
</nav>
<select id="version-selector">
<option value="2.9.0">2.9.0</option>
</select>
<a href="https://github.com/apache/ignite" title='GitHub' class='github' target="_blank">
<img src="/assets/images/github-gray.svg" alt="GitHub logo">
</a>
<form class='search'>
<button class="search-close" type='button'><img src='/assets/images/cancel.svg'></button>
<input type="search" placeholder="Search…" id="search-input">
</form>
<button type='button' class='search-toggle'><img src='/assets/images/search.svg'></button>
<button type='button' class='top-nav-toggle'></button>
</div>
</header>
<link rel="stylesheet" href="/assets/css/docs.css">
<section class='page-docs'>
<nav class='left-nav' data-swiftype-index='false'>
<li>
<a href="/docs/2.9.0/preface" class='' >Preface</a>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Quick Start Guides<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/quick-start/java"
class=''
>Java</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/dotnet"
class=''
>.NET/C#</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/cpp"
class=''
>C++</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/python"
class=''
>Python</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/nodejs"
class=''
>Node.JS</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/sql"
class=''
>SQL</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/php"
class=''
>PHP</a>
</li>
<li>
<a href="/docs/2.9.0/quick-start/restapi"
class=''
>REST API</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="/installation" class='group-toggle collapsed '>Installation<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/installation/installing-using-zip"
class=''
>Installing Using ZIP Archive</a>
</li>
<li>
<a href="/docs/2.9.0/installation/installing-using-docker"
class=''
>Installing Using Docker</a>
</li>
<li>
<a href="/docs/2.9.0/installation/deb-rpm"
class=''
>Installing DEB or RPM package</a>
</li>
<li>
<button
type='button'
class='collapsed '>Kubernetes<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//installation/kubernetes/amazon-eks-deployment" class=''>Amazon EKS</a></li>
<li><a href="/docs/2.9.0//installation/kubernetes/azure-deployment" class=''>Azure Kubernetes Service</a></li>
<li><a href="/docs/2.9.0//installation/kubernetes/gke-deployment" class=''>Google Kubernetes Engine</a></li>
</nav>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Setting Up<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/setup"
class=''
>Setting Up Ignite for Java</a>
</li>
<li>
<a href="/docs/2.9.0/setup-dotnet"
class=''
>Setting Up Ignite for .NET/C#</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/understanding-configuration" class='' >Understanding Configuration</a>
</li>
<li>
<a href="/docs/2.9.0/logging" class='' >Configuring Logging</a>
</li>
<li>
<a href="/docs/2.9.0/starting-nodes" class='' >Starting and Stopping Nodes</a>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Clustering<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/clustering/clustering"
class=''
>Overview</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/tcp-ip-discovery"
class=''
>TCP/IP Discovery</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/zookeeper-discovery"
class=''
>ZooKeeper Discovery</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/discovery-in-the-cloud"
class=''
>Discovery in the Cloud</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/network-configuration"
class=''
>Network Configuration</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/connect-client-nodes"
class=''
>Connecting Client Nodes</a>
</li>
<li>
<a href="/docs/2.9.0/clustering/running-client-nodes-behind-nat"
class=''
>Running Client Nodes Behind NAT</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Data Modeling<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/data-modeling/data-modeling"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/2.9.0/data-modeling/data-partitioning"
class=''
>Data Partitioning</a>
</li>
<li>
<a href="/docs/2.9.0/data-modeling/affinity-collocation"
class=''
>Affinity Colocation</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Configuring Memory<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/memory-architecture"
class=''
>Memory Architecture</a>
</li>
<li>
<a href="/docs/2.9.0/memory-configuration/data-regions"
class=''
>Configuring Data Regions</a>
</li>
<li>
<a href="/docs/2.9.0/memory-configuration/eviction-policies"
class=''
>Eviction Policies</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Configuring Caches<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/configuring-caches/configuration-overview"
class=''
>Cache Configuration</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/configuring-backups"
class=''
>Configuring Partition Backups</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/atomicity-modes"
class=''
>Atomicity Modes</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/expiry-policies"
class=''
>Expiry Policy</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/on-heap-caching"
class=''
>On-Heap Caching</a>
</li>
<li>
<a href="/docs/2.9.0/configuring-caches/cache-groups"
class=''
>Cache Groups</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Persistence<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/persistence/native-persistence"
class=''
>Ignite Persistence</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/external-storage"
class=''
>External Storage</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/swap"
class=''
>Swapping</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/custom-cache-store"
class=''
>Implementing Custom Cache Store</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/disk-compression"
class=''
>Disk Compression</a>
</li>
<li>
<a href="/docs/2.9.0/persistence/persistence-tuning"
class=''
>Tuning Persistence</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/baseline-topology" class='' >Baseline Topology</a>
</li>
<li>
<a href="/docs/2.9.0/cluster-states" class='' >Cluster States</a>
</li>
<li>
<a href="/docs/2.9.0/data-rebalancing" class='' >Data Rebalancing</a>
</li>
<li>
<a href="/docs/2.9.0/partition-loss-policy" class='' >Partition Loss Policy</a>
</li>
<li>
<a href="/docs/2.9.0/deploying-user-code" class='' >Deploying User Code</a>
</li>
<li>
<a href="/docs/2.9.0/peer-class-loading" class='' >Peer Class Loading</a>
</li>
<li>
<a href="/docs/2.9.0/data-streaming" class='' >Data Streaming</a>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Using Key-Value Cache API<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/key-value-api/basic-cache-operations"
class=''
>Basic Cache Operations</a>
</li>
<li>
<a href="/docs/2.9.0/key-value-api/binary-objects"
class=''
>Working with Binary Objects</a>
</li>
<li>
<a href="/docs/2.9.0/key-value-api/using-scan-queries"
class=''
>Using Scan Queries</a>
</li>
<li>
<a href="/docs/2.9.0/read-repair"
class=''
>Read Repair</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/key-value-api/continuous-queries" class='' >Using Continuous Queries</a>
</li>
<li>
<a href="/docs/2.9.0/key-value-api/transactions" class='' >Performing Transactions</a>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Working with SQL<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/SQL/sql-introduction"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/schemas"
class=''
>Understanding Schemas</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/indexes"
class=''
>Defining Indexes</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/sql-api"
class=''
>Using SQL API</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/distributed-joins"
class=''
>Distributed Joins</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/sql-transactions"
class=''
>SQL Transactions</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/custom-sql-func"
class=''
>Custom SQL Functions</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/JDBC/jdbc-driver"
class=''
>JDBC Driver</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/JDBC/jdbc-client-driver"
class=''
>JDBC Client Driver</a>
</li>
<li>
<a href="/docs/2.9.0/transactions/mvcc"
class=''
>Multiversion Concurrency Control</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Distributed Computing<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/distributed-computing/distributed-computing"
class=''
>Distributed Computing API</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/cluster-groups"
class=''
>Cluster Groups</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/executor-service"
class=''
>Executor Service</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/map-reduce"
class=''
>MapReduce API</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/load-balancing"
class=''
>Load Balancing</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/fault-tolerance"
class=''
>Fault Tolerance</a>
</li>
<li>
<a href="/docs/2.9.0/distributed-computing/job-scheduling"
class=''
>Job Scheduling</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/collocated-computations" class='' >Colocating Computations with Data</a>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Working with Events<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/events/listening-to-events"
class=''
>Enabling and Listenting to Events</a>
</li>
<li>
<a href="/docs/2.9.0/events/events"
class=''
>Events</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/near-cache" class='' >Near Caches</a>
</li>
<li>
<a href="/docs/2.9.0/platform-cache" class='' >.NET Platform Cache</a>
</li>
<li>
<a href="/docs/2.9.0/services/services" class='' >Services</a>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Distributed Data Structures<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/data-structures/queue-and-set"
class=''
>Queue and Set</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/atomic-types"
class=''
>Atomic Types</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/countdownlatch"
class=''
>CountDownLatch</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/atomic-sequence"
class=''
>Atomic Sequence</a>
</li>
<li>
<a href="/docs/2.9.0/data-structures/semaphore"
class=''
>Semaphore</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Machine Learning<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/machine-learning/machine-learning"
class=''
>Machine Learning</a>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/partition-based-dataset"
class=''
>Partition Based Dataset</a>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/updating-trained-models"
class=''
>Updating Trained Models</a>
</li>
<li>
<button
type='button'
class='collapsed '>Binary Classification<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//machine-learning/binary-classification/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0//machine-learning/binary-classification/linear-svm" class=''>Linear SVM (Support Vector Machine)</a></li>
<li><a href="/docs/2.9.0//machine-learning/binary-classification/decision-trees" class=''>Decision Trees</a></li>
<li><a href="/docs/2.9.0//machine-learning/binary-classification/multilayer-perceptron" class=''>Multilayer Perceptron</a></li>
<li><a href="/docs/2.9.0//machine-learning/binary-classification/logistic-regression" class=''>Logistic Regression</a></li>
<li><a href="/docs/2.9.0//machine-learning/binary-classification/knn-classification" class=''>k-NN Classification</a></li>
<li><a href="/docs/2.9.0//machine-learning/binary-classification/ann" class=''>ANN (Approximate Nearest Neighbor)</a></li>
<li><a href="/docs/2.9.0//machine-learning/binary-classification/naive-bayes" class=''>Naive Bayes</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Regression<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//machine-learning/regression/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0//machine-learning/regression/linear-regression" class=''>Linear Regression</a></li>
<li><a href="/docs/2.9.0//machine-learning/regression/decision-trees-regression" class=''>Decision Trees Regression</a></li>
<li><a href="/docs/2.9.0//machine-learning/regression/knn-regression" class=''>k-NN Regression</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Clustering<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//machine-learning/clustering/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0//machine-learning/clustering/k-means-clustering" class=''>K-Means Clustering</a></li>
<li><a href="/docs/2.9.0//machine-learning/clustering/gaussian-mixture" class=''>Gaussian mixture (GMM)</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/preprocessing"
class=''
>Preprocessing</a>
</li>
<li>
<button
type='button'
class='collapsed '>Model Selection<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//machine-learning/model-selection/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0//machine-learning/model-selection/evaluator" class=''>Evaluator</a></li>
<li><a href="/docs/2.9.0//machine-learning/model-selection/split-the-dataset-on-test-and-train-datasets" class=''>Split the dataset on test and train datasets</a></li>
<li><a href="/docs/2.9.0//machine-learning/model-selection/hyper-parameter-tuning" class=''>Hyper-parameter tuning</a></li>
<li><a href="/docs/2.9.0//machine-learning/model-selection/pipeline-api" class=''>Pipeline API</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/multiclass-classification"
class=''
>Multiclass Classification</a>
</li>
<li>
<button
type='button'
class='collapsed '>Ensemble Methods<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//machine-learning/ensemble-methods/introduction" class=''></a></li>
<li><a href="/docs/2.9.0//machine-learning/ensemble-methods/stacking" class=''>Stacking</a></li>
<li><a href="/docs/2.9.0//machine-learning/ensemble-methods/baggin" class=''>Bagging</a></li>
<li><a href="/docs/2.9.0//machine-learning/ensemble-methods/random-forest" class=''>Random Forest</a></li>
<li><a href="/docs/2.9.0//machine-learning/ensemble-methods/gradient-boosting" class=''>Gradient Boosting</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/machine-learning/recommendation-systems"
class=''
>Recommendation Systems</a>
</li>
<li>
<button
type='button'
class='collapsed '>Importing Model<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//machine-learning/importing-model/introduction" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0//machine-learning/importing-model/model-import-from-gxboost" class=''>Import Model from XGBoost</a></li>
<li><a href="/docs/2.9.0//machine-learning/importing-model/model-import-from-apache-spark" class=''>Import Model from Apache Spark</a></li>
</nav>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Monitoring<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/monitoring-metrics/intro"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/2.9.0/monitoring-metrics/cluster-id"
class=''
>Cluster ID and Tag</a>
</li>
<li>
<button
type='button'
class='collapsed '>Metrics<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//monitoring-metrics/configuring-metrics" class=''>Configuring Metrics</a></li>
<li><a href="/docs/2.9.0//monitoring-metrics/metrics" class=''>JMX Metrics</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>New Metrics System<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//monitoring-metrics/new-metrics-system" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0//monitoring-metrics/new-metrics" class=''>Metrics</a></li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/monitoring-metrics/system-views"
class=''
>System Views</a>
</li>
<li>
<a href="/docs/2.9.0/monitoring-metrics/tracing"
class=''
>Tracing</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="/security" class='group-toggle collapsed '>Security<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/security/authentication"
class=''
>Authentication</a>
</li>
<li>
<a href="/docs/2.9.0/security/ssl-tls"
class=''
>SSL/TLS</a>
</li>
<li>
<button
type='button'
class='collapsed '>Transparent Data Encryption<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//security/tde" class=''>Introduction</a></li>
<li><a href="/docs/2.9.0//security/master-key-rotation" class=''>Master key rotation</a></li>
</nav>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>Thin Clients<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/thin-clients/getting-started-with-thin-clients"
class=''
>Thin Clients Overview</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/java-thin-client"
class=''
>Java Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/dotnet-thin-client"
class=''
>.NET Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/cpp-thin-client"
class=''
>C++ Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/python-thin-client"
class=''
>Python Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/php-thin-client"
class=''
>PHP Thin Client</a>
</li>
<li>
<a href="/docs/2.9.0/thin-clients/nodejs-thin-client"
class=''
>Node.js Thin Client</a>
</li>
<li>
<button
type='button'
class='collapsed '>Binary Client Protocol<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/2.9.0//binary-client-protocol/binary-client-protocol" class=''>Binary Client Protocol</a></li>
<li><a href="/docs/2.9.0//binary-client-protocol/data-format" class=''>Data Format</a></li>
<li><a href="/docs/2.9.0//binary-client-protocol/key-value-queries" class=''>Key-Value Queries</a></li>
<li><a href="/docs/2.9.0//binary-client-protocol/sql-and-scan-queries" class=''>SQL and Scan Queries</a></li>
<li><a href="/docs/2.9.0//binary-client-protocol/binary-type-metadata" class=''>Binary Types Metadata</a></li>
<li><a href="/docs/2.9.0//binary-client-protocol/cache-configuration" class=''>Cache Configuration</a></li>
</nav>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle collapsed '>ODBC Driver<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/SQL/ODBC/odbc-driver"
class=''
>ODBC Driver</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/ODBC/connection-string-dsn"
class=''
>Connection String and DSN</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/ODBC/querying-modifying-data"
class=''
>Querying and Modifying Data</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/ODBC/specification"
class=''
>Specification</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/ODBC/data-types"
class=''
>Data Types</a>
</li>
<li>
<a href="/docs/2.9.0/SQL/ODBC/error-codes"
class=''
>Error Codes</a>
</li>
</nav>
</li>
<li>
<a href="/docs/2.9.0/restapi" class='' >REST API</a>
</li>
<li>
<a href="/docs/2.9.0/control-script" class='' >Control Script</a>
</li>
<li>
<a href="/docs/2.9.0/plugins" class='' >Plugins</a>
</li>
<li>
<a href="/docs/2.9.0/sqlline" class='' >SQLLine</a>
</li>
<li>
<button type='button' data-guide-url="" class='group-toggle expanded '>Ignite for Spark<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group expanded'>
<li>
<a href="/docs/2.9.0/ignite-for-spark/overview"
class=''
>Overview</a>
</li>
<li>
<a href="/docs/2.9.0/ignite-for-spark/ignitecontext-and-rdd"
class=''
>IgniteContext and IgniteRDD</a>
</li>
<li>
<a href="/docs/2.9.0/ignite-for-spark/ignite-dataframe"
class=''
>Ignite DataFrame</a>
</li>
<li>
<a href="/docs/2.9.0/ignite-for-spark/installation"
class=''
>Installation</a>
</li>
<li>
<a href="/docs/2.9.0/ignite-for-spark/spark-shell"
class='active'
>Test Ignite with Spark-shell</a>
</li>
<li>
<a href="/docs/2.9.0/ignite-for-spark/troubleshooting"
class=''
>Troubleshooting</a>
</li>
</nav>
</li>
<li>
<button type='button' data-guide-url="/sql-reference/sql-reference-overview" class='group-toggle collapsed '>SQL Reference<img class="state-indicator" src="/assets/images/left-nav-arrow.svg"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/2.9.0/sql-reference/sql-conformance"
class=''
>SQL Conformance</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/ddl"
class=''
>Data Definition Language (DDL)</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/dml"
class=''
>Data Manipulation Language (DML)</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/transactions"
class=''
>Transactions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/operational-commands"
class=''
>Operational Commands</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/aggregate-functions"
class=''
>Aggregate functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/numeric-functions"
class=''
>Numeric Functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/string-functions"
class=''
>String Functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/date-time-functions"
class=''
>Data and Time Functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/system-functions"
class=''
>System Functions</a>
</li>
<li>
<a href="/docs/2.9.0/sql-reference/data-types"
class=''
>Data Types</a>
</li>
</nav>
</li>
</nav>
<div class="left-nav__overlay"></div>
<article data-swiftype-index='true'>
<a class='edit-link' href="https://github.com/apache/ignite/tree/IGNITE-7595/docs/_docs/ignite-for-spark/spark-shell.adoc" target="_blank">Edit</a>
<h1>Testing Ignite with Spark-shell</h1>
<div class="sect1">
<h2 id="starting-up-the-cluster">Starting up the cluster</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Here we will briefly cover the process of Spark and Ignite cluster startup. Refer to <a href="https://spark.apache.org/docs/latest/">Spark documentation</a> for more details.</p>
</div>
<div class="paragraph">
<p>For the testing you will need a Spark master process and at least one Spark worker. Usually Spark master and workers are separate machines, but for the test purposes you can start worker on the same machine where master starts.</p>
</div>
<div class="olist arabic">
<ol class="arabic">
<li>
<p>Download and unpack Spark binary distribution to the same location (let it be <code>SPARK_HOME</code>) on all nodes.</p>
</li>
<li>
<p>Download and unpack Ignite binary distribution to the same location (let it be <code>IGNITE_HOME</code>) on all nodes.</p>
</li>
<li>
<p>On master node <code>cd</code> to <code>$SPARK_HOME</code> and run the following command:</p>
<div class="openblock">
<div class="content">
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="shell">sbin/start-master.sh</code></pre>
</div>
</div>
<div class="paragraph">
<p>The script should output the path to log file of the started process. Check the log file for the master URL which has the following format: <code>spark://master_host:master_port</code> Also check the log file for the Web UI url (usually it is <code><a href="http://master_host:8080" class="bare">http://master_host:8080</a></code>).</p>
</div>
</div>
</div>
</li>
<li>
<p>On each of the worker nodes <code>cd</code> to <code>$SPARK_HOME</code> and run the following command:</p>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="shell">bin/spark-class org.apache.spark.deploy.worker.Worker spark://master_host:master_port</code></pre>
</div>
</div>
<div class="paragraph">
<p>where <code>spark://master_host:master_port</code> is the master URL you grabbed from the master log file. After workers has started check the master Web UI interface, it should show all of your workers registered in status <code>ALIVE</code>.</p>
</div>
</li>
<li>
<p>On each of the worker nodes cd to <code>$IGNITE_HOME</code> and start an Ignite node by running the following command:</p>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="shell">bin/ignite.sh</code></pre>
</div>
</div>
</li>
</ol>
</div>
<div class="paragraph">
<p>You should see Ignite nodes discover each other with default configuration. If your network does not allow multicast traffic, you will need to change the default configuration file and configure TCP discovery.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="working-with-spark-shell">Working with Spark-Shell</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Now that you have your cluster up and running, you can run <code>spark-shell</code> and check the integration.</p>
</div>
<div class="olist arabic">
<ol class="arabic">
<li>
<p>Start spark shell:</p>
<div class="openblock">
<div class="content">
<div class="ulist">
<ul>
<li>
<p>Either by providing Maven coordinates to Ignite artifacts (you can use <code>--repositories</code> if you need, but it may be omitted):</p>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="shell">./bin/spark-shell
<span class="nt">--packages</span> org.apache.ignite:ignite-spark:1.8.0
<span class="nt">--master</span> spark://master_host:master_port
<span class="nt">--repositories</span> http://repo.maven.apache.org/maven2/org/apache/ignite</code></pre>
</div>
</div>
</li>
<li>
<p>Or by providing paths to Ignite jar file paths using <code>--jars</code> parameter</p>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="shell">./bin/spark-shell <span class="nt">--jars</span> path/to/ignite-core.jar,path/to/ignite-spark.jar,path/to/cache-api.jar,path/to/ignite-log4j.jar,path/to/log4j.jar <span class="nt">--master</span> spark://master_host:master_port</code></pre>
</div>
</div>
</li>
</ul>
</div>
<div class="paragraph">
<p>You should see Spark shell started up.</p>
</div>
<div class="paragraph">
<p>Note that if you are planning to use spring configuration loading, you will need to add the <code>ignite-spring</code> dependency as well:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="shell">./bin/spark-shell
<span class="nt">--packages</span> org.apache.ignite:ignite-spark:1.8.0,org.apache.ignite:ignite-spring:1.8.0
<span class="nt">--master</span> spark://master_host:master_port</code></pre>
</div>
</div>
</div>
</div>
</li>
<li>
<p>Let&#8217;s create an instance of Ignite context using default configuration:</p>
<div class="openblock">
<div class="content">
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="scala"><span class="k">import</span> <span class="nn">org.apache.ignite.spark._</span>
<span class="k">import</span> <span class="nn">org.apache.ignite.configuration._</span>
<span class="k">val</span> <span class="nv">ic</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">IgniteContext</span><span class="o">(</span><span class="n">sc</span><span class="o">,</span> <span class="o">()</span> <span class="k">=&gt;</span> <span class="k">new</span> <span class="nc">IgniteConfiguration</span><span class="o">())</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>You should see something like</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="text">ic: org.apache.ignite.spark.IgniteContext = org.apache.ignite.spark.IgniteContext@62be2836</code></pre>
</div>
</div>
<div class="paragraph">
<p>An alternative way to create an instance of IgniteContext is to use a configuration file. Note that if path to configuration is specified in a relative form, then the <code>IGNITE_HOME</code> environment variable should be globally set in the system as the path is resolved relative to <code>IGNITE_HOME</code></p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="scala"><span class="k">import</span> <span class="nn">org.apache.ignite.spark._</span>
<span class="k">import</span> <span class="nn">org.apache.ignite.configuration._</span>
<span class="k">val</span> <span class="nv">ic</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">IgniteContext</span><span class="o">(</span><span class="n">sc</span><span class="o">,</span> <span class="s">"examples/config/spark/example-shared-rdd.xml"</span><span class="o">)</span></code></pre>
</div>
</div>
</div>
</div>
</li>
<li>
<p>Let&#8217;s now create an instance of <code>IgniteRDD</code> using "partitioned" cache in default configuration:</p>
<div class="openblock">
<div class="content">
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="scala"><span class="k">val</span> <span class="nv">sharedRDD</span> <span class="k">=</span> <span class="nv">ic</span><span class="o">.</span><span class="py">fromCache</span><span class="o">[</span><span class="kt">Integer</span>, <span class="kt">Integer</span><span class="o">](</span><span class="s">"partitioned"</span><span class="o">)</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>You should see an instance of RDD created for partitioned cache:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="text">shareRDD: org.apache.ignite.spark.IgniteRDD[Integer,Integer] = IgniteRDD[0] at RDD at IgniteAbstractRDD.scala:27</code></pre>
</div>
</div>
<div class="paragraph">
<p>Note that creation of RDD is a local operation and will not create a cache in Ignite cluster.</p>
</div>
</div>
</div>
</li>
<li>
<p>Let&#8217;s now actually ask Spark to do something with our RDD, for example, get all pairs where value is less than 10:</p>
<div class="openblock">
<div class="content">
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="scala"><span class="nv">sharedRDD</span><span class="o">.</span><span class="py">filter</span><span class="o">(</span><span class="nv">_</span><span class="o">.</span><span class="py">_2</span> <span class="o">&lt;</span> <span class="mi">10</span><span class="o">).</span><span class="py">collect</span><span class="o">()</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>As our cache has not been filled yet, the result will be an empty array:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="text">res0: Array[(Integer, Integer)] = Array()</code></pre>
</div>
</div>
<div class="paragraph">
<p>Check the logs of remote spark workers and see how Ignite context will start clients on all remote workers in the cluster. You can also start command-line Visor and check that "partitioned" cache has been created.</p>
</div>
</div>
</div>
</li>
<li>
<p>Let&#8217;s now save some values into Ignite:</p>
<div class="openblock">
<div class="content">
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="scala"><span class="nv">sharedRDD</span><span class="o">.</span><span class="py">savePairs</span><span class="o">(</span><span class="nv">sc</span><span class="o">.</span><span class="py">parallelize</span><span class="o">(</span><span class="mi">1</span> <span class="n">to</span> <span class="mi">100000</span><span class="o">,</span> <span class="mi">10</span><span class="o">).</span><span class="py">map</span><span class="o">(</span><span class="n">i</span> <span class="k">=&gt;</span> <span class="o">(</span><span class="n">i</span><span class="o">,</span> <span class="n">i</span><span class="o">)))</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>After running this command you can check with command-line Visor that cache size is 100000 elements.</p>
</div>
</div>
</div>
</li>
<li>
<p>We can now check how the state we created will survive job restart. Shut down the spark shell and repeat steps 1-3. You should again have an instance of Ignite context and RDD for "partitioned" cache. We can now check how many keys there are in our RDD which value is greater than 50000:</p>
<div class="openblock">
<div class="content">
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="scala"><span class="nv">sharedRDD</span><span class="o">.</span><span class="py">filter</span><span class="o">(</span><span class="nv">_</span><span class="o">.</span><span class="py">_2</span> <span class="o">&gt;</span> <span class="mi">50000</span><span class="o">).</span><span class="py">count</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>Since we filled up cache with a sequence of number from 1 to 100000 inclusive, we should see <code>50000</code> as a result:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="text">res0: Long = 50000</code></pre>
</div>
</div>
</div>
</div>
</li>
</ol>
</div>
</div>
</div>
<div class="copyright">
© 2020 The Apache Software Foundation.<br/>
Apache, Apache Ignite, the Apache feather and the Apache Ignite logo are either registered trademarks or trademarks of The Apache Software Foundation.
</div>
</article>
<nav class="right-nav" data-swiftype-index='false'>
<ul class="sectlevel1">
<li><a href="#starting-up-the-cluster">Starting up the cluster</a></li>
<li><a href="#working-with-spark-shell">Working with Spark-Shell</a></li>
</ul>
<footer>
</footer>
</nav>
</section>
<script type='module' src='/assets/js/code-copy-to-clipboard.js' async crossorigin></script>
<script>
// inits deep anchors -- needs to be done here because of https://www.bryanbraun.com/anchorjs/#dont-run-it-too-late
anchors.add('.page-docs h1, .page-docs h2, .page-docs h3:not(.discrete), .page-docs h4, .page-docs h5');
anchors.options = {
placement: 'right',
visible: 'always'
};
</script>
<!-- load google fonts async -->
<script type="text/javascript">
WebFontConfig = {
google: { families: [ 'Open+Sans:300,400,600,700&display=swap' ] }
};
(function() {
var wf = document.createElement('script');
wf.src = 'https://ajax.googleapis.com/ajax/libs/webfont/1/webfont.js';
wf.type = 'text/javascript';
wf.async = 'true';
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(wf, s);
})(); </script>
<script src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script>
<script>
docsearch({
// Your apiKey and indexName will be given to you once
// we create your config
apiKey: '3eee686c0ebe39eff3baeb18c56fa5f8',
indexName: 'apache_ignite',
// Replace inputSelector with a CSS selector
// matching your search input
inputSelector: '#search-input',
// algoliaOptions: { 'facetFilters': ["version:$VERSION"] },
// Set debug to true to inspect the dropdown
debug: false,
});
</script>
<script type='module' src='/assets/js/index.js?1600286557' async crossorigin></script>
<script type='module' src='/assets/js/versioning.js?1600286557' async crossorigin></script>
</body>
</html>