IGNITE-17470: Add initial support of Spark 3.2 (#173)
diff --git a/docs/_docs/spark-3.2/spark-3.2.adoc b/docs/_docs/spark-3.2/spark-3.2.adoc
new file mode 100644
index 0000000..9d5b852
--- /dev/null
+++ b/docs/_docs/spark-3.2/spark-3.2.adoc
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+= Using Spark with Apache Ignite
+
+This spark module provides an implementation of Spark RDD abstraction which enables easy access to Ignite caches.
+
+To use Ignite Spark-3.2 module
+
+. Import it to your Maven project. If you are using Maven to manage dependencies of your project, you can add an Ignite
+Spark-3.2 module dependency like this (replace `${ignite-spark-3.2-ext.version}` with actual Ignite Spark-3.2 Extension version you are interested in):
++
+[tabs]
+--
+tab:pom.xml[]
+[source,xml]
+----
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+ http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ ...
+ <dependencies>
+ ...
+ <dependency>
+ <groupId>org.apache.ignite</groupId>
+ <artifactId>ignite-spark-3.2-ext</artifactId>
+ <version>${ignite-spark-3.2-ext.version}</version>
+ </dependency>
+ ...
+ </dependencies>
+ ...
+</project>
+----
+--
+
+. Make sure that all required libraries are presented in the classpath
+when run the application using the spark-submit command or add them to your POM file.
+
+Versions:
+
+* `${scala-version}` -- 2.12
+* `${spark-version}` -- 3.2
+* `${ignite-versions}` -- the version of Apache Ignite that you use
+
+tab:pom.xml[]
+[source,xml]
+----
+ ...
+ <dependencies>
+ ...
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>${scala-version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-reflect</artifactId>
+ <version>${scala-version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.ignite</groupId>
+ <artifactId>ignite-core</artifactId>
+ <version>${ignite-version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.ignite</groupId>
+ <artifactId>ignite-indexing</artifactId>
+ <version>${ignite-version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.ignite</groupId>
+ <artifactId>ignite-spring</artifactId>
+ <version>${ignite-version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-network-common_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-network-shuffle_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-tags_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+ ...
+ </dependencies>
+ ...
+</project>
+----
diff --git a/modules/spark-3.2-ext/examples/config/example-default.xml b/modules/spark-3.2-ext/examples/config/example-default.xml
new file mode 100644
index 0000000..e6c359d
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/config/example-default.xml
@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ Ignite configuration with all defaults and enabled p2p deployment and enabled events.
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:util="http://www.springframework.org/schema/util"
+ xsi:schemaLocation="
+ http://www.springframework.org/schema/beans
+ http://www.springframework.org/schema/beans/spring-beans.xsd
+ http://www.springframework.org/schema/util
+ http://www.springframework.org/schema/util/spring-util.xsd">
+ <bean abstract="true" id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
+ <!-- Set to true to enable distributed class loading for examples, default is false. -->
+ <property name="peerClassLoadingEnabled" value="true"/>
+
+ <!-- Enable task execution events for examples. -->
+ <property name="includeEventTypes">
+ <list>
+ <!--Task execution events-->
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_TASK_STARTED"/>
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_TASK_FINISHED"/>
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_TASK_FAILED"/>
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_TASK_TIMEDOUT"/>
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_TASK_SESSION_ATTR_SET"/>
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_TASK_REDUCED"/>
+
+ <!--Cache events-->
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_CACHE_OBJECT_PUT"/>
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_CACHE_OBJECT_READ"/>
+ <util:constant static-field="org.apache.ignite.events.EventType.EVT_CACHE_OBJECT_REMOVED"/>
+ </list>
+ </property>
+
+ <!-- Explicitly configure TCP discovery SPI to provide list of initial nodes. -->
+ <property name="discoverySpi">
+ <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+ <property name="ipFinder">
+ <!--
+ Ignite provides several options for automatic discovery that can be used
+ instead os static IP based discovery. For information on all options refer
+ to our documentation: http://apacheignite.readme.io/docs/cluster-config
+ -->
+ <!-- Uncomment static IP finder to enable static-based discovery of initial nodes. -->
+ <!--<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">-->
+ <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.multicast.TcpDiscoveryMulticastIpFinder">
+ <property name="addresses">
+ <list>
+ <!-- In distributed environment, replace with actual host IP address. -->
+ <value>127.0.0.1:47500..47509</value>
+ </list>
+ </property>
+ </bean>
+ </property>
+ </bean>
+ </property>
+ </bean>
+</beans>
diff --git a/modules/spark-3.2-ext/examples/config/example-ignite.xml b/modules/spark-3.2-ext/examples/config/example-ignite.xml
new file mode 100644
index 0000000..a3e7e22
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/config/example-ignite.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ Ignite configuration with all defaults and enabled p2p deployment and enabled events.
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.springframework.org/schema/beans
+ http://www.springframework.org/schema/beans/spring-beans.xsd">
+ <!-- Imports default Ignite configuration -->
+ <import resource="example-default.xml"/>
+
+ <bean parent="ignite.cfg"/>
+</beans>
diff --git a/modules/spark-3.2-ext/examples/modules/core/src/test/config/log4j2-test.xml b/modules/spark-3.2-ext/examples/modules/core/src/test/config/log4j2-test.xml
new file mode 100644
index 0000000..0bf9cf5
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/modules/core/src/test/config/log4j2-test.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<Configuration>
+ <Appenders>
+ <Console name="CONSOLE" target="SYSTEM_OUT">
+ <PatternLayout pattern="[%d{ISO8601}][%-5p][%t][%c{1}]%notEmpty{[%markerSimpleName]} %m%n"/>
+ <LevelRangeFilter minLevel="INFO" maxLevel="DEBUG"/>
+ </Console>
+
+ <Console name="CONSOLE_ERR" target="SYSTEM_ERR">
+ <PatternLayout pattern="[%d{ISO8601}][%-5p][%t][%c{1}] %m%n"/>
+ </Console>
+ </Appenders>
+
+ <Loggers>
+ <Logger name="org" level="INFO"/>
+
+ <Root level="INFO">
+ <AppenderRef ref="CONSOLE" level="DEBUG"/>
+ <AppenderRef ref="CONSOLE_ERR" level="WARN"/>
+ </Root>
+ </Loggers>
+</Configuration>
diff --git a/modules/spark-3.2-ext/examples/modules/core/src/test/config/tests.properties b/modules/spark-3.2-ext/examples/modules/core/src/test/config/tests.properties
new file mode 100644
index 0000000..86094c8
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/modules/core/src/test/config/tests.properties
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/modules/spark-3.2-ext/examples/pom.xml b/modules/spark-3.2-ext/examples/pom.xml
new file mode 100644
index 0000000..0ad89da
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/pom.xml
@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.ignite</groupId>
+ <artifactId>ignite-spark-3.2-parent-ext</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <relativePath>../../spark-3.2-ext/pom.xml</relativePath>
+ </parent>
+
+ <artifactId>ignite-spark-3.2-examples</artifactId>
+
+ <dependencies>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-spark-3.2-ext</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-indexing</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-spring</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-network-common_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-network-shuffle_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-tags_2.12</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>${spark.jackson.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ <version>${spark.jackson.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>${spark.jackson.version}</version>
+ </dependency>
+
+ <!-- Test dependencies -->
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_2.11</artifactId>
+ <version>${scala.test.version}</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteCatalogExample.scala b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteCatalogExample.scala
new file mode 100644
index 0000000..a707d0a
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteCatalogExample.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark
+
+import java.lang.{Long ⇒ JLong}
+
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.{Ignite, Ignition}
+import org.apache.logging.log4j.core.config.Configurator
+import org.apache.logging.log4j.Level
+import org.apache.spark.sql.ignite.IgniteSparkSession
+
+/**
+ * Example application to show use-case for Ignite implementation of Spark SQL {@link org.apache.spark.sql.catalog.Catalog}.
+ * Catalog provides ability to automatically resolve SQL tables created in Ignite.
+ */
+object IgniteCatalogExample extends App {
+ /**
+ * Ignite config file.
+ */
+ private val CONFIG = "config/example-ignite.xml"
+
+ /**
+ * Test cache name.
+ */
+ private val CACHE_NAME = "testCache"
+
+ //Starting Ignite server node.
+ val ignite = setupServerAndData
+
+ closeAfter(ignite) { ignite ⇒
+ //Creating Ignite-specific implementation of Spark session.
+ val igniteSession = IgniteSparkSession.builder()
+ .appName("Spark Ignite catalog example")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .igniteConfig(CONFIG)
+ .getOrCreate()
+
+ //Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.ERROR)
+ Configurator.setLevel("org.apache.ignite", Level.INFO)
+
+ println("List of available tables:")
+
+ //Showing existing tables.
+ igniteSession.catalog.listTables().show()
+
+ println("PERSON table description:")
+
+ //Showing `person` schema.
+ igniteSession.catalog.listColumns("person").show()
+
+ println("CITY table description:")
+
+ //Showing `city` schema.
+ igniteSession.catalog.listColumns("city").show()
+
+ println("Querying all persons from city with ID=2.")
+ println
+
+ //Selecting data throw Spark SQL engine.
+ val df = igniteSession.sql("SELECT * FROM person WHERE CITY_ID = 2")
+
+ println("Result schema:")
+
+ df.printSchema()
+
+ println("Result content:")
+
+ df.show()
+
+ println("Querying all persons living in Denver.")
+ println
+
+ //Selecting data throw Spark SQL engine.
+ val df2 = igniteSession.sql("SELECT * FROM person p JOIN city c ON c.ID = p.CITY_ID WHERE c.NAME = 'Denver'")
+
+ println("Result schema:")
+
+ df2.printSchema()
+
+ println("Result content:")
+
+ df2.show()
+ }
+
+ /**
+ * Starting ignite server node and creating.
+ *
+ * @return Ignite server node.
+ */
+ def setupServerAndData: Ignite = {
+ //Starting Ignite.
+ val ignite = Ignition.start(CONFIG)
+
+ //Creating cache.
+ val ccfg = new CacheConfiguration[Int, Int](CACHE_NAME).setSqlSchema("PUBLIC")
+
+ val cache = ignite.getOrCreateCache(ccfg)
+
+ //Create tables.
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE city (id LONG PRIMARY KEY, name VARCHAR) WITH \"template=replicated\"")).getAll
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE person (id LONG, name VARCHAR, city_id LONG, PRIMARY KEY (id, city_id)) " +
+ "WITH \"backups=1, affinityKey=city_id\"")).getAll
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX on Person (city_id)")).getAll
+
+ //Inserting some data into table.
+ var qry = new SqlFieldsQuery("INSERT INTO city (id, name) VALUES (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "Forest Hill")).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Denver")).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "St. Petersburg")).getAll
+
+ qry = new SqlFieldsQuery("INSERT INTO person (id, name, city_id) values (?, ?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "John Doe", 3L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Jane Roe", 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "Mary Major", 1L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], "Richard Miles", 2L.asInstanceOf[JLong])).getAll
+
+ ignite
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameExample.scala b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameExample.scala
new file mode 100644
index 0000000..21bfe90
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameExample.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark
+
+import java.lang.{Long ⇒ JLong, String ⇒ JString}
+
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.{Ignite, Ignition}
+import org.apache.logging.log4j.Level
+import org.apache.logging.log4j.core.config.Configurator
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions._
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+
+/**
+ * Example application showing use-cases for Ignite implementation of Spark DataFrame API.
+ */
+object IgniteDataFrameExample extends App {
+ /**
+ * Ignite config file.
+ */
+ private val CONFIG = "config/example-ignite.xml"
+
+ /**
+ * Test cache name.
+ */
+ private val CACHE_NAME = "testCache"
+
+ //Starting Ignite server node.
+ val ignite = setupServerAndData
+
+ closeAfter(ignite) { ignite ⇒
+ //Creating spark session.
+ implicit val spark = SparkSession.builder()
+ .appName("Spark Ignite data sources example")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .getOrCreate()
+
+ // Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.ERROR)
+ Configurator.setLevel("org.apache.ignite", Level.INFO)
+
+ // Executing examples.
+
+ sparkDSLExample
+
+ nativeSparkSqlExample
+ }
+
+ /**
+ * Examples of usage Ignite DataFrame implementation.
+ * Selecting data throw Spark DSL.
+ *
+ * @param spark SparkSession.
+ */
+ def sparkDSLExample(implicit spark: SparkSession): Unit = {
+ println("Querying using Spark DSL.")
+ println
+
+ val igniteDF = spark.read
+ .format(FORMAT_IGNITE) //Data source type.
+ .option(OPTION_TABLE, "person") //Table to read.
+ .option(OPTION_CONFIG_FILE, CONFIG) //Ignite config.
+ .load()
+ .filter(col("id") >= 2) //Filter clause.
+ .filter(col("name") like "%M%") //Another filter clause.
+
+ println("Data frame schema:")
+
+ igniteDF.printSchema() //Printing query schema to console.
+
+ println("Data frame content:")
+
+ igniteDF.show() //Printing query results to console.
+ }
+
+ /**
+ * Examples of usage Ignite DataFrame implementation.
+ * Registration of Ignite DataFrame for following usage.
+ * Selecting data by Spark SQL query.
+ *
+ * @param spark SparkSession.
+ */
+ def nativeSparkSqlExample(implicit spark: SparkSession): Unit = {
+ println("Querying using Spark SQL.")
+ println
+
+ val df = spark.read
+ .format(FORMAT_IGNITE) //Data source type.
+ .option(OPTION_TABLE, "person") //Table to read.
+ .option(OPTION_CONFIG_FILE, CONFIG) //Ignite config.
+ .load()
+
+ //Registering DataFrame as Spark view.
+ df.createOrReplaceTempView("person")
+
+ //Selecting data from Ignite throw Spark SQL Engine.
+ val igniteDF = spark.sql("SELECT * FROM person WHERE id >= 2 AND name = 'Mary Major'")
+
+ println("Result schema:")
+
+ igniteDF.printSchema() //Printing query schema to console.
+
+ println("Result content:")
+
+ igniteDF.show() //Printing query results to console.
+ }
+
+ def setupServerAndData: Ignite = {
+ //Starting Ignite.
+ val ignite = Ignition.start(CONFIG)
+
+ //Creating first test cache.
+ val ccfg = new CacheConfiguration[JLong, JString](CACHE_NAME).setSqlSchema("PUBLIC")
+
+ val cache = ignite.getOrCreateCache(ccfg)
+
+ //Creating SQL tables.
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE city (id LONG PRIMARY KEY, name VARCHAR) WITH \"template=replicated\"")).getAll
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE person (id LONG, name VARCHAR, city_id LONG, PRIMARY KEY (id, city_id)) " +
+ "WITH \"backups=1, affinityKey=city_id\"")).getAll
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX on Person (city_id)")).getAll
+
+ //Inserting some data to tables.
+ var qry = new SqlFieldsQuery("INSERT INTO city (id, name) VALUES (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "Forest Hill")).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Denver")).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "St. Petersburg")).getAll
+
+ qry = new SqlFieldsQuery("INSERT INTO person (id, name, city_id) values (?, ?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "John Doe", 3L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Jane Roe", 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "Mary Major", 1L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], "Richard Miles", 2L.asInstanceOf[JLong])).getAll
+
+ ignite
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameJoinExample.scala b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameJoinExample.scala
new file mode 100644
index 0000000..0d3d780
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameJoinExample.scala
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark
+
+import java.lang.{Integer => JInt, Long => JLong, String => JString}
+
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.ignite.{Ignite, Ignition}
+import org.apache.logging.log4j.Level
+import org.apache.logging.log4j.core.config.Configurator
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Example application demonstrates the join operations between two dataframes or Spark tables with data saved in Ignite caches.
+ */
+object IgniteDataFrameJoinExample extends App {
+ /** Ignite config file. */
+ private val CONFIG = "config/example-ignite.xml"
+
+ /** Test cache name. */
+ private val CACHE_NAME = "testCache"
+
+ // Starting Ignite server node.
+ val ignite = setupServerAndData
+
+ closeAfter(ignite) { ignite ⇒
+ //Creating spark session.
+ implicit val spark = SparkSession.builder()
+ .appName("IgniteDataFrameJoinExample")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .getOrCreate()
+
+ // Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.ERROR)
+ Configurator.setLevel("org.apache.ignite", Level.INFO)
+
+ // Executing examples.
+ sparkDSLJoinExample
+ nativeSparkSqlJoinExample
+ }
+
+ /**
+ * Examples of usage Ignite DataFrame implementation.
+ * Selecting data throw Spark DSL.
+ *
+ * @param spark SparkSession.
+ */
+ def sparkDSLJoinExample(implicit spark: SparkSession): Unit = {
+ println("Querying using Spark DSL.")
+ println
+
+ val persons = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_TABLE, "person")
+ .option(OPTION_CONFIG_FILE, CONFIG)
+ .load()
+
+ persons.printSchema()
+ persons.show()
+
+ val cities = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_TABLE, "city")
+ .option(OPTION_CONFIG_FILE, CONFIG)
+ .load()
+
+ persons.printSchema()
+ persons.show()
+
+ val joinResult = persons.join(cities, persons("city_id") === cities("id"))
+ .select(persons("name").as("person"), persons("age"), cities("name").as("city"), cities("country"))
+
+ joinResult.explain(true)
+ joinResult.printSchema()
+ joinResult.show()
+ }
+
+ /**
+ * Examples of usage Ignite DataFrame implementation.
+ * Registration of Ignite DataFrame for following usage.
+ * Selecting data by Spark SQL query.
+ *
+ * @param spark SparkSession.
+ */
+ def nativeSparkSqlJoinExample(implicit spark: SparkSession): Unit = {
+ println("Querying using Spark SQL.")
+ println
+
+ val persons = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_TABLE, "person")
+ .option(OPTION_CONFIG_FILE, CONFIG)
+ .load()
+
+ persons.printSchema()
+ persons.show()
+
+ val cities = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_TABLE, "city")
+ .option(OPTION_CONFIG_FILE, CONFIG)
+ .load()
+
+ persons.printSchema()
+ persons.show()
+
+ // Registering DataFrame as Spark view.
+ persons.createOrReplaceTempView("person")
+ cities.createOrReplaceTempView("city")
+
+ // Selecting data from Ignite throw Spark SQL Engine.
+ val joinResult = spark.sql("""
+ | SELECT
+ | person.name AS person,
+ | age,
+ | city.name AS city,
+ | country
+ | FROM
+ | person JOIN
+ | city ON person.city_id = city.id
+ """.stripMargin);
+
+ joinResult.explain(true)
+ joinResult.printSchema()
+ joinResult.show()
+ }
+
+ def setupServerAndData: Ignite = {
+ // Starting Ignite.
+ val ignite = Ignition.start(CONFIG)
+
+ // Creating first test cache.
+ val ccfg = new CacheConfiguration[JLong, JString](CACHE_NAME).setSqlSchema("PUBLIC")
+
+ val cache = ignite.getOrCreateCache(ccfg)
+
+ // Creating SQL tables.
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE city (id LONG PRIMARY KEY, name VARCHAR, country VARCHAR) WITH \"template=replicated\"")).getAll
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE person (id LONG, name VARCHAR, age INT, city_id LONG, PRIMARY KEY (id, city_id)) " +
+ "WITH \"backups=1, affinityKey=city_id\"")).getAll
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX on Person (city_id)")).getAll
+
+ // Inserting some data to tables.
+ var qry = new SqlFieldsQuery("INSERT INTO city (id, name, country) VALUES (?, ?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "Forest Hill", "USA")).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Denver", "USA")).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "St. Petersburg", "Russia")).getAll
+
+ qry = new SqlFieldsQuery("INSERT INTO person (id, name, age, city_id) values (?, ?, ?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "John Doe", 31.asInstanceOf[JInt], 3L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Jane Roe", 27.asInstanceOf[JInt], 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "Mary Major", 86.asInstanceOf[JInt], 1L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], "Richard Miles", 19.asInstanceOf[JInt], 2L.asInstanceOf[JLong])).getAll
+
+ ignite
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameWriteExample.scala b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameWriteExample.scala
new file mode 100644
index 0000000..bc3e536
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/IgniteDataFrameWriteExample.scala
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark
+
+import java.lang.{Long => JLong, String => JString}
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.internal.util.IgniteUtils.gridClassLoader
+import org.apache.ignite.{Ignite, Ignition}
+import org.apache.spark.sql.{SaveMode, SparkSession}
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.logging.log4j.Level
+import org.apache.logging.log4j.core.config.Configurator
+import org.apache.spark.sql.functions._
+
+import scala.collection.JavaConversions._
+
+/**
+ * Example application showing use-case for writing Spark DataFrame API to Ignite.
+ */
+object IgniteDataFrameWriteExample extends App {
+ /**
+ * Ignite config file.
+ */
+ private val CONFIG = "config/example-ignite.xml"
+
+ /**
+ * Test cache name.
+ */
+ private val CACHE_NAME = "testCache"
+
+ //Starting Ignite server node.
+ val ignite = setupServerAndData
+
+ closeAfter(ignite) { _ ⇒
+ //Creating spark session.
+ implicit val spark: SparkSession = SparkSession.builder()
+ .appName("Spark Ignite data sources write example")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .getOrCreate()
+
+ // Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.INFO)
+ Configurator.setLevel("org.apache.ignite", Level.INFO)
+
+ // Executing examples.
+ println("Example of writing json file to Ignite:")
+
+ writeJSonToIgnite
+
+ println("Example of modifying existing Ignite table data through Data Fram API:")
+
+ editDataAndSaveToNewTable
+ }
+
+ def writeJSonToIgnite(implicit spark: SparkSession): Unit = {
+ //Load content of json file to data frame.
+ val personsDataFrame = spark.read.json(
+ gridClassLoader.getResource("person.json").getFile)
+
+ println()
+ println("Json file content:")
+ println()
+
+ //Printing content of json file to console.
+ personsDataFrame.show()
+
+ println()
+ println("Writing Data Frame to Ignite:")
+ println()
+
+ //Writing content of data frame to Ignite.
+ personsDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, CONFIG)
+ .option(OPTION_TABLE, "json_person")
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "template=replicated")
+ .save()
+
+ println("Done!")
+
+ println()
+ println("Reading data from Ignite table:")
+ println()
+
+ val cache = ignite.cache[Any, Any](CACHE_NAME)
+
+ //Reading saved data from Ignite.
+ val data = cache.query(new SqlFieldsQuery("SELECT id, name, department FROM json_person")).getAll
+
+ data.foreach { row ⇒ println(row.mkString("[", ", ", "]")) }
+ }
+
+ def editDataAndSaveToNewTable(implicit spark: SparkSession): Unit = {
+ //Load content of Ignite table to data frame.
+ val personDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, CONFIG)
+ .option(OPTION_TABLE, "person")
+ .load()
+
+ println()
+ println("Data frame content:")
+ println()
+
+ //Printing content of data frame to console.
+ personDataFrame.show()
+
+ println()
+ println("Modifying Data Frame and write it to Ignite:")
+ println()
+
+ personDataFrame
+ .withColumn("id", col("id") + 42) //Edit id column
+ .withColumn("name", reverse(col("name"))) //Edit name column
+ .write.format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, CONFIG)
+ .option(OPTION_TABLE, "new_persons")
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id, city_id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "backups=1")
+ .mode(SaveMode.Overwrite) //Overwriting entire table.
+ .save()
+
+ println("Done!")
+
+ println()
+ println("Reading data from Ignite table:")
+ println()
+
+ val cache = ignite.cache[Any, Any](CACHE_NAME)
+
+ //Reading saved data from Ignite.
+ val data = cache.query(new SqlFieldsQuery("SELECT id, name, city_id FROM new_persons")).getAll
+
+ data.foreach { row ⇒ println(row.mkString("[", ", ", "]")) }
+ }
+
+ def setupServerAndData: Ignite = {
+ //Starting Ignite.
+ val ignite = Ignition.start(CONFIG)
+
+ //Creating first test cache.
+ val ccfg = new CacheConfiguration[JLong, JString](CACHE_NAME).setSqlSchema("PUBLIC")
+
+ val cache = ignite.getOrCreateCache(ccfg)
+
+ //Creating SQL table.
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE person (id LONG, name VARCHAR, city_id LONG, PRIMARY KEY (id)) " +
+ "WITH \"backups=1\"")).getAll
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX on Person (city_id)")).getAll
+
+ //Inserting some data to tables.
+ val qry = new SqlFieldsQuery("INSERT INTO person (id, name, city_id) values (?, ?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "John Doe", 3L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Jane Roe", 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "Mary Major", 1L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], "Richard Miles", 2L.asInstanceOf[JLong])).getAll
+
+ ignite
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteCatalogExample.java b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteCatalogExample.java
new file mode 100644
index 0000000..e04ea0a
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteCatalogExample.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.query.SqlFieldsQuery;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.config.Configurator;
+import org.apache.spark.sql.AnalysisException;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.ignite.IgniteSparkSession;
+
+import static org.apache.ignite.internal.util.typedef.X.println;
+
+/**
+ *
+ */
+public class JavaIgniteCatalogExample {
+ /**
+ * Ignite config file.
+ */
+ private static final String CONFIG = "config/example-ignite.xml";
+
+ /**
+ * Test cache name.
+ */
+ private static final String CACHE_NAME = "testCache";
+
+ /** @param args Command line arguments. */
+ public static void main(String args[]) throws AnalysisException {
+
+ setupServerAndData();
+
+ //Creating Ignite-specific implementation of Spark session.
+ IgniteSparkSession igniteSession = IgniteSparkSession.builder()
+ .appName("Spark Ignite catalog example")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .igniteConfig(CONFIG)
+ .getOrCreate();
+
+ //Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.ERROR);
+ Configurator.setLevel("org.apache.ignite", Level.INFO);
+
+ System.out.println("List of available tables:");
+
+ //Showing existing tables.
+ igniteSession.catalog().listTables().show();
+
+ System.out.println("PERSON table description:");
+
+ //Showing `person` schema.
+ igniteSession.catalog().listColumns("person").show();
+
+ System.out.println("CITY table description:");
+
+ //Showing `city` schema.
+ igniteSession.catalog().listColumns("city").show();
+
+ println("Querying all persons from city with ID=2.");
+
+ //Selecting data through Spark SQL engine.
+ Dataset<Row> df = igniteSession.sql("SELECT * FROM person WHERE CITY_ID = 2");
+
+ System.out.println("Result schema:");
+
+ df.printSchema();
+
+ System.out.println("Result content:");
+
+ df.show();
+
+ System.out.println("Querying all persons living in Denver.");
+
+ //Selecting data through Spark SQL engine.
+ Dataset<Row> df2 = igniteSession.sql("SELECT * FROM person p JOIN city c ON c.ID = p.CITY_ID WHERE c.NAME = 'Denver'");
+
+ System.out.println("Result schema:");
+
+ df2.printSchema();
+
+ System.out.println("Result content:");
+
+ df2.show();
+
+ Ignition.stop(false);
+ }
+
+ /** */
+ private static void setupServerAndData() {
+ //Starting Ignite.
+ Ignite ignite = Ignition.start(CONFIG);
+
+ //Creating cache.
+ CacheConfiguration<?, ?> ccfg = new CacheConfiguration<>(CACHE_NAME).setSqlSchema("PUBLIC");
+
+ IgniteCache<?, ?> cache = ignite.getOrCreateCache(ccfg);
+
+ //Create tables.
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE city (id LONG PRIMARY KEY, name VARCHAR) WITH \"template=replicated\"")).getAll();
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE person (id LONG, name VARCHAR, city_id LONG, PRIMARY KEY (id, city_id)) " +
+ "WITH \"backups=1, affinityKey=city_id\"")).getAll();
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX on Person (city_id)")).getAll();
+
+ //Inserting some data into table.
+ SqlFieldsQuery qry = new SqlFieldsQuery("INSERT INTO city (id, name) VALUES (?, ?)");
+
+ cache.query(qry.setArgs(1L, "Forest Hill")).getAll();
+ cache.query(qry.setArgs(2L, "Denver")).getAll();
+ cache.query(qry.setArgs(3L, "St. Petersburg")).getAll();
+
+ qry = new SqlFieldsQuery("INSERT INTO person (id, name, city_id) values (?, ?, ?)");
+
+ cache.query(qry.setArgs(1L, "John Doe", 3L)).getAll();
+ cache.query(qry.setArgs(2L, "Jane Roe", 2L)).getAll();
+ cache.query(qry.setArgs(3L, "Mary Major", 1L)).getAll();
+ cache.query(qry.setArgs(4L, "Richard Miles", 2L)).getAll();
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameExample.java b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameExample.java
new file mode 100644
index 0000000..93fec36
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameExample.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.query.SqlFieldsQuery;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.spark.IgniteDataFrameSettings;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.config.Configurator;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+import static org.apache.spark.sql.functions.col;
+
+/**
+ *
+ */
+public class JavaIgniteDataFrameExample {
+ /**
+ * Ignite config file.
+ */
+ private static final String CONFIG = "config/example-ignite.xml";
+
+ /**
+ * Test cache name.
+ */
+ private static final String CACHE_NAME = "testCache";
+
+ /** @param args Command line arguments. */
+ public static void main(String args[]) {
+
+ setupServerAndData();
+
+ //Creating spark session.
+ SparkSession spark = SparkSession
+ .builder()
+ .appName("JavaIgniteDataFrameExample")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .getOrCreate();
+
+ // Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.ERROR);
+ Configurator.setLevel("org.apache.ignite", Level.INFO);
+
+ // Executing examples.
+
+ sparkDSLExample(spark);
+
+ nativeSparkSqlExample(spark);
+
+ Ignition.stop(false);
+ }
+
+ /** */
+ private static void sparkDSLExample(SparkSession spark) {
+ System.out.println("Querying using Spark DSL.");
+
+ Dataset<Row> igniteDF = spark.read()
+ .format(IgniteDataFrameSettings.FORMAT_IGNITE()) //Data source type.
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "person") //Table to read.
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG) //Ignite config.
+ .load()
+ .filter(col("id").geq(2)) //Filter clause.
+ .filter(col("name").like("%M%")); //Another filter clause.
+
+ System.out.println("Data frame schema:");
+
+ igniteDF.printSchema(); //Printing query schema to console.
+
+ System.out.println("Data frame content:");
+
+ igniteDF.show(); //Printing query results to console.
+ }
+
+ /** */
+ private static void nativeSparkSqlExample(SparkSession spark) {
+ System.out.println("Querying using Spark SQL.");
+
+ Dataset<Row> df = spark.read()
+ .format(IgniteDataFrameSettings.FORMAT_IGNITE()) //Data source type.
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "person") //Table to read.
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG) //Ignite config.
+ .load();
+
+ //Registering DataFrame as Spark view.
+ df.createOrReplaceTempView("person");
+
+ //Selecting data from Ignite through Spark SQL Engine.
+ Dataset<Row> igniteDF = spark.sql("SELECT * FROM person WHERE id >= 2 AND name = 'Mary Major'");
+
+ System.out.println("Result schema:");
+
+ igniteDF.printSchema(); //Printing query schema to console.
+
+ System.out.println("Result content:");
+
+ igniteDF.show(); //Printing query results to console.
+ }
+
+ /** */
+ private static void setupServerAndData() {
+ //Starting Ignite.
+ Ignite ignite = Ignition.start(CONFIG);
+
+ //Creating first test cache.
+ CacheConfiguration<?, ?> ccfg = new CacheConfiguration<>(CACHE_NAME).setSqlSchema("PUBLIC");
+
+ IgniteCache<?, ?> cache = ignite.getOrCreateCache(ccfg);
+
+ //Creating SQL tables.
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE city (id LONG PRIMARY KEY, name VARCHAR) WITH \"template=replicated\"")).getAll();
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE person (id LONG, name VARCHAR, city_id LONG, PRIMARY KEY (id, city_id)) " +
+ "WITH \"backups=1, affinity_key=city_id\"")).getAll();
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX on Person (city_id)")).getAll();
+
+ SqlFieldsQuery qry = new SqlFieldsQuery("INSERT INTO city (id, name) VALUES (?, ?)");
+
+ //Inserting some data to tables.
+ cache.query(qry.setArgs(1L, "Forest Hill")).getAll();
+ cache.query(qry.setArgs(2L, "Denver")).getAll();
+ cache.query(qry.setArgs(3L, "St. Petersburg")).getAll();
+
+ qry = new SqlFieldsQuery("INSERT INTO person (id, name, city_id) values (?, ?, ?)");
+
+ cache.query(qry.setArgs(1L, "John Doe", 3L)).getAll();
+ cache.query(qry.setArgs(2L, "Jane Roe", 2L)).getAll();
+ cache.query(qry.setArgs(3L, "Mary Major", 1L)).getAll();
+ cache.query(qry.setArgs(4L, "Richard Miles", 2L)).getAll();
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameJoinExample.java b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameJoinExample.java
new file mode 100644
index 0000000..8f82eda
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameJoinExample.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.query.SqlFieldsQuery;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.spark.IgniteDataFrameSettings;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.config.Configurator;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * Example application demonstrates the join operations between two dataframes or Spark tables with data saved in Ignite caches.
+ */
+public class JavaIgniteDataFrameJoinExample {
+ /** Ignite config file. */
+ private static final String CONFIG = "config/example-ignite.xml";
+
+ /** Test cache name. */
+ private static final String CACHE_NAME = "testCache";
+
+ /** @param args Command line arguments. */
+ public static void main(String args[]) {
+
+ setupServerAndData();
+
+ // Creating spark session.
+ SparkSession spark = SparkSession
+ .builder()
+ .appName("JavaIgniteDataFrameJoinExample")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .getOrCreate();
+
+ // Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.ERROR);
+ Configurator.setLevel("org.apache.ignite", Level.INFO);
+
+ // Executing examples.
+ sparkDSLJoinExample(spark);
+ nativeSparkSqlJoinExample(spark);
+
+ Ignition.stop(false);
+ }
+
+ /** */
+ private static void sparkDSLJoinExample(SparkSession spark) {
+ System.out.println("Querying using Spark DSL.");
+
+ Dataset<Row> persons = spark.read()
+ .format(IgniteDataFrameSettings.FORMAT_IGNITE())
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "person")
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG)
+ .load();
+
+ persons.printSchema();
+ persons.show();
+
+ Dataset<Row> cities = spark.read()
+ .format(IgniteDataFrameSettings.FORMAT_IGNITE())
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "city")
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG)
+ .load();
+
+ cities.printSchema();
+ cities.show();
+
+ Dataset<Row> joinResult = persons.join(cities, persons.col("city_id").equalTo(cities.col("id")))
+ .select(persons.col("name").as("person"), persons.col("age"), cities.col("name").as("city"), cities.col("country"));
+
+ joinResult.explain(true);
+ joinResult.printSchema();
+ joinResult.show();
+ }
+
+ /** */
+ private static void nativeSparkSqlJoinExample(SparkSession spark) {
+ System.out.println("Querying using Spark SQL.");
+
+ Dataset<Row> persons = spark.read()
+ .format(IgniteDataFrameSettings.FORMAT_IGNITE())
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "person")
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG)
+ .load();
+
+ persons.printSchema();
+ persons.show();
+
+ Dataset<Row> cities = spark.read()
+ .format(IgniteDataFrameSettings.FORMAT_IGNITE())
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "city")
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG)
+ .load();
+
+ cities.printSchema();
+ cities.show();
+
+ // Registering DataFrame as Spark view.
+ persons.createOrReplaceTempView("person");
+ cities.createOrReplaceTempView("city");
+
+ // Selecting data from Ignite through Spark SQL Engine.
+ Dataset<Row> joinResult = spark.sql(
+ "SELECT person.name AS person, age, city.name AS city, country FROM person JOIN city ON person.city_id = city.id"
+ );
+
+ joinResult.explain(true);
+ joinResult.printSchema();
+ joinResult.show();
+ }
+
+ /** */
+ private static void setupServerAndData() {
+ // Starting Ignite.
+ Ignite ignite = Ignition.start(CONFIG);
+
+ // Creating first test cache.
+ CacheConfiguration<?, ?> ccfg = new CacheConfiguration<>(CACHE_NAME).setSqlSchema("PUBLIC");
+
+ IgniteCache<?, ?> cache = ignite.getOrCreateCache(ccfg);
+
+ // Creating SQL tables.
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE city (id LONG PRIMARY KEY, name VARCHAR, country VARCHAR) WITH \"template=replicated\"")).getAll();
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE person (id LONG, name VARCHAR, age INT, city_id LONG, PRIMARY KEY (id, city_id)) " +
+ "WITH \"backups=1, affinity_key=city_id\"")).getAll();
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX on Person (city_id)")).getAll();
+
+ SqlFieldsQuery qry = new SqlFieldsQuery("INSERT INTO city (id, name, country) VALUES (?, ?, ?)");
+
+ // Inserting some data to tables.
+ cache.query(qry.setArgs(1L, "Forest Hill", "USA")).getAll();
+ cache.query(qry.setArgs(2L, "Denver", "USA")).getAll();
+ cache.query(qry.setArgs(3L, "St. Petersburg", "Russia")).getAll();
+
+ qry = new SqlFieldsQuery("INSERT INTO person (id, name, age, city_id) values (?, ?, ?, ?)");
+
+ cache.query(qry.setArgs(1L, "Alexey Zinoviev", 31, 3L)).getAll();
+ cache.query(qry.setArgs(2L, "Jane Roe", 27, 2L)).getAll();
+ cache.query(qry.setArgs(3L, "Mary Major", 86, 1L)).getAll();
+ cache.query(qry.setArgs(4L, "Richard Miles", 19, 2L)).getAll();
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameWriteExample.java b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameWriteExample.java
new file mode 100644
index 0000000..4cf77a9
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/JavaIgniteDataFrameWriteExample.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark;
+
+import java.util.List;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.query.SqlFieldsQuery;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.spark.IgniteDataFrameSettings;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.config.Configurator;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+
+import static org.apache.ignite.internal.util.IgniteUtils.gridClassLoader;
+import static org.apache.spark.sql.functions.col;
+import static org.apache.spark.sql.functions.reverse;
+
+/**
+ *
+ */
+public class JavaIgniteDataFrameWriteExample {
+ /**
+ * Ignite config file.
+ */
+ private static final String CONFIG = "config/example-ignite.xml";
+
+ /**
+ * Test cache name.
+ */
+ private static final String CACHE_NAME = "testCache";
+
+ /** @param args Command line arguments. */
+ public static void main(String args[]) {
+ //Starting Ignite.
+ Ignite ignite = Ignition.start(CONFIG);
+
+ //Starting Ignite server node.
+ setupServerAndData(ignite);
+
+ //Creating spark session.
+ SparkSession spark = SparkSession
+ .builder()
+ .appName("Spark Ignite data sources write example")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .getOrCreate();
+
+ // Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.ERROR);
+ Configurator.setLevel("org.apache.ignite", Level.INFO);
+
+ // Executing examples.
+ System.out.println("Example of writing json file to Ignite:");
+
+ writeJSonToIgnite(ignite, spark);
+
+ System.out.println("Example of modifying existing Ignite table data through Data Fram API:");
+
+ editDataAndSaveToNewTable(ignite, spark);
+
+ Ignition.stop(false);
+ }
+
+ /** */
+ private static void writeJSonToIgnite(Ignite ignite, SparkSession spark) {
+ //Load content of json file to data frame.
+ Dataset<Row> personsDataFrame = spark.read().json(
+ gridClassLoader().getResource("person.json").getFile());
+
+ System.out.println("Json file content:");
+
+ //Printing content of json file to console.
+ personsDataFrame.show();
+
+ System.out.println("Writing Data Frame to Ignite:");
+
+ //Writing content of data frame to Ignite.
+ personsDataFrame.write()
+ .format(IgniteDataFrameSettings.FORMAT_IGNITE())
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG)
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "json_person")
+ .option(IgniteDataFrameSettings.OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS(), "id")
+ .option(IgniteDataFrameSettings.OPTION_CREATE_TABLE_PARAMETERS(), "template=replicated")
+ .save();
+
+ System.out.println("Done!");
+
+ System.out.println("Reading data from Ignite table:");
+
+ CacheConfiguration<?, ?> ccfg = new CacheConfiguration<>(CACHE_NAME);
+
+ IgniteCache<?, ?> cache = ignite.getOrCreateCache(ccfg);
+
+ //Reading saved data from Ignite.
+ List<List<?>> data = cache.query(new SqlFieldsQuery("SELECT id, name, department FROM json_person")).getAll();
+
+ System.out.println(data);
+ }
+
+ /** */
+ private static void editDataAndSaveToNewTable(Ignite ignite, SparkSession spark) {
+ //Load content of Ignite table to data frame.
+ Dataset<Row> personDataFrame = spark.read()
+ .format(IgniteDataFrameSettings.FORMAT_IGNITE())
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG)
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "person")
+ .load();
+
+ System.out.println("Data frame content:");
+
+ //Printing content of data frame to console.
+ personDataFrame.show();
+
+ System.out.println("Modifying Data Frame and write it to Ignite:");
+
+ personDataFrame
+ .withColumn("id", col("id").plus(42)) //Edit id column
+ .withColumn("name", reverse(col("name"))) //Edit name column
+ .write().format(IgniteDataFrameSettings.FORMAT_IGNITE())
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE(), CONFIG)
+ .option(IgniteDataFrameSettings.OPTION_TABLE(), "new_persons")
+ .option(IgniteDataFrameSettings.OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS(), "id, city_id")
+ .option(IgniteDataFrameSettings.OPTION_CREATE_TABLE_PARAMETERS(), "backups=1")
+ .mode(SaveMode.Overwrite) //Overwriting entire table.
+ .save();
+
+ System.out.println("Done!");
+
+ System.out.println("Reading data from Ignite table:");
+
+ CacheConfiguration<?, ?> ccfg = new CacheConfiguration<>(CACHE_NAME);
+
+ IgniteCache<?, ?> cache = ignite.getOrCreateCache(ccfg);
+
+ //Reading saved data from Ignite.
+ List<List<?>> data = cache.query(new SqlFieldsQuery("SELECT id, name, city_id FROM new_persons")).getAll();
+
+ System.out.println(data);
+ }
+
+ /** */
+ private static void setupServerAndData(Ignite ignite) {
+ //Creating first test cache.
+ CacheConfiguration<?, ?> ccfg = new CacheConfiguration<>(CACHE_NAME).setSqlSchema("PUBLIC");
+
+ IgniteCache<?, ?> cache = ignite.getOrCreateCache(ccfg);
+
+ //Creating SQL table.
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE person (id LONG, name VARCHAR, city_id LONG, PRIMARY KEY (id)) " +
+ "WITH \"backups=1\"")).getAll();
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX on Person (city_id)")).getAll();
+
+ //Inserting some data to tables.
+ SqlFieldsQuery qry = new SqlFieldsQuery("INSERT INTO person (id, name, city_id) values (?, ?, ?)");
+
+ cache.query(qry.setArgs(1L, "John Doe", 3L)).getAll();
+ cache.query(qry.setArgs(2L, "Jane Roe", 2L)).getAll();
+ cache.query(qry.setArgs(3L, "Mary Major", 1L)).getAll();
+ cache.query(qry.setArgs(4L, "Richard Miles", 2L)).getAll();
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/SharedRDDExample.java b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/SharedRDDExample.java
new file mode 100644
index 0000000..d7db573
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/SharedRDDExample.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.spark;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.ignite.spark.JavaIgniteContext;
+import org.apache.ignite.spark.JavaIgniteRDD;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.config.Configurator;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.api.java.function.VoidFunction;
+import org.apache.spark.sql.Dataset;
+import scala.Tuple2;
+
+/**
+ * This example demonstrates how to create an JavaIgnitedRDD and share it with multiple spark workers. The goal of this
+ * particular example is to provide the simplest code example of this logic.
+ * <p>
+ * This example will start Ignite in the embedded mode and will start an JavaIgniteContext on each Spark worker node.
+ * <p>
+ * The example can work in the standalone mode as well that can be enabled by setting JavaIgniteContext's
+ * {@code standalone} property to {@code true} and running an Ignite node separately with
+ * `examples/config/spark/example-shared-rdd.xml` config.
+ */
+public class SharedRDDExample {
+ /**
+ * Executes the example.
+ * @param args Command line arguments, none required.
+ */
+ public static void main(String args[]) {
+ // Spark Configuration.
+ SparkConf sparkConf = new SparkConf()
+ .setAppName("JavaIgniteRDDExample")
+ .setMaster("local")
+ .set("spark.executor.instances", "2");
+
+ // Spark context.
+ JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
+
+ // Adjust the logger to exclude the logs of no interest.
+ Configurator.setRootLevel(Level.ERROR);
+ Configurator.setLevel("org.apache.ignite", Level.INFO);
+
+ // Creates Ignite context with specific configuration and runs Ignite in the embedded mode.
+ JavaIgniteContext<Integer, Integer> igniteContext = new JavaIgniteContext<Integer, Integer>(
+ sparkContext, "config/spark/example-shared-rdd.xml", false);
+
+ // Create a Java Ignite RDD of Type (Int,Int) Integer Pair.
+ JavaIgniteRDD<Integer, Integer> sharedRDD = igniteContext.<Integer, Integer>fromCache("sharedRDD");
+
+ // Define data to be stored in the Ignite RDD (cache).
+ List<Integer> data = new ArrayList<>(20);
+
+ for (int i = 0; i < 20; i++) {
+ data.add(i);
+ }
+
+ // Preparing a Java RDD.
+ JavaRDD<Integer> javaRDD = sparkContext.<Integer>parallelize(data);
+
+ // Fill the Ignite RDD in with Int pairs. Here Pairs are represented as Scala Tuple2.
+ sharedRDD.savePairs(javaRDD.<Integer, Integer>mapToPair(new PairFunction<Integer, Integer, Integer>() {
+ @Override public Tuple2<Integer, Integer> call(Integer val) throws Exception {
+ return new Tuple2<Integer, Integer>(val, val);
+ }
+ }));
+
+ System.out.println(">>> Iterating over Ignite Shared RDD...");
+
+ // Iterate over the Ignite RDD.
+ sharedRDD.foreach(new VoidFunction<Tuple2<Integer, Integer>>() {
+ @Override public void call(Tuple2<Integer, Integer> tuple) throws Exception {
+ System.out.println("(" + tuple._1 + "," + tuple._2 + ")");
+ }
+ });
+
+ System.out.println(">>> Transforming values stored in Ignite Shared RDD...");
+
+ // Filter out even values as a transformed RDD.
+ JavaPairRDD<Integer, Integer> transformedValues =
+ sharedRDD.filter(new Function<Tuple2<Integer, Integer>, Boolean>() {
+ @Override public Boolean call(Tuple2<Integer, Integer> tuple) throws Exception {
+ return tuple._2() % 2 == 0;
+ }
+ });
+
+ // Print out the transformed values.
+ transformedValues.foreach(new VoidFunction<Tuple2<Integer, Integer>>() {
+ @Override public void call(Tuple2<Integer, Integer> tuple) throws Exception {
+ System.out.println("(" + tuple._1 + "," + tuple._2 + ")");
+ }
+ });
+
+ System.out.println(">>> Executing SQL query over Ignite Shared RDD...");
+
+ // Execute SQL query over the Ignite RDD.
+ Dataset df = sharedRDD.sql("select _val from Integer where _key < 9");
+
+ // Show the result of the execution.
+ df.show();
+
+ // Close IgniteContext on all the workers.
+ igniteContext.close(true);
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/package-info.java b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/package-info.java
new file mode 100644
index 0000000..9a16d23
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Basic examples for ignite functionality with spark.
+ */
+
+package org.apache.ignite.examples.spark;
diff --git a/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/package.scala b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/package.scala
new file mode 100644
index 0000000..a877d82
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/java/org/apache/ignite/examples/spark/package.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ignite.examples
+
+/**
+ */
+package object spark {
+ /**
+ * Utility object.
+ * Takes a `AutoCloseable` resource and closure to work with it.
+ * After work is done - closes the resource.
+ */
+ object closeAfter {
+ def apply[R <: AutoCloseable, T](r: R)(c: (R) ⇒ T) = {
+ try {
+ c(r)
+ }
+ finally {
+ r.close
+ }
+ }
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/main/resources/config/spark/example-shared-rdd.xml b/modules/spark-3.2-ext/examples/src/main/resources/config/spark/example-shared-rdd.xml
new file mode 100644
index 0000000..83de6a3
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/resources/config/spark/example-shared-rdd.xml
@@ -0,0 +1,83 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ Ignite Spring configuration file to startup Ignite cache.
+
+ This file demonstrates how to configure cache using Spring. Provided cache
+ will be created on node startup.
+
+ When starting a standalone node, you need to execute the following command:
+ {IGNITE_HOME}/bin/ignite.{bat|sh} examples/config/example-shared-rdd.xml
+
+ When starting Ignite from Java IDE, pass path to this file to Ignition:
+ Ignition.start("examples/config/example-shared-rdd.xml");
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="
+ http://www.springframework.org/schema/beans
+ http://www.springframework.org/schema/beans/spring-beans.xsd">
+
+ <bean id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
+ <property name="cacheConfiguration">
+ <!-- SharedRDD cache example configuration (Atomic mode). -->
+ <bean class="org.apache.ignite.configuration.CacheConfiguration">
+ <!-- Set a cache name. -->
+ <property name="name" value="sharedRDD"/>
+ <!-- Set a cache mode. -->
+ <property name="cacheMode" value="PARTITIONED"/>
+ <!-- Index Integer pairs used in the example. -->
+ <property name="indexedTypes">
+ <list>
+ <value>java.lang.Integer</value>
+ <value>java.lang.Integer</value>
+ </list>
+ </property>
+ <!-- Set atomicity mode. -->
+ <property name="atomicityMode" value="ATOMIC"/>
+ <!-- Configure a number of backups. -->
+ <property name="backups" value="1"/>
+ </bean>
+ </property>
+
+ <!-- Explicitly configure TCP discovery SPI to provide list of initial nodes. -->
+ <property name="discoverySpi">
+ <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+ <property name="ipFinder">
+ <!--
+ Ignite provides several options for automatic discovery that can be used
+ instead os static IP based discovery. For information on all options refer
+ to our documentation: http://apacheignite.readme.io/docs/cluster-config
+ -->
+ <!-- Uncomment static IP finder to enable static-based discovery of initial nodes. -->
+ <!--<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">-->
+ <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.multicast.TcpDiscoveryMulticastIpFinder">
+ <property name="addresses">
+ <list>
+ <!-- In distributed environment, replace with actual host IP address. -->
+ <value>127.0.0.1:47500..47509</value>
+ </list>
+ </property>
+ </bean>
+ </property>
+ </bean>
+ </property>
+ </bean>
+</beans>
diff --git a/modules/spark-3.2-ext/examples/src/main/resources/person.json b/modules/spark-3.2-ext/examples/src/main/resources/person.json
new file mode 100644
index 0000000..d651b0d
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/main/resources/person.json
@@ -0,0 +1,10 @@
+{ "id": 1, "name": "Ivan Ivanov", "department": "Executive commitee" }
+{ "id": 2, "name": "Petr Petrov", "department": "Executive commitee" }
+{ "id": 3, "name": "Jonh Doe", "department": "Production" }
+{ "id": 4, "name": "Smith Ann", "department": "Production" }
+{ "id": 5, "name": "Sergey Smirnov", "department": "Accounting" }
+{ "id": 6, "name": "Alexandra Sergeeva", "department": "Accounting" }
+{ "id": 7, "name": "Adam West", "department": "IT" }
+{ "id": 8, "name": "Beverley Chase", "department": "Head Office" }
+{ "id": 9, "name": "Igor Rozhkov", "department": "Head Office" }
+{ "id": 10, "name": "Anastasia Borisova", "department": "IT" }
diff --git a/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/IgniteDataFrameSelfTest.java b/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/IgniteDataFrameSelfTest.java
new file mode 100644
index 0000000..fb58627
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/IgniteDataFrameSelfTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.examples;
+
+import org.apache.ignite.examples.spark.IgniteCatalogExample;
+import org.apache.ignite.examples.spark.IgniteDataFrameExample;
+import org.apache.ignite.examples.spark.IgniteDataFrameJoinExample;
+import org.apache.ignite.examples.spark.IgniteDataFrameWriteExample;
+import org.apache.ignite.testframework.junits.common.GridAbstractExamplesTest;
+import org.junit.Test;
+
+/**
+ */
+public class IgniteDataFrameSelfTest extends GridAbstractExamplesTest {
+ /** */
+ static final String[] EMPTY_ARGS = new String[0];
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testCatalogExample() throws Exception {
+ IgniteCatalogExample.main(EMPTY_ARGS);
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testDataFrameExample() throws Exception {
+ IgniteDataFrameExample.main(EMPTY_ARGS);
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testDataFrameWriteExample() throws Exception {
+ IgniteDataFrameWriteExample.main(EMPTY_ARGS);
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testDataFrameJoinExample() throws Exception {
+ IgniteDataFrameJoinExample.main(EMPTY_ARGS);
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/JavaIgniteDataFrameSelfTest.java b/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/JavaIgniteDataFrameSelfTest.java
new file mode 100644
index 0000000..0e0f6b2
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/JavaIgniteDataFrameSelfTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.examples;
+
+import org.apache.ignite.examples.spark.JavaIgniteCatalogExample;
+import org.apache.ignite.examples.spark.JavaIgniteDataFrameExample;
+import org.apache.ignite.examples.spark.JavaIgniteDataFrameJoinExample;
+import org.apache.ignite.examples.spark.JavaIgniteDataFrameWriteExample;
+import org.apache.ignite.testframework.junits.common.GridAbstractExamplesTest;
+import org.junit.Test;
+
+/**
+ */
+public class JavaIgniteDataFrameSelfTest extends GridAbstractExamplesTest {
+ /** */
+ static final String[] EMPTY_ARGS = new String[0];
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testCatalogExample() throws Exception {
+ JavaIgniteCatalogExample.main(EMPTY_ARGS);
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testDataFrameExample() throws Exception {
+ JavaIgniteDataFrameExample.main(EMPTY_ARGS);
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testDataFrameWriteExample() throws Exception {
+ JavaIgniteDataFrameWriteExample.main(EMPTY_ARGS);
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testDataFrameJoinExample() throws Exception {
+ JavaIgniteDataFrameJoinExample.main(EMPTY_ARGS);
+ }
+}
diff --git a/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/SharedRDDExampleSelfTest.java b/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/SharedRDDExampleSelfTest.java
new file mode 100644
index 0000000..300dfa8
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/examples/SharedRDDExampleSelfTest.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.examples;
+
+import org.apache.ignite.examples.spark.SharedRDDExample;
+import org.apache.ignite.testframework.junits.common.GridAbstractExamplesTest;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * SharedRDD examples self test.
+ */
+public class SharedRDDExampleSelfTest extends GridAbstractExamplesTest {
+ /** */
+ static final String[] EMPTY_ARGS = new String[0];
+
+ /**
+ * TODO: IGNITE-12054 Only one SparkContext may be running in this JVM (see SPARK-2243).
+ * @throws Exception If failed.
+ */
+ @Ignore("https://issues.apache.org/jira/browse/IGNITE-12054")
+ @Test
+ public void testSharedRDDExample() throws Exception {
+ SharedRDDExample.main(EMPTY_ARGS);
+ }
+
+}
diff --git a/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/testsuites/IgniteExamplesSparkSelfTestSuite.java b/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/testsuites/IgniteExamplesSparkSelfTestSuite.java
new file mode 100644
index 0000000..5b75bb1
--- /dev/null
+++ b/modules/spark-3.2-ext/examples/src/test/java/org/apache/ignite/spark/testsuites/IgniteExamplesSparkSelfTestSuite.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.testsuites;
+
+import org.apache.ignite.spark.examples.IgniteDataFrameSelfTest;
+import org.apache.ignite.spark.examples.JavaIgniteDataFrameSelfTest;
+import org.apache.ignite.spark.examples.SharedRDDExampleSelfTest;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.junit.BeforeClass;
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+
+import static org.apache.ignite.IgniteSystemProperties.IGNITE_OVERRIDE_MCAST_GRP;
+
+/**
+ * Examples test suite.
+ * <p>
+ * Contains only Spring ignite examples tests.
+ */
+@RunWith(Suite.class)
+@Suite.SuiteClasses({
+ SharedRDDExampleSelfTest.class,
+ IgniteDataFrameSelfTest.class,
+ JavaIgniteDataFrameSelfTest.class,
+})
+public class IgniteExamplesSparkSelfTestSuite {
+ /** */
+ @BeforeClass
+ public static void init() {
+ System.setProperty(IGNITE_OVERRIDE_MCAST_GRP,
+ GridTestUtils.getNextMulticastGroup(IgniteExamplesSparkSelfTestSuite.class));
+ }
+}
diff --git a/modules/spark-3.2-ext/pom.xml b/modules/spark-3.2-ext/pom.xml
new file mode 100644
index 0000000..a99dca1
--- /dev/null
+++ b/modules/spark-3.2-ext/pom.xml
@@ -0,0 +1,70 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.ignite</groupId>
+ <artifactId>ignite-parent-ext-internal</artifactId>
+ <version>1</version>
+ <relativePath>../../parent-internal/pom.xml</relativePath>
+ </parent>
+
+ <artifactId>ignite-spark-3.2-parent-ext</artifactId>
+ <packaging>pom</packaging>
+ <version>1.0.0-SNAPSHOT</version>
+
+ <properties>
+ <scala.library.version>2.12.16</scala.library.version>
+ <scala.test.version>3.2.12</scala.test.version>
+ <spark.version>3.2.2</spark.version>
+ <spark.jackson.version>2.12.7</spark.jackson.version>
+ </properties>
+
+ <modules>
+ <module>spark-3.2</module>
+ <module>examples</module>
+ </modules>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-deploy-plugin</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>flatten-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>flatten</id>
+ <inherited>false</inherited>
+ <phase/>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/modules/spark-3.2-ext/spark-3.2/README.txt b/modules/spark-3.2-ext/spark-3.2/README.txt
new file mode 100644
index 0000000..a3d2661
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/README.txt
@@ -0,0 +1,11 @@
+Apache Ignite Spark 3.2 Module
+------------------------------
+
+Apache Ignite provides an implementation of Spark RDD abstraction which enables easy access to Ignite caches.
+Ignite RDD does not keep it's state in the memory of the Spark application and provides a view of the corresponding
+Ignite cache. Depending on the chosen deployment mode this state may exist only during the lifespan of the Spark
+application (embedded mode) or may exist outside of the Spark application (standalone mode), allowing seamless
+sharing of the state between multiple Spark jobs.
+------------------------------
+
+Activate 'spark-3.2-dev-profile' profile to let IDEA load the module.
diff --git a/modules/spark-3.2-ext/spark-3.2/licenses/apache-2.0.txt b/modules/spark-3.2-ext/spark-3.2/licenses/apache-2.0.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/licenses/apache-2.0.txt
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/modules/spark-3.2-ext/spark-3.2/modules/core/src/test/config/log4j2-test.xml b/modules/spark-3.2-ext/spark-3.2/modules/core/src/test/config/log4j2-test.xml
new file mode 100644
index 0000000..0bf9cf5
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/modules/core/src/test/config/log4j2-test.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<Configuration>
+ <Appenders>
+ <Console name="CONSOLE" target="SYSTEM_OUT">
+ <PatternLayout pattern="[%d{ISO8601}][%-5p][%t][%c{1}]%notEmpty{[%markerSimpleName]} %m%n"/>
+ <LevelRangeFilter minLevel="INFO" maxLevel="DEBUG"/>
+ </Console>
+
+ <Console name="CONSOLE_ERR" target="SYSTEM_ERR">
+ <PatternLayout pattern="[%d{ISO8601}][%-5p][%t][%c{1}] %m%n"/>
+ </Console>
+ </Appenders>
+
+ <Loggers>
+ <Logger name="org" level="INFO"/>
+
+ <Root level="INFO">
+ <AppenderRef ref="CONSOLE" level="DEBUG"/>
+ <AppenderRef ref="CONSOLE_ERR" level="WARN"/>
+ </Root>
+ </Loggers>
+</Configuration>
diff --git a/modules/spark-3.2-ext/spark-3.2/modules/core/src/test/config/tests.properties b/modules/spark-3.2-ext/spark-3.2/modules/core/src/test/config/tests.properties
new file mode 100644
index 0000000..86094c8
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/modules/core/src/test/config/tests.properties
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/modules/spark-3.2-ext/spark-3.2/pom.xml b/modules/spark-3.2-ext/spark-3.2/pom.xml
new file mode 100644
index 0000000..dad17c0
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/pom.xml
@@ -0,0 +1,229 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ POM file.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.ignite</groupId>
+ <artifactId>ignite-spark-3.2-parent-ext</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <relativePath>../../spark-3.2-ext/pom.xml</relativePath>
+ </parent>
+
+ <artifactId>ignite-spark-3.2-ext</artifactId>
+
+ <url>http://ignite.apache.org</url>
+
+ <dependencies>
+
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>${scala.library.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-reflect</artifactId>
+ <version>${scala.library.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-core</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-indexing</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-spring</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_2.12</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_2.12</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_2.12</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-network-common_2.12</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-network-shuffle_2.12</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-tags_2.12</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- Test dependencies -->
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>ignite-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_2.12</artifactId>
+ <version>${scala.test.version}</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-funspec_2.12</artifactId>
+ <version>${scala.test.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalactic</groupId>
+ <artifactId>scalactic_2.12</artifactId>
+ <version>${scala.test.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalatestplus</groupId>
+ <artifactId>scalatestplus-junit_2.12</artifactId>
+ <version>1.0.0-M2</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-deploy-plugin</artifactId>
+ <version>2.8.2</version>
+ <configuration>
+ <skip>false</skip>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+
+ <!-- IDEA can't load the module if scala has provided scope -->
+ <profile>
+ <id>spark-3.2-dev-profile</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>${scala.library.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-reflect</artifactId>
+ <version>${scala.library.version}</version>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>scala-test</id>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ <version>2.0.0</version>
+ <configuration>
+ <reportsDirectory>target/surefire-reports</reportsDirectory>
+ <junitxml>.</junitxml>
+ <filereports>WDF IgniteScalaTestSuites.txt</filereports>
+ <skipTests>true</skipTests>
+ </configuration>
+ <executions>
+ <execution>
+ <id>test</id>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+</project>
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/modules/spark-3.2-ext/spark-3.2/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
new file mode 100644
index 0000000..8304662
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -0,0 +1 @@
+org.apache.ignite.spark.impl.IgniteRelationProvider
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteContext.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteContext.scala
new file mode 100644
index 0000000..4445df9
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteContext.scala
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite._
+import org.apache.ignite.configuration.{CacheConfiguration, IgniteConfiguration}
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.internal.util.IgniteUtils
+import org.apache.ignite.spark.IgniteContext.setIgniteHome
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.SparkContext
+import org.apache.log4j.Logger
+import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
+
+/**
+ * Ignite context.
+ *
+ * @param sparkContext Spark context.
+ * @param cfgF Configuration factory.
+ */
+class IgniteContext(
+ @transient val sparkContext: SparkContext,
+ cfgF: () ⇒ IgniteConfiguration,
+ @deprecated("Embedded mode is deprecated and will be discontinued. Consider using standalone mode instead.")
+ standalone: Boolean = true
+ ) extends Serializable {
+ private val cfgClo = new Once(cfgF)
+
+ private val igniteHome = IgniteUtils.getIgniteHome
+
+ if (!standalone) {
+ Logging.log.warn("Embedded mode is deprecated and will be discontinued. Consider using standalone mode instead.")
+
+ // Get required number of executors with default equals to number of available executors.
+ val workers = sparkContext.getConf.getInt("spark.executor.instances",
+ sparkContext.statusTracker.getExecutorInfos.size)
+
+ if (workers <= 0)
+ throw new IllegalStateException("No Spark executors found to start Ignite nodes.")
+
+ Logging.log.info("Will start Ignite nodes on " + workers + " workers")
+
+ // Start ignite server node on each worker in server mode.
+ sparkContext.parallelize(1 to workers, workers).foreachPartition(it ⇒ ignite())
+ }
+
+ // Make sure to start Ignite on context creation.
+ ignite()
+
+ //Stop local ignite instance on application end.
+ //Instances on workers will be stopped with executor stop(jvm exit).
+ sparkContext.addSparkListener(new SparkListener {
+ override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
+ close()
+ }
+ })
+
+ /**
+ * Creates an instance of IgniteContext with the given spring configuration.
+ *
+ * @param sc Spark context.
+ * @param springUrl Spring configuration path.
+ * @param standalone Standalone or embedded mode.
+ */
+ @deprecated("Embedded mode is deprecated and will be discontinued. Consider using standalone mode instead.")
+ def this(
+ sc: SparkContext,
+ springUrl: String,
+ standalone: Boolean
+ ) {
+ this(sc, () ⇒ IgnitionEx.loadConfiguration(springUrl).get1(), standalone)
+ }
+
+ /**
+ * Creates an instance of IgniteContext with the given spring configuration.
+ *
+ * @param sc Spark context.
+ * @param springUrl Spring configuration path.
+ */
+ def this(
+ sc: SparkContext,
+ springUrl: String
+ ) {
+ this(sc, () ⇒ IgnitionEx.loadConfiguration(springUrl).get1())
+ }
+
+ /**
+ * Creates an instance of IgniteContext with default Ignite configuration.
+ * By default this method will use grid configuration defined in `IGNITE_HOME/config/default-config.xml`
+ * configuration file.
+ *
+ * @param sc Spark context.
+ */
+ def this(sc: SparkContext) {
+ this(sc, IgnitionEx.DFLT_CFG)
+ }
+
+ val sqlContext = new SQLContext(sparkContext)
+
+ /**
+ * Creates an `IgniteRDD` instance from the given cache name. If the cache does not exist, it will be
+ * automatically started from template on the first invoked RDD action.
+ *
+ * @param cacheName Cache name.
+ * @return `IgniteRDD` instance.
+ */
+ def fromCache[K, V](cacheName: String): IgniteRDD[K, V] = {
+ new IgniteRDD[K, V](this, cacheName, null, false)
+ }
+
+ /**
+ * Creates an `IgniteRDD` instance from the given cache configuration. If the cache does not exist, it will be
+ * automatically started using the configuration provided on the first invoked RDD action.
+ *
+ * @param cacheCfg Cache configuration to use.
+ * @return `IgniteRDD` instance.
+ */
+ def fromCache[K, V](cacheCfg: CacheConfiguration[K, V]) = {
+ new IgniteRDD[K, V](this, cacheCfg.getName, cacheCfg, false)
+ }
+
+ /**
+ * Get or start Ignite instance it it's not started yet.
+ * @return
+ */
+ def ignite(): Ignite = {
+ setIgniteHome(igniteHome)
+
+ val igniteCfg = cfgClo()
+
+ // check if called from driver
+ if (standalone || sparkContext != null) igniteCfg.setClientMode(true)
+
+ try {
+ Ignition.getOrStart(igniteCfg)
+ }
+ catch {
+ case e: IgniteException ⇒
+ Logging.log.error("Failed to start Ignite.", e)
+
+ throw e
+ }
+ }
+
+ /**
+ * Stops supporting ignite instance. If ignite instance has been already stopped, this operation will be
+ * a no-op.
+ */
+ def close(shutdownIgniteOnWorkers: Boolean = false): Unit = {
+ // additional check if called from driver
+ if (sparkContext != null && shutdownIgniteOnWorkers) {
+ // Get required number of executors with default equals to number of available executors.
+ val workers = sparkContext.getConf.getInt("spark.executor.instances",
+ sparkContext.statusTracker.getExecutorInfos.size)
+
+ if (workers > 0) {
+ Logging.log.info("Will stop Ignite nodes on " + workers + " workers")
+
+ // Start ignite server node on each worker in server mode.
+ sparkContext.parallelize(1 to workers, workers).foreachPartition(it ⇒ doClose())
+ }
+ }
+
+ doClose()
+ }
+
+ private def doClose() = {
+ val igniteCfg = cfgClo()
+
+ if (Ignition.state(igniteCfg.getIgniteInstanceName) == IgniteState.STARTED)
+ Ignition.stop(igniteCfg.getIgniteInstanceName, false)
+ }
+}
+
+object IgniteContext {
+ def apply(sparkContext: SparkContext, cfgF: () ⇒ IgniteConfiguration, standalone: Boolean = true): IgniteContext =
+ new IgniteContext(sparkContext, cfgF, standalone)
+
+ def setIgniteHome(igniteHome: String): Unit = {
+ val home = IgniteUtils.getIgniteHome
+
+ if (home == null && igniteHome != null) {
+ Logging.log.info("Setting IGNITE_HOME from driver not as it is not available on this worker: " + igniteHome)
+
+ IgniteUtils.nullifyHomeDirectory()
+
+ System.setProperty(IgniteSystemProperties.IGNITE_HOME, igniteHome)
+ }
+ }
+}
+
+/**
+ * Auxiliary closure that ensures that passed in closure is executed only once.
+ *
+ * @param clo Closure to wrap.
+ */
+class Once(clo: () ⇒ IgniteConfiguration) extends Serializable {
+ @transient @volatile var res: IgniteConfiguration = null
+
+ def apply(): IgniteConfiguration = {
+ if (res == null) {
+
+ this.synchronized {
+
+ if (res == null)
+
+ res = clo()
+ }
+ }
+
+ res
+ }
+}
+
+/**
+ * Spark uses log4j by default. Using this logger in IgniteContext as well.
+ *
+ * This object is used to avoid problems with log4j serialization.
+ */
+object Logging extends Serializable {
+ @transient lazy val log = Logger.getLogger(classOf[IgniteContext])
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteDataFrameSettings.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteDataFrameSettings.scala
new file mode 100644
index 0000000..4e0abf4
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteDataFrameSettings.scala
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+/**
+ */
+object IgniteDataFrameSettings {
+ /**
+ * Name of DataSource format for loading data from Apache Ignite.
+ */
+ val FORMAT_IGNITE = "ignite"
+
+ /**
+ * Config option to specify path to ignite config file.
+ * Config from this file will be used to connect to existing Ignite cluster.
+ *
+ * @note All nodes for executing Spark task forcibly will be started in client mode.
+ *
+ * @example {{{
+ * val igniteDF = spark.read.format(IGNITE)
+ * .option(OPTION_CONFIG_FILE, CONFIG_FILE)
+ * // other options ...
+ * .load()
+ * }}}
+ */
+ val OPTION_CONFIG_FILE = "config"
+
+ /**
+ * Config option to specify Ignite SQL table name to load data from.
+ *
+ * @example {{{
+ * val igniteDF = spark.read.format(IGNITE)
+ * // other options ...
+ * .option(OPTION_TABLE, "mytable")
+ * .load()
+ * }}}
+ *
+ * @see [[org.apache.ignite.cache.QueryEntity#tableName]]
+ */
+ val OPTION_TABLE = "table"
+
+ /**
+ * Config option to specify the Ignite SQL schema name in which the specified table is present.
+ * If this is not specified, all schemata will be scanned for a table name which matches the given table
+ * name and the first matching table will be used. This option can be used when there are multiple tables in
+ * different schemata with the same table name to disambiguate the tables.
+ *
+ * @example {{{
+ * val igniteDF = spark.read.format(IGNITE)
+ * .option(OPTION_TABLE, "myTable")
+ * .option(OPTION_SCHEMA, "mySchema")
+ * }}}
+ */
+ val OPTION_SCHEMA = "schema"
+
+ /**
+ * Config option to specify newly created Ignite SQL table parameters.
+ * Value of these option will be used in `CREATE TABLE ... WITH "option value goes here"`
+ *
+ * @example {{{
+ * val igniteDF = spark.write.format(IGNITE)
+ * // other options ...
+ * .option( OPTION_CREATE_TABLE_PARAMETERS, "backups=1, template=replicated")
+ * .save()
+ * }}}
+ *
+ * @see [[https://apacheignite-sql.readme.io/docs/create-table]]
+ */
+ val OPTION_CREATE_TABLE_PARAMETERS = "createTableParameters"
+
+ /**
+ * Config option to specify comma separated list of primary key fields for a newly created Ignite SQL table.
+ *
+ * @example {{{
+ * val igniteDF = spark.write.format(IGNITE)
+ * // other options ...
+ * .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ * .save()
+ * }}}
+ *
+ * @see [[https://apacheignite-sql.readme.io/docs/create-table]]
+ */
+ val OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS = "primaryKeyFields"
+
+ /**
+ * Config option for saving data frame.
+ * Internally all SQL inserts are done through `IgniteDataStreamer`.
+ * This options sets `allowOverwrite` property of streamer.
+ * If `true` then row with same primary key value will be written to the table.
+ * If `false` then row with same primary key value will be skipped. Existing row will be left in the table.
+ * Default value if `false`.
+ *
+ * @example {{{
+ * val igniteDF = spark.write.format(IGNITE)
+ * // other options ...
+ * .option(OPTION_STREAMER_ALLOW_OVERWRITE, true)
+ * .save()
+ * }}}
+ *
+ * @see [[org.apache.ignite.IgniteDataStreamer]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#allowOverwrite(boolean)]]
+ */
+ val OPTION_STREAMER_ALLOW_OVERWRITE = "streamerAllowOverwrite"
+
+ /**
+ * Config option for saving data frame.
+ * Internally all SQL inserts are done through `IgniteDataStreamer`.
+ * This options sets `skipStore` property of streamer.
+ * If `true` then write-through behavior will be disabled for data streaming.
+ * If `false` then write-through behavior will be enabled for data streaming.
+ * Default value if `false`.
+ *
+ * @example {{{
+ * val igniteDF = spark.write.format(IGNITE)
+ * // other options ...
+ * .option(OPTION_STREAMER_SKIP_STORE, true)
+ * .save()
+ * }}}
+ * @see [[org.apache.ignite.IgniteDataStreamer]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#skipStore(boolean)]]
+ */
+ val OPTION_STREAMER_SKIP_STORE = "streamerSkipStore"
+
+ /**
+ * Config option for saving data frame.
+ * Internally all SQL inserts are done through `IgniteDataStreamer`.
+ * This options sets `autoFlushFrequency` property of streamer.
+ *
+ * @example {{{
+ * val igniteDF = spark.write.format(IGNITE)
+ * // other options ...
+ * .option(OPTION_STREAMING_FLUSH_FREQUENCY, 10000)
+ * .save()
+ * }}}
+ *
+ * @see [[org.apache.ignite.IgniteDataStreamer]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#autoFlushFrequency(long)]]
+ */
+ val OPTION_STREAMER_FLUSH_FREQUENCY = "streamerFlushFrequency"
+
+ /**
+ * Config option for saving data frame.
+ * Internally all SQL inserts are done through `IgniteDataStreamer`.
+ * This options sets `perNodeBufferSize` property of streamer.
+ *
+ * @example {{{
+ * val igniteDF = spark.write.format(IGNITE)
+ * // other options ...
+ * .option(OPTION_STREAMING_PER_NODE_BUFFER_SIZE, 1024)
+ * .save()
+ * }}}
+ *
+ * @see [[org.apache.ignite.IgniteDataStreamer]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#perNodeBufferSize(int)]]
+ */
+ val OPTION_STREAMER_PER_NODE_BUFFER_SIZE = "streamerPerNodeBufferSize"
+
+ /**
+ * Config option for saving data frame.
+ * Internally all SQL inserts are done through `IgniteDataStreamer`.
+ * This options sets `perNodeParallelOperations` property of streamer.
+ *
+ * @example {{{
+ * val igniteDF = spark.write.format(IGNITE)
+ * // other options ...
+ * .option(OPTION_STREAMING_PER_NODE_PARALLEL_OPERATIONS, 42)
+ * .save()
+ * }}}
+ *
+ * @see [[org.apache.ignite.IgniteDataStreamer]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#perNodeParallelOperations(int)]]
+ */
+ val OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS = "streamerPerNodeParallelOperations"
+
+ /**
+ * Option for a [[org.apache.spark.sql.SparkSession]] configuration.
+ * If `true` then all Ignite optimization of Spark SQL statements will be disabled.
+ * Default value is `false`.
+ *
+ * @see [[org.apache.spark.sql.ignite.IgniteOptimization]]
+ */
+ val OPTION_DISABLE_SPARK_SQL_OPTIMIZATION = "ignite.disableSparkSQLOptimization"
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteRDD.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteRDD.scala
new file mode 100644
index 0000000..a244728
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/IgniteRDD.scala
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ignite.spark
+
+import org.apache.ignite.cache.query._
+import org.apache.ignite.cluster.ClusterNode
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.internal.processors.cache.query.QueryCursorEx
+import org.apache.ignite.internal.processors.query.GridQueryFieldMetadata
+import org.apache.ignite.lang.IgniteUuid
+import org.apache.ignite.spark.impl._
+import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi
+import org.apache.ignite.spi.discovery.tcp.internal.TcpDiscoveryNode
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql._
+import org.apache.spark.sql.types._
+
+import javax.cache.Cache
+import scala.collection.JavaConversions._
+
+/**
+ * Ignite RDD. Represents Ignite cache as Spark RDD abstraction.
+ *
+ * @param ic Ignite context to use.
+ * @param cacheName Cache name.
+ * @param cacheCfg Cache configuration.
+ * @tparam K Key type.
+ * @tparam V Value type.
+ */
+class IgniteRDD[K, V] (
+ val ic: IgniteContext,
+ val cacheName: String,
+ val cacheCfg: CacheConfiguration[K, V],
+ val keepBinary: Boolean
+) extends IgniteAbstractRDD[(K, V), K, V] (ic, cacheName, cacheCfg, keepBinary) {
+ /**
+ * Computes iterator based on given partition.
+ *
+ * @param part Partition to use.
+ * @param context Task context.
+ * @return Partition iterator.
+ */
+ override def compute(part: Partition, context: TaskContext): Iterator[(K, V)] = {
+ val cache = ensureCache()
+
+ val qry: ScanQuery[K, V] = new ScanQuery[K, V](part.index)
+
+ val cur = cache.query(qry)
+
+ TaskContext.get().addTaskCompletionListener[Unit]((_) ⇒ cur.close())
+
+ new IgniteQueryIterator[Cache.Entry[K, V], (K, V)](cur.iterator(), entry ⇒ {
+ (entry.getKey, entry.getValue)
+ })
+ }
+
+ /**
+ * Gets partitions for the given cache RDD.
+ *
+ * @return Partitions.
+ */
+ override protected[spark] def getPartitions: Array[Partition] = {
+ ensureCache()
+
+ val parts = ic.ignite().affinity(cacheName).partitions()
+
+ (0 until parts).map(new IgnitePartition(_)).toArray
+ }
+
+ /**
+ * Gets preferred locations for the given partition.
+ *
+ * @param split Split partition.
+ * @return
+ */
+ override protected[spark] def getPreferredLocations(split: Partition): Seq[String] = {
+ ensureCache()
+
+ if (ic.ignite().configuration().getDiscoverySpi().isInstanceOf[TcpDiscoverySpi]) {
+ ic.ignite().affinity(cacheName).mapPartitionToPrimaryAndBackups(split.index)
+ .map(_.asInstanceOf[TcpDiscoveryNode].socketAddresses()).flatten.map(_.getHostName).toList
+ }
+ else {
+ ic.ignite().affinity(cacheName).mapPartitionToPrimaryAndBackups(split.index)
+ .flatten(_.hostNames).toSeq
+ }
+ }
+
+ /**
+ * Tells whether this IgniteRDD is empty or not.
+ *
+ * @return Whether this IgniteRDD is empty or not.
+ */
+ override def isEmpty(): Boolean = {
+ count() == 0
+ }
+
+ /**
+ * Gets number of tuples in this IgniteRDD.
+ *
+ * @return Number of tuples in this IgniteRDD.
+ */
+ override def count(): Long = {
+ val cache = ensureCache()
+
+ cache.size()
+ }
+
+ /**
+ * Runs an object SQL on corresponding Ignite cache.
+ *
+ * @param typeName Type name to run SQL against.
+ * @param sql SQL query to run.
+ * @param args Optional SQL query arguments.
+ * @return RDD with query results.
+ */
+ def objectSql(typeName: String, sql: String, args: Any*): RDD[(K, V)] = {
+ val qry: SqlQuery[K, V] = new SqlQuery[K, V](typeName, sql)
+
+ qry.setArgs(args.map(_.asInstanceOf[Object]):_*)
+
+ new IgniteSqlRDD[(K, V), Cache.Entry[K, V], K, V](ic, cacheName, cacheCfg, qry,
+ entry ⇒ (entry.getKey, entry.getValue), keepBinary)
+ }
+
+ /**
+ * Runs an SQL fields query.
+ *
+ * @param sql SQL statement to run.
+ * @param args Optional SQL query arguments.
+ * @return `DataFrame` instance with the query results.
+ */
+ def sql(sql: String, args: Any*): DataFrame = {
+ val qry = new SqlFieldsQuery(sql)
+
+ qry.setArgs(args.map(_.asInstanceOf[Object]):_*)
+
+ val schema = buildSchema(ensureCache().query(qry).asInstanceOf[QueryCursorEx[java.util.List[_]]].fieldsMeta())
+
+ val rowRdd = new IgniteSqlRDD[Row, java.util.List[_], K, V](
+ ic, cacheName, cacheCfg, qry, list ⇒ Row.fromSeq(list), keepBinary)
+
+ ic.sqlContext.createDataFrame(rowRdd, schema)
+ }
+
+ /**
+ * Saves values from given RDD into Ignite. A unique key will be generated for each value of the given RDD.
+ *
+ * @param rdd RDD instance to save values from.
+ */
+ def saveValues(rdd: RDD[V]) = {
+ rdd.foreachPartition(it ⇒ {
+ val ig = ic.ignite()
+
+ ensureCache()
+
+ val locNode = ig.cluster().localNode()
+
+ val node: Option[ClusterNode] = ig.cluster().forHost(locNode).nodes().find(!_.eq(locNode))
+
+ val streamer = ig.dataStreamer[Object, V](cacheName)
+
+ try {
+ it.foreach(value ⇒ {
+ val key = affinityKeyFunc(value, node.orNull)
+
+ streamer.addData(key, value)
+ })
+ }
+ finally {
+ streamer.close()
+ }
+ })
+ }
+
+ /**
+ * Saves values from given RDD into Ignite. A unique key will be generated for each value of the given RDD.
+ *
+ * @param rdd RDD instance to save values from.
+ * @param f Transformation function.
+ */
+ def saveValues[T](rdd: RDD[T], f: (T, IgniteContext) ⇒ V) = {
+ rdd.foreachPartition(it ⇒ {
+ val ig = ic.ignite()
+
+ ensureCache()
+
+ val locNode = ig.cluster().localNode()
+
+ val node: Option[ClusterNode] = ig.cluster().forHost(locNode).nodes().find(!_.eq(locNode))
+
+ val streamer = ig.dataStreamer[Object, V](cacheName)
+
+ try {
+ it.foreach(t ⇒ {
+ val value = f(t, ic)
+
+ val key = affinityKeyFunc(value, node.orNull)
+
+ streamer.addData(key, value)
+ })
+ }
+ finally {
+ streamer.close()
+ }
+ })
+ }
+
+ /**
+ * Saves values from the given key-value RDD into Ignite.
+ *
+ * @param rdd RDD instance to save values from.
+ * @param overwrite Boolean flag indicating whether the call on this method should overwrite existing
+ * values in Ignite cache.
+ * @param skipStore Sets flag indicating that write-through behavior should be disabled for data streaming.
+ */
+ def savePairs(rdd: RDD[(K, V)], overwrite: Boolean = false, skipStore: Boolean = false) = {
+ rdd.foreachPartition(it ⇒ {
+ val ig = ic.ignite()
+
+ // Make sure to deploy the cache
+ ensureCache()
+
+ val streamer = ig.dataStreamer[K, V](cacheName)
+
+ try {
+ streamer.allowOverwrite(overwrite)
+ streamer.skipStore(skipStore)
+
+ it.foreach(tup ⇒ {
+ streamer.addData(tup._1, tup._2)
+ })
+ }
+ finally {
+ streamer.close()
+ }
+ })
+ }
+
+ /**
+ * Saves values from the given RDD into Ignite.
+ *
+ * @param rdd RDD instance to save values from.
+ * @param f Transformation function.
+ * @param overwrite Boolean flag indicating whether the call on this method should overwrite existing
+ * values in Ignite cache.
+ * @param skipStore Sets flag indicating that write-through behavior should be disabled for data streaming.
+ */
+ def savePairs[T](rdd: RDD[T], f: (T, IgniteContext) ⇒ (K, V), overwrite: Boolean, skipStore: Boolean) = {
+ rdd.foreachPartition(it ⇒ {
+ val ig = ic.ignite()
+
+ // Make sure to deploy the cache
+ ensureCache()
+
+ val streamer = ig.dataStreamer[K, V](cacheName)
+
+ try {
+ streamer.allowOverwrite(overwrite)
+ streamer.skipStore(skipStore)
+
+ it.foreach(t ⇒ {
+ val tup = f(t, ic)
+
+ streamer.addData(tup._1, tup._2)
+ })
+ }
+ finally {
+ streamer.close()
+ }
+ })
+ }
+
+ /**
+ * Saves values from the given RDD into Ignite.
+ *
+ * @param rdd RDD instance to save values from.
+ * @param f Transformation function.
+ */
+ def savePairs[T](rdd: RDD[T], f: (T, IgniteContext) ⇒ (K, V)): Unit = {
+ savePairs(rdd, f, overwrite = false, skipStore = false)
+ }
+
+ /**
+ * Removes all values from the underlying Ignite cache.
+ */
+ def clear(): Unit = {
+ ensureCache().removeAll()
+ }
+
+ /**
+ * Returns `IgniteRDD` that will operate with binary objects. This method
+ * behaves similar to [[org.apache.ignite.IgniteCache#withKeepBinary]].
+ *
+ * @return New `IgniteRDD` instance for binary objects.
+ */
+ def withKeepBinary[K1, V1](): IgniteRDD[K1, V1] = {
+ new IgniteRDD[K1, V1](
+ ic,
+ cacheName,
+ cacheCfg.asInstanceOf[CacheConfiguration[K1, V1]],
+ true)
+ }
+
+ /**
+ * Builds spark schema from query metadata.
+ *
+ * @param fieldsMeta Fields metadata.
+ * @return Spark schema.
+ */
+ private def buildSchema(fieldsMeta: java.util.List[GridQueryFieldMetadata]): StructType = {
+ new StructType(fieldsMeta.map(i ⇒
+ new StructField(i.fieldName(), IgniteRDD.dataType(i.fieldTypeName(), i.fieldName()), nullable = true))
+ .toArray)
+ }
+
+ /**
+ * Generates affinity key for given cluster node.
+ *
+ * @param value Value to generate key for.
+ * @param node Node to generate key for.
+ * @return Affinity key.
+ */
+ private def affinityKeyFunc(value: V, node: ClusterNode): IgniteUuid = {
+ val aff = ic.ignite().affinity[IgniteUuid](cacheName)
+
+ Stream.from(1, Math.max(1000, aff.partitions() * 2))
+ .map(_ ⇒ IgniteUuid.randomUuid()).find(node == null || aff.mapKeyToNode(_).eq(node))
+ .getOrElse(IgniteUuid.randomUuid())
+ }
+}
+
+object IgniteRDD {
+ /**
+ * Default decimal type.
+ */
+ private[spark] val DECIMAL = DecimalType(DecimalType.MAX_PRECISION, 3)
+
+ /**
+ * Gets Spark data type based on type name.
+ *
+ * @param typeName Type name.
+ * @return Spark data type.
+ */
+ def dataType(typeName: String, fieldName: String): DataType = typeName match {
+ case "java.lang.Boolean" ⇒ BooleanType
+ case "java.lang.Byte" ⇒ ByteType
+ case "java.lang.Short" ⇒ ShortType
+ case "java.lang.Integer" ⇒ IntegerType
+ case "java.lang.Long" ⇒ LongType
+ case "java.lang.Float" ⇒ FloatType
+ case "java.lang.Double" ⇒ DoubleType
+ case "java.math.BigDecimal" ⇒ DECIMAL
+ case "java.lang.String" ⇒ StringType
+ case "java.util.Date" ⇒ DateType
+ case "java.sql.Date" ⇒ DateType
+ case "java.sql.Timestamp" ⇒ TimestampType
+ case "[B" ⇒ BinaryType
+
+ case _ ⇒ StructType(new Array[StructField](0))
+ }
+
+ /**
+ * Converts java.util.Date to java.sql.Date as j.u.Date not supported by Spark SQL.
+ *
+ * @param input Any value.
+ * @return If input is java.util.Date returns java.sql.Date representation of given value, otherwise returns unchanged value.
+ */
+ def convertIfNeeded(input: Any): Any =
+ if (input == null)
+ input
+ else {
+ input match {
+ case timestamp: java.sql.Timestamp ⇒
+ timestamp
+
+ //Spark SQL doesn't support java.util.Date see - https://spark.apache.org/docs/latest/sql-programming-guide.html#data-types
+ case date: java.util.Date ⇒
+ new java.sql.Date(date.getTime)
+
+ case _ ⇒ input
+ }
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/JavaIgniteContext.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/JavaIgniteContext.scala
new file mode 100644
index 0000000..fa38631
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/JavaIgniteContext.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.Ignite
+import org.apache.ignite.configuration.{CacheConfiguration, IgniteConfiguration}
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.lang.IgniteOutClosure
+import org.apache.spark.api.java.JavaSparkContext
+
+import scala.reflect.ClassTag
+
+/**
+ * Java-friendly Ignite context wrapper.
+ *
+ * @param sc Java Spark context.
+ * @param cfgF Configuration factory.
+ * @tparam K Key type.
+ * @tparam V Value type.
+ */
+class JavaIgniteContext[K, V](
+ @transient val sc: JavaSparkContext,
+ val cfgF: IgniteOutClosure[IgniteConfiguration],
+ @deprecated("Embedded mode is deprecated and will be discontinued. Consider using standalone mode instead.")
+ standalone: Boolean = true
+ ) extends Serializable {
+
+ @transient val ic: IgniteContext = new IgniteContext(sc.sc, () => cfgF.apply(), standalone)
+
+ def this(sc: JavaSparkContext, cfgF: IgniteOutClosure[IgniteConfiguration]) {
+ this(sc, cfgF, true)
+ }
+
+ def this(sc: JavaSparkContext, springUrl: String) {
+ this(sc, new IgniteOutClosure[IgniteConfiguration] {
+ override def apply() = IgnitionEx.loadConfiguration(springUrl).get1()
+ })
+ }
+
+ @deprecated("Embedded mode is deprecated and will be discontinued. Consider using standalone mode instead.")
+ def this(sc: JavaSparkContext, springUrl: String, standalone: Boolean) {
+ this(sc, new IgniteOutClosure[IgniteConfiguration] {
+ override def apply() = IgnitionEx.loadConfiguration(springUrl).get1()
+ }, standalone)
+ }
+
+ def fromCache(cacheName: String): JavaIgniteRDD[K, V] =
+ JavaIgniteRDD.fromIgniteRDD(new IgniteRDD[K, V](ic, cacheName, null, false))
+
+ def fromCache(cacheCfg: CacheConfiguration[K, V]) =
+ JavaIgniteRDD.fromIgniteRDD(new IgniteRDD[K, V](ic, cacheCfg.getName, cacheCfg, false))
+
+ def ignite(): Ignite = ic.ignite()
+
+ def close(shutdownIgniteOnWorkers:Boolean = false) = ic.close(shutdownIgniteOnWorkers)
+
+ private[spark] def fakeClassTag[T]: ClassTag[T] = ClassTag.AnyRef.asInstanceOf[ClassTag[T]]
+
+ implicit val ktag: ClassTag[K] = fakeClassTag
+
+ implicit val vtag: ClassTag[V] = fakeClassTag
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/JavaIgniteRDD.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/JavaIgniteRDD.scala
new file mode 100644
index 0000000..1937483
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/JavaIgniteRDD.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import java.util
+
+import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.{Partition, TaskContext}
+
+import scala.annotation.varargs
+import scala.collection.JavaConversions._
+import scala.language.implicitConversions
+import scala.reflect.ClassTag
+
+/**
+ * Java-friendly Ignite RDD wrapper. Represents Ignite cache as Java Spark RDD abstraction.
+ *
+ * @param rdd Ignite RDD instance.
+ * @tparam K Key type.
+ * @tparam V Value type.
+ */
+class JavaIgniteRDD[K, V](override val rdd: IgniteRDD[K, V])
+ extends JavaPairRDD[K, V](rdd)(JavaIgniteRDD.fakeClassTag, JavaIgniteRDD.fakeClassTag) {
+
+ override def wrapRDD(rdd: RDD[(K, V)]): JavaPairRDD[K, V] = JavaPairRDD.fromRDD(rdd)
+
+ override val classTag: ClassTag[(K, V)] = JavaIgniteRDD.fakeClassTag
+
+ /**
+ * Computes iterator based on given partition.
+ *
+ * @param part Partition to use.
+ * @param context Task context.
+ * @return Partition iterator.
+ */
+ def compute(part: Partition, context: TaskContext): Iterator[(K, V)] = {
+ rdd.compute(part, context)
+ }
+
+ /**
+ * Gets partitions for the given cache RDD.
+ *
+ * @return Partitions.
+ */
+ protected def getPartitions: java.util.List[Partition] = {
+ new util.ArrayList[Partition](rdd.getPartitions.toSeq)
+ }
+
+ /**
+ * Gets preferred locations for the given partition.
+ *
+ * @param split Split partition.
+ * @return
+ */
+ protected def getPreferredLocations(split: Partition): Seq[String] = {
+ rdd.getPreferredLocations(split)
+ }
+
+ @varargs def objectSql(typeName: String, sql: String, args: Any*): JavaPairRDD[K, V] =
+ JavaPairRDD.fromRDD(rdd.objectSql(typeName, sql, args:_*))
+
+ @varargs def sql(sql: String, args: Any*): DataFrame = rdd.sql(sql, args:_*)
+
+ def saveValues(jrdd: JavaRDD[V]) = rdd.saveValues(JavaRDD.toRDD(jrdd))
+
+ def saveValues[T](jrdd: JavaRDD[T], f: (T, IgniteContext) ⇒ V) = rdd.saveValues(JavaRDD.toRDD(jrdd), f)
+
+ def savePairs(jrdd: JavaPairRDD[K, V], overwrite: Boolean, skipStore: Boolean) = {
+ val rrdd: RDD[(K, V)] = JavaPairRDD.toRDD(jrdd)
+
+ rdd.savePairs(rrdd, overwrite, skipStore)
+ }
+
+ def savePairs(jrdd: JavaPairRDD[K, V]): Unit = savePairs(jrdd, overwrite = false, skipStore = false)
+
+ def savePairs[T](jrdd: JavaRDD[T], f: (T, IgniteContext) ⇒ (K, V), overwrite: Boolean = false,
+ skipStore: Boolean = false) = {
+ rdd.savePairs(JavaRDD.toRDD(jrdd), f, overwrite, skipStore)
+ }
+
+ def savePairs[T](jrdd: JavaRDD[T], f: (T, IgniteContext) ⇒ (K, V)): Unit =
+ savePairs(jrdd, f, overwrite = false, skipStore = false)
+
+ def clear(): Unit = rdd.clear()
+
+ def withKeepBinary[K1, V1](): JavaIgniteRDD[K1, V1] = new JavaIgniteRDD[K1, V1](rdd.withKeepBinary[K1, V1]())
+}
+
+object JavaIgniteRDD {
+ implicit def fromIgniteRDD[K: ClassTag, V: ClassTag](rdd: IgniteRDD[K, V]): JavaIgniteRDD[K, V] =
+ new JavaIgniteRDD[K, V](rdd)
+
+ implicit def toIgniteRDD[K, V](rdd: JavaIgniteRDD[K, V]): IgniteRDD[K, V] = rdd.rdd
+
+ def fakeClassTag[T]: ClassTag[T] = ClassTag.AnyRef.asInstanceOf[ClassTag[T]]
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteAbstractRDD.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteAbstractRDD.scala
new file mode 100644
index 0000000..fd43a33
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteAbstractRDD.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.IgniteCache
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.spark.IgniteContext
+import org.apache.spark.rdd.RDD
+
+import scala.reflect.ClassTag
+
+abstract class IgniteAbstractRDD[R:ClassTag, K, V] (
+ ic: IgniteContext,
+ cacheName: String,
+ cacheCfg: CacheConfiguration[K, V],
+ keepBinary: Boolean
+) extends RDD[R] (ic.sparkContext, deps = Nil) {
+ protected def ensureCache(): IgniteCache[K, V] = {
+ // Make sure to deploy the cache
+ val cache =
+ if (cacheCfg != null)
+ ic.ignite().getOrCreateCache(cacheCfg)
+ else
+ ic.ignite().getOrCreateCache(cacheName)
+
+ if (keepBinary)
+ cache.withKeepBinary()
+ else
+ cache.asInstanceOf[IgniteCache[K, V]]
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteDataFramePartition.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteDataFramePartition.scala
new file mode 100644
index 0000000..4c9c72e
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteDataFramePartition.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.cluster.ClusterNode
+import org.apache.spark.Partition
+
+/**
+ * DataFrame partition
+ *
+ * sparkPartitionIdx - index of partition
+ * primary - primary node for list of ignitePartitions
+ */
+case class IgniteDataFramePartition(sparkPartIdx: Int, primary: ClusterNode, igniteParts: List[Int]) extends Partition {
+ override def index: Int = sparkPartIdx
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgnitePartition.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgnitePartition.scala
new file mode 100644
index 0000000..2107a5f
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgnitePartition.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.spark.Partition
+
+case class IgnitePartition(idx: Int) extends Partition {
+ override def index: Int = idx
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteQueryIterator.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteQueryIterator.scala
new file mode 100644
index 0000000..4165fd3
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteQueryIterator.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+class IgniteQueryIterator[T, R] (
+ cur: java.util.Iterator[T],
+ conv: (T) ⇒ R
+) extends Iterator[R] {
+ override def hasNext: Boolean = cur.hasNext
+
+ override def next(): R = conv(cur.next())
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteRelationProvider.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteRelationProvider.scala
new file mode 100644
index 0000000..a4f6da1
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteRelationProvider.scala
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.IgniteException
+import org.apache.ignite.configuration.IgniteConfiguration
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.internal.util.IgniteUtils
+import org.apache.ignite.spark.IgniteContext
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.ignite.spark.impl.QueryHelper.{createTable, dropTable, ensureCreateTableOptions, saveTable}
+import org.apache.spark.sql.SaveMode.{Append, Overwrite}
+import org.apache.spark.sql.ignite.IgniteExternalCatalog.{IGNITE_PROTOCOL, OPTION_GRID}
+import org.apache.spark.sql.ignite.IgniteOptimization
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
+
+/**
+ * Apache Ignite relation provider.
+ */
+class IgniteRelationProvider extends RelationProvider
+ with CreatableRelationProvider
+ with DataSourceRegister {
+ /**
+ * @return "ignite" - name of relation provider.
+ */
+ override def shortName(): String = FORMAT_IGNITE
+
+ /**
+ * To create IgniteRelation we need a link to a ignite cluster and a table name.
+ * To refer cluster user have to specify one of config parameter:
+ * <ul>
+ * <li><code>config</code> - path to ignite configuration file.
+ * </ul>
+ * Existing table inside Apache Ignite should be referred via <code>table</code> parameter.
+ *
+ * @param sqlCtx SQLContext.
+ * @param params Parameters for relation creation.
+ * @return IgniteRelation.
+ * @see IgniteRelation
+ * @see IgnitionEx#grid(String)
+ * @see org.apache.ignite.spark.IgniteDataFrameSettings.OPTION_TABLE
+ * @see org.apache.ignite.spark.IgniteDataFrameSettings.OPTION_SCHEMA
+ * @see org.apache.ignite.spark.IgniteDataFrameSettings.OPTION_CONFIG_FILE
+ */
+ override def createRelation(sqlCtx: SQLContext, params: Map[String, String]): BaseRelation =
+ createRelation(
+ igniteContext(params, sqlCtx),
+ params.getOrElse(OPTION_TABLE, throw new IgniteException("'table' must be specified.")),
+ params.get(OPTION_SCHEMA),
+ sqlCtx)
+
+ /**
+ * Save `data` to corresponding Ignite table and returns Relation for saved data.
+ *
+ * To save data or create IgniteRelation we need a link to a ignite cluster and a table name.
+ * To refer cluster user have to specify one of config parameter:
+ * <ul>
+ * <li><code>config</code> - path to ignite configuration file.
+ * </ul>
+ * Existing table inside Apache Ignite should be referred via <code>table</code> or <code>path</code> parameter.
+ *
+ * If table doesn't exists it will be created.
+ * If `mode` is Overwrite and `table` already exists it will be recreated(DROP TABLE, CREATE TABLE).
+ *
+ * If table create is required use can set following options:
+ *
+ * <ul>
+ * <li>`OPTION_PRIMARY_KEY_FIELDS` - required option. comma separated list of fields for primary key.</li>
+ * <li>`OPTION_CACHE_FOR_DDL` - required option. Existing cache name for executing SQL DDL statements.
+ * <li>`OPTION_CREATE_TABLE_OPTIONS` - Ignite specific parameters for a new table. See WITH [https://apacheignite-sql.readme.io/docs/create-table].</li>
+ * </ul>
+ *
+ * Data write executed 'by partition'. User can set `OPTION_WRITE_PARTITIONS_NUM` - number of partition for data.
+ *
+ * @param sqlCtx SQLContext.
+ * @param mode Save mode.
+ * @param params Additional parameters.
+ * @param data Data to save.
+ * @return IgniteRelation.
+ */
+ override def createRelation(sqlCtx: SQLContext,
+ mode: SaveMode,
+ params: Map[String, String],
+ data: DataFrame): BaseRelation = {
+
+ val ctx = igniteContext(params, sqlCtx)
+
+ val tblName = tableName(params)
+
+ val tblInfoOption = sqlTableInfo(ctx.ignite(), tblName, params.get(OPTION_SCHEMA))
+
+ if (tblInfoOption.isDefined) {
+ mode match {
+ case Overwrite ⇒
+ ensureCreateTableOptions(data.schema, params, ctx)
+
+ dropTable(tblName, ctx.ignite())
+
+ val createTblOpts = params.get(OPTION_CREATE_TABLE_PARAMETERS)
+
+ createTable(data.schema,
+ tblName,
+ primaryKeyFields(params),
+ createTblOpts,
+ ctx.ignite())
+
+ saveTable(data,
+ tblName,
+ params.get(OPTION_SCHEMA),
+ ctx,
+ params.get(OPTION_STREAMER_ALLOW_OVERWRITE).map(_.toBoolean),
+ params.get(OPTION_STREAMER_SKIP_STORE).map(_.toBoolean),
+ params.get(OPTION_STREAMER_FLUSH_FREQUENCY).map(_.toLong),
+ params.get(OPTION_STREAMER_PER_NODE_BUFFER_SIZE).map(_.toInt),
+ params.get(OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS).map(_.toInt))
+
+ case Append ⇒
+ saveTable(data,
+ tblName,
+ params.get(OPTION_SCHEMA),
+ ctx,
+ params.get(OPTION_STREAMER_ALLOW_OVERWRITE).map(_.toBoolean),
+ params.get(OPTION_STREAMER_SKIP_STORE).map(_.toBoolean),
+ params.get(OPTION_STREAMER_FLUSH_FREQUENCY).map(_.toLong),
+ params.get(OPTION_STREAMER_PER_NODE_BUFFER_SIZE).map(_.toInt),
+ params.get(OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS).map(_.toInt))
+
+ case SaveMode.ErrorIfExists =>
+ throw new IgniteException(s"Table or view '$tblName' already exists. SaveMode: ErrorIfExists.")
+
+ case SaveMode.Ignore =>
+ // With `SaveMode.Ignore` mode, if table already exists, the save operation is expected
+ // to not save the contents of the DataFrame and to not change the existing data.
+ // Therefore, it is okay to do nothing here and then just return the relation below.
+ }
+ }
+ else {
+ ensureCreateTableOptions(data.schema, params, ctx)
+
+ val primaryKeyFields = params(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS).split(",")
+
+ val createTblOpts = params.get(OPTION_CREATE_TABLE_PARAMETERS)
+
+ createTable(data.schema,
+ tblName,
+ primaryKeyFields,
+ createTblOpts,
+ ctx.ignite())
+
+ saveTable(data,
+ tblName,
+ params.get(OPTION_SCHEMA),
+ ctx,
+ params.get(OPTION_STREAMER_ALLOW_OVERWRITE).map(_.toBoolean),
+ params.get(OPTION_STREAMER_SKIP_STORE).map(_.toBoolean),
+ params.get(OPTION_STREAMER_FLUSH_FREQUENCY).map(_.toLong),
+ params.get(OPTION_STREAMER_PER_NODE_BUFFER_SIZE).map(_.toInt),
+ params.get(OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS).map(_.toInt))
+ }
+
+ createRelation(ctx,
+ tblName,
+ params.get(OPTION_SCHEMA),
+ sqlCtx)
+ }
+
+ /**
+ * @param igniteCtx Ignite context.
+ * @param tblName Table name.
+ * @param schema Optional schema name.
+ * @param sqlCtx SQL context.
+ * @return Ignite SQL relation.
+ */
+ private def createRelation(igniteCtx: IgniteContext, tblName: String, schema: Option[String], sqlCtx: SQLContext):
+ BaseRelation = {
+ val optimizationDisabled =
+ sqlCtx.sparkSession.conf.get(OPTION_DISABLE_SPARK_SQL_OPTIMIZATION, "false").toBoolean
+
+ val experimentalMethods = sqlCtx.sparkSession.sessionState.experimentalMethods
+
+ if (optimizationDisabled) {
+ experimentalMethods.extraOptimizations =
+ experimentalMethods.extraOptimizations.filter(_ != IgniteOptimization)
+ }
+ else {
+ val optimizationExists = experimentalMethods.extraOptimizations.contains(IgniteOptimization)
+
+ if (!optimizationExists)
+ experimentalMethods.extraOptimizations = experimentalMethods.extraOptimizations :+ IgniteOptimization
+ }
+
+ IgniteSQLRelation(
+ igniteCtx,
+ tblName,
+ schema,
+ sqlCtx)
+ }
+
+ /**
+ * @param params Params.
+ * @param sqlCtx SQL Context.
+ * @return IgniteContext.
+ */
+ private def igniteContext(params: Map[String, String], sqlCtx: SQLContext): IgniteContext = {
+ val igniteHome = IgniteUtils.getIgniteHome
+
+ def configProvider: () ⇒ IgniteConfiguration = {
+ if (params.contains(OPTION_CONFIG_FILE))
+ () ⇒ {
+ IgniteContext.setIgniteHome(igniteHome)
+
+ val cfg = IgnitionEx.loadConfiguration(params(OPTION_CONFIG_FILE)).get1()
+
+ cfg.setClientMode(true)
+
+ cfg
+ }
+ else if (params.contains(OPTION_GRID))
+ () ⇒ {
+ IgniteContext.setIgniteHome(igniteHome)
+
+ val cfg = ignite(params(OPTION_GRID)).configuration()
+
+ cfg.setClientMode(true)
+
+ cfg
+ }
+ else
+ throw new IgniteException("'config' must be specified to connect to ignite cluster.")
+ }
+
+ IgniteContext(sqlCtx.sparkContext, configProvider)
+ }
+
+ /**
+ * @param params Params.
+ * @return Table name.
+ */
+ private def tableName(params: Map[String, String]): String = {
+ val tblName = params.getOrElse(OPTION_TABLE,
+ params.getOrElse("path", throw new IgniteException("'table' or 'path' must be specified.")))
+
+ if (tblName.startsWith(IGNITE_PROTOCOL))
+ tblName.replace(IGNITE_PROTOCOL, "").toUpperCase()
+ else
+ tblName.toUpperCase
+ }
+
+ /**
+ * @param params Params.
+ * @return Sequence of primary key fields.
+ */
+ private def primaryKeyFields(params: Map[String, String]): Seq[String] =
+ params(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS).split(",")
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLAccumulatorRelation.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLAccumulatorRelation.scala
new file mode 100644
index 0000000..6eb600a
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLAccumulatorRelation.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.spark.impl
+import org.apache.ignite.spark.impl.optimization.accumulator.{JoinSQLAccumulator, QueryAccumulator}
+import org.apache.ignite.spark.impl.optimization.isSimpleTableAcc
+import org.apache.spark.Partition
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.sources.{BaseRelation, TableScan}
+import org.apache.spark.sql.types.{Metadata, StructField, StructType}
+
+/**
+ * Relation to query data from query generated by <code>QueryAccumulator</code>.
+ * <code>QueryAccumulator</code> is generated by <code>IgniteOptimization</code>.
+ *
+ * @see IgniteOptimization
+ */
+class IgniteSQLAccumulatorRelation[K, V](val acc: QueryAccumulator)
+ (@transient val sqlContext: SQLContext) extends BaseRelation with TableScan {
+
+ /** @inheritdoc */
+ override def schema: StructType =
+ StructType(acc.output.map { c ⇒
+ StructField(
+ name = c.name,
+ dataType = c.dataType,
+ nullable = c.nullable,
+ metadata = Metadata.empty)
+ })
+
+ /** @inheritdoc */
+ override def buildScan(): RDD[Row] =
+ IgniteSQLDataFrameRDD[K, V](
+ acc.igniteQueryContext.igniteContext,
+ acc.igniteQueryContext.cacheName,
+ schema,
+ acc.compileQuery(),
+ List.empty,
+ calcPartitions,
+ isDistributeJoin(acc)
+ )
+
+ /** @inheritdoc */
+ override def toString: String =
+ s"IgniteSQLAccumulatorRelation(columns=[${acc.output.map(_.name).mkString(", ")}], qry=${acc.compileQuery()})"
+
+ /**
+ * @return Collection of spark partition.
+ */
+ private def calcPartitions: Array[Partition] =
+ //If accumulator stores some complex query(join, aggregation, limit, order, etc.).
+ //we has to load data from Ignite as a single Spark partition.
+ if (!isSimpleTableAcc(acc)){
+ val aff = acc.igniteQueryContext.igniteContext.ignite().affinity(acc.igniteQueryContext.cacheName)
+
+ val parts = aff.partitions()
+
+ Array(IgniteDataFramePartition(0, primary = null, igniteParts = (0 until parts).toList))
+ }
+ else
+ impl.calcPartitions(acc.igniteQueryContext.igniteContext, acc.igniteQueryContext.cacheName)
+
+ /**
+ * @param acc Plan.
+ * @return True if plan of one or its children are `JoinSQLAccumulator`, false otherwise.
+ */
+ private def isDistributeJoin(acc: LogicalPlan): Boolean =
+ acc match {
+ case _: JoinSQLAccumulator ⇒
+ true
+
+ case _ ⇒
+ acc.children.exists(isDistributeJoin)
+ }
+}
+
+object IgniteSQLAccumulatorRelation {
+ def apply[K, V](acc: QueryAccumulator): IgniteSQLAccumulatorRelation[K, V] =
+ new IgniteSQLAccumulatorRelation[K, V](acc)(acc.igniteQueryContext.sqlContext)
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLDataFrameRDD.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLDataFrameRDD.scala
new file mode 100644
index 0000000..ec502fc
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLDataFrameRDD.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.cache.CacheMode
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.spark.{IgniteContext, IgniteRDD}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.{Partition, TaskContext}
+import java.util.{List ⇒ JList}
+
+/**
+ * Implementation of Spark RDD for Apache Ignite to support Data Frame API.
+ */
+class IgniteSQLDataFrameRDD[K, V](
+ ic: IgniteContext,
+ cacheName: String,
+ schema: StructType,
+ qryStr: String,
+ args: List[_],
+ parts: Array[Partition],
+ distributedJoin: Boolean) extends
+ IgniteSqlRDD[Row, JList[_], K, V](
+ ic,
+ cacheName,
+ cacheCfg = null,
+ qry = null,
+ r ⇒ new GenericRowWithSchema(r.toArray.map(IgniteRDD.convertIfNeeded), schema),
+ keepBinary = true,
+ parts) {
+
+ /**
+ * Executes an Ignite query for this RDD and return Iterator to iterate throw results.
+ *
+ * @param partition Partition.
+ * @param context TaskContext.
+ * @return Results of query for specific partition.
+ */
+ override def compute(partition: Partition, context: TaskContext): Iterator[Row] = {
+ val qry0 = new SqlFieldsQuery(qryStr)
+
+ qry0.setDistributedJoins(distributedJoin)
+
+ if (args.nonEmpty)
+ qry0.setArgs(args.map(_.asInstanceOf[Object]): _*)
+
+ val ccfg = ic.ignite().cache[K, V](cacheName).getConfiguration(classOf[CacheConfiguration[K, V]])
+
+ val ignitePartition = partition.asInstanceOf[IgniteDataFramePartition]
+
+ if (ccfg.getCacheMode != CacheMode.REPLICATED && ignitePartition.igniteParts.nonEmpty && !distributedJoin)
+ qry0.setPartitions(ignitePartition.igniteParts: _*)
+
+ qry = qry0
+
+ super.compute(partition, context)
+ }
+}
+
+object IgniteSQLDataFrameRDD {
+ def apply[K, V](ic: IgniteContext,
+ cacheName: String,
+ schema: StructType,
+ qryStr: String,
+ args: List[_],
+ parts: Array[Partition] = Array(IgnitePartition(0)),
+ distributedJoin: Boolean = false): IgniteSQLDataFrameRDD[K, V] = {
+ new IgniteSQLDataFrameRDD[K, V](ic, cacheName, schema, qryStr, args, parts, distributedJoin)
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLRelation.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLRelation.scala
new file mode 100644
index 0000000..c8d5122
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSQLRelation.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.IgniteException
+import org.apache.ignite.internal.processors.query.{GridQueryTypeDescriptor, QueryTypeDescriptorImpl}
+import org.apache.ignite.spark.{IgniteContext, IgniteRDD, impl}
+import org.apache.spark.Partition
+import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Row, SQLContext}
+
+import scala.collection.JavaConversions._
+
+/**
+ * Apache Ignite implementation of Spark BaseRelation with PrunedFilteredScan for Ignite SQL Tables
+ */
+class IgniteSQLRelation[K, V](
+ private[apache] val ic: IgniteContext,
+ private[apache] val tableName: String,
+ private[apache] val schemaName: Option[String])
+ (@transient val sqlContext: SQLContext) extends BaseRelation with PrunedFilteredScan with Logging {
+
+ /**
+ * @return Schema of Ignite SQL table.
+ */
+ override def schema: StructType =
+ sqlTableInfo(ic.ignite(), tableName, schemaName)
+ .map(IgniteSQLRelation.schema)
+ .getOrElse(throw new IgniteException(s"Unknown table $tableName"))
+
+ /**
+ * Builds Apache Ignite SQL Query for given table, columns and filters.
+ *
+ * @param columns Columns to select.
+ * @param filters Filters to apply.
+ * @return Apache Ignite RDD implementation.
+ */
+ override def buildScan(columns: Array[String], filters: Array[Filter]): RDD[Row] = {
+ val qryAndArgs = queryAndArgs(columns, filters)
+
+ IgniteSQLDataFrameRDD[K, V](ic, cacheName, schema, qryAndArgs._1, qryAndArgs._2, calcPartitions(filters))
+ }
+
+ override def toString = s"IgniteSQLRelation[table=$tableName]"
+
+ /**
+ * @param columns Columns to select.
+ * @param filters Filters to apply.
+ * @return SQL query string and arguments for it.
+ */
+ private def queryAndArgs(columns: Array[String], filters: Array[Filter]): (String, List[Any]) = {
+ val columnsStr =
+ if (columns.isEmpty)
+ "*"
+ else
+ columns.mkString(",")
+
+ //Creating corresponding Ignite SQL query.
+ //Query will be executed by Ignite SQL Engine.
+ val qryAndArgs = filters match {
+ case Array(_, _*) ⇒
+ val where = QueryUtils.compileWhere(filters)
+
+ (s"SELECT $columnsStr FROM $tableName WHERE ${where._1}", where._2)
+
+ case _ ⇒
+ (s"SELECT $columnsStr FROM $tableName", List.empty)
+ }
+
+ logInfo(qryAndArgs._1)
+
+ qryAndArgs
+ }
+
+ /**
+ * Computes spark partitions for this relation.
+ *
+ * @return Array of IgniteDataFramPartition.
+ */
+ private def calcPartitions(filters: Array[Filter]): Array[Partition] =
+ impl.calcPartitions(ic, cacheName)
+
+ /**
+ * Cache name for a table name.
+ */
+ private lazy val cacheName: String =
+ sqlCacheName(ic.ignite(), tableName, schemaName)
+ .getOrElse(throw new IgniteException(s"Unknown table $tableName"))
+}
+
+object IgniteSQLRelation {
+ /**
+ * Converts Apache Ignite table description: <code>QueryEntity</code> to Spark description: <code>StructType</code>.
+ *
+ * @param table Ignite table descirption.
+ * @return Spark table descirption
+ */
+ def schema(table: GridQueryTypeDescriptor): StructType = {
+ //Partition columns has to be in the end of list.
+ //See `org.apache.spark.sql.catalyst.catalog.CatalogTable#partitionSchema`
+ val columns = table.fields.toList.sortBy(c ⇒ isKeyColumn(table, c._1))
+
+ StructType(columns.map { case (name, dataType) ⇒
+ StructField(
+ name = table.asInstanceOf[QueryTypeDescriptorImpl].aliases.getOrDefault(name, name),
+ dataType = IgniteRDD.dataType(dataType.getName, name),
+ nullable = !isKeyColumn(table, name),
+ metadata = Metadata.empty)
+ })
+ }
+
+ def apply[K, V](ic: IgniteContext, tableName: String, schemaName: Option[String],
+ sqlContext: SQLContext): IgniteSQLRelation[K, V] =
+ new IgniteSQLRelation[K, V](ic, tableName, schemaName)(sqlContext)
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSqlRDD.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSqlRDD.scala
new file mode 100644
index 0000000..9c4378c
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/IgniteSqlRDD.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.cache.query.Query
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.spark.IgniteContext
+import org.apache.spark.{Partition, TaskContext}
+
+import scala.reflect.ClassTag
+
+class IgniteSqlRDD[R: ClassTag, T, K, V](
+ ic: IgniteContext,
+ cacheName: String,
+ cacheCfg: CacheConfiguration[K, V],
+ var qry: Query[T],
+ conv: (T) ⇒ R,
+ keepBinary: Boolean,
+ partitions: Array[Partition] = Array(IgnitePartition(0))
+) extends IgniteAbstractRDD[R, K, V](ic, cacheName, cacheCfg, keepBinary) {
+ override def compute(split: Partition, context: TaskContext): Iterator[R] = {
+ val cur = ensureCache().query(qry)
+
+ TaskContext.get().addTaskCompletionListener[Unit]((_) ⇒ cur.close())
+
+ new IgniteQueryIterator[T, R](cur.iterator(), conv)
+ }
+
+ override protected def getPartitions: Array[Partition] = partitions
+}
+
+object IgniteSqlRDD {
+ def apply[R: ClassTag, T, K, V](ic: IgniteContext, cacheName: String, cacheCfg: CacheConfiguration[K, V],
+ qry: Query[T], conv: (T) ⇒ R, keepBinary: Boolean,
+ partitions: Array[Partition] = Array(IgnitePartition(0))): IgniteSqlRDD[R, T, K, V] =
+ new IgniteSqlRDD(ic, cacheName, cacheCfg, qry, conv, keepBinary, partitions)
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/QueryHelper.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/QueryHelper.scala
new file mode 100644
index 0000000..625f449
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/QueryHelper.scala
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.internal.IgniteEx
+import org.apache.ignite.internal.processors.query.QueryTypeDescriptorImpl
+import org.apache.ignite.internal.processors.query.QueryUtils.DFLT_SCHEMA
+import org.apache.ignite.spark.IgniteContext
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.ignite.spark.impl.QueryUtils.{compileCreateTable, compileDropTable, compileInsert}
+import org.apache.ignite.{Ignite, IgniteException}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{DataFrame, Row}
+
+/**
+ * Helper class for executing DDL queries.
+ */
+private[apache] object QueryHelper {
+ /**
+ * Drops provided table.
+ *
+ * @param tableName Table name.
+ * @param ignite Ignite.
+ */
+ def dropTable(tableName: String, ignite: Ignite): Unit = {
+ val qryProcessor = ignite.asInstanceOf[IgniteEx].context().query()
+
+ val qry = compileDropTable(tableName)
+
+ qryProcessor.querySqlFields(new SqlFieldsQuery(qry), true).getAll
+ }
+
+ /**
+ * Creates table.
+ *
+ * @param schema Schema.
+ * @param tblName Table name.
+ * @param primaryKeyFields Primary key fields.
+ * @param createTblOpts Ignite specific options.
+ * @param ignite Ignite.
+ */
+ def createTable(schema: StructType, tblName: String, primaryKeyFields: Seq[String], createTblOpts: Option[String],
+ ignite: Ignite): Unit = {
+ val qryProcessor = ignite.asInstanceOf[IgniteEx].context().query()
+
+ val qry = compileCreateTable(schema, tblName, primaryKeyFields, createTblOpts)
+
+ qryProcessor.querySqlFields(new SqlFieldsQuery(qry), true).getAll
+ }
+
+ /**
+ * Ensures all options are specified correctly to create table based on provided `schema`.
+ *
+ * @param schema Schema of new table.
+ * @param params Parameters.
+ */
+ def ensureCreateTableOptions(schema: StructType, params: Map[String, String], ctx: IgniteContext): Unit = {
+ if (!params.contains(OPTION_TABLE) && !params.contains("path"))
+ throw new IgniteException("'table' must be specified.")
+
+ if (params.contains(OPTION_SCHEMA) && !params(OPTION_SCHEMA).equalsIgnoreCase(DFLT_SCHEMA)) {
+ throw new IgniteException("Creating new tables in schema " + params(OPTION_SCHEMA) + " is not valid, tables"
+ + " must only be created in " + DFLT_SCHEMA)
+ }
+
+ params.get(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS)
+ .map(_.split(','))
+ .getOrElse(throw new IgniteException("Can't create table! Primary key fields has to be specified."))
+ .map(_.trim)
+ .foreach { pkField ⇒
+ if (pkField == "")
+ throw new IgniteException("PK field can't be empty.")
+
+ if (!schema.exists(_.name.equalsIgnoreCase(pkField)))
+ throw new IgniteException(s"'$pkField' doesn't exists in DataFrame schema.")
+
+ }
+ }
+
+ /**
+ * Saves data to the table.
+ *
+ * @param data Data.
+ * @param tblName Table name.
+ * @param schemaName Optional schema name.
+ * @param ctx Ignite context.
+ * @param streamerAllowOverwrite Flag enabling overwriting existing values in cache.
+ * @param streamerFlushFrequency Insert query streamer automatic flush frequency.
+ * @param streamerPerNodeBufferSize Insert query streamer size of per node query buffer.
+ * @param streamerPerNodeParallelOperations Insert query streamer maximum number of parallel operations for a single node.
+ *
+ * @see [[org.apache.ignite.IgniteDataStreamer]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#allowOverwrite(boolean)]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#autoFlushFrequency(long)]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#perNodeBufferSize(int)]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#perNodeParallelOperations(int)]]
+ */
+ def saveTable(data: DataFrame,
+ tblName: String,
+ schemaName: Option[String],
+ ctx: IgniteContext,
+ streamerAllowOverwrite: Option[Boolean],
+ streamerSkipStore: Option[Boolean],
+ streamerFlushFrequency: Option[Long],
+ streamerPerNodeBufferSize: Option[Int],
+ streamerPerNodeParallelOperations: Option[Int]
+ ): Unit = {
+ val insertQry = compileInsert(tblName, data.schema)
+
+ data.rdd.foreachPartition(iterator =>
+ savePartition(iterator,
+ insertQry,
+ tblName,
+ schemaName,
+ ctx,
+ streamerAllowOverwrite,
+ streamerSkipStore,
+ streamerFlushFrequency,
+ streamerPerNodeBufferSize,
+ streamerPerNodeParallelOperations
+ ))
+ }
+
+ /**
+ * Saves partition data to the Ignite table.
+ *
+ * @param iterator Data iterator.
+ * @param insertQry Insert query.
+ * @param tblName Table name.
+ * @param schemaName Optional schema name.
+ * @param ctx Ignite context.
+ * @param streamerAllowOverwrite Flag enabling overwriting existing values in cache.
+ * @param streamerFlushFrequency Insert query streamer automatic flush frequency.
+ * @param streamerPerNodeBufferSize Insert query streamer size of per node query buffer.
+ * @param streamerPerNodeParallelOperations Insert query streamer maximum number of parallel operations for a single node.
+ *
+ * @see [[org.apache.ignite.IgniteDataStreamer]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#allowOverwrite(boolean)]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#autoFlushFrequency(long)]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#perNodeBufferSize(int)]]
+ * @see [[org.apache.ignite.IgniteDataStreamer#perNodeParallelOperations(int)]]
+ */
+ private def savePartition(iterator: Iterator[Row],
+ insertQry: String,
+ tblName: String,
+ schemaName: Option[String],
+ ctx: IgniteContext,
+ streamerAllowOverwrite: Option[Boolean],
+ streamerSkipStore: Option[Boolean],
+ streamerFlushFrequency: Option[Long],
+ streamerPerNodeBufferSize: Option[Int],
+ streamerPerNodeParallelOperations: Option[Int]
+ ): Unit = {
+ val tblInfo = sqlTableInfo(ctx.ignite(), tblName, schemaName).get.asInstanceOf[QueryTypeDescriptorImpl]
+
+ val streamer = ctx.ignite().dataStreamer(tblInfo.cacheName)
+
+ streamerAllowOverwrite.foreach(v ⇒ streamer.allowOverwrite(v))
+
+ streamerSkipStore.foreach(v ⇒ streamer.skipStore(v))
+
+ streamerFlushFrequency.foreach(v ⇒ streamer.autoFlushFrequency(v))
+
+ streamerPerNodeBufferSize.foreach(v ⇒ streamer.perNodeBufferSize(v))
+
+ streamerPerNodeParallelOperations.foreach(v ⇒ streamer.perNodeParallelOperations(v))
+
+ try {
+ val qryProcessor = ctx.ignite().asInstanceOf[IgniteEx].context().query()
+
+ iterator.foreach { row ⇒
+ val schema = row.schema
+
+ val args = schema.map { f ⇒
+ row.get(row.fieldIndex(f.name)).asInstanceOf[Object]
+ }
+
+ qryProcessor.streamUpdateQuery(tblInfo.cacheName,
+ tblInfo.schemaName, streamer, insertQry, args.toArray, "spark")
+ }
+ }
+ finally {
+ streamer.close()
+ }
+
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/QueryUtils.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/QueryUtils.scala
new file mode 100644
index 0000000..79aa523
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/QueryUtils.scala
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+import org.apache.ignite.IgniteException
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types._
+
+/**
+ * Utility class for building SQL queries.
+ */
+private[impl] object QueryUtils extends Logging {
+ /**
+ * Builds `where` part of SQL query.
+ *
+ * @param filters Filter to apply.
+ * @return Tuple contains `where` string and `List[Any]` of query parameters.
+ */
+ def compileWhere(filters: Seq[Filter]): (String, List[Any]) =
+ filters.foldLeft(("", List[Any]()))(buildSingleClause)
+
+ /**
+ * Builds `insert` query for provided table and schema.
+ *
+ * @param tblName Table name.
+ * @param tblSchema Schema.
+ * @return SQL query to insert data into table.
+ */
+ def compileInsert(tblName: String, tblSchema: StructType): String = {
+ val columns = tblSchema.fields.map(_.name).mkString(",")
+ val placeholder = tblSchema.fields.map(_ ⇒ "?").mkString(",")
+
+ val qry = s"INSERT INTO $tblName($columns) VALUES($placeholder)"
+
+ logInfo(qry)
+
+ qry
+ }
+
+ /**
+ * Builds `drop table` query.
+ *
+ * @param tblName Table name.
+ * @return SQL query to drop table.
+ */
+ def compileDropTable(tblName: String): String = {
+ val qry = s"DROP TABLE ${tblName}"
+
+ logInfo(qry)
+
+ qry
+ }
+
+ /**
+ * Builds `create table` query.
+ *
+ * @param schema Schema.
+ * @param tblName Table name.
+ * @param primaryKeyFields Primary key fields.
+ * @param createTblOpts Ignite specific options for table.
+ * @return SQL query to create table.
+ */
+ def compileCreateTable(schema: StructType, tblName: String, primaryKeyFields: Seq[String], createTblOpts: Option[String]): String = {
+ val pk = s", PRIMARY KEY (${primaryKeyFields.mkString(",")})"
+
+ val withParams = createTblOpts.map(w ⇒ s"""WITH \"$w\"""").getOrElse("")
+
+ val qry = s"CREATE TABLE $tblName (${schema.map(compileColumn).mkString(", ")} $pk) $withParams"
+
+ logInfo(qry)
+
+ qry
+ }
+
+ /**
+ * @param field Column.
+ * @return SQL query part for column.
+ */
+ private def compileColumn(field: StructField): String = {
+ val col = s"${field.name} ${dataType(field)}"
+
+ if (!field.nullable)
+ col + " NOT NULL"
+ else
+ col
+ }
+
+ /**
+ * Gets Ignite data type based on type name.
+ *
+ * @param field Field.
+ * @return SQL data type.
+ */
+ private def dataType(field: StructField): String = field.dataType match {
+ case BooleanType ⇒
+ "BOOLEAN"
+
+ case ByteType ⇒
+ "TINYINT"
+
+ case ShortType ⇒
+ "SMALLINT"
+
+ case IntegerType ⇒
+ "INT"
+
+ case LongType ⇒
+ "BIGINT"
+
+ case FloatType ⇒
+ "FLOAT"
+
+ case DoubleType ⇒
+ "DOUBLE"
+
+ //For now Ignite doesn't provide correct information about DECIMAL column precision and scale.
+ //All we have is default scale and precision.
+ //Just replace it with some "common sense" values.
+ case decimal: DecimalType if decimal.precision == 10 && decimal.scale == 0 ⇒
+ s"DECIMAL(10, 5)"
+
+ case decimal: DecimalType ⇒
+ s"DECIMAL(${decimal.precision}, ${decimal.scale})"
+
+ case StringType ⇒
+ "VARCHAR"
+
+ case DateType ⇒
+ "DATE"
+
+ case TimestampType ⇒
+ "TIMESTAMP"
+
+ case _ ⇒
+ throw new IgniteException(s"Unsupported data type ${field.dataType}")
+ }
+
+ /**
+ * Adds single where clause to `state` and returns new state.
+ *
+ * @param state Current `where` state.
+ * @param clause Clause to add.
+ * @return `where` with given clause.
+ */
+ private def buildSingleClause(state: (String, List[Any]), clause: Filter): (String, List[Any]) = {
+ val filterStr = state._1
+
+ val params = state._2
+
+ clause match {
+ case EqualTo(attr, value) ⇒ (addStrClause(filterStr, s"$attr = ?"), params :+ value)
+
+ case EqualNullSafe(attr, value) ⇒ (addStrClause(filterStr, s"($attr IS NULL OR $attr = ?)"), params :+ value)
+
+ case GreaterThan(attr, value) ⇒ (addStrClause(filterStr, s"$attr > ?"), params :+ value)
+
+ case GreaterThanOrEqual(attr, value) ⇒ (addStrClause(filterStr, s"$attr >= ?"), params :+ value)
+
+ case LessThan(attr, value) ⇒ (addStrClause(filterStr, s"$attr < ?"), params :+ value)
+
+ case LessThanOrEqual(attr, value) ⇒ (addStrClause(filterStr, s"$attr <= ?"), params :+ value)
+
+ case In(attr, values) ⇒ (addStrClause(filterStr, s"$attr IN (${values.map(_ ⇒ "?").mkString(",")})"), params ++ values)
+
+ case IsNull(attr) ⇒ (addStrClause(filterStr, s"$attr IS NULL"), params)
+
+ case IsNotNull(attr) ⇒ (addStrClause(filterStr, s"$attr IS NOT NULL"), params)
+
+ case And(left, right) ⇒
+ val leftClause = buildSingleClause(("", params), left)
+ val rightClause = buildSingleClause(("", leftClause._2), right)
+
+ (addStrClause(filterStr, s"${leftClause._1} AND ${rightClause._1}"), rightClause._2)
+
+ case Or(left, right) ⇒
+ val leftClause = buildSingleClause(("", params), left)
+ val rightClause = buildSingleClause(("", leftClause._2), right)
+
+ (addStrClause(filterStr, s"${leftClause._1} OR ${rightClause._1}"), rightClause._2)
+
+ case Not(child) ⇒
+ val innerClause = buildSingleClause(("", params), child)
+
+ (addStrClause(filterStr, s"NOT ${innerClause._1}"), innerClause._2)
+
+ case StringStartsWith(attr, value) ⇒
+ (addStrClause(filterStr, s"$attr LIKE ?"), params :+ (value + "%"))
+
+ case StringEndsWith(attr, value) ⇒
+ (addStrClause(filterStr, s"$attr LIKE ?"), params :+ ("%" + value))
+
+ case StringContains(attr, value) ⇒
+ (addStrClause(filterStr, s"$attr LIKE ?"), params :+ ("%" + value + "%"))
+ }
+ }
+
+ /**
+ * Utility method to add clause to sql WHERE string.
+ *
+ * @param filterStr Current filter string
+ * @param clause Clause to add.
+ * @return Filter string.
+ */
+ private def addStrClause(filterStr: String, clause: String) =
+ if (filterStr.isEmpty)
+ clause
+ else
+ filterStr + " AND " + clause
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/AggregateExpressions.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/AggregateExpressions.scala
new file mode 100644
index 0000000..3e6b6b5
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/AggregateExpressions.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization
+
+import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.types._
+
+/**
+ * Object to support aggregate expressions like `sum` or `avg`.
+ */
+private[optimization] object AggregateExpressions extends SupportedExpressions {
+ /** @inheritdoc */
+ def apply(expr: Expression, checkChild: (Expression) ⇒ Boolean): Boolean = expr match {
+ case AggregateExpression(aggregateFunction, _, _, _, _) ⇒
+ checkChild(aggregateFunction)
+
+ case Average(child, _) ⇒
+ checkChild(child)
+
+ case Count(children) ⇒
+ children.forall(checkChild)
+
+ case Max(child) ⇒
+ checkChild(child)
+
+ case Min(child) ⇒
+ checkChild(child)
+
+ case Sum(child, _) ⇒
+ checkChild(child)
+
+ case _ ⇒
+ false
+ }
+
+ /** @inheritdoc */
+ override def toString(expr: Expression, childToString: Expression ⇒ String, useQualifier: Boolean,
+ useAlias: Boolean): Option[String] = expr match {
+ case AggregateExpression(aggregateFunction, _, isDistinct, _, _) ⇒
+ aggregateFunction match {
+ case Count(children) ⇒
+ if (isDistinct)
+ Some(s"COUNT(DISTINCT ${children.map(childToString(_)).mkString(" ")})")
+ else
+ Some(s"COUNT(${children.map(childToString(_)).mkString(" ")})")
+
+ case sum: Sum ⇒
+ if (isDistinct)
+ Some(castSum(
+ s"SUM(DISTINCT ${sum.children.map(childToString(_)).mkString(" ")})", sum.dataType))
+ else
+ Some(castSum(s"SUM(${sum.children.map(childToString(_)).mkString(" ")})", sum.dataType))
+
+ case _ ⇒
+ Some(childToString(aggregateFunction))
+ }
+
+ case Average(child, _) ⇒
+ child.dataType match {
+ case DecimalType() | DoubleType ⇒
+ Some(s"AVG(${childToString(child)})")
+
+ case _ ⇒
+ //Spark `AVG` return type is always a double or a decimal.
+ //See [[org.apache.spark.sql.catalyst.expressions.aggregate.Average]]
+ //But Ignite `AVG` return type for a integral types is integral.
+ //To preserve query correct results has to cast column to double.
+ Some(s"AVG(CAST(${childToString(child)} AS DOUBLE))")
+ }
+
+
+ case Count(children) ⇒
+ Some(s"COUNT(${children.map(childToString(_)).mkString(" ")})")
+
+ case Max(child) ⇒
+ Some(s"MAX(${childToString(child)})")
+
+ case Min(child) ⇒
+ Some(s"MIN(${childToString(child)})")
+
+ case sum: Sum ⇒
+ Some(castSum(s"SUM(${childToString(sum.child)})", sum.dataType))
+
+ case _ ⇒
+ None
+ }
+
+ /**
+ * Ignite returns BigDecimal but Spark expects BIGINT.
+ */
+ private def castSum(sumSql: String, dataType: DataType): String = dataType match {
+ case LongType ⇒
+ s"CAST($sumSql AS BIGINT)"
+
+ case _ ⇒
+ s"$sumSql"
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/ConditionExpressions.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/ConditionExpressions.scala
new file mode 100644
index 0000000..fbfbd64
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/ConditionExpressions.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ignite.spark.impl.optimization
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, _}
+
+/**
+ * Object to support condition expression. Like `and` or `in` operators.
+ */
+private[optimization] object ConditionExpressions extends SupportedExpressions {
+ /** @inheritdoc */
+ def apply(expr: Expression, checkChild: (Expression) ⇒ Boolean): Boolean = expr match {
+ case EqualTo(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case EqualNullSafe(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case GreaterThan(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case GreaterThanOrEqual(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case LessThan(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case LessThanOrEqual(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case InSet(child, set) if set.forall(_.isInstanceOf[Literal]) ⇒
+ checkChild(child)
+
+ case In(child, list) if list.forall(_.isInstanceOf[Literal]) ⇒
+ checkChild(child)
+
+ case IsNull(child) ⇒
+ checkChild(child)
+
+ case IsNotNull(child) ⇒
+ checkChild(child)
+
+ case And(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Or(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Not(child) ⇒
+ checkChild(child)
+
+ case StartsWith(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case EndsWith(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Contains(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case _ ⇒
+ false
+ }
+
+ /** @inheritdoc */
+ override def toString(expr: Expression, childToString: Expression ⇒ String, useQualifier: Boolean,
+ useAlias: Boolean): Option[String] = expr match {
+ case EqualTo(left, right) ⇒
+ Some(s"${childToString(left)} = ${childToString(right)}")
+
+ case EqualNullSafe(left, right) ⇒
+ Some(s"(${childToString(left)} IS NULL OR ${childToString(left)} = ${childToString(right)})")
+
+ case GreaterThan(left, right) ⇒
+ Some(s"${childToString(left)} > ${childToString(right)}")
+
+ case GreaterThanOrEqual(left, right) ⇒
+ Some(s"${childToString(left)} >= ${childToString(right)}")
+
+ case LessThan(left, right) ⇒
+ Some(s"${childToString(left)} < ${childToString(right)}")
+
+ case LessThanOrEqual(left, right) ⇒
+ Some(s"${childToString(left)} <= ${childToString(right)}")
+
+ case In(attr, values) ⇒
+ Some(s"${childToString(attr)} IN (${values.map(childToString(_)).mkString(", ")})")
+
+ case IsNull(child) ⇒
+ Some(s"${childToString(child)} IS NULL")
+
+ case IsNotNull(child) ⇒
+ Some(s"${childToString(child)} IS NOT NULL")
+
+ case And(left, right) ⇒
+ Some(s"${childToString(left)} AND ${childToString(right)}")
+
+ case Or(left, right) ⇒
+ Some(s"${childToString(left)} OR ${childToString(right)}")
+
+ case Not(child) ⇒
+ Some(s"NOT ${childToString(child)}")
+
+ case StartsWith(attr, value) ⇒ {
+ //Expecting string literal here.
+ //To add % sign it's required to remove quotes.
+ val valStr = removeQuotes(childToString(value))
+
+ Some(s"${childToString(attr)} LIKE '$valStr%'")
+ }
+
+ case EndsWith(attr, value) ⇒ {
+ //Expecting string literal here.
+ //To add % sign it's required to remove quotes.
+ val valStr = removeQuotes(childToString(value))
+
+ Some(s"${childToString(attr)} LIKE '%$valStr'")
+ }
+
+ case Contains(attr, value) ⇒ {
+ //Expecting string literal here.
+ //To add % signs it's required to remove quotes.
+ val valStr = removeQuotes(childToString(value))
+
+ Some(s"${childToString(attr)} LIKE '%$valStr%'")
+ }
+
+ case _ ⇒
+ None
+ }
+
+ /**
+ * @param str String to process.
+ * @return Str without surrounding quotes.
+ */
+ private def removeQuotes(str: String): String =
+ if (str.length < 2)
+ str
+ else
+ str match {
+ case quoted if quoted.startsWith("'") && quoted.endsWith("'") ⇒
+ quoted.substring(1, quoted.length-1)
+
+ case _ ⇒ str
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/DateExpressions.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/DateExpressions.scala
new file mode 100644
index 0000000..d075bf0
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/DateExpressions.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, _}
+
+/**
+ * Object to support expressions to work with date/timestamp.
+ */
+private[optimization] object DateExpressions extends SupportedExpressions {
+ /** @inheritdoc */
+ def apply(expr: Expression, checkChild: (Expression) ⇒ Boolean): Boolean = expr match {
+ case CurrentDate(None) ⇒
+ true
+
+ case CurrentTimestamp() ⇒
+ true
+
+ case DateAdd(startDate, days) ⇒
+ checkChild(startDate) && checkChild(days)
+
+ case DateDiff(date1, date2) ⇒
+ checkChild(date1) && checkChild(date2)
+
+ case DayOfMonth(date) ⇒
+ checkChild(date)
+
+ case DayOfYear(date) ⇒
+ checkChild(date)
+
+ case Hour(date, _) ⇒
+ checkChild(date)
+
+ case Minute(date, _) ⇒
+ checkChild(date)
+
+ case Month(date) ⇒
+ checkChild(date)
+
+ case ParseToDate(left, format, child) ⇒
+ checkChild(left) && (format.isEmpty || checkChild(format.get)) && checkChild(child)
+
+ case Quarter(date) ⇒
+ checkChild(date)
+
+ case Second(date, _) ⇒
+ checkChild(date)
+
+ case WeekOfYear(date) ⇒
+ checkChild(date)
+
+ case Year(date) ⇒
+ checkChild(date)
+
+ case _ ⇒
+ false
+ }
+
+ /** @inheritdoc */
+ override def toString(expr: Expression, childToString: Expression ⇒ String, useQualifier: Boolean,
+ useAlias: Boolean): Option[String] = expr match {
+ case CurrentDate(_) ⇒
+ Some(s"CURRENT_DATE()")
+
+ case CurrentTimestamp() ⇒
+ Some(s"CURRENT_TIMESTAMP()")
+
+ case DateAdd(startDate, days) ⇒
+ Some(s"CAST(DATEADD('DAY', ${childToString(days)}, ${childToString(startDate)}) AS DATE)")
+
+ case DateDiff(date1, date2) ⇒
+ Some(s"CAST(DATEDIFF('DAY', ${childToString(date1)}, ${childToString(date2)}) AS INT)")
+
+ case DayOfMonth(date) ⇒
+ Some(s"DAY_OF_MONTH(${childToString(date)})")
+
+ case DayOfYear(date) ⇒
+ Some(s"DAY_OF_YEAR(${childToString(date)})")
+
+ case Hour(date, _) ⇒
+ Some(s"HOUR(${childToString(date)})")
+
+ case Minute(date, _) ⇒
+ Some(s"MINUTE(${childToString(date)})")
+
+ case Month(date) ⇒
+ Some(s"MINUTE(${childToString(date)})")
+
+ case ParseToDate(left, formatOption, _) ⇒
+ formatOption match {
+ case Some(format) ⇒
+ Some(s"PARSEDATETIME(${childToString(left)}, ${childToString(format)})")
+ case None ⇒
+ Some(s"PARSEDATETIME(${childToString(left)})")
+ }
+
+ case Quarter(date) ⇒
+ Some(s"QUARTER(${childToString(date)})")
+
+ case Second(date, _) ⇒
+ Some(s"SECOND(${childToString(date)})")
+
+ case WeekOfYear(date) ⇒
+ Some(s"WEEK(${childToString(date)})")
+
+ case Year(date) ⇒
+ Some(s"YEAR(${childToString(date)})")
+
+ case _ ⇒
+ None
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/IgniteQueryContext.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/IgniteQueryContext.scala
new file mode 100644
index 0000000..c5a7f34
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/IgniteQueryContext.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization
+
+import org.apache.ignite.spark.IgniteContext
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.expressions.NamedExpression
+
+/**
+ * Class to store Ignite query info during optimization process.
+ *
+ * @param igniteContext IgniteContext.
+ * @param sqlContext SQLContext.
+ * @param cacheName Cache name.
+ * @param aliasIndex Iterator to generate indexes for auto-generated aliases.
+ * @param catalogTable CatalogTable from source relation.
+ */
+case class IgniteQueryContext(
+ igniteContext: IgniteContext,
+ sqlContext: SQLContext,
+ cacheName: String,
+ aliasIndex: Iterator[Int],
+ catalogTable: Option[CatalogTable] = None,
+ distributeJoin: Boolean = false
+) {
+ /**
+ * @return Unique table alias.
+ */
+ def uniqueTableAlias: String = "table" + aliasIndex.next
+
+ /**
+ * @param col Column
+ * @return Unique column alias.
+ */
+ def uniqueColumnAlias(col: NamedExpression): String = col.name + "_" + aliasIndex.next
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/MathExpressions.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/MathExpressions.scala
new file mode 100644
index 0000000..256cd78
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/MathExpressions.scala
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, _}
+
+/**
+ * Object to support math expressions.
+ */
+private[optimization] object MathExpressions extends SupportedExpressions {
+ /** @inheritdoc */
+ def apply(expr: Expression, checkChild: (Expression) ⇒ Boolean): Boolean = expr match {
+ case Abs(child, _) ⇒
+ checkChild(child)
+
+ case Acos(child) ⇒
+ checkChild(child)
+
+ case Asin(child) ⇒
+ checkChild(child)
+
+ case Atan(child) ⇒
+ checkChild(child)
+
+ case Cos(child) ⇒
+ checkChild(child)
+
+ case Cosh(child) ⇒
+ checkChild(child)
+
+ case Sin(child) ⇒
+ checkChild(child)
+
+ case Sinh(child) ⇒
+ checkChild(child)
+
+ case Tan(child) ⇒
+ checkChild(child)
+
+ case Tanh(child) ⇒
+ checkChild(child)
+
+ case Atan2(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case BitwiseAnd(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case BitwiseOr(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case BitwiseXor(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Ceil(child) ⇒
+ checkChild(child)
+
+ case ToDegrees(child) ⇒
+ checkChild(child)
+
+ case Exp(child) ⇒
+ checkChild(child)
+
+ case Floor(child) ⇒
+ checkChild(child)
+
+ case Log(child) ⇒
+ checkChild(child)
+
+ case Log10(child) ⇒
+ checkChild(child)
+
+ case Logarithm(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case ToRadians(child) ⇒
+ checkChild(child)
+
+ case Sqrt(child) ⇒
+ checkChild(child)
+
+ case _: Pi ⇒
+ true
+
+ case _: EulerNumber ⇒
+ true
+
+ case Pow(left, right) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Rand(child, _) ⇒
+ checkChild(child)
+
+ case Round(child, scale) ⇒
+ checkChild(child) && checkChild(scale)
+
+ case Signum(child) ⇒
+ checkChild(child)
+
+ case Remainder(left, right, _) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Divide(left, right, _) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Multiply(left, right, _) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Subtract(left, right, _) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Add(left, right, _) ⇒
+ checkChild(left) && checkChild(right)
+
+ case UnaryMinus(child, _) ⇒
+ checkChild(child)
+
+ case UnaryPositive(child) ⇒
+ checkChild(child)
+
+ case _ ⇒ false
+ }
+
+ /** @inheritdoc */
+ override def toString(expr: Expression, childToString: Expression ⇒ String, useQualifier: Boolean,
+ useAlias: Boolean): Option[String] = expr match {
+ case Abs(child, _) ⇒
+ Some(s"ABS(${childToString(child)})")
+
+ case Acos(child) ⇒
+ Some(s"ACOS(${childToString(child)})")
+
+ case Asin(child) ⇒
+ Some(s"ASIN(${childToString(child)})")
+
+ case Atan(child) ⇒
+ Some(s"ATAN(${childToString(child)})")
+
+ case Cos(child) ⇒
+ Some(s"COS(${childToString(child)})")
+
+ case Cosh(child) ⇒
+ Some(s"COSH(${childToString(child)})")
+
+ case Sin(child) ⇒
+ Some(s"SIN(${childToString(child)})")
+
+ case Sinh(child) ⇒
+ Some(s"SINH(${childToString(child)})")
+
+ case Tan(child) ⇒
+ Some(s"TAN(${childToString(child)})")
+
+ case Tanh(child) ⇒
+ Some(s"TANH(${childToString(child)})")
+
+ case Atan2(left, right) ⇒
+ Some(s"ATAN2(${childToString(left)}, ${childToString(right)})")
+
+ case BitwiseAnd(left, right) ⇒
+ Some(s"BITAND(${childToString(left)}, ${childToString(right)})")
+
+ case BitwiseOr(left, right) ⇒
+ Some(s"BITOR(${childToString(left)}, ${childToString(right)})")
+
+ case BitwiseXor(left, right) ⇒
+ Some(s"BITXOR(${childToString(left)}, ${childToString(right)})")
+
+ case Ceil(child) ⇒
+ Some(s"CAST(CEIL(${childToString(child)}) AS LONG)")
+
+ case ToDegrees(child) ⇒
+ Some(s"DEGREES(${childToString(child)})")
+
+ case Exp(child) ⇒
+ Some(s"EXP(${childToString(child)})")
+
+ case Floor(child) ⇒
+ Some(s"CAST(FLOOR(${childToString(child)}) AS LONG)")
+
+ case Log(child) ⇒
+ Some(s"LOG(${childToString(child)})")
+
+ case Log10(child) ⇒
+ Some(s"LOG10(${childToString(child)})")
+
+ case Logarithm(base, arg) ⇒
+ childToString(base) match {
+ //Spark internally converts LN(XXX) to LOG(2.718281828459045, XXX).
+ //Because H2 doesn't have builtin function for a free base logarithm
+ //I want to prevent usage of log(a, b) = ln(a)/ln(b) when possible.
+ case "2.718281828459045" ⇒
+ Some(s"LOG(${childToString(arg)})")
+ case "10" ⇒
+ Some(s"LOG10(${childToString(arg)})")
+ case argStr ⇒
+ Some(s"(LOG(${childToString(arg)})/LOG($argStr))")
+ }
+
+ case ToRadians(child) ⇒
+ Some(s"RADIANS(${childToString(child)})")
+
+ case Sqrt(child) ⇒
+ Some(s"SQRT(${childToString(child)})")
+
+ case _: Pi ⇒
+ Some("PI()")
+
+ case _: EulerNumber ⇒
+ Some("E()")
+
+ case Pow(left, right) ⇒
+ Some(s"POWER(${childToString(left)}, ${childToString(right)})")
+
+ case Rand(child, _) ⇒
+ Some(s"RAND(${childToString(child)})")
+
+ case Round(child, scale) ⇒
+ Some(s"ROUND(${childToString(child)}, ${childToString(scale)})")
+
+ case Signum(child) ⇒
+ Some(s"SIGN(${childToString(child)})")
+
+ case Remainder(left, right, _) ⇒
+ Some(s"${childToString(left)} % ${childToString(right)}")
+
+ case Divide(left, right, _) ⇒
+ Some(s"${childToString(left)} / ${childToString(right)}")
+
+ case Multiply(left, right, _) ⇒
+ Some(s"${childToString(left)} * ${childToString(right)}")
+
+ case Subtract(left, right, _) ⇒
+ Some(s"${childToString(left)} - ${childToString(right)}")
+
+ case Add(left, right, _) ⇒
+ Some(s"${childToString(left)} + ${childToString(right)}")
+
+ case UnaryMinus(child, _) ⇒
+ Some(s"-${childToString(child)}")
+
+ case UnaryPositive(child) ⇒
+ Some(s"+${childToString(child)}")
+
+ case _ ⇒
+ None
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SimpleExpressions.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SimpleExpressions.scala
new file mode 100644
index 0000000..37cb9e1
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SimpleExpressions.scala
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization
+
+import java.text.SimpleDateFormat
+import org.apache.spark.sql.catalyst.expressions.{Expression, _}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.types._
+
+import java.time.ZoneOffset
+import java.util.TimeZone
+
+/**
+ * Object to support some 'simple' expressions like aliases.
+ */
+private[optimization] object SimpleExpressions extends SupportedExpressions {
+ /** @inheritdoc */
+ override def apply(expr: Expression, checkChild: Expression ⇒ Boolean): Boolean = expr match {
+ case Literal(_, _) ⇒
+ true
+
+ case _: Attribute ⇒
+ true
+
+ case Alias(child, _) ⇒
+ checkChild(child)
+
+ case Cast(child, dataType, _, _) ⇒
+ checkChild(child) && castSupported(from = child.dataType, to = dataType)
+
+ case _ ⇒
+ false
+ }
+
+ /** @inheritdoc */
+ override def toString(expr: Expression, childToString: Expression ⇒ String, useQualifier: Boolean,
+ useAlias: Boolean): Option[String] = expr match {
+ case l: Literal ⇒
+ if (l.value == null)
+ Some("null")
+ else {
+ l.dataType match {
+ case StringType ⇒
+ Some("'" + l.value.toString + "'")
+
+ case TimestampType ⇒
+ l.value match {
+ //Internal representation of TimestampType is Long.
+ //So we converting from internal spark representation to CAST call.
+ case date: Long ⇒
+ Some(s"CAST('${timestampFormat.get.format(DateTimeUtils.toJavaTimestamp(date))}' " +
+ s"AS TIMESTAMP)")
+
+ case _ ⇒
+ Some(l.value.toString)
+ }
+
+ case DateType ⇒
+ l.value match {
+ //Internal representation of DateType is Int.
+ //So we converting from internal spark representation to CAST call.
+ case days: Integer ⇒
+ val date = new java.util.Date(DateTimeUtils.microsToMillis(DateTimeUtils.daysToMicros(days, ZoneOffset
+ .UTC))) // FIXME: default id
+
+ Some(s"CAST('${dateFormat.get.format(date)}' AS DATE)")
+
+ case _ ⇒
+ Some(l.value.toString)
+ }
+
+ case _ ⇒
+ Some(l.value.toString)
+ }
+ }
+ case ar: AttributeReference ⇒
+ val name =
+ if (useQualifier)
+ // TODO: add ticket to handle seq with two elements with qualifier for database name: related to the [SPARK-19602][SQL] ticket
+ ar.qualifier.map(_ + "." + ar.name).find(_ => true).getOrElse(ar.name)
+ else
+ ar.name
+
+ if (ar.metadata.contains(ALIAS) &&
+ !isAliasEqualColumnName(ar.metadata.getString(ALIAS), ar.name) &&
+ useAlias) {
+ Some(aliasToString(name, ar.metadata.getString(ALIAS)))
+ } else
+ Some(name)
+
+ case Alias(child, name) ⇒
+ if (useAlias)
+ Some(childToString(child)).map(aliasToString(_, name))
+ else
+ Some(childToString(child))
+
+ case Cast(child, dataType, _, _) ⇒ // FIXME: Timezone
+ Some(s"CAST(${childToString(child)} AS ${toSqlType(dataType)})")
+
+ case SortOrder(child, direction, _, _) ⇒
+ Some(s"${childToString(child)}${if(direction==Descending) " DESC" else ""}")
+
+ case _ ⇒
+ None
+ }
+
+ /**
+ * @param column Column name.
+ * @param alias Alias.
+ * @return SQL String for column with alias.
+ */
+ private def aliasToString(column: String, alias: String): String =
+ if (isAliasEqualColumnName(alias, column))
+ column
+ else if (alias.matches("[A-Za-z_][0-9A-Za-z_]*"))
+ s"$column AS $alias"
+ else
+ s"""$column AS "$alias""""
+
+ /**
+ * @param alias Alias.
+ * @param column Column.
+ * @return True if name equals to alias, false otherwise.
+ */
+ private def isAliasEqualColumnName(alias: String, column: String): Boolean =
+ alias.compareToIgnoreCase(column.replaceAll("'", "")) == 0
+
+ /**
+ * @param from From type conversion.
+ * @param to To type conversion.
+ * @return True if cast support for types, false otherwise.
+ */
+ private def castSupported(from: DataType, to: DataType): Boolean = from match {
+ case BooleanType ⇒
+ Set[DataType](BooleanType, StringType)(to)
+
+ case ByteType ⇒
+ Set(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, StringType, DecimalType(_, _),
+ StringType)(to)
+
+ case ShortType ⇒
+ Set(ShortType, IntegerType, LongType, FloatType, DoubleType, StringType, DecimalType(_, _))(to)
+
+ case IntegerType ⇒
+ Set(IntegerType, LongType, FloatType, DoubleType, StringType, DecimalType(_, _))(to)
+
+ case LongType ⇒
+ Set(LongType, FloatType, DoubleType, StringType, DecimalType(_, _))(to)
+
+ case FloatType ⇒
+ Set(FloatType, DoubleType, StringType, DecimalType(_, _))(to)
+
+ case DoubleType ⇒
+ Set(DoubleType, StringType, DecimalType(_, _))(to)
+
+ case DecimalType() ⇒
+ Set(StringType, DecimalType(_, _))(to)
+
+ case DateType ⇒
+ Set[DataType](DateType, StringType, LongType, TimestampType)(to)
+
+ case TimestampType ⇒
+ Set[DataType](TimestampType, DateType, StringType, LongType)(to)
+
+ case StringType ⇒
+ Set(BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType,
+ DecimalType(_, _), DateType, TimestampType, StringType)(to)
+
+ case BinaryType ⇒
+ false
+
+ case ArrayType(_, _) ⇒
+ false
+ }
+
+ /**
+ * Date format built-in Ignite.
+ */
+ private val dateFormat: ThreadLocal[SimpleDateFormat] = new ThreadLocal[SimpleDateFormat] {
+ override def initialValue(): SimpleDateFormat =
+ new SimpleDateFormat("yyyy-MM-dd")
+ }
+
+ /**
+ * Timestamp format built-in Ignite.
+ */
+ private val timestampFormat: ThreadLocal[SimpleDateFormat] = new ThreadLocal[SimpleDateFormat] {
+ override def initialValue(): SimpleDateFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/StringExpressions.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/StringExpressions.scala
new file mode 100644
index 0000000..733fe80
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/StringExpressions.scala
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, _}
+
+/**
+ * Object to support expressions to work with strings like `length` or `trim`.
+ */
+private[optimization] object StringExpressions extends SupportedExpressions {
+ /** @inheritdoc */
+ def apply(expr: Expression, checkChild: (Expression) ⇒ Boolean): Boolean = expr match {
+ case Ascii(child) ⇒
+ checkChild(child)
+
+ case Length(child) ⇒
+ checkChild(child)
+
+ case Concat(children) ⇒
+ children.forall(checkChild)
+
+ case ConcatWs(children) ⇒
+ children.forall(checkChild)
+
+ case StringInstr(str, substr) ⇒
+ checkChild(str) && checkChild(substr)
+
+ case Lower(child) ⇒
+ checkChild(child)
+
+ case Upper(child) ⇒
+ checkChild(child)
+
+ case StringLocate(substr, str, start) ⇒
+ checkChild(substr) && checkChild(str) && checkChild(start)
+
+ case StringLPad(str, len, pad) ⇒
+ checkChild(str) && checkChild(len) && checkChild(pad)
+
+ case StringRPad(str, len, pad) ⇒
+ checkChild(str) && checkChild(len) && checkChild(pad)
+
+ case StringTrimLeft(child, None) ⇒
+ checkChild(child)
+
+ case StringTrimRight(child, None) ⇒
+ checkChild(child)
+
+ case StringTrim(child, None) ⇒
+ checkChild(child)
+
+ case StringTrimLeft(child, Some(trimStr)) ⇒
+ checkChild(child) && checkChild(trimStr)
+
+ case StringTrimRight(child, Some(trimStr)) ⇒
+ checkChild(child) && checkChild(trimStr)
+
+ case StringTrim(child, Some(trimStr)) ⇒
+ checkChild(child) && checkChild(trimStr)
+
+ case RegExpReplace(subject, regexp, rep, pos) ⇒
+ checkChild(subject) && checkChild(regexp) && checkChild(rep) && checkChild(pos)
+
+ case StringRepeat(str, times) ⇒
+ checkChild(str) && checkChild(times)
+
+ case SoundEx(child) ⇒
+ checkChild(child)
+
+ case StringSpace(child) ⇒
+ checkChild(child)
+
+ case Substring(str, pos, len) ⇒
+ checkChild(str) && checkChild(pos) && checkChild(len)
+
+ case StringTranslate(str, strMatch, strReplace) ⇒
+ checkChild(str) && checkChild(strMatch) && checkChild(strReplace)
+
+ case _ ⇒ false
+ }
+
+ /** @inheritdoc */
+ override def toString(expr: Expression, childToString: Expression ⇒ String, useQualifier: Boolean,
+ useAlias: Boolean): Option[String] = expr match {
+ case Ascii(child) ⇒
+ Some(s"ASCII(${childToString(child)})")
+
+ case Length(child) ⇒
+ Some(s"CAST(LENGTH(${childToString(child)}) AS INTEGER)")
+
+ case Concat(children) ⇒
+ Some(s"CONCAT(${children.map(childToString(_)).mkString(", ")})")
+
+ case ConcatWs(children) ⇒
+ Some(s"CONCAT_WS(${children.map(childToString(_)).mkString(", ")})")
+
+ case StringInstr(str, substr) ⇒
+ Some(s"POSITION(${childToString(substr)}, ${childToString(str)})")
+
+ case Lower(child) ⇒
+ Some(s"LOWER(${childToString(child)})")
+
+ case Upper(child) ⇒
+ Some(s"UPPER(${childToString(child)})")
+
+ case StringLocate(substr, str, start) ⇒
+ Some(s"LOCATE(${childToString(substr)}, ${childToString(str)}, ${childToString(start)})")
+
+ case StringLPad(str, len, pad) ⇒
+ Some(s"LPAD(${childToString(str)}, ${childToString(len)}, ${childToString(pad)})")
+
+ case StringRPad(str, len, pad) ⇒
+ Some(s"RPAD(${childToString(str)}, ${childToString(len)}, ${childToString(pad)})")
+
+ case StringTrimLeft(child, None) ⇒
+ Some(s"LTRIM(${childToString(child)})")
+
+ case StringTrimRight(child, None) ⇒
+ Some(s"RTRIM(${childToString(child)})")
+
+ case StringTrim(child, None) ⇒
+ Some(s"TRIM(${childToString(child)})")
+
+ case StringTrimLeft(child, Some(trimStr)) ⇒
+ Some(s"LTRIM(${childToString(child)}, ${childToString(trimStr)})")
+
+ case StringTrimRight(child, Some(trimStr)) ⇒
+ Some(s"RTRIM(${childToString(child)}, ${childToString(trimStr)})")
+
+ case StringTrim(child, Some(trimStr)) ⇒
+ Some(s"TRIM(${childToString(child)}, ${childToString(trimStr)})")
+
+ case RegExpReplace(subject, regexp, rep, pos) ⇒
+ Some(s"REGEXP_REPLACE(${childToString(subject)}, ${childToString(regexp)}, ${childToString(rep)})")
+
+ case StringRepeat(str, times) ⇒
+ Some(s"REPEAT(${childToString(str)}, ${childToString(times)})")
+
+ case SoundEx(child) ⇒
+ Some(s"SOUND_EX(${childToString(child)})")
+
+ case StringSpace(child) ⇒
+ Some(s"SPACE(${childToString(child)})")
+
+ case Substring(str, pos, len) ⇒
+ Some(s"SUBSTR(${childToString(str)}, ${childToString(pos)}, ${childToString(len)})")
+
+ case StringTranslate(str, strMatch, strReplace) ⇒
+ Some(s"TRANSLATE(${childToString(str)}, ${childToString(strMatch)}, ${childToString(strReplace)})")
+
+ case _ ⇒
+ None
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SupportedExpressions.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SupportedExpressions.scala
new file mode 100644
index 0000000..f46eb72
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SupportedExpressions.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+
+/**
+ * Provides methods to work with Spark SQL expression that supported by Ignite SQL syntax.
+ */
+private[optimization] trait SupportedExpressions {
+ /**
+ * @param expr Expression to check.
+ * @param checkChild Closure to check child expression.
+ * @return True if `expr` are supported, false otherwise.
+ */
+ def apply(expr: Expression, checkChild: (Expression) ⇒ Boolean): Boolean
+
+ /**
+ * @param expr Expression to convert to string.
+ * @param childToString Closure to convert children expressions.
+ * @param useQualifier If true `expr` should be printed using qualifier. `Table1.id` for example.
+ * @param useAlias If true `expr` should be printed with alias. `name as person_name` for example.
+ * @return SQL representation of `expr` if it supported. `None` otherwise.
+ */
+ def toString(expr: Expression, childToString: (Expression) ⇒ String, useQualifier: Boolean,
+ useAlias: Boolean): Option[String]
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SystemExpressions.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SystemExpressions.scala
new file mode 100644
index 0000000..40e4e29
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/SystemExpressions.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization
+
+import org.apache.ignite.IgniteException
+import org.apache.spark.sql.catalyst.expressions.{Coalesce, EqualTo, Expression, Greatest, If, IfNull, IsNotNull, IsNull, Least, Literal, NullIf, Nvl2}
+
+/**
+ * Object to support some built-in expressions like `nvl2` or `coalesce`.
+ */
+private[optimization] object SystemExpressions extends SupportedExpressions {
+ /** @inheritdoc */
+ override def apply(expr: Expression, checkChild: Expression ⇒ Boolean): Boolean = expr match {
+ case Coalesce(children) ⇒
+ children.forall(checkChild)
+
+ case Greatest(children) ⇒
+ children.forall(checkChild)
+
+ case IfNull(left, right, _) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Least(children) ⇒
+ children.forall(checkChild)
+
+ case NullIf(left, right, _) ⇒
+ checkChild(left) && checkChild(right)
+
+ case Nvl2(expr1, expr2, expr3, _) ⇒
+ checkChild(expr1) && checkChild(expr2) && checkChild(expr3)
+
+ case If(predicate, trueValue, falseValue) ⇒
+ predicate match {
+ case IsNotNull(child) ⇒
+ checkChild(child) && checkChild(trueValue) && checkChild(falseValue)
+
+ case IsNull(child) ⇒
+ checkChild(child) && checkChild(trueValue) && checkChild(falseValue)
+
+ case EqualTo(left, right) ⇒
+ trueValue match {
+ case Literal(null, _) ⇒
+ (left == falseValue || right == falseValue) && checkChild(left) && checkChild(right)
+
+ case _ ⇒
+ false
+ }
+
+ case _ ⇒
+ false
+ }
+
+ case _ ⇒
+ false
+ }
+
+ /** @inheritdoc */
+ override def toString(expr: Expression, childToString: Expression ⇒ String, useQualifier: Boolean,
+ useAlias: Boolean): Option[String] = expr match {
+ case Coalesce(children) ⇒
+ Some(s"COALESCE(${children.map(childToString(_)).mkString(", ")})")
+
+ case Greatest(children) ⇒
+ Some(s"GREATEST(${children.map(childToString(_)).mkString(", ")})")
+
+ case IfNull(left, right, _) ⇒
+ Some(s"IFNULL(${childToString(left)}, ${childToString(right)})")
+
+ case Least(children) ⇒
+ Some(s"LEAST(${children.map(childToString(_)).mkString(", ")})")
+
+ case NullIf(left, right, _) ⇒
+ Some(s"NULLIF(${childToString(left)}, ${childToString(right)})")
+
+ case Nvl2(expr1, expr2, expr3, _) ⇒
+ Some(s"NVL2(${childToString(expr1)}, ${childToString(expr2)}, ${childToString(expr3)})")
+
+ case If(predicate, trueValue, falseValue) ⇒
+ predicate match {
+ case IsNotNull(child) ⇒
+ Some(s"NVL2(${childToString(child)}, ${childToString(trueValue)}, ${childToString(falseValue)})")
+
+ case IsNull(child) ⇒
+ Some(s"NVL2(${childToString(child)}, ${childToString(falseValue)}, ${childToString(trueValue)})")
+
+ case EqualTo(left, right) ⇒
+ trueValue match {
+ case Literal(null, _) ⇒
+ if (left == falseValue)
+ Some(s"NULLIF(${childToString(left)}, ${childToString(right)})")
+ else if (right == falseValue)
+ Some(s"NULLIF(${childToString(right)}, ${childToString(left)})")
+ else
+ throw new IgniteException(s"Expression not supported. $expr")
+
+ case _ ⇒
+ throw new IgniteException(s"Expression not supported. $expr")
+ }
+
+ case _ ⇒
+ throw new IgniteException(s"Expression not supported. $expr")
+ }
+
+ case _ ⇒
+ None
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/JoinSQLAccumulator.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/JoinSQLAccumulator.scala
new file mode 100644
index 0000000..05e5aeb
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/JoinSQLAccumulator.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization.accumulator
+
+import org.apache.ignite.IgniteException
+import org.apache.ignite.spark.impl.optimization._
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.{BinaryNode, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.{Inner, JoinType, LeftOuter, RightOuter}
+
+/**
+ * Accumulator to store information about join query.
+ */
+private[apache] case class JoinSQLAccumulator(
+ igniteQueryContext: IgniteQueryContext,
+ left: QueryAccumulator,
+ right: QueryAccumulator,
+ joinType: JoinType,
+ outputExpressions: Seq[NamedExpression],
+ condition: Option[Expression],
+ leftAlias: Option[String],
+ rightAlias: Option[String],
+ distinct: Boolean = false,
+ where: Option[Seq[Expression]] = None,
+ groupBy: Option[Seq[Expression]] = None,
+ having: Option[Seq[Expression]] = None,
+ limit: Option[Expression] = None,
+ localLimit: Option[Expression] = None,
+ orderBy: Option[Seq[SortOrder]] = None
+) extends BinaryNode with SelectAccumulator {
+ /** @inheritdoc */
+ override def compileQuery(prettyPrint: Boolean = false, nestedQuery: Boolean = false): String = {
+ val delim = if (prettyPrint) "\n" else " "
+ val tab = if (prettyPrint) " " else ""
+
+ var sql = s"SELECT$delim$tab" +
+ s"${fixQualifier(outputExpressions).map(exprToString(_, useQualifier = true)).mkString(", ")}$delim" +
+ s"FROM$delim$tab$compileJoinExpr"
+
+ if (allFilters.nonEmpty)
+ sql += s"${delim}WHERE$delim$tab" +
+ s"${fixQualifier(allFilters).map(exprToString(_, useQualifier = true)).mkString(s" AND$delim$tab")}"
+
+ if (groupBy.exists(_.nonEmpty))
+ sql += s"${delim}GROUP BY " +
+ s"${fixQualifier(groupBy.get).map(exprToString(_, useQualifier = true)).mkString(s",$delim$tab")}"
+
+ if (having.exists(_.nonEmpty))
+ sql += s"${delim}HAVING " +
+ s"${fixQualifier(having.get).map(exprToString(_, useQualifier = true)).mkString(s" AND$delim$tab")}"
+
+ if (orderBy.exists(_.nonEmpty))
+ sql += s"${delim}ORDER BY " +
+ s"${fixQualifier(orderBy.get).map(exprToString(_, useQualifier = true)).mkString(s",$delim$tab")}"
+
+ if (limit.isDefined) {
+ sql += s" LIMIT ${exprToString(fixQualifier0(limit.get), useQualifier = true)}"
+
+ if (nestedQuery)
+ sql = s"SELECT * FROM ($sql)"
+ }
+
+ sql
+ }
+
+ /**
+ * @return Filters for this query.
+ */
+ private def allFilters: Seq[Expression] = {
+ val leftFilters =
+ if (isSimpleTableAcc(left))
+ left.asInstanceOf[SingleTableSQLAccumulator].where.getOrElse(Seq.empty)
+ else
+ Seq.empty
+
+ val rightFilters =
+ if (isSimpleTableAcc(right))
+ right.asInstanceOf[SingleTableSQLAccumulator].where.getOrElse(Seq.empty)
+ else Seq.empty
+
+ where.getOrElse(Seq.empty) ++ leftFilters ++ rightFilters
+ }
+
+ /**
+ * @return `table1 LEFT JOIN (SELECT....FROM...) table2` part of join query.
+ */
+ private def compileJoinExpr: String = {
+ val leftJoinSql =
+ if (isSimpleTableAcc(left))
+ left.asInstanceOf[SingleTableSQLAccumulator].table.get
+ else
+ s"(${left.compileQuery()}) ${leftAlias.get}"
+
+ val rightJoinSql = {
+ val leftTableName =
+ if (isSimpleTableAcc(left))
+ left.qualifier
+ else
+ leftAlias.get
+
+ if (isSimpleTableAcc(right)) {
+ val rightTableName = right.asInstanceOf[SingleTableSQLAccumulator].table.get
+
+ if (leftTableName == rightTableName)
+ s"$rightTableName as ${rightAlias.get}"
+ else
+ rightTableName
+ } else
+ s"(${right.compileQuery()}) ${rightAlias.get}"
+ }
+
+ s"$leftJoinSql $joinTypeSQL $rightJoinSql" +
+ s"${condition.map(expr ⇒ s" ON ${exprToString(fixQualifier0(expr), useQualifier = true)}").getOrElse("")}"
+ }
+
+ /**
+ * @return SQL string representing specific join type.
+ */
+ private def joinTypeSQL = joinType match {
+ case Inner ⇒
+ "JOIN"
+ case LeftOuter ⇒
+ "LEFT JOIN"
+
+ case RightOuter ⇒
+ "RIGHT JOIN"
+
+ case _ ⇒
+ throw new IgniteException(s"Unsupported join type $joinType")
+ }
+
+ /**
+ * Changes table qualifier in case of embedded query.
+ *
+ * @param exprs Expressions to fix.
+ * @tparam T type of input expression.
+ * @return copy of `exprs` with fixed qualifier.
+ */
+ private def fixQualifier[T <: Expression](exprs: Seq[T]): Seq[T] =
+ exprs.map(fixQualifier0)
+
+ /**
+ * Changes table qualifier for single expression.
+ *
+ * @param expr Expression to fix.
+ * @tparam T type of input expression.
+ * @return copy of `expr` with fixed qualifier.
+ */
+ private def fixQualifier0[T <: Expression](expr: T): T = expr match {
+ case attr: AttributeReference ⇒
+ attr.withQualifier(Seq(findQualifier(attr))).asInstanceOf[T]
+
+ case _ ⇒
+ expr.withNewChildren(fixQualifier(expr.children)).asInstanceOf[T]
+ }
+
+ /**
+ * Find right qualifier for a `attr`.
+ *
+ * @param attr Attribute to fix qualifier in
+ * @return Right qualifier for a `attr`
+ */
+ private def findQualifier(attr: AttributeReference): String = {
+ val leftTableName =
+ if (isSimpleTableAcc(left))
+ left.qualifier
+ else
+ leftAlias.get
+
+ if (left.outputExpressions.exists(_.exprId == attr.exprId))
+ leftTableName
+ else if (isSimpleTableAcc(right) && right.qualifier != leftTableName)
+ right.qualifier
+ else
+ rightAlias.get
+ }
+
+ /** @inheritdoc */
+ override def simpleString(maxFields: Int): String =
+ s"JoinSQLAccumulator(joinType: $joinType, columns: $outputExpressions, condition: $condition)"
+
+ /** @inheritdoc */
+ override def withOutputExpressions(outputExpressions: Seq[NamedExpression]): SelectAccumulator = copy(outputExpressions= outputExpressions)
+
+ /** @inheritdoc */
+ override def withDistinct(distinct: Boolean): JoinSQLAccumulator = copy(distinct = distinct)
+
+ /** @inheritdoc */
+ override def withWhere(where: Seq[Expression]): JoinSQLAccumulator = copy(where = Some(where))
+
+ /** @inheritdoc */
+ override def withGroupBy(groupBy: Seq[Expression]): JoinSQLAccumulator = copy(groupBy = Some(groupBy))
+
+ /** @inheritdoc */
+ override def withHaving(having: Seq[Expression]): JoinSQLAccumulator = copy(having = Some(having))
+
+ /** @inheritdoc */
+ override def withLimit(limit: Expression): JoinSQLAccumulator = copy(limit = Some(limit))
+
+ /** @inheritdoc */
+ override def withLocalLimit(localLimit: Expression): JoinSQLAccumulator = copy(localLimit = Some(localLimit))
+
+ /** @inheritdoc */
+ override def withOrderBy(orderBy: Seq[SortOrder]): JoinSQLAccumulator = copy(orderBy = Some(orderBy))
+
+ /** @inheritdoc */
+ override def withNewChildrenInternal(newLeft: LogicalPlan, newRight: LogicalPlan): JoinSQLAccumulator = copy(
+ left = newLeft.asInstanceOf[QueryAccumulator], right = newRight.asInstanceOf[QueryAccumulator])
+
+ /** @inheritdoc */
+ override def output: Seq[Attribute] = outputExpressions.map(toAttributeReference(_, Seq.empty))
+
+ /** @inheritdoc */
+ override lazy val qualifier: String = igniteQueryContext.uniqueTableAlias
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/QueryAccumulator.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/QueryAccumulator.scala
new file mode 100644
index 0000000..9570a66
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/QueryAccumulator.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization.accumulator
+
+import org.apache.ignite.spark.impl.optimization.IgniteQueryContext
+import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * Generic query info accumulator interface.
+ */
+private[apache] trait QueryAccumulator extends LogicalPlan {
+ /**
+ * @return Ignite query context.
+ */
+ def igniteQueryContext: IgniteQueryContext
+
+ /**
+ * @return Generated output.
+ */
+ def outputExpressions: Seq[NamedExpression]
+
+ /**
+ * @return Ordering info.
+ */
+ def orderBy: Option[Seq[SortOrder]]
+
+ /**
+ * @param outputExpressions New output expressions.
+ * @return Copy of this accumulator with new output.
+ */
+ def withOutputExpressions(outputExpressions: Seq[NamedExpression]): QueryAccumulator
+
+ /**
+ * @param orderBy New ordering.
+ * @return Copy of this accumulator with new order.
+ */
+ def withOrderBy(orderBy: Seq[SortOrder]): QueryAccumulator
+
+ /**
+ * @return Copy of this accumulator with `limit` expression.
+ */
+ def withLimit(limit: Expression): QueryAccumulator
+
+ /**
+ * @return Copy of this accumulator with `localLimit` expression.
+ */
+ def withLocalLimit(localLimit: Expression): QueryAccumulator
+
+ /**
+ * @param prettyPrint If true human readable query will be generated.
+ * @return SQL query.
+ */
+ def compileQuery(prettyPrint: Boolean = false, nestedQuery: Boolean = false): String
+
+ /**
+ * @return Qualifier that should be use to select data from this accumulator.
+ */
+ def qualifier: String
+
+ /**
+ * All expressions are resolved when extra optimization executed.
+ */
+ override lazy val resolved = true
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/SelectAccumulator.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/SelectAccumulator.scala
new file mode 100644
index 0000000..c1db6f9
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/SelectAccumulator.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization.accumulator
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+
+/**
+ * Generic interface for a SELECT query.
+ */
+private[apache] trait SelectAccumulator extends QueryAccumulator {
+ /**
+ * @return Expression for HAVING part of query.
+ */
+ def having: Option[Seq[Expression]]
+
+ /**
+ * @return Expression for WHERE part of query.
+ */
+ def where: Option[Seq[Expression]]
+
+ /**
+ * @return Expression for GROUP BY part of query.
+ */
+ def groupBy: Option[Seq[Expression]]
+
+ /**
+ * @return Copy of this accumulator with `distinct` flag.
+ */
+ def withDistinct(distinct: Boolean): SelectAccumulator
+
+ /**
+ * @return Copy of this accumulator with `where` expressions.
+ */
+ def withWhere(where: Seq[Expression]): SelectAccumulator
+
+ /**
+ * @return Copy of this accumulator with `groupBy` expressions.
+ */
+ def withGroupBy(groupBy: Seq[Expression]): SelectAccumulator
+
+ /**
+ * @return Copy of this accumulator with `having` expressions.
+ */
+ def withHaving(having: Seq[Expression]): SelectAccumulator
+
+ /**
+ * @return Copy of this accumulator with `limit` expression.
+ */
+ def withLimit(limit: Expression): SelectAccumulator
+
+ /**
+ * @return Copy of this accumulator with `localLimit` expression.
+ */
+ def withLocalLimit(localLimit: Expression): SelectAccumulator
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/SingleTableSQLAccumulator.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/SingleTableSQLAccumulator.scala
new file mode 100644
index 0000000..2f56d9e
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/SingleTableSQLAccumulator.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization.accumulator
+
+import org.apache.ignite.IgniteException
+import org.apache.ignite.spark.impl.optimization._
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * Class for accumulating parts of SQL query to a single Ignite table.
+ *
+ * See <a href="http://www.h2database.com/html/grammar.html#select">select syntax of H2</a>.
+ */
+private[apache] case class SingleTableSQLAccumulator(
+ igniteQueryContext: IgniteQueryContext,
+ table: Option[String],
+ tableExpression: Option[(QueryAccumulator, String)],
+ outputExpressions: Seq[NamedExpression],
+ distinct: Boolean = false,
+ all: Boolean = false,
+ where: Option[Seq[Expression]] = None,
+ groupBy: Option[Seq[Expression]] = None,
+ having: Option[Seq[Expression]] = None,
+ limit: Option[Expression] = None,
+ localLimit: Option[Expression] = None,
+ orderBy: Option[Seq[SortOrder]] = None
+) extends SelectAccumulator {
+ /** @inheritdoc */
+ override def compileQuery(prettyPrint: Boolean = false, nestedQuery: Boolean = false): String = {
+ val delim = if (prettyPrint) "\n" else " "
+ val tab = if (prettyPrint) " " else ""
+
+ var sql = s"SELECT$delim$tab${outputExpressions.map(exprToString(_)).mkString(", ")}${delim}" +
+ s"FROM$delim$tab$compiledTableExpression"
+
+ if (where.exists(_.nonEmpty))
+ sql += s"${delim}WHERE$delim$tab${where.get.map(exprToString(_)).mkString(s" AND$delim$tab")}"
+
+ if (groupBy.exists(_.nonEmpty))
+ sql += s"${delim}GROUP BY ${groupBy.get.map(exprToString(_)).mkString(s",$delim$tab")}"
+
+ if (having.exists(_.nonEmpty))
+ sql += s"${delim}HAVING ${having.get.map(exprToString(_)).mkString(s" AND$delim$tab")}"
+
+ if (orderBy.exists(_.nonEmpty))
+ sql += s"${delim}ORDER BY ${orderBy.get.map(exprToString(_)).mkString(s",$delim$tab")}"
+
+ if (limit.isDefined) {
+ sql += s" LIMIT ${limit.map(exprToString(_)).get}"
+
+ if (nestedQuery)
+ sql = s"SELECT * FROM ($sql)"
+ }
+
+ sql
+ }
+
+ /**
+ * @return From table SQL query part.
+ */
+ private def compiledTableExpression: String = table match {
+ case Some(tableName) ⇒
+ tableName
+
+ case None ⇒ tableExpression match {
+ case Some((acc, alias)) ⇒
+ s"(${acc.compileQuery()}) $alias"
+
+ case None ⇒
+ throw new IgniteException("Unknown table.")
+ }
+ }
+
+ /** @inheritdoc */
+ override def simpleString(maxFields: Int): String =
+ s"IgniteSQLAccumulator(table: $table, columns: $outputExpressions, distinct: $distinct, all: $all, " +
+ s"where: $where, groupBy: $groupBy, having: $having, limit: $limit, orderBy: $orderBy)"
+
+ /** @inheritdoc */
+ override def withOutputExpressions(outputExpressions: Seq[NamedExpression]): SelectAccumulator =
+ copy(outputExpressions= outputExpressions)
+
+ /** @inheritdoc */
+ override def withDistinct(distinct: Boolean): SingleTableSQLAccumulator = copy(distinct = distinct)
+
+ /** @inheritdoc */
+ override def withWhere(where: Seq[Expression]): SingleTableSQLAccumulator = copy(where = Some(where))
+
+ /** @inheritdoc */
+ override def withGroupBy(groupBy: Seq[Expression]): SingleTableSQLAccumulator = copy(groupBy = Some(groupBy))
+
+ /** @inheritdoc */
+ override def withHaving(having: Seq[Expression]): SingleTableSQLAccumulator = copy(having = Some(having))
+
+ /** @inheritdoc */
+ override def withLimit(limit: Expression): SingleTableSQLAccumulator = copy(limit = Some(limit))
+
+ /** @inheritdoc */
+ override def withLocalLimit(localLimit: Expression): SingleTableSQLAccumulator = copy(localLimit = Some(localLimit))
+
+ /** @inheritdoc */
+ override def withOrderBy(orderBy: Seq[SortOrder]): SingleTableSQLAccumulator = copy(orderBy = Some(orderBy))
+
+ /** @inheritdoc */
+ override def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = copy() // FIXME
+
+ /** @inheritdoc */
+ override def output: Seq[Attribute] = outputExpressions.map(toAttributeReference(_, Seq.empty))
+
+ /** @inheritdoc */
+ override def qualifier: String = table.getOrElse(tableExpression.get._2)
+
+ /** @inheritdoc */
+ override def children: Seq[LogicalPlan] = tableExpression.map(te ⇒ Seq(te._1)).getOrElse(Nil)
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/UnionSQLAccumulator.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/UnionSQLAccumulator.scala
new file mode 100644
index 0000000..29bfcda
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/accumulator/UnionSQLAccumulator.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl.optimization.accumulator
+
+import org.apache.ignite.spark.impl.optimization.{IgniteQueryContext, exprToString, toAttributeReference}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * Accumulator to store info about UNION query.
+ */
+private[apache] case class UnionSQLAccumulator(
+ igniteQueryContext: IgniteQueryContext,
+ children: Seq[QueryAccumulator],
+ outputExpressions: Seq[NamedExpression],
+ limit: Option[Expression] = None,
+ localLimit: Option[Expression] = None,
+ orderBy: Option[Seq[SortOrder]] = None
+) extends QueryAccumulator {
+ /** @inheritdoc */
+ override def compileQuery(prettyPrint: Boolean = false, nestedQuery: Boolean = false): String = {
+ val delim = if (prettyPrint) "\n" else " "
+ val tab = if (prettyPrint) " " else ""
+
+ var query = children.map(_.compileQuery(prettyPrint, nestedQuery = true)).mkString(s"${delim}UNION$delim")
+
+ query = orderBy match {
+ case Some(sortOrders) ⇒
+ query + s"${delim}ORDER BY ${sortOrders.map(exprToString(_)).mkString(s",$delim$tab")}"
+
+ case None ⇒ query
+ }
+
+ if (limit.isDefined) {
+ query += s" LIMIT ${exprToString(limit.get)}"
+
+ if (nestedQuery)
+ query = s"SELECT * FROM ($query)"
+ }
+
+ query
+ }
+
+ /** @inheritdoc */
+ override def simpleString(maxFields: Int): String =
+ s"UnionSQLAccumulator(orderBy: ${orderBy.map(_.map(exprToString(_)).mkString(", ")).getOrElse("[]")})"
+
+ /** @inheritdoc */
+ override def withOutputExpressions(outputExpressions: Seq[NamedExpression]): QueryAccumulator =
+ copy(outputExpressions= outputExpressions)
+
+ /** @inheritdoc */
+ override def withOrderBy(orderBy: Seq[SortOrder]): QueryAccumulator = copy(orderBy = Some(orderBy))
+
+ /** @inheritdoc */
+ override def output: Seq[Attribute] = outputExpressions.map(toAttributeReference(_, Seq.empty))
+
+ /** @inheritdoc */
+ override lazy val qualifier: String = igniteQueryContext.uniqueTableAlias
+
+ /** @inheritdoc */
+ override def withLimit(limit: Expression): QueryAccumulator = copy(limit = Some(limit))
+
+ /** @inheritdoc */
+ override def withLocalLimit(localLimit: Expression): QueryAccumulator = copy(localLimit = Some(localLimit))
+
+ override protected def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = copy() // FIXME
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/package.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/package.scala
new file mode 100644
index 0000000..5526cad
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/optimization/package.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark.impl
+
+
+import org.apache.ignite.IgniteException
+import org.apache.ignite.spark.impl.optimization.accumulator.{QueryAccumulator, SingleTableSQLAccumulator}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, ExprId, Expression, NamedExpression}
+import org.apache.spark.sql.types._
+
+import scala.annotation.tailrec
+
+/**
+ */
+package object optimization {
+ /**
+ * Constant to store alias in column metadata.
+ */
+ private[optimization] val ALIAS: String = "alias"
+
+ /**
+ * All `SupportedExpression` implementations.
+ */
+ private val SUPPORTED_EXPRESSIONS: List[SupportedExpressions] = List (
+ SimpleExpressions,
+ SystemExpressions,
+ AggregateExpressions,
+ ConditionExpressions,
+ DateExpressions,
+ MathExpressions,
+ StringExpressions
+ )
+
+ /**
+ * @param expr Expression.
+ * @param useQualifier If true outputs attributes of `expr` with qualifier.
+ * @param useAlias If true outputs `expr` with alias.
+ * @return String representation of expression.
+ */
+ def exprToString(expr: Expression, useQualifier: Boolean = false, useAlias: Boolean = true): String = {
+ @tailrec
+ def exprToString0(expr: Expression, supportedExpressions: List[SupportedExpressions]): Option[String] =
+ if (supportedExpressions.nonEmpty) {
+ val exprStr = supportedExpressions.head.toString(
+ expr,
+ exprToString(_, useQualifier, useAlias = false),
+ useQualifier,
+ useAlias)
+
+ exprStr match {
+ case res: Some[String] ⇒
+ res
+ case None ⇒
+ exprToString0(expr, supportedExpressions.tail)
+ }
+ }
+ else
+ None
+
+ exprToString0(expr, SUPPORTED_EXPRESSIONS) match {
+ case Some(str) ⇒ str
+
+ case None ⇒
+ throw new IgniteException("Unsupporte expression " + expr)
+ }
+ }
+
+ /**
+ * @param exprs Expressions to check.
+ * @return True if `exprs` contains only allowed(i.e. can be pushed down to Ignite) expressions false otherwise.
+ */
+ def exprsAllowed(exprs: Seq[Expression]): Boolean =
+ exprs.forall(exprsAllowed)
+
+ /**
+ * @param expr Expression to check.
+ * @return True if `expr` allowed(i.e. can be pushed down to Ignite) false otherwise.
+ *
+ */
+ def exprsAllowed(expr: Expression): Boolean =
+ SUPPORTED_EXPRESSIONS.exists(_(expr, exprsAllowed))
+
+ /**
+ * Converts `input` into `AttributeReference`.
+ *
+ * @param input Expression to convert.
+ * @param existingOutput Existing output.
+ * @param exprId Optional expression ID to use.
+ * @param alias Optional alias for a result.
+ * @return Converted expression.
+ */
+ def toAttributeReference(input: Expression, existingOutput: Seq[NamedExpression], exprId: Option[ExprId] = None,
+ alias: Option[String] = None): AttributeReference = {
+
+ input match {
+ case attr: AttributeReference ⇒
+ val toCopy = existingOutput.find(_.exprId == attr.exprId).getOrElse(attr)
+
+ AttributeReference(
+ name = toCopy.name,
+ dataType = toCopy.dataType,
+ metadata = alias
+ .map(new MetadataBuilder().withMetadata(toCopy.metadata).putString(ALIAS, _).build())
+ .getOrElse(toCopy.metadata)
+ )(exprId = exprId.getOrElse(toCopy.exprId), qualifier = toCopy.qualifier)
+
+ case a: Alias ⇒
+ toAttributeReference(a.child, existingOutput, Some(a.exprId), Some(alias.getOrElse(a.name)))
+
+ case agg: AggregateExpression ⇒
+ agg.aggregateFunction match {
+ case c: Count ⇒
+ if (agg.isDistinct)
+ AttributeReference(
+ name = s"COUNT(DISTINCT ${c.children.map(exprToString(_)).mkString(" ")})",
+ dataType = LongType,
+ metadata = alias
+ .map(new MetadataBuilder().putString(ALIAS, _).build())
+ .getOrElse(Metadata.empty)
+ )(exprId = exprId.getOrElse(agg.resultId))
+ else
+ AttributeReference(
+ name = s"COUNT(${c.children.map(exprToString(_)).mkString(" ")})",
+ dataType = LongType,
+ metadata = alias
+ .map(new MetadataBuilder().putString(ALIAS, _).build())
+ .getOrElse(Metadata.empty)
+ )(exprId = exprId.getOrElse(agg.resultId))
+
+ case _ ⇒
+ toAttributeReference(agg.aggregateFunction, existingOutput, Some(exprId.getOrElse(agg.resultId)), alias)
+ }
+
+ case ne: NamedExpression ⇒
+ AttributeReference(
+ name = exprToString(input),
+ dataType = input.dataType,
+ metadata = alias
+ .map(new MetadataBuilder().withMetadata(ne.metadata).putString(ALIAS, _).build())
+ .getOrElse(Metadata.empty)
+ )(exprId = exprId.getOrElse(ne.exprId))
+
+ case _ if exprsAllowed(input) ⇒
+ AttributeReference(
+ name = exprToString(input),
+ dataType = input.dataType,
+ metadata = alias
+ .map(new MetadataBuilder().putString(ALIAS, _).build())
+ .getOrElse(Metadata.empty)
+ )(exprId = exprId.getOrElse(NamedExpression.newExprId))
+
+ case _ ⇒
+ throw new IgniteException(s"Unsupported column expression $input")
+ }
+ }
+
+ /**
+ * @param dataType Spark data type.
+ * @return SQL data type.
+ */
+ def toSqlType(dataType: DataType): String = dataType match {
+ case BooleanType ⇒ "BOOLEAN"
+ case IntegerType ⇒ "INT"
+ case ByteType ⇒ "TINYINT"
+ case ShortType ⇒ "SMALLINT"
+ case LongType ⇒ "BIGINT"
+ case DecimalType() ⇒ "DECIMAL"
+ case DoubleType ⇒ "DOUBLE"
+ case FloatType ⇒ "REAL"
+ case DateType ⇒ "DATE"
+ case TimestampType ⇒ "TIMESTAMP"
+ case StringType ⇒ "VARCHAR"
+ case BinaryType ⇒ "BINARY"
+ case ArrayType(_, _) ⇒ "ARRAY"
+ case _ ⇒
+ throw new IgniteException(s"$dataType not supported!")
+ }
+
+ /**
+ * @param expr Expression
+ * @return True if expression or some of it children is AggregateExpression, false otherwise.
+ */
+ def hasAggregateInside(expr: Expression): Boolean = {
+ def hasAggregateInside0(expr: Expression): Boolean = expr match {
+ case AggregateExpression(_, _, _, _, _) ⇒
+ true
+
+ case e: Expression ⇒
+ e.children.exists(hasAggregateInside0)
+ }
+
+ hasAggregateInside0(expr)
+ }
+
+ /**
+ * Check if `acc` representing simple query.
+ * Simple is `SELECT ... FROM table WHERE ... ` like query.
+ * Without aggregation, limits, order, embedded select expressions.
+ *
+ * @param acc Accumulator to check.
+ * @return True if accumulator stores simple query info, false otherwise.
+ */
+ def isSimpleTableAcc(acc: QueryAccumulator): Boolean = acc match {
+ case acc: SingleTableSQLAccumulator if acc.table.isDefined ⇒
+ acc.groupBy.isEmpty &&
+ acc.localLimit.isEmpty &&
+ acc.orderBy.isEmpty &&
+ !acc.distinct &&
+ !acc.outputExpressions.exists(hasAggregateInside)
+
+ case _ ⇒
+ false
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/package.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/package.scala
new file mode 100644
index 0000000..c41937a
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/ignite/spark/impl/package.scala
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.commons.lang.StringUtils.equalsIgnoreCase
+import org.apache.ignite.cache.CacheMode
+import org.apache.ignite.cluster.ClusterNode
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.internal.IgniteEx
+import org.apache.ignite.internal.processors.query.{GridQueryTypeDescriptor, QueryTypeDescriptorImpl}
+import org.apache.ignite.internal.processors.query.QueryUtils.normalizeSchemaName
+import org.apache.ignite.internal.util.lang.GridFunc.contains
+import org.apache.ignite.{Ignite, Ignition}
+import org.apache.spark.Partition
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+
+package object impl {
+ /**
+ * @param g Ignite.
+ * @return Name of Ignite. If name is null empty string returned.
+ */
+ def igniteName(g: Ignite): String =
+ if(g.name() != null)
+ g.name
+ else
+ ""
+
+ /**
+ * @param schema Name of schema.
+ * @param default Default schema.
+ * @return Schema to use.
+ */
+ def schemaOrDefault(schema: String, default: String): String =
+ if (schema == SessionCatalog.DEFAULT_DATABASE)
+ default
+ else
+ schema
+
+ /**
+ * @param gridName Name of grid.
+ * @return Named instance of grid. If 'gridName' is empty unnamed instance returned.
+ */
+ def ignite(gridName: String): Ignite =
+ if (gridName == "")
+ Ignition.ignite()
+ else
+ Ignition.ignite(gridName)
+
+ /**
+ * @param ignite Ignite instance.
+ * @param tabName Table name.
+ * @param schemaName Optional schema name.
+ * @return True if table exists false otherwise.
+ */
+ def sqlTableExists(ignite: Ignite, tabName: String, schemaName: Option[String]): Boolean =
+ sqlTableInfo(ignite, tabName, schemaName).isDefined
+
+ /**
+ * @param ignite Ignite instance.
+ * @param tabName Table name.
+ * @param schemaName Optional schema name.
+ * @return Cache name for given table.
+ */
+ def sqlCacheName(ignite: Ignite, tabName: String, schemaName: Option[String]): Option[String] =
+ sqlTableInfo(ignite, tabName, schemaName).map(_.asInstanceOf[QueryTypeDescriptorImpl].cacheName)
+
+ /**
+ * @param ignite Ignite instance.
+ * @return All schemas in given Ignite instance.
+ */
+ def allSchemas(ignite: Ignite): Seq[String] = ignite.cacheNames
+ .map(name =>
+ normalizeSchemaName(name,
+ ignite.cache[Any,Any](name).getConfiguration(classOf[CacheConfiguration[Any,Any]]).getSqlSchema))
+ .toSeq
+ .distinct
+
+ /**
+ * @param ignite Ignite instance.
+ * @param schemaName Schema name.
+ * @return All cache configurations for the given schema.
+ */
+ def cachesForSchema[K,V](ignite: Ignite, schemaName: Option[String]): Seq[CacheConfiguration[K,V]] =
+ ignite.cacheNames
+ .map(ignite.cache[K,V](_).getConfiguration(classOf[CacheConfiguration[K,V]]))
+ .filter(ccfg =>
+ schemaName.forall(normalizeSchemaName(ccfg.getName, ccfg.getSqlSchema).equalsIgnoreCase(_)) ||
+ schemaName.contains(SessionCatalog.DEFAULT_DATABASE))
+ .toSeq
+
+ /**
+ * @param ignite Ignite instance.
+ * @param tabName Table name.
+ * @param schemaName Optional schema name.
+ * @return GridQueryTypeDescriptor for a given table.
+ */
+ def sqlTableInfo(ignite: Ignite, tabName: String, schemaName: Option[String]): Option[GridQueryTypeDescriptor] =
+ ignite.asInstanceOf[IgniteEx].context.cache.publicCacheNames
+ .flatMap(cacheName => ignite.asInstanceOf[IgniteEx].context.query.types(cacheName))
+ .find(table => table.tableName.equalsIgnoreCase(tabName) && isValidSchema(table, schemaName))
+
+ /**
+ * @param table GridQueryTypeDescriptor for a given table.
+ * @param schemaName Optional schema name.
+ * @return `True` if schema is valid.
+ */
+ def isValidSchema(table: GridQueryTypeDescriptor, schemaName: Option[String]): Boolean =
+ schemaName match {
+ case Some(schema) =>
+ schema.equalsIgnoreCase(table.schemaName) || schema.equals(SessionCatalog.DEFAULT_DATABASE)
+ case None =>
+ true
+ }
+
+ /**
+ * @param table Table.
+ * @param column Column name.
+ * @return `True` if column is key.
+ */
+ def isKeyColumn(table: GridQueryTypeDescriptor, column: String): Boolean =
+ contains(allKeyFields(table), column) || equalsIgnoreCase(table.keyFieldName, column)
+
+ /**
+ * @param table Table.
+ * @return All the key fields in a Set.
+ */
+ def allKeyFields(table: GridQueryTypeDescriptor): scala.collection.Set[String] =
+ table.fields.filter(entry => table.property(entry._1).key).keySet
+
+ /**
+ * Computes spark partitions for a given cache.
+ *
+ * @param ic Ignite context.
+ * @param cacheName Cache name
+ * @return Array of IgniteDataFramPartition
+ */
+ def calcPartitions(ic: IgniteContext, cacheName: String): Array[Partition] = {
+ val cache = ic.ignite().cache[Any, Any](cacheName)
+
+ val ccfg = cache.getConfiguration(classOf[CacheConfiguration[Any, Any]])
+
+ if (ccfg.getCacheMode == CacheMode.REPLICATED) {
+ val serverNodes = ic.ignite().cluster().forCacheNodes(cacheName).forServers().nodes()
+
+ Array(IgniteDataFramePartition(0, serverNodes.head, Stream.from(0).take(1024).toList))
+ }
+ else {
+ val aff = ic.ignite().affinity(cacheName)
+
+ val parts = aff.partitions()
+
+ val nodesToParts = (0 until parts).foldLeft(Map[ClusterNode, ArrayBuffer[Int]]()) {
+ case (nodeToParts, ignitePartIdx) ⇒
+ val primary = aff.mapPartitionToPrimaryAndBackups(ignitePartIdx).head
+
+ if (nodeToParts.contains(primary)) {
+ nodeToParts(primary) += ignitePartIdx
+
+ nodeToParts
+ }
+ else
+ nodeToParts + (primary → ArrayBuffer[Int](ignitePartIdx))
+ }
+
+ val partitions = nodesToParts.toIndexedSeq.zipWithIndex.map { case ((node, nodesParts), i) ⇒
+ IgniteDataFramePartition(i, node, nodesParts.toList)
+ }
+
+ partitions.toArray
+ }
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteExternalCatalog.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteExternalCatalog.scala
new file mode 100644
index 0000000..c5dcca3
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteExternalCatalog.scala
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.ignite
+
+import java.net.URI
+
+import org.apache.ignite.internal.processors.query.QueryUtils.DFLT_SCHEMA
+import org.apache.ignite.spark.IgniteDataFrameSettings.OPTION_TABLE
+import org.apache.ignite.spark.IgniteContext
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.ignite.spark.impl.IgniteSQLRelation.schema
+import org.apache.ignite.{Ignite, IgniteException}
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.catalog.{ExternalCatalog, _}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.types.StructType
+import org.apache.ignite.spark.impl._
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog.DEFAULT_DATABASE
+import org.apache.spark.sql.ignite.IgniteExternalCatalog.{IGNITE_PROTOCOL, IGNITE_URI, OPTION_GRID}
+
+import scala.collection.JavaConversions._
+
+/**
+ * External catalog implementation to provide transparent access to SQL tables existed in Ignite.
+ *
+ * @param igniteContext Ignite context to provide access to Ignite instance.
+ */
+private[ignite] class IgniteExternalCatalog(igniteContext: IgniteContext)
+ extends ExternalCatalog {
+ /**
+ * Default Ignite instance.
+ */
+ @transient private val ignite: Ignite = igniteContext.ignite()
+
+ @transient private var currentSchema = DEFAULT_DATABASE
+
+ /**
+ * @param db Ignite instance name.
+ * @return Description of Ignite instance.
+ */
+ override def getDatabase(db: String): CatalogDatabase =
+ CatalogDatabase(db, db, IGNITE_URI, Map.empty)
+
+ /**
+ * Checks Ignite schema with provided name exists.
+ *
+ * @param schema Ignite schema name or <code>SessionCatalog.DEFAULT_DATABASE</code>.
+ * @return True is Ignite schema exists.
+ */
+ override def databaseExists(schema: String): Boolean =
+ schema == DEFAULT_DATABASE || allSchemas(ignite).exists(schema.equalsIgnoreCase)
+
+ /**
+ * @return List of all known Ignite schemas.
+ */
+ override def listDatabases(): Seq[String] =
+ allSchemas(ignite)
+
+ /**
+ * @param pattern Pattern to filter databases names.
+ * @return List of all known Ignite schema names filtered by pattern.
+ */
+ override def listDatabases(pattern: String): Seq[String] =
+ StringUtils.filterPattern(listDatabases(), pattern)
+
+ /**
+ * Sets default Ignite schema.
+ *
+ * @param schema Name of Ignite schema.
+ */
+ override def setCurrentDatabase(schema: String): Unit =
+ currentSchema = schema
+
+ /** @inheritdoc */
+ override def getTable(db: String, table: String): CatalogTable = getTableOption(db, table).get
+
+ def getTableOption(db: String, tabName: String): Option[CatalogTable] = {
+ val gridName = igniteName(ignite)
+
+ val schemaName = schemaOrDefault(db, currentSchema)
+
+ sqlTableInfo(ignite, tabName, Some(db)) match {
+ case Some(table) ⇒
+ val tableName = table.tableName
+
+ Some(new CatalogTable(
+ identifier = new TableIdentifier(tableName, Some(schemaName)),
+ tableType = CatalogTableType.EXTERNAL,
+ storage = CatalogStorageFormat(
+ locationUri = Some(URI.create(IGNITE_PROTOCOL + schemaName + "/" + tableName)),
+ inputFormat = Some(FORMAT_IGNITE),
+ outputFormat = Some(FORMAT_IGNITE),
+ serde = None,
+ compressed = false,
+ properties = Map(
+ OPTION_GRID → gridName,
+ OPTION_TABLE → tableName)
+ ),
+ schema = schema(table),
+ provider = Some(FORMAT_IGNITE),
+ partitionColumnNames =
+ if (!allKeyFields(table).isEmpty)
+ allKeyFields(table).toSeq
+ else
+ Seq(table.keyFieldName),
+ bucketSpec = None))
+ case None ⇒ None
+ }
+ }
+
+ /** @inheritdoc */
+ override def tableExists(db: String, table: String): Boolean =
+ sqlTableExists(ignite, table, Some(schemaOrDefault(db, currentSchema)))
+
+ /** @inheritdoc */
+ override def listTables(db: String): Seq[String] = listTables(db, ".*")
+
+ /** @inheritdoc */
+ override def listTables(db: String, pattern: String): Seq[String] =
+ StringUtils.filterPattern(
+ cachesForSchema[Any,Any](ignite, Some(schemaOrDefault(db, currentSchema)))
+ .flatMap(_.getQueryEntities.map(_.getTableName)), pattern)
+
+ /** @inheritdoc */
+ override def loadTable(db: String, table: String,
+ loadPath: String, isOverwrite: Boolean, isSrcLocal: Boolean): Unit = { /* no-op */ }
+
+ /** @inheritdoc */
+ override def getPartition(db: String, table: String, spec: TablePartitionSpec): CatalogTablePartition = null
+
+ /** @inheritdoc */
+ override def getPartitionOption(db: String, table: String,
+ spec: TablePartitionSpec): Option[CatalogTablePartition] = None
+
+ /** @inheritdoc */
+ override def listPartitionNames(db: String, table: String, partialSpec: Option[TablePartitionSpec]): Seq[String] = {
+ sqlCacheName(ignite, table, Some(schemaOrDefault(db, currentSchema))).map { cacheName ⇒
+ val parts = ignite.affinity(cacheName).partitions()
+
+ (0 until parts).map(_.toString)
+ }.getOrElse(Seq.empty)
+ }
+
+ /** @inheritdoc */
+ override def listPartitions(db: String, table: String,
+ partialSpec: Option[TablePartitionSpec]): Seq[CatalogTablePartition] = {
+
+ val partitionNames = listPartitionNames(db, table, partialSpec)
+
+ if (partitionNames.isEmpty)
+ Seq.empty
+ else {
+ val cacheName = sqlCacheName(ignite, table, Some(schemaOrDefault(db, currentSchema))).get
+
+ val aff = ignite.affinity[Any](cacheName)
+
+ partitionNames.map { name ⇒
+ val nodes = aff.mapPartitionToPrimaryAndBackups(name.toInt)
+
+ if (nodes.isEmpty)
+ throw new AnalysisException(s"Nodes for parition is empty [grid=${ignite.name},table=$table,partition=$name].")
+
+ CatalogTablePartition (
+ Map[String, String] (
+ "name" → name,
+ "primary" → nodes.head.id.toString,
+ "backups" → nodes.tail.map(_.id.toString).mkString(",")
+ ),
+ CatalogStorageFormat.empty
+ )
+ }
+ }
+ }
+
+ /** @inheritdoc */
+ override def listPartitionsByFilter(db: String,
+ table: String,
+ predicates: Seq[Expression],
+ defaultTimeZoneId: String): Seq[CatalogTablePartition] =
+ listPartitions(db, table, None)
+
+ /** @inheritdoc */
+ override def loadPartition(db: String,
+ table: String,
+ loadPath: String,
+ partition: TablePartitionSpec, isOverwrite: Boolean,
+ inheritTableSpecs: Boolean, isSrcLocal: Boolean): Unit = { /* no-op */ }
+
+ /** @inheritdoc */
+ override def loadDynamicPartitions(db: String, table: String,
+ loadPath: String,
+ partition: TablePartitionSpec, replace: Boolean,
+ numDP: Int): Unit = { /* no-op */ }
+
+ /** @inheritdoc */
+ override def getFunction(db: String, funcName: String): CatalogFunction =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def functionExists(db: String, funcName: String): Boolean = false
+
+ /** @inheritdoc */
+ override def listFunctions(db: String, pattern: String): Seq[String] = Seq.empty[String]
+
+ /** @inheritdoc */
+ override def alterDatabase(dbDefinition: CatalogDatabase): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def alterFunction(db: String, funcDefinition: CatalogFunction): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def alterTableStats(db: String, table: String, stats: Option[CatalogStatistics]): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def alterTable(tableDefinition: CatalogTable): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def alterTableDataSchema(db: String, table: String, schema: StructType): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def createFunction(db: String, funcDefinition: CatalogFunction): Unit = { /* no-op */ }
+
+ /** @inheritdoc */
+ override def dropFunction(db: String, funcName: String): Unit = { /* no-op */ }
+
+ /** @inheritdoc */
+ override def renameFunction(db: String, oldName: String, newName: String): Unit = { /* no-op */ }
+
+ /** @inheritdoc */
+ override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def dropDatabase(db: String, ignoreIfNotExists: Boolean, cascade: Boolean): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = {
+ sqlTableInfo(ignite, tableDefinition.identifier.table, tableDefinition.identifier.database) match {
+ case Some(_) ⇒
+ /* no-op */
+
+ case None ⇒
+ val schema = tableDefinition.identifier.database
+
+ if(schema.isDefined && !schema.contains(DFLT_SCHEMA) && !schema.contains(DEFAULT_DATABASE))
+ throw new IgniteException("Can only create new tables in PUBLIC schema, not " + schema.get)
+
+ val props = tableDefinition.storage.properties
+
+ QueryHelper.createTable(tableDefinition.schema,
+ tableDefinition.identifier.table,
+ props(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS).split(","),
+ props.get(OPTION_CREATE_TABLE_PARAMETERS),
+ ignite)
+ }
+ }
+
+ /** @inheritdoc */
+ override def dropTable(db: String, tabName: String, ignoreIfNotExists: Boolean, purge: Boolean): Unit =
+ sqlTableInfo(ignite, tabName, Some(schemaOrDefault(db, currentSchema))) match {
+ case Some(table) ⇒
+ val tableName = table.tableName
+
+ QueryHelper.dropTable(tableName, ignite)
+
+ case None ⇒
+ if (!ignoreIfNotExists)
+ throw new IgniteException(s"Table $tabName doesn't exists.")
+ }
+
+ /** @inheritdoc */
+ override def renameTable(db: String, oldName: String, newName: String): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def createPartitions(db: String, table: String,
+ parts: Seq[CatalogTablePartition],
+ ignoreIfExists: Boolean): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def dropPartitions(db: String, table: String,
+ parts: Seq[TablePartitionSpec],
+ ignoreIfNotExists: Boolean, purge: Boolean, retainData: Boolean): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def renamePartitions(db: String, table: String,
+ specs: Seq[TablePartitionSpec],
+ newSpecs: Seq[TablePartitionSpec]): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ /** @inheritdoc */
+ override def alterPartitions(db: String, table: String,
+ parts: Seq[CatalogTablePartition]): Unit =
+ throw new UnsupportedOperationException("unsupported")
+
+ override def getTablesByName(db: String, tables: Seq[String]): Seq[CatalogTable] = tables.map(getTable(db, _))
+
+ override def listViews(db: String, pattern: String): Seq[String] = {
+ throw new UnsupportedOperationException("unsupported") // FIXME
+ }
+}
+
+object IgniteExternalCatalog {
+ /**
+ * Config option to specify named grid instance to connect when loading data.
+ * For internal use only.
+ *
+ * @see [[org.apache.ignite.Ignite#name()]]
+ */
+ private[apache] val OPTION_GRID = "grid"
+
+ /**
+ * Location of ignite tables.
+ */
+ private[apache] val IGNITE_PROTOCOL = "ignite:/"
+
+ /**
+ * URI location of ignite tables.
+ */
+ private val IGNITE_URI = new URI(IGNITE_PROTOCOL)
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteOptimization.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteOptimization.scala
new file mode 100644
index 0000000..5c5654f
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteOptimization.scala
@@ -0,0 +1,443 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.ignite
+
+import org.apache.ignite.IgniteException
+import org.apache.ignite.spark.impl.optimization._
+import org.apache.ignite.spark.impl.optimization.accumulator._
+import org.apache.ignite.spark.impl.{IgniteSQLAccumulatorRelation, IgniteSQLRelation, sqlCacheName}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+
+/**
+ * Query plan optimization for a Ignite based queries.
+ */
+object IgniteOptimization extends Rule[LogicalPlan] with Logging {
+ /** @inheritdoc */
+ override def apply(plan: LogicalPlan): LogicalPlan = {
+ logDebug("")
+ logDebug("== Plan Before Ignite Operator Push Down ==")
+ logDebug(plan.toString())
+
+ val transformed = fixAmbiguousOutput(pushDownOperators(plan))
+
+ logDebug("")
+ logDebug("== Plan After Ignite Operator Push Down ==")
+ logDebug(transformed.toString())
+
+ makeIgniteAccRelation(transformed)
+ }
+
+ /**
+ * Change query plan by accumulating query parts supported by Ignite into `QueryAccumulator`.
+ *
+ * @param plan Query plan.
+ * @return Transformed plan.
+ */
+ private def pushDownOperators(plan: LogicalPlan): LogicalPlan = {
+ val aliasIndexIterator = Stream.from(1).iterator
+
+ //Flag to indicate that some step was skipped due to unsupported expression.
+ //When it true we has to skip entire transformation of higher level Nodes.
+ var stepSkipped = true
+
+ //Applying optimization rules from bottom to up tree nodes.
+ plan.transformUp {
+ //We found basic node to transform.
+ //We create new accumulator and going to the upper layers.
+ case LogicalRelation(igniteSqlRelation: IgniteSQLRelation[_, _], output, _catalogTable, _) ⇒
+ //Clear flag to optimize each statement separately
+ stepSkipped = false
+
+ val igniteQueryContext = IgniteQueryContext(
+ igniteContext = igniteSqlRelation.ic,
+ sqlContext = igniteSqlRelation.sqlContext,
+ catalogTable = _catalogTable,
+ aliasIndex = aliasIndexIterator,
+ cacheName =
+ sqlCacheName(igniteSqlRelation.ic.ignite(), igniteSqlRelation.tableName,
+ igniteSqlRelation.schemaName)
+ .getOrElse(throw new IgniteException("Unknown table")))
+
+ //Logical Relation is bottomest TreeNode in LogicalPlan.
+ //We replace it with accumulator.
+ //We push all supported SQL operator into it on the higher tree levels.
+ SingleTableSQLAccumulator(
+ igniteQueryContext = igniteQueryContext,
+ table = Some(igniteSqlRelation.tableName),
+ tableExpression = None,
+ outputExpressions = output.map(attr ⇒ attr.withQualifier(Seq(igniteSqlRelation.tableName))))
+
+ case project: Project if !stepSkipped && exprsAllowed(project.projectList) ⇒
+ //Project layer just changes output of current query.
+ project.child match {
+ case acc: SelectAccumulator ⇒
+ acc.withOutputExpressions(
+ substituteExpressions(project.projectList, acc.outputExpressions))
+
+ case _ ⇒
+ throw new IgniteException("stepSkipped == true but child is not SelectAccumulator")
+ }
+
+ case sort: Sort if !stepSkipped && isSortPushDownAllowed(sort.order, sort.global) ⇒
+ sort.child match {
+ case acc: QueryAccumulator ⇒
+ acc.withOrderBy(sort.order)
+
+ case _ ⇒
+ throw new IgniteException("stepSkipped == true but child is not SelectAccumulator")
+ }
+
+ case filter: Filter if !stepSkipped && exprsAllowed(filter.condition) ⇒
+
+ filter.child match {
+ case acc: SelectAccumulator ⇒
+ if (hasAggregateInside(filter.condition) || acc.groupBy.isDefined)
+ acc.withHaving(acc.having.getOrElse(Nil) :+ filter.condition)
+ else
+ acc.withWhere(acc.where.getOrElse(Nil) :+ filter.condition)
+
+ case _ ⇒
+ throw new IgniteException("stepSkipped == true but child is not SelectAccumulator")
+ }
+
+ case agg: Aggregate
+ if !stepSkipped && exprsAllowed(agg.groupingExpressions) && exprsAllowed(agg.aggregateExpressions) ⇒
+
+ agg.child match {
+ case acc: SelectAccumulator ⇒
+ if (acc.groupBy.isDefined) {
+ val tableAlias = acc.igniteQueryContext.uniqueTableAlias
+
+ SingleTableSQLAccumulator(
+ igniteQueryContext = acc.igniteQueryContext,
+ table = None,
+ tableExpression = Some((acc, tableAlias)),
+ outputExpressions = agg.aggregateExpressions)
+ }
+ else
+ acc
+ .withGroupBy(agg.groupingExpressions)
+ .withOutputExpressions(
+ substituteExpressions(agg.aggregateExpressions, acc.outputExpressions))
+
+ case acc: QueryAccumulator ⇒
+ val tableAlias = acc.igniteQueryContext.uniqueTableAlias
+
+ SingleTableSQLAccumulator(
+ igniteQueryContext = acc.igniteQueryContext,
+ table = None,
+ tableExpression = Some((acc, tableAlias)),
+ outputExpressions = agg.aggregateExpressions)
+
+ case _ ⇒
+ throw new IgniteException("stepSkipped == true but child is not SelectAccumulator")
+ }
+
+ case limit: LocalLimit if !stepSkipped && exprsAllowed(limit.limitExpr) ⇒
+ limit.child match {
+ case acc: SelectAccumulator ⇒
+ acc.withLocalLimit(limit.limitExpr)
+
+ case acc: QueryAccumulator ⇒
+ acc.withLocalLimit(limit.limitExpr)
+
+ case _ ⇒
+ throw new IgniteException("stepSkipped == true but child is not SelectAccumulator")
+ }
+
+ case limit: GlobalLimit if !stepSkipped && exprsAllowed(limit.limitExpr) ⇒
+ limit.child.transformUp {
+ case acc: SelectAccumulator ⇒
+ acc.withLimit(limit.limitExpr)
+
+ case acc: QueryAccumulator ⇒
+ acc.withLimit(limit.limitExpr)
+
+ case _ ⇒
+ throw new IgniteException("stepSkipped == true but child is not SelectAccumulator")
+ }
+
+ case union: Union if !stepSkipped && isAllChildrenOptimized(union.children) ⇒
+ val first = union.children.head.asInstanceOf[QueryAccumulator]
+
+ val subQueries = union.children.map(_.asInstanceOf[QueryAccumulator])
+
+ UnionSQLAccumulator(
+ first.igniteQueryContext,
+ subQueries,
+ subQueries.head.output)
+
+ case join: Join
+ if !stepSkipped && isAllChildrenOptimized(Seq(join.left, join.right)) &&
+ join.condition.forall(exprsAllowed) ⇒
+
+ val left = join.left.asInstanceOf[QueryAccumulator]
+
+ val (leftOutput, leftAlias) =
+ if (!isSimpleTableAcc(left)) {
+ val tableAlias = left.igniteQueryContext.uniqueTableAlias
+
+ (left.output, Some(tableAlias))
+ }
+ else
+ (left.output, None)
+
+ val right = join.right.asInstanceOf[QueryAccumulator]
+
+ val (rightOutput, rightAlias) =
+ if (!isSimpleTableAcc(right) ||
+ leftAlias.getOrElse(left.qualifier) == right.qualifier) {
+ val tableAlias = right.igniteQueryContext.uniqueTableAlias
+
+ (right.output, Some(tableAlias))
+ }
+ else
+ (right.output, None)
+
+ JoinSQLAccumulator(
+ left.igniteQueryContext,
+ left,
+ right,
+// newLeft,
+// newRight,
+ join.joinType,
+ leftOutput ++ rightOutput,
+ join.condition,
+ leftAlias,
+ rightAlias)
+
+ case unknown ⇒
+ stepSkipped = true
+
+ unknown
+ }
+ }
+
+ /**
+ * Changes qualifiers for an ambiguous columns names.
+ *
+ * @param plan Query plan.
+ * @return Transformed plan.
+ */
+ private def fixAmbiguousOutput(plan: LogicalPlan): LogicalPlan = plan.transformDown {
+ case acc: SingleTableSQLAccumulator if acc.children.exists(_.isInstanceOf[JoinSQLAccumulator]) ⇒
+ val fixedChildOutput =
+ fixAmbiguousOutput(acc.tableExpression.get._1.outputExpressions, acc.igniteQueryContext)
+
+ val newOutput = substituteExpressions(acc.outputExpressions, fixedChildOutput, changeOnlyName = true)
+
+ acc.copy(
+ outputExpressions = newOutput,
+ where = acc.where.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ groupBy = acc.groupBy.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ having = acc.having.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ limit = acc.limit.map(
+ substituteExpression(_, fixedChildOutput, changeOnlyName = true)),
+ localLimit = acc.localLimit.map(
+ substituteExpression(_, fixedChildOutput, changeOnlyName = true)),
+ orderBy = acc.orderBy.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)))
+
+ acc
+
+ case acc: JoinSQLAccumulator
+ if acc.left.isInstanceOf[JoinSQLAccumulator] || acc.right.isInstanceOf[JoinSQLAccumulator] ⇒
+ val leftFixed = acc.left match {
+ case leftJoin: JoinSQLAccumulator ⇒
+ val fixedChildOutput = fixAmbiguousOutput(acc.left.outputExpressions, acc.igniteQueryContext)
+
+ val newOutput =
+ substituteExpressions(acc.outputExpressions, fixedChildOutput, changeOnlyName = true)
+
+ acc.copy(
+ outputExpressions = newOutput,
+ left = leftJoin.copy(outputExpressions = fixedChildOutput),
+ condition = acc.condition.map(
+ substituteExpression(_, fixedChildOutput, changeOnlyName = true)),
+ where = acc.where.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ groupBy = acc.groupBy.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ having = acc.having.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ limit = acc.limit.map(
+ substituteExpression(_, fixedChildOutput, changeOnlyName = true)),
+ localLimit = acc.localLimit.map(
+ substituteExpression(_, fixedChildOutput, changeOnlyName = true)),
+ orderBy = acc.orderBy.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)))
+
+ case _ ⇒ acc
+ }
+
+ val fixed = leftFixed.right match {
+ case rightJoin: JoinSQLAccumulator ⇒
+ val fixedChildOutput =
+ fixAmbiguousOutput(leftFixed.outputExpressions, leftFixed.igniteQueryContext)
+
+ val newOutput = substituteExpressions(leftFixed.outputExpressions, fixedChildOutput)
+
+ leftFixed.copy(
+ outputExpressions = newOutput,
+ right = rightJoin.copy(outputExpressions = fixedChildOutput),
+ condition = acc.condition.map(
+ substituteExpression(_, fixedChildOutput, changeOnlyName = true)),
+ where = acc.where.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ groupBy = acc.groupBy.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ having = acc.having.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)),
+ limit = acc.limit.map(
+ substituteExpression(_, fixedChildOutput, changeOnlyName = true)),
+ localLimit = acc.localLimit.map(
+ substituteExpression(_, fixedChildOutput, changeOnlyName = true)),
+ orderBy = acc.orderBy.map(
+ substituteExpressions(_, fixedChildOutput, changeOnlyName = true)))
+
+ case _ ⇒ leftFixed
+ }
+
+ fixed.copy(
+ condition = acc.condition.map(
+ substituteExpression(_, acc.outputExpressions, changeOnlyName = true)),
+ where = acc.where.map(
+ substituteExpressions(_, acc.outputExpressions, changeOnlyName = true)),
+ groupBy = acc.groupBy.map(
+ substituteExpressions(_, acc.outputExpressions, changeOnlyName = true)),
+ having = acc.having.map(
+ substituteExpressions(_, acc.outputExpressions, changeOnlyName = true)),
+ limit = acc.limit.map(
+ substituteExpression(_, acc.outputExpressions, changeOnlyName = true)),
+ localLimit = acc.localLimit.map(
+ substituteExpression(_, acc.outputExpressions, changeOnlyName = true)),
+ orderBy = acc.orderBy.map(
+ substituteExpressions(_, acc.outputExpressions, changeOnlyName = true)))
+
+ case unknown ⇒
+ unknown
+ }
+
+ private def fixAmbiguousOutput(exprs: Seq[NamedExpression], ctx: IgniteQueryContext): Seq[NamedExpression] =
+ exprs.foldLeft((Set[String](), Set[NamedExpression]())) {
+ case ((uniqueNames, fixed), next) ⇒
+ if (uniqueNames(next.name))
+ (uniqueNames, fixed + Alias(next, ctx.uniqueColumnAlias(next))(exprId = next.exprId))
+ else
+ (uniqueNames + next.name, fixed + next)
+ }._2.toSeq
+
+ /**
+ * Substitutes each `QueryAccumulator` with a `LogicalRelation` contains `IgniteSQLAccumulatorRelation`.
+ *
+ * @param plan Query plan.
+ * @return Transformed plan.
+ */
+ private def makeIgniteAccRelation(plan: LogicalPlan): LogicalPlan =
+ plan.transformDown {
+ case acc: QueryAccumulator ⇒
+ new LogicalRelation (
+ relation = IgniteSQLAccumulatorRelation(acc),
+ output = acc.outputExpressions.map(toAttributeReference(_, Seq.empty)),
+ catalogTable = acc.igniteQueryContext.catalogTable,
+ false)
+ }
+
+ /**
+ * @param order Order.
+ * @param global True if order applied to entire result set false if ordering per-partition.
+ * @return True if sort can be pushed down to Ignite, false otherwise.
+ */
+ private def isSortPushDownAllowed(order: Seq[SortOrder], global: Boolean): Boolean =
+ global && order.map(_.child).forall(exprsAllowed)
+
+ /**
+ * @param children Plans to check.
+ * @return True is all plan are `QueryAccumulator`, false otherwise.
+ */
+ private def isAllChildrenOptimized(children: Seq[LogicalPlan]): Boolean =
+ children.forall {
+ case _: QueryAccumulator ⇒
+ true
+
+ case _ ⇒
+ false
+ }
+
+ /**
+ * Changes expression from `exprs` collection to expression with same `exprId` from `substitution`.
+ *
+ * @param exprs Expressions to substitute.
+ * @param substitution Expressions for substitution
+ * @param changeOnlyName If true substitute only expression name.
+ * @tparam T Concrete expression type.
+ * @return Substituted expressions.
+ */
+ private def substituteExpressions[T <: Expression](exprs: Seq[T], substitution: Seq[NamedExpression],
+ changeOnlyName: Boolean = false): Seq[T] = {
+
+ exprs.map(substituteExpression(_, substitution, changeOnlyName))
+ }
+
+ private def substituteExpression[T <: Expression](expr: T, substitution: Seq[NamedExpression],
+ changeOnlyName: Boolean): T = expr match {
+ case ne: NamedExpression ⇒
+ substitution.find(_.exprId == ne.exprId) match {
+ case Some(found) ⇒
+ if (!changeOnlyName)
+ found.asInstanceOf[T]
+ else ne match {
+ case alias: Alias ⇒
+ Alias(
+ AttributeReference(
+ found.name,
+ found.dataType,
+ nullable = found.nullable,
+ metadata = found.metadata)(
+ exprId = found.exprId,
+ qualifier = found.qualifier),
+ alias.name) (
+ exprId = alias.exprId,
+ qualifier = alias.qualifier,
+ explicitMetadata = alias.explicitMetadata).asInstanceOf[T]
+
+ case attr: AttributeReference ⇒
+ attr.copy(name = found.name)(
+ exprId = found.exprId,
+ qualifier = found.qualifier).asInstanceOf[T]
+
+ case _ ⇒ ne.asInstanceOf[T]
+ }
+
+ case None ⇒
+ expr.withNewChildren(
+ substituteExpressions(expr.children, substitution, changeOnlyName)).asInstanceOf[T]
+ }
+
+ case _ ⇒
+ expr.withNewChildren(
+ substituteExpressions(expr.children, substitution, changeOnlyName)).asInstanceOf[T]
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteSharedState.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteSharedState.scala
new file mode 100644
index 0000000..e07672f
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteSharedState.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.ignite
+
+import org.apache.ignite.spark.IgniteContext
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.catalyst.catalog.{ExternalCatalogEvent, ExternalCatalogEventListener, ExternalCatalogWithListener}
+import org.apache.spark.sql.internal.SharedState
+
+/**
+ * Shared state to override link to IgniteExternalCatalog
+ */
+private[ignite] class IgniteSharedState (
+ igniteContext: IgniteContext,
+ sparkContext: SparkContext) extends SharedState(sparkContext, initialConfigs = Map.empty[String, String]) {
+ /** @inheritdoc */
+ override lazy val externalCatalog: ExternalCatalogWithListener = {
+ val externalCatalog = new IgniteExternalCatalog(igniteContext)
+
+ val wrapped = new ExternalCatalogWithListener(externalCatalog)
+
+ wrapped.addListener(new ExternalCatalogEventListener {
+ override def onEvent(event: ExternalCatalogEvent): Unit = {
+ sparkContext.listenerBus.post(event)
+ }
+ })
+
+ wrapped
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteSparkSession.scala b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteSparkSession.scala
new file mode 100644
index 0000000..7582b20
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/main/scala/org/apache/spark/sql/ignite/IgniteSparkSession.scala
@@ -0,0 +1,355 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.ignite
+
+import org.apache.ignite.IgniteException
+import org.apache.ignite.configuration.IgniteConfiguration
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.spark.IgniteContext
+
+import scala.collection.JavaConverters._
+import scala.reflect.runtime.universe.TypeTag
+import org.apache.spark.SparkConf
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession.Builder
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst._
+import org.apache.spark.sql.catalyst.encoders._
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.internal._
+import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.streaming._
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
+
+/**
+ * Implementation of Spark Session for Ignite.
+ */
+class IgniteSparkSession private(
+ ic: IgniteContext,
+ proxy: SparkSession,
+ existingSharedState: Option[SharedState],
+ parentSessionState: Option[SessionState]) extends SparkSession(proxy.sparkContext) {
+ self ⇒
+
+ private def this(ic: IgniteContext, proxy: SparkSession) =
+ this(ic, proxy, None, None)
+
+ private def this(proxy: SparkSession) =
+ this(new IgniteContext(proxy.sparkContext, IgnitionEx.DFLT_CFG), proxy)
+
+ private def this(proxy: SparkSession, configPath: String) =
+ this(new IgniteContext(proxy.sparkContext, configPath), proxy)
+
+ private def this(proxy: SparkSession, cfgF: () => IgniteConfiguration) =
+ this(new IgniteContext(proxy.sparkContext, cfgF), proxy)
+
+ /** @inheritdoc */
+ @transient override lazy val catalog = new CatalogImpl(self)
+
+ /** @inheritdoc */
+ @transient override val sqlContext: SQLContext = new SQLContext(self)
+
+ /** @inheritdoc */
+ @transient override lazy val sharedState: SharedState =
+ existingSharedState.getOrElse(new IgniteSharedState(ic, sparkContext))
+
+ /** @inheritdoc */
+ @transient override lazy val sessionState: SessionState = {
+ parentSessionState
+ .map(_.clone(this))
+ .getOrElse {
+ val sessionState = new SessionStateBuilder(self, None).build()
+
+ sessionState.experimentalMethods.extraOptimizations =
+ sessionState.experimentalMethods.extraOptimizations :+ IgniteOptimization
+
+ sessionState
+ }
+ }
+
+ /** @inheritdoc */
+ @transient override lazy val conf: RuntimeConfig = proxy.conf
+
+ /** @inheritdoc */
+ @transient override lazy val emptyDataFrame: DataFrame = proxy.emptyDataFrame
+
+ /** @inheritdoc */
+ override def newSession(): SparkSession = new IgniteSparkSession(ic, proxy.newSession())
+
+ /** @inheritdoc */
+ override def version: String = proxy.version
+
+ /** @inheritdoc */
+ override def emptyDataset[T: Encoder]: Dataset[T] = {
+ val encoder = implicitly[Encoder[T]]
+ new Dataset(self, LocalRelation(encoder.schema.toAttributes), encoder)
+ }
+
+ /** @inheritdoc */
+ override def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = {
+ Dataset.ofRows(self, LocalRelation.fromExternalRows(schema.toAttributes, rows.asScala))
+ }
+
+ /** @inheritdoc */
+ override def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
+ val attributeSeq: Seq[AttributeReference] = getSchema(beanClass)
+ val className = beanClass.getName
+ val rowRdd = rdd.mapPartitions { iter =>
+ SQLContext.beansToRows(iter, Utils.classForName(className), attributeSeq)
+ }
+ Dataset.ofRows(self, LogicalRDD(attributeSeq, rowRdd)(self))
+ }
+
+ /** @inheritdoc */
+ override def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = {
+ val attrSeq = getSchema(beanClass)
+ val rows = SQLContext.beansToRows(data.asScala.iterator, beanClass, attrSeq)
+ Dataset.ofRows(self, LocalRelation(attrSeq, rows.toSeq))
+ }
+
+ /** @inheritdoc */
+ override def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
+ SparkSession.setActiveSession(this)
+ val encoder = Encoders.product[A]
+ Dataset.ofRows(self, ExternalRDD(rdd, self)(encoder))
+ }
+
+ /** @inheritdoc */
+ override def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
+ Dataset.ofRows(self, LogicalRelation(baseRelation))
+ }
+
+ /** @inheritdoc */
+ override def createDataset[T: Encoder](data: Seq[T]): Dataset[T] = {
+ val enc = encoderFor[T]
+ val attributes = enc.schema.toAttributes
+ val encoded = data.map(d => enc.createSerializer().apply(d)) // TODO .copy()?
+ val plan = new LocalRelation(attributes, encoded)
+ Dataset[T](self, plan)
+ }
+
+ /** @inheritdoc */
+ override def createDataset[T: Encoder](data: RDD[T]): Dataset[T] = {
+ Dataset[T](self, ExternalRDD(data, self))
+ }
+
+ /** @inheritdoc */
+ override def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = {
+ new Dataset(self, Range(start, end, step, numPartitions), Encoders.LONG)
+ }
+
+ /** @inheritdoc */
+ override def table(tableName: String): DataFrame = {
+ val tableIdent = sessionState.sqlParser.parseTableIdentifier(tableName)
+
+ Dataset.ofRows(self, sessionState.catalog.lookupRelation(tableIdent))
+ }
+
+ /** @inheritdoc */
+ override def sql(sqlText: String): DataFrame = Dataset.ofRows(self, sessionState.sqlParser.parsePlan(sqlText))
+
+ /** @inheritdoc */
+ override def read: DataFrameReader = new DataFrameReader(self)
+
+ /** @inheritdoc */
+ override def readStream: DataStreamReader = new DataStreamReader(self)
+
+ /** @inheritdoc */
+ override def stop(): Unit = proxy.stop()
+
+ /** @inheritdoc */
+ override private[sql] def applySchemaToPythonRDD(rdd: RDD[Array[Any]], schema: StructType) = {
+ val rowRdd = rdd.map(r => python.EvaluatePython.makeFromJava(schema).asInstanceOf[InternalRow])
+ Dataset.ofRows(self, LogicalRDD(schema.toAttributes, rowRdd)(self))
+ }
+
+ /** @inheritdoc */
+ override private[sql] def cloneSession(): IgniteSparkSession = {
+ val session = new IgniteSparkSession(ic, proxy.cloneSession(), Some(sharedState), Some(sessionState))
+
+ session.sessionState // Force copy of SessionState.
+
+ session
+ }
+
+ /** @inheritdoc */
+ @transient override private[sql] val extensions =
+ proxy.extensions
+
+ /** @inheritdoc */
+ override def createDataFrame(rowRDD: RDD[Row],
+ schema: StructType): DataFrame = {
+ val catalystRows = {
+ val encoder = RowEncoder(schema).createSerializer()
+ rowRDD.map(encoder.apply)
+ }
+ val logicalPlan = LogicalRDD(schema.toAttributes, catalystRows)(self)
+ Dataset.ofRows(self, logicalPlan)
+ }
+
+ /** @inheritdoc */
+ override private[sql] def table( tableIdent: TableIdentifier) =
+ Dataset.ofRows(self, sessionState.catalog.lookupRelation(tableIdent))
+
+ private def getSchema(beanClass: Class[_]): Seq[AttributeReference] = {
+ val (dataType, _) = JavaTypeInference.inferDataType(beanClass)
+ dataType.asInstanceOf[StructType].fields.map { f =>
+ AttributeReference(f.name, f.dataType, f.nullable)()
+ }
+ }
+}
+
+object IgniteSparkSession {
+ /**
+ * @return New instance of <code>IgniteBuilder</code>
+ */
+ def builder(): IgniteBuilder = {
+ new IgniteBuilder
+ }
+
+ /**
+ * Builder for <code>IgniteSparkSession</code>.
+ * Extends spark session builder with methods related to Ignite configuration.
+ */
+ class IgniteBuilder extends Builder {
+ /**
+ * Config provider.
+ */
+ private var cfgF: () ⇒ IgniteConfiguration = _
+
+ /**
+ * Path to config file.
+ */
+ private var config: String = _
+
+ /** @inheritdoc */
+ override def getOrCreate(): IgniteSparkSession = synchronized {
+ val sparkSession = super.getOrCreate()
+
+ val ic = if (cfgF != null)
+ new IgniteContext(sparkSession.sparkContext, cfgF)
+ else if (config != null)
+ new IgniteContext(sparkSession.sparkContext, config)
+ else {
+ logWarning("No `igniteConfig` or `igniteConfigProvider`. " +
+ "IgniteSparkSession will use DFLT_CFG for Ignite.")
+
+ new IgniteContext(sparkSession.sparkContext)
+ }
+
+ new IgniteSparkSession(ic, sparkSession)
+ }
+
+ /**
+ * Set path to Ignite config file.
+ * User should use only one of <code>igniteConfig</code> and <code>igniteConfigProvider</code>.
+ *
+ * @param cfg Path to Ignite config file.
+ * @return This for chaining.
+ */
+ def igniteConfig(cfg: String): IgniteBuilder = {
+ if (cfgF != null)
+ throw new IgniteException("only one of config or configProvider should be provided")
+
+ this.config = cfg
+
+ this
+ }
+
+ /**
+ * Set Ignite config provider.
+ * User should use only one of <code>igniteConfig</code> and <code>igniteConfigProvider</code>.
+ *
+ * @param cfgF Closure to provide <code>IgniteConfiguration</code>.
+ * @return This for chaining.
+ */
+ def igniteConfigProvider(cfgF: () ⇒ IgniteConfiguration): IgniteBuilder = {
+ if (config != null)
+ throw new IgniteException("only one of config or configProvider should be provided")
+
+ this.cfgF = cfgF
+
+ this
+ }
+
+ /** @inheritdoc */
+ override def appName(name: String): IgniteBuilder = {
+ super.appName(name)
+
+ this
+ }
+
+ /** @inheritdoc */
+ override def config(key: String, value: String): IgniteBuilder = {
+ super.config(key, value)
+
+ this
+ }
+
+ /** @inheritdoc */
+ override def config(key: String, value: Long): IgniteBuilder = {
+ super.config(key, value)
+
+ this
+ }
+
+ /** @inheritdoc */
+ override def config(key: String, value: Double): IgniteBuilder = {
+ super.config(key, value)
+
+ this
+ }
+
+ /** @inheritdoc */
+ override def config(key: String, value: Boolean): IgniteBuilder = {
+ super.config(key, value)
+
+ this
+ }
+
+ /** @inheritdoc */
+ override def config(conf: SparkConf): IgniteBuilder = {
+ super.config(conf)
+
+ this
+ }
+
+ /** @inheritdoc */
+ override def master(master: String): IgniteBuilder = {
+ super.master(master)
+
+ this
+ }
+
+ /**
+ * This method will throw RuntimeException as long as we building '''IgniteSparkSession'''
+ */
+ override def enableHiveSupport(): IgniteBuilder =
+ throw new IgniteException("This method doesn't supported by IgniteSparkSession")
+
+ /** @inheritdoc */
+ override def withExtensions(f: (SparkSessionExtensions) ⇒ Unit): IgniteBuilder = {
+ super.withExtensions(f)
+ this
+ }
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaEmbeddedIgniteRDDSelfTest.java b/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaEmbeddedIgniteRDDSelfTest.java
new file mode 100644
index 0000000..fbd4363
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaEmbeddedIgniteRDDSelfTest.java
@@ -0,0 +1,338 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark;
+
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.internal.util.typedef.F;
+import org.apache.ignite.lang.IgniteOutClosure;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.sql.Column;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.junit.Test;
+import scala.Tuple2;
+
+/**
+ * Tests for {@link JavaIgniteRDD} (embedded mode).
+ */
+public class JavaEmbeddedIgniteRDDSelfTest extends GridCommonAbstractTest {
+ /** For Ignite instance names generation */
+ private static AtomicInteger cntr = new AtomicInteger(1);
+
+ /** Ignite instance names. */
+ private static ThreadLocal<Integer> igniteInstanceNames = new ThreadLocal<Integer>() {
+ @Override protected Integer initialValue() {
+ return cntr.getAndIncrement();
+ }
+ };
+
+ /** Grid count. */
+ private static final int GRID_CNT = 3;
+
+ /** Keys count. */
+ private static final int KEYS_CNT = 10000;
+
+ /** Cache name. */
+ private static final String PARTITIONED_CACHE_NAME = "partitioned";
+
+ /** Sum function. */
+ private static final Function2<Integer, Integer, Integer> SUM_F = new Function2<Integer, Integer, Integer>() {
+ @Override public Integer call(Integer x, Integer y) {
+ return x + y;
+ }
+ };
+
+ /** To pair function. */
+ private static final PairFunction<Integer, String, String> TO_PAIR_F = new PairFunction<Integer, String, String>() {
+ /** {@inheritDoc} */
+ @Override public Tuple2<String, String> call(Integer i) {
+ return new Tuple2<>(String.valueOf(i), "val" + i);
+ }
+ };
+
+ /** (String, Integer); pair to Integer value function. */
+ private static final Function<Tuple2<String, Integer>, Integer> STR_INT_PAIR_TO_INT_F = new PairToValueFunction<>();
+
+ /** (String, Entity) pair to Entity value function. */
+ private static final Function<Tuple2<String, Entity>, Entity> STR_ENTITY_PAIR_TO_ENTITY_F =
+ new PairToValueFunction<>();
+
+ /** Integer to entity function. */
+ private static final PairFunction<Integer, String, Entity> INT_TO_ENTITY_F =
+ new PairFunction<Integer, String, Entity>() {
+ @Override public Tuple2<String, Entity> call(Integer i) throws Exception {
+ return new Tuple2<>(String.valueOf(i), new Entity(i, "name" + i, i * 100));
+ }
+ };
+
+ /**
+ * Default constructor.
+ */
+ public JavaEmbeddedIgniteRDDSelfTest() {
+ super(false);
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void afterTest() throws Exception {
+ stopAllGrids();
+ }
+
+ /**
+ * Creates default spark context
+ *
+ * @return Context.
+ */
+ private JavaSparkContext createContext() {
+ SparkConf conf = new SparkConf();
+
+ conf.set("spark.executor.instances", String.valueOf(GRID_CNT));
+
+ return new JavaSparkContext("local[" + GRID_CNT + "]", "test", conf);
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testStoreDataToIgnite() throws Exception {
+ JavaSparkContext sc = createContext();
+
+ JavaIgniteContext<String, String> ic = null;
+
+ try {
+ ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);
+
+ ic.fromCache(PARTITIONED_CACHE_NAME)
+ .savePairs(sc.parallelize(F.range(0, KEYS_CNT), GRID_CNT).mapToPair(TO_PAIR_F), true, false);
+
+ Ignite ignite = ic.ignite();
+
+ IgniteCache<String, String> cache = ignite.cache(PARTITIONED_CACHE_NAME);
+
+ for (int i = 0; i < KEYS_CNT; i++) {
+ String val = cache.get(String.valueOf(i));
+
+ assertNotNull("Value was not put to cache for key: " + i, val);
+ assertEquals("Invalid value stored for key: " + i, "val" + i, val);
+ }
+ }
+ finally {
+ if (ic != null)
+ ic.close(true);
+
+ sc.stop();
+ }
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testReadDataFromIgnite() throws Exception {
+ JavaSparkContext sc = createContext();
+
+ JavaIgniteContext<String, Integer> ic = null;
+
+ try {
+ ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);
+
+ Ignite ignite = ic.ignite();
+
+ IgniteCache<String, Integer> cache = ignite.cache(PARTITIONED_CACHE_NAME);
+
+ for (int i = 0; i < KEYS_CNT; i++)
+ cache.put(String.valueOf(i), i);
+
+ JavaRDD<Integer> values = ic.fromCache(PARTITIONED_CACHE_NAME).map(STR_INT_PAIR_TO_INT_F);
+
+ int sum = values.fold(0, SUM_F);
+
+ int expSum = (KEYS_CNT * KEYS_CNT + KEYS_CNT) / 2 - KEYS_CNT;
+
+ assertEquals(expSum, sum);
+ }
+ finally {
+ if (ic != null)
+ ic.close(true);
+
+ sc.stop();
+ }
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testQueryObjectsFromIgnite() throws Exception {
+ JavaSparkContext sc = createContext();
+
+ JavaIgniteContext<String, Entity> ic = null;
+
+ try {
+ ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);
+
+ JavaIgniteRDD<String, Entity> cache = ic.fromCache(PARTITIONED_CACHE_NAME);
+
+ int cnt = 1001;
+ cache.savePairs(sc.parallelize(F.range(0, cnt), GRID_CNT).mapToPair(INT_TO_ENTITY_F), true, false);
+
+ List<Entity> res = cache.objectSql("Entity", "name = ? and salary = ?", "name50", 5000)
+ .map(STR_ENTITY_PAIR_TO_ENTITY_F).collect();
+
+ assertEquals("Invalid result length", 1, res.size());
+ assertEquals("Invalid result", 50, res.get(0).id());
+ assertEquals("Invalid result", "name50", res.get(0).name());
+ assertEquals("Invalid result", 5000, res.get(0).salary());
+
+// Ignite ignite = ic.ignite();
+// IgniteCache<Object, Object> underCache = ignite.cache(PARTITIONED_CACHE_NAME);
+// assertEquals("Invalid total count", cnt, underCache.size());
+
+ assertEquals("Invalid count", 500, cache.objectSql("Entity", "id > 500").count());
+ }
+ finally {
+ if (ic != null)
+ ic.close(true);
+
+ sc.stop();
+ }
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testQueryFieldsFromIgnite() throws Exception {
+ JavaSparkContext sc = createContext();
+
+ JavaIgniteContext<String, Entity> ic = null;
+
+ try {
+ ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);
+
+ JavaIgniteRDD<String, Entity> cache = ic.fromCache(PARTITIONED_CACHE_NAME);
+
+ cache.savePairs(sc.parallelize(F.range(0, 1001), GRID_CNT).mapToPair(INT_TO_ENTITY_F), true, false);
+
+ Dataset<Row> df =
+ cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000);
+
+ df.printSchema();
+
+ Row[] res = (Row[])df.collect();
+
+ assertEquals("Invalid result length", 1, res.length);
+ assertEquals("Invalid result", 50, res[0].get(0));
+ assertEquals("Invalid result", "name50", res[0].get(1));
+ assertEquals("Invalid result", 5000, res[0].get(2));
+
+ Column exp = new Column("NAME").equalTo("name50").and(new Column("SALARY").equalTo(5000));
+
+ Dataset<Row> df0 = cache.sql("select id, name, salary from Entity").where(exp);
+
+ df.printSchema();
+
+ Row[] res0 = (Row[])df0.collect();
+
+ assertEquals("Invalid result length", 1, res0.length);
+ assertEquals("Invalid result", 50, res0[0].get(0));
+ assertEquals("Invalid result", "name50", res0[0].get(1));
+ assertEquals("Invalid result", 5000, res0[0].get(2));
+
+ assertEquals("Invalid count", 500, cache.sql("select id from Entity where id > 500").count());
+ }
+ finally {
+ if (ic != null)
+ ic.close(true);
+
+ sc.stop();
+ }
+ }
+
+ /**
+ * @param igniteInstanceName Ignite instance name.
+ * @param client Client.
+ * @throws Exception If failed.
+ * @return Confiuration.
+ */
+ private static IgniteConfiguration getConfiguration(String igniteInstanceName, boolean client) throws Exception {
+ IgniteConfiguration cfg = new IgniteConfiguration();
+
+ cfg.setCacheConfiguration(cacheConfiguration());
+
+ cfg.setClientMode(client);
+
+ cfg.setIgniteInstanceName(igniteInstanceName);
+
+ return cfg;
+ }
+
+ /**
+ * Creates cache configuration.
+ *
+ * @return Cache configuration.
+ */
+ private static CacheConfiguration<Object, Object> cacheConfiguration() {
+ CacheConfiguration<Object, Object> ccfg = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+ ccfg.setBackups(1);
+
+ ccfg.setName(PARTITIONED_CACHE_NAME);
+
+ ccfg.setIndexedTypes(String.class, Entity.class);
+
+ return ccfg;
+ }
+
+ /**
+ * Ignite configiration provider.
+ */
+ static class IgniteConfigProvider implements IgniteOutClosure<IgniteConfiguration> {
+ /** {@inheritDoc} */
+ @Override public IgniteConfiguration apply() {
+ try {
+ return getConfiguration("worker-" + igniteInstanceNames.get(), false);
+ }
+ catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ /**
+ * @param <K>
+ * @param <V>
+ */
+ static class PairToValueFunction<K, V> implements Function<Tuple2<K, V>, V> {
+ /** {@inheritDoc} */
+ @Override public V call(Tuple2<K, V> t) throws Exception {
+ return t._2();
+ }
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaEmbeddedIgniteRDDWithLocalStoreSelfTest.java b/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaEmbeddedIgniteRDDWithLocalStoreSelfTest.java
new file mode 100644
index 0000000..2f13d25
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaEmbeddedIgniteRDDWithLocalStoreSelfTest.java
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark;
+
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import javax.cache.Cache;
+import javax.cache.configuration.FactoryBuilder;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.store.CacheStoreAdapter;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.internal.util.typedef.F;
+import org.apache.ignite.lang.IgniteOutClosure;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.PairFunction;
+import org.jetbrains.annotations.Nullable;
+import org.junit.Test;
+import scala.Tuple2;
+
+/**
+ * Tests for {@link JavaIgniteRDD} (embedded mode).
+ */
+public class JavaEmbeddedIgniteRDDWithLocalStoreSelfTest extends GridCommonAbstractTest {
+ /** */
+ private static ConcurrentHashMap<Object, Object> storeMap;
+
+ /** */
+ private TestStore store;
+
+ /** For Ignite instance names generation */
+ private static AtomicInteger cntr = new AtomicInteger(1);
+
+ /** Ignite instance names. */
+ private static ThreadLocal<Integer> igniteInstanceNames = new ThreadLocal<Integer>() {
+ @Override protected Integer initialValue() {
+ return cntr.getAndIncrement();
+ }
+ };
+
+ /** Grid count. */
+ private static final int GRID_CNT = 3;
+
+ /** Cache name. */
+ private static final String PARTITIONED_CACHE_NAME = "partitioned";
+
+ /** To pair function. */
+ private static final PairFunction<Integer, Integer, Integer> SIMPLE_FUNCTION = new PairFunction<Integer, Integer, Integer>() {
+ /** {@inheritDoc} */
+ @Override public Tuple2<Integer, Integer> call(Integer i) {
+ return new Tuple2<>(i, i);
+ }
+ };
+
+ /**
+ * Default constructor.
+ */
+ public JavaEmbeddedIgniteRDDWithLocalStoreSelfTest() {
+ super(false);
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void afterTest() throws Exception {
+ stopAllGrids();
+ }
+
+ /**
+ * Creates default spark context
+ *
+ * @return Context.
+ */
+ private JavaSparkContext createContext() {
+ SparkConf conf = new SparkConf();
+
+ conf.set("spark.executor.instances", String.valueOf(GRID_CNT));
+
+ return new JavaSparkContext("local[" + GRID_CNT + "]", "test", conf);
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testStoreDataToIgniteWithOptionSkipStore() throws Exception {
+ storeMap = new ConcurrentHashMap<>();
+ store = new TestStore();
+
+ JavaSparkContext sc = createContext();
+
+ JavaIgniteContext<Integer, Integer> ic = null;
+
+ try {
+ ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);
+
+ Ignite ignite = ic.ignite();
+
+ IgniteCache<Integer, Integer> cache = ignite.cache(PARTITIONED_CACHE_NAME);
+
+ for (int i = 0; i < 1000; i++)
+ storeMap.put(i, i);
+
+ ic.fromCache(PARTITIONED_CACHE_NAME)
+ .savePairs(sc.parallelize(F.range(1000, 2000), GRID_CNT).mapToPair(SIMPLE_FUNCTION), true, false);
+
+ for (int i = 0; i < 2000; i++)
+ assertEquals(i, storeMap.get(i));
+
+ ic.fromCache(PARTITIONED_CACHE_NAME)
+ .savePairs(sc.parallelize(F.range(2000, 3000), GRID_CNT).mapToPair(SIMPLE_FUNCTION), true, true);
+
+ for (int i = 2000; i < 3000; i++)
+ assertNull(storeMap.get(i));
+
+ for (int i = 0; i < 3000; i++) {
+ Integer val = cache.get(i);
+
+ assertNotNull("Value was not put to cache for key: " + i, val);
+ assertEquals("Invalid value stored for key: " + i, Integer.valueOf(i), val);
+ }
+ }
+ finally {
+ if (ic != null)
+ ic.close(true);
+
+ sc.stop();
+ }
+ }
+
+ /**
+ * @param igniteInstanceName Ignite instance name.
+ * @param client Client.
+ * @throws Exception If failed.
+ * @return Confiuration.
+ */
+ private static IgniteConfiguration getConfiguration(String igniteInstanceName, boolean client) throws Exception {
+ IgniteConfiguration cfg = new IgniteConfiguration();
+
+ cfg.setCacheConfiguration(cacheConfiguration());
+
+ cfg.setClientMode(client);
+
+ cfg.setIgniteInstanceName(igniteInstanceName);
+
+ return cfg;
+ }
+
+ /**
+ * Creates cache configuration.
+ *
+ * @return Cache configuration.
+ */
+ private static CacheConfiguration<Object, Object> cacheConfiguration() {
+ CacheConfiguration<Object, Object> ccfg = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+ ccfg.setBackups(1);
+
+ ccfg.setName(PARTITIONED_CACHE_NAME);
+
+ ccfg.setIndexedTypes(String.class, Entity.class);
+
+ ccfg.setCacheStoreFactory(FactoryBuilder.factoryOf(TestStore.class));
+
+ ccfg.setReadThrough(true);
+ ccfg.setWriteThrough(true);
+
+ return ccfg;
+ }
+
+ /**
+ * Ignite configiration provider.
+ */
+ static class IgniteConfigProvider implements IgniteOutClosure<IgniteConfiguration> {
+ /** {@inheritDoc} */
+ @Override public IgniteConfiguration apply() {
+ try {
+ return getConfiguration("worker-" + igniteInstanceNames.get(), false);
+ }
+ catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ /** */
+ @SuppressWarnings("PublicInnerClass")
+ public static class TestStore extends CacheStoreAdapter<Object, Object> {
+ /** {@inheritDoc} */
+ @Nullable @Override public Object load(Object key) {
+ return storeMap.get(key);
+ }
+
+ /** {@inheritDoc} */
+ @Override public void write(Cache.Entry<?, ?> entry) {
+ storeMap.put(entry.getKey(), entry.getValue());
+ }
+
+ /** {@inheritDoc} */
+ @Override public void delete(Object key) {
+ storeMap.remove(key);
+ }
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaStandaloneIgniteRDDSelfTest.java b/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaStandaloneIgniteRDDSelfTest.java
new file mode 100644
index 0000000..828daf0
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/spark/JavaStandaloneIgniteRDDSelfTest.java
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark;
+
+import java.lang.reflect.Field;
+import java.math.BigDecimal;
+import java.util.List;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.internal.util.typedef.F;
+import org.apache.ignite.lang.IgniteOutClosure;
+import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
+import org.apache.ignite.spi.discovery.tcp.ipfinder.TcpDiscoveryIpFinder;
+import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.sql.Column;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.junit.Test;
+import scala.Tuple2;
+
+/**
+ * Tests for {@link JavaIgniteRDD} (standalone mode).
+ */
+public class JavaStandaloneIgniteRDDSelfTest extends GridCommonAbstractTest {
+ /** Grid count. */
+ private static final int GRID_CNT = 3;
+
+ /** Keys count. */
+ private static final int KEYS_CNT = 10000;
+
+ /** Entity cache name. */
+ private static final String ENTITY_CACHE_NAME = "entity";
+
+ /** Entity all types fields types name. */
+ private static final String ENTITY_ALL_TYPES_CACHE_NAME = "entityAllTypes";
+
+ /** Ip finder. */
+ private static final TcpDiscoveryIpFinder IP_FINDER = new TcpDiscoveryVmIpFinder(true);
+
+ /** Sum function. */
+ private static final Function2<Integer, Integer, Integer> SUM_F = new Function2<Integer, Integer, Integer>() {
+ @Override public Integer call(Integer x, Integer y) {
+ return x + y;
+ }
+ };
+
+ /** To pair function. */
+ private static final PairFunction<Integer, String, String> TO_PAIR_F = new PairFunction<Integer, String, String>() {
+ /** {@inheritDoc} */
+ @Override public Tuple2<String, String> call(Integer i) {
+ return new Tuple2<>(String.valueOf(i), "val" + i);
+ }
+ };
+
+ /** (String, Integer); pair to Integer value function. */
+ private static final Function<Tuple2<String, Integer>, Integer> STR_INT_PAIR_TO_INT_F = new PairToValueFunction<>();
+
+ /** (String, Entity) pair to Entity value function. */
+ private static final Function<Tuple2<String, Entity>, Entity> STR_ENTITY_PAIR_TO_ENTITY_F =
+ new PairToValueFunction<>();
+
+ /** Integer to entity function. */
+ private static final PairFunction<Integer, String, Entity> INT_TO_ENTITY_F =
+ new PairFunction<Integer, String, Entity>() {
+ @Override public Tuple2<String, Entity> call(Integer i) throws Exception {
+ return new Tuple2<>(String.valueOf(i), new Entity(i, "name" + i, i * 100));
+ }
+ };
+
+ /** */
+ private static final PairFunction<Integer, String, EntityTestAllTypeFields> INT_TO_ENTITY_ALL_FIELDS_F =
+ new PairFunction<Integer, String, EntityTestAllTypeFields>() {
+ @Override public Tuple2<String, EntityTestAllTypeFields> call(Integer i) throws Exception {
+ return new Tuple2<>(String.valueOf(i), new EntityTestAllTypeFields(i));
+ }
+ };
+
+ /** {@inheritDoc} */
+ @Override protected void beforeTest() throws Exception {
+ Ignition.ignite("grid-0").cache(ENTITY_CACHE_NAME).clear();
+ Ignition.ignite("grid-0").cache(ENTITY_ALL_TYPES_CACHE_NAME).clear();
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void afterTest() throws Exception {
+ Ignition.stop("client", false);
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void beforeTestsStarted() throws Exception {
+ for (int i = 0; i < GRID_CNT; i++)
+ Ignition.start(getConfiguration("grid-" + i, false));
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testStoreDataToIgnite() throws Exception {
+ JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
+
+ try {
+ JavaIgniteContext<String, String> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
+
+ ic.fromCache(ENTITY_CACHE_NAME)
+ .savePairs(sc.parallelize(F.range(0, KEYS_CNT), 2).mapToPair(TO_PAIR_F));
+
+ Ignite ignite = Ignition.ignite("grid-0");
+
+ IgniteCache<String, String> cache = ignite.cache(ENTITY_CACHE_NAME);
+
+ for (int i = 0; i < KEYS_CNT; i++) {
+ String val = cache.get(String.valueOf(i));
+
+ assertNotNull("Value was not put to cache for key: " + i, val);
+ assertEquals("Invalid value stored for key: " + i, "val" + i, val);
+ }
+ }
+ finally {
+ sc.stop();
+ }
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testReadDataFromIgnite() throws Exception {
+ JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
+
+ try {
+ JavaIgniteContext<String, Integer> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
+
+ Ignite ignite = Ignition.ignite("grid-0");
+
+ IgniteCache<String, Integer> cache = ignite.cache(ENTITY_CACHE_NAME);
+
+ for (int i = 0; i < KEYS_CNT; i++)
+ cache.put(String.valueOf(i), i);
+
+ JavaRDD<Integer> values = ic.fromCache(ENTITY_CACHE_NAME).map(STR_INT_PAIR_TO_INT_F);
+
+ int sum = values.fold(0, SUM_F);
+
+ int expSum = (KEYS_CNT * KEYS_CNT + KEYS_CNT) / 2 - KEYS_CNT;
+
+ assertEquals(expSum, sum);
+ }
+ finally {
+ sc.stop();
+ }
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testQueryObjectsFromIgnite() throws Exception {
+ JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
+
+ try {
+ JavaIgniteContext<String, Entity> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
+
+ JavaIgniteRDD<String, Entity> cache = ic.fromCache(ENTITY_CACHE_NAME);
+
+ cache.savePairs(sc.parallelize(F.range(0, 1001), 2).mapToPair(INT_TO_ENTITY_F));
+
+ List<Entity> res = cache.objectSql("Entity", "name = ? and salary = ?", "name50", 5000)
+ .map(STR_ENTITY_PAIR_TO_ENTITY_F).collect();
+
+ assertEquals("Invalid result length", 1, res.size());
+ assertEquals("Invalid result", 50, res.get(0).id());
+ assertEquals("Invalid result", "name50", res.get(0).name());
+ assertEquals("Invalid result", 5000, res.get(0).salary());
+ assertEquals("Invalid count", 500, cache.objectSql("Entity", "id > 500").count());
+ }
+ finally {
+ sc.stop();
+ }
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testQueryFieldsFromIgnite() throws Exception {
+ JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
+
+ try {
+ JavaIgniteContext<String, Entity> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
+
+ JavaIgniteRDD<String, Entity> cache = ic.fromCache(ENTITY_CACHE_NAME);
+
+ cache.savePairs(sc.parallelize(F.range(0, 1001), 2).mapToPair(INT_TO_ENTITY_F));
+
+ Dataset<Row> df =
+ cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000);
+
+ df.printSchema();
+
+ Row[] res = (Row[])df.collect();
+
+ assertEquals("Invalid result length", 1, res.length);
+ assertEquals("Invalid result", 50, res[0].get(0));
+ assertEquals("Invalid result", "name50", res[0].get(1));
+ assertEquals("Invalid result", 5000, res[0].get(2));
+
+ Column exp = new Column("NAME").equalTo("name50").and(new Column("SALARY").equalTo(5000));
+
+ Dataset<Row> df0 = cache.sql("select id, name, salary from Entity").where(exp);
+
+ df.printSchema();
+
+ Row[] res0 = (Row[])df0.collect();
+
+ assertEquals("Invalid result length", 1, res0.length);
+ assertEquals("Invalid result", 50, res0[0].get(0));
+ assertEquals("Invalid result", "name50", res0[0].get(1));
+ assertEquals("Invalid result", 5000, res0[0].get(2));
+
+ assertEquals("Invalid count", 500, cache.sql("select id from Entity where id > 500").count());
+ }
+ finally {
+ sc.stop();
+ }
+ }
+
+ /**
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testAllFieldsTypes() throws Exception {
+ JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
+
+ final int cnt = 100;
+
+ try {
+ JavaIgniteContext<String, EntityTestAllTypeFields> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
+
+ JavaIgniteRDD<String, EntityTestAllTypeFields> cache = ic.fromCache(ENTITY_ALL_TYPES_CACHE_NAME);
+
+ cache.savePairs(sc.parallelize(F.range(0, cnt), 2).mapToPair(INT_TO_ENTITY_ALL_FIELDS_F));
+
+ EntityTestAllTypeFields e = new EntityTestAllTypeFields(cnt / 2);
+ for (Field f : EntityTestAllTypeFields.class.getDeclaredFields()) {
+ String fieldName = f.getName();
+
+ Object val = GridTestUtils.getFieldValue(e, fieldName);
+
+ Dataset<Row> df = cache.sql(
+ String.format("select %s from EntityTestAllTypeFields where %s = ?", fieldName, fieldName),
+ val);
+
+ if (val instanceof BigDecimal) {
+ Object res = ((Row[])df.collect())[0].get(0);
+
+ assertTrue(String.format("+++ Fail on %s field", fieldName),
+ ((Comparable<BigDecimal>)val).compareTo((BigDecimal)res) == 0);
+ }
+ else if (val instanceof java.sql.Date)
+ assertEquals(String.format("+++ Fail on %s field", fieldName),
+ val.toString(), ((Row[])df.collect())[0].get(0).toString());
+ else if (val.getClass().isArray())
+ assertTrue(String.format("+++ Fail on %s field", fieldName), 1 <= df.count());
+ else {
+ assertTrue(String.format("+++ Fail on %s field", fieldName), ((Row[])df.collect()).length > 0);
+ assertTrue(String.format("+++ Fail on %s field", fieldName), ((Row[])df.collect())[0].size() > 0);
+ assertEquals(String.format("+++ Fail on %s field", fieldName), val, ((Row[])df.collect())[0].get(0));
+ }
+
+ info(String.format("+++ Query on the filed: %s : %s passed", fieldName, f.getType().getSimpleName()));
+ }
+ }
+ finally {
+ sc.stop();
+ }
+ }
+
+ /**
+ * @param igniteInstanceName Ignite instance name.
+ * @param client Client.
+ * @return Cache configuration.
+ */
+ private static IgniteConfiguration getConfiguration(String igniteInstanceName, boolean client) throws Exception {
+ IgniteConfiguration cfg = new IgniteConfiguration();
+
+ TcpDiscoverySpi discoSpi = new TcpDiscoverySpi();
+
+ discoSpi.setIpFinder(IP_FINDER);
+
+ cfg.setDiscoverySpi(discoSpi);
+
+ cfg.setCacheConfiguration(
+ cacheConfiguration(ENTITY_CACHE_NAME, String.class, Entity.class),
+ cacheConfiguration(ENTITY_ALL_TYPES_CACHE_NAME, String.class, EntityTestAllTypeFields.class));
+
+ cfg.setClientMode(client);
+
+ cfg.setIgniteInstanceName(igniteInstanceName);
+
+ return cfg;
+ }
+
+ /**
+ * @param name Name.
+ * @param clsK Class k.
+ * @param clsV Class v.
+ * @return cache Configuration.
+ */
+ private static CacheConfiguration<Object, Object> cacheConfiguration(String name, Class<?> clsK, Class<?> clsV) {
+ CacheConfiguration<Object, Object> ccfg = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+ ccfg.setBackups(1);
+
+ ccfg.setName(name);
+
+ ccfg.setIndexedTypes(clsK, clsV);
+
+ return ccfg;
+ }
+
+ /**
+ * Ignite configiration provider.
+ */
+ static class IgniteConfigProvider implements IgniteOutClosure<IgniteConfiguration> {
+ /** {@inheritDoc} */
+ @Override public IgniteConfiguration apply() {
+ try {
+ return getConfiguration("client", true);
+ }
+ catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ /**
+ * @param <K>
+ * @param <V>
+ */
+ static class PairToValueFunction<K, V> implements Function<Tuple2<K, V>, V> {
+ /** {@inheritDoc} */
+ @Override public V call(Tuple2<K, V> t) throws Exception {
+ return t._2();
+ }
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/testsuites/IgniteRDDTestSuite.java b/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/testsuites/IgniteRDDTestSuite.java
new file mode 100644
index 0000000..bde086d
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/java/org/apache/ignite/testsuites/IgniteRDDTestSuite.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.testsuites;
+
+import org.apache.ignite.spark.JavaEmbeddedIgniteRDDSelfTest;
+import org.apache.ignite.spark.JavaEmbeddedIgniteRDDWithLocalStoreSelfTest;
+import org.apache.ignite.spark.JavaStandaloneIgniteRDDSelfTest;
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+
+/**
+ * Test suit for Ignite RDD
+ */
+@RunWith(Suite.class)
+@Suite.SuiteClasses({
+ JavaEmbeddedIgniteRDDSelfTest.class,
+ JavaStandaloneIgniteRDDSelfTest.class,
+ JavaEmbeddedIgniteRDDWithLocalStoreSelfTest.class
+})
+public class IgniteRDDTestSuite {
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/resources/cities.json b/modules/spark-3.2-ext/spark-3.2/src/test/resources/cities.json
new file mode 100644
index 0000000..264bae0
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/resources/cities.json
@@ -0,0 +1,3 @@
+{ "id": 1, "name": "Forest Hill" }
+{ "id": 2, "name": "Denver" }
+{ "id": 3, "name": "St. Petersburg" }
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/resources/cities_non_unique.json b/modules/spark-3.2-ext/spark-3.2/src/test/resources/cities_non_unique.json
new file mode 100644
index 0000000..f971c86
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/resources/cities_non_unique.json
@@ -0,0 +1,6 @@
+{ "id": 1, "name": "Forest Hill" }
+{ "id": 2, "name": "Denver" }
+{ "id": 3, "name": "St. Petersburg" }
+{ "id": 1, "name": "Paris" }
+{ "id": 2, "name": "New York" }
+{ "id": 3, "name": "Moscow" }
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/resources/ignite-spark-config.xml b/modules/spark-3.2-ext/spark-3.2/src/test/resources/ignite-spark-config.xml
new file mode 100644
index 0000000..827fb09
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/resources/ignite-spark-config.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ Ignite configuration with all defaults and enabled p2p deployment and enabled events.
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:util="http://www.springframework.org/schema/util"
+ xsi:schemaLocation="
+ http://www.springframework.org/schema/beans
+ http://www.springframework.org/schema/beans/spring-beans.xsd
+ http://www.springframework.org/schema/util
+ http://www.springframework.org/schema/util/spring-util.xsd">
+ <bean id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
+ <property name="localHost" value="127.0.0.1"/>
+
+ <!-- Explicitly configure TCP discovery SPI to provide list of initial nodes. -->
+ <property name="discoverySpi">
+ <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+ <property name="ipFinder">
+ <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
+ <property name="addresses">
+ <list>
+ <value>127.0.0.1:47500</value>
+ </list>
+ </property>
+ </bean>
+ </property>
+ </bean>
+ </property>
+
+ <property name="cacheConfiguration">
+ <array>
+ <bean class="org.apache.ignite.configuration.CacheConfiguration">
+ <property name="backups" value="1" />
+ <property name="sqlSchema" value="PUBLIC" />
+ <property name="name" value="cache1" />
+ </bean>
+ <bean class="org.apache.ignite.configuration.CacheConfiguration">
+ <property name="backups" value="1" />
+ <property name="sqlSchema" value="PUBLIC" />
+ <property name="name" value="cache2" />
+ </bean>
+ </array>
+ </property>
+ </bean>
+</beans>
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/resources/spark/spark-config.xml b/modules/spark-3.2-ext/spark-3.2/src/test/resources/spark/spark-config.xml
new file mode 100644
index 0000000..2af4bef
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/resources/spark/spark-config.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="
+ http://www.springframework.org/schema/beans
+ http://www.springframework.org/schema/beans/spring-beans.xsd">
+ <bean id="grid.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
+ <property name="clientMode" value="true"/>
+
+ <property name="localHost" value="127.0.0.1"/>
+
+ <property name="discoverySpi">
+ <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+ <property name="ipFinder">
+ <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
+ <property name="addresses">
+ <list>
+ <value>127.0.0.1:47500..47504</value>
+ </list>
+ </property>
+ </bean>
+ </property>
+ </bean>
+ </property>
+ </bean>
+</beans>
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/AbstractDataFrameSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/AbstractDataFrameSpec.scala
new file mode 100644
index 0000000..288c7d6
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/AbstractDataFrameSpec.scala
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.{Ignite, Ignition}
+import org.apache.ignite.configuration.{CacheConfiguration, IgniteConfiguration}
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+import org.scalatest._
+
+import java.lang.{Long => JLong}
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.cache.query.annotations.QuerySqlField
+import org.apache.ignite.internal.IgnitionEx.loadConfiguration
+import org.apache.ignite.spark.AbstractDataFrameSpec.configuration
+import org.apache.ignite.spark.impl.IgniteSQLAccumulatorRelation
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.ignite.spark.AbstractDataFrameSpec._
+import org.scalatest.funspec.AnyFunSpec
+import org.scalatest.matchers.should.Matchers
+
+import scala.annotation.meta.field
+import scala.reflect.ClassTag
+
+/**
+ */
+abstract class AbstractDataFrameSpec extends AnyFunSpec with Matchers with BeforeAndAfterAll with BeforeAndAfter
+ with Assertions {
+ var spark: SparkSession = _
+
+ var client: Ignite = _
+
+ private val NUM_SERVERS = 5
+
+ override protected def beforeAll(): Unit = {
+ for (i ← 0 to NUM_SERVERS)
+ Ignition.start(configuration("grid-" + i, client = false))
+
+ client = Ignition.getOrStart(configuration("client", client = true))
+
+ createSparkSession()
+ }
+
+ override protected def afterAll(): Unit = {
+ Ignition.stop("client", false)
+
+ for (i ← 0 to NUM_SERVERS)
+ Ignition.stop("grid-" + i, false)
+
+ spark.close()
+ }
+
+ protected def createSparkSession(): Unit = {
+ spark = SparkSession.builder()
+ .appName("DataFrameSpec")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .getOrCreate()
+ }
+
+ def createPersonTable2(client: Ignite, cacheName: String): Unit =
+ createPersonTable0(client, cacheName, PERSON_TBL_NAME_2)
+
+ def createPersonTable(client: Ignite, cacheName: String): Unit =
+ createPersonTable0(client, cacheName, PERSON_TBL_NAME)
+
+ private def createPersonTable0(client: Ignite, cacheName: String, tblName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ s"""
+ | CREATE TABLE $tblName (
+ | id LONG,
+ | name VARCHAR,
+ | birth_date DATE,
+ | is_resident BOOLEAN,
+ | salary FLOAT,
+ | pension DOUBLE,
+ | account DECIMAL,
+ | age INT,
+ | city_id LONG,
+ | PRIMARY KEY (id, city_id)) WITH "backups=1, affinityKey=city_id"
+ """.stripMargin)).getAll
+
+ val qry = new SqlFieldsQuery(s"INSERT INTO $tblName (id, name, city_id) values (?, ?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "John Doe", 3L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Jane Roe", 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "Mary Major", 1L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], "Richard Miles", 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(5L.asInstanceOf[JLong], null, 2L.asInstanceOf[JLong])).getAll
+ }
+
+ def createCityTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE city (id LONG PRIMARY KEY, name VARCHAR) WITH \"template=replicated\"")).getAll
+
+ val qry = new SqlFieldsQuery("INSERT INTO city (id, name) VALUES (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "Forest Hill")).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "Denver")).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "St. Petersburg")).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], "St. Petersburg")).getAll
+ }
+
+ def createEmployeeCache(client: Ignite, cacheName: String, schemaName: Option[String] = None): Unit = {
+ val ccfg = AbstractDataFrameSpec.cacheConfiguration[String, Employee](cacheName)
+
+ schemaName.foreach(ccfg.setSqlSchema)
+
+ val cache = client.getOrCreateCache(ccfg)
+
+ cache.put("key1", Employee(1, "John Connor", 15, 0))
+ cache.put("key2", Employee(2, "Sarah Connor", 32, 10000))
+ cache.put("key3", Employee(3, "Arnold Schwarzenegger", 27, 1000))
+ }
+
+ def checkQueryData[T](res: DataFrame, expectedRes: Product)
+ (implicit ord: T ⇒ Ordered[T]): Unit =
+ checkQueryData(res, expectedRes, _.getAs[T](0))
+
+ def checkQueryData[Ordered](res: DataFrame, expectedRes: Product, sorter: Row => Ordered)
+ (implicit ord: Ordering[Ordered]): Unit = {
+ val data = res.rdd.collect.sortBy(sorter)
+
+ for(i ← 0 until expectedRes.productArity) {
+ val row = data(i)
+
+ if (row.size == 1)
+ assert(row(0) == expectedRes.productElement(i), s"row[$i, 0] = ${row(0)} should be equal ${expectedRes.productElement(i)}")
+ else {
+ val expectedRow: Product = expectedRes.productElement(i).asInstanceOf[Product]
+
+ assert(expectedRow.productArity == row.size, s"Rows size should be equal, but expected.size=${expectedRow.productArity} " +
+ s"and row.size=${row.size}")
+
+ for (j ← 0 until expectedRow.productArity)
+ assert(row(j) == expectedRow.productElement(j), s"row[$i, $j] = ${row(j)} should be equal ${expectedRow.productElement(j)}")
+ }
+ }
+ }
+}
+
+object AbstractDataFrameSpec {
+ val TEST_CONFIG_FILE = "ignite-spark-config.xml"
+
+ val DEFAULT_CACHE = "cache1"
+
+ val TEST_OBJ_TEST_OBJ_CACHE_NAME = "cache2"
+
+ val EMPLOYEE_CACHE_NAME = "cache3"
+
+ val PERSON_TBL_NAME = "person"
+
+ val PERSON_TBL_NAME_2 = "person2"
+
+ def configuration(igniteInstanceName: String, client: Boolean): IgniteConfiguration = {
+ val cfg = loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(client)
+
+ cfg.setIgniteInstanceName(igniteInstanceName)
+
+ cfg
+ }
+
+ /**
+ * Gets cache configuration for the given grid name.
+ *
+ * @tparam K class of cached keys
+ * @tparam V class of cached values
+ * @param cacheName cache name.
+ * @return Cache configuration.
+ */
+ def cacheConfiguration[K : ClassTag, V : ClassTag](cacheName : String): CacheConfiguration[Object, Object] = {
+ val ccfg = new CacheConfiguration[Object, Object]()
+
+ ccfg.setBackups(1)
+
+ ccfg.setName(cacheName)
+
+ ccfg.setIndexedTypes(
+ implicitly[reflect.ClassTag[K]].runtimeClass.asInstanceOf[Class[K]],
+ implicitly[reflect.ClassTag[V]].runtimeClass.asInstanceOf[Class[V]])
+
+ ccfg
+ }
+
+ /**
+ * @param df Data frame.
+ * @param qry SQL Query.
+ */
+ def checkOptimizationResult(df: DataFrame, qry: String = ""): Unit = {
+ df.explain(true)
+
+ val plan = df.queryExecution.optimizedPlan
+
+ val cnt = plan.collectLeaves.count {
+ case LogicalRelation(relation: IgniteSQLAccumulatorRelation[_, _], _, _, _) ⇒
+ if (qry != "")
+ assert(qry.toLowerCase == relation.acc.compileQuery().toLowerCase,
+ s"Generated query should be equal to expected.\nexpected - ${qry.toLowerCase}\ngenerated - ${relation.acc.compileQuery().toLowerCase}")
+
+ true
+
+ case _ ⇒
+ false
+ }
+
+ assert(cnt != 0, s"Plan should contains IgniteSQLAccumulatorRelation")
+ }
+
+ /**
+ * Enclose some closure, so it doesn't on outer object(default scala behaviour) while serializing.
+ */
+ def enclose[E, R](enclosed: E)(func: E => R): R = func(enclosed)
+}
+
+case class Employee (
+ @(QuerySqlField @field)(index = true) id: Long,
+
+ @(QuerySqlField @field) name: String,
+
+ age: Int,
+
+ @(QuerySqlField @field)(index = true, descending = true) salary: Float
+) extends Serializable { }
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/Entity.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/Entity.scala
new file mode 100644
index 0000000..bef87d5
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/Entity.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.spark.IgniteRDDSpec.ScalarCacheQuerySqlField
+
+class Entity (
+ @ScalarCacheQuerySqlField(index = true) val id: Int,
+ @ScalarCacheQuerySqlField(index = true) val name: String,
+ @ScalarCacheQuerySqlField(index = true) val salary: Int
+) extends Serializable {
+
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/EntityTestAllTypeFields.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/EntityTestAllTypeFields.scala
new file mode 100644
index 0000000..36d8274
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/EntityTestAllTypeFields.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import java.lang.Boolean
+import java.sql.Timestamp
+import java.util.Date
+
+import org.apache.ignite.spark.IgniteRDDSpec.ScalarCacheQuerySqlField
+
+class EntityTestAllTypeFields(
+ @ScalarCacheQuerySqlField(index = true) val boolVal: Boolean,
+ @ScalarCacheQuerySqlField(index = true) val byteVal: Byte,
+ @ScalarCacheQuerySqlField(index = true) val shortVal: Short,
+ @ScalarCacheQuerySqlField(index = true) val intVal: Int,
+ @ScalarCacheQuerySqlField(index = true) val longVal: Long,
+ @ScalarCacheQuerySqlField(index = true) val floatVal: Float,
+ @ScalarCacheQuerySqlField(index = true) val doubleVal: Double,
+ @ScalarCacheQuerySqlField(index = true) val strVal: String,
+ @ScalarCacheQuerySqlField(index = true) val dateVal: Date,
+ @ScalarCacheQuerySqlField(index = true) val timestampVal: Timestamp,
+ @ScalarCacheQuerySqlField(index = true) val byteArrVal: Array[Byte],
+ @ScalarCacheQuerySqlField(index = true) val bigDecVal: java.math.BigDecimal,
+ @ScalarCacheQuerySqlField(index = true) val javaSqlDate: java.sql.Date
+
+) extends Serializable {
+ def this(
+ i: Int
+ ) {
+ this(
+ i % 2 == 0, // Boolean
+ i.toByte, // Byte
+ i.toShort, // Short
+ i, // Int
+ i.toLong, // Long
+ i, // Float
+ i, // Double
+ "name" + i, // String
+ new Date(i),
+ new Timestamp(i),
+ Array(i.toByte, i.toByte),
+ new java.math.BigDecimal(i.toString),
+ new java.sql.Date(i))
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteCatalogSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteCatalogSpec.scala
new file mode 100644
index 0000000..373d26e
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteCatalogSpec.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import java.lang.{Long => JLong}
+import org.apache.ignite.IgniteException
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.internal.util.IgniteUtils.{gridClassLoader, resolveIgnitePath}
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, EMPLOYEE_CACHE_NAME, TEST_CONFIG_FILE, enclose}
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.apache.spark.sql.types.{LongType, StringType}
+import org.junit.Assert.assertEquals
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ * Tests to check Spark Catalog implementation.
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteCatalogSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ describe("Ignite Catalog Implementation") {
+ it("Should observe all available SQL tables") {
+ val tables = igniteSession.catalog.listTables.collect()
+
+ tables.length should equal(3)
+
+ tables.map(_.name).sorted should equal(Array("CITY", "EMPLOYEE", "PERSON"))
+ }
+
+ it("Should use the database context when providing tables") {
+ igniteSession.catalog.setCurrentDatabase("employeeSchema")
+
+ val employeeSchemaTables = igniteSession.catalog.listTables().collect()
+
+ employeeSchemaTables.map(_.name).sorted should equal(Array("EMPLOYEE"))
+
+ igniteSession.catalog.setCurrentDatabase("PUBLIC")
+
+ val publicSchemaTables = igniteSession.catalog.listTables().collect()
+
+ publicSchemaTables.map(_.name).sorted should equal(Array("CITY", "PERSON"))
+ }
+
+ it("Should provide table names given the PUBLIC schema") {
+ val tables = igniteSession.catalog.listTables("PUBLIC").collect()
+
+ tables.map(_.name).sorted should equal(Array("CITY", "PERSON"))
+ }
+
+ it("Should provide table names given a custom schema") {
+ val tables = igniteSession.catalog.listTables("employeeSchema").collect()
+
+ tables.map(_.name).sorted should equal(Array("EMPLOYEE"))
+ }
+
+ it("Should provide correct schema for SQL table") {
+ val columns = igniteSession.catalog.listColumns("city").collect()
+
+ columns.length should equal (2)
+
+ columns.map(c ⇒ (c.name, c.dataType, c.nullable)).sorted should equal (
+ Array(
+ ("ID", LongType.catalogString, false),
+ ("NAME", StringType.catalogString, true)))
+ }
+
+ it("Should provide the list of all schemas") {
+ val schemas = igniteSession.catalog.listDatabases().collect()
+
+ schemas.map(_.name).sorted should equal(Array("cache3", "employeeschema", "public"))
+ }
+
+ it("Should provide ability to query SQL table without explicit registration") {
+ val res = igniteSession.sql("SELECT id, name FROM city").rdd
+
+ res.count should equal(4)
+
+ val cities = res.collect.sortBy(_.getAs[JLong]("id"))
+
+ cities.map(c ⇒ (c.getAs[JLong]("id"), c.getAs[String]("name"))) should equal (
+ Array(
+ (1, "Forest Hill"),
+ (2, "Denver"),
+ (3, "St. Petersburg"),
+ (4, "St. Petersburg")
+ )
+ )
+ }
+
+ it("Should provide ability to query SQL table configured throw java annotations without explicit registration") {
+ val res = igniteSession.sql("SELECT id, name, salary FROM employee").rdd
+
+ res.count should equal(3)
+
+ val employees = res.collect.sortBy(_.getAs[JLong]("id"))
+
+ employees.map(c ⇒ (c.getAs[JLong]("id"), c.getAs[String]("name"), c.getAs[Float]("salary"))) should equal (
+ Array(
+ (1, "John Connor", 0f),
+ (2, "Sarah Connor", 10000f),
+ (3, "Arnold Schwarzenegger", 1000f)
+ )
+ )
+ }
+
+ it("Should provide newly created tables in tables list") {
+ val cache = client.cache(DEFAULT_CACHE)
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE new_table(id LONG PRIMARY KEY, name VARCHAR) WITH \"template=replicated\"")).getAll
+
+ val tables = igniteSession.catalog.listTables.collect()
+
+ tables.find(_.name == "NEW_TABLE").map(_.name) should equal (Some("NEW_TABLE"))
+
+ val columns = igniteSession.catalog.listColumns("NEW_TABLE").collect()
+
+ columns.map(c ⇒ (c.name, c.dataType, c.nullable)).sorted should equal (
+ Array(
+ ("ID", LongType.catalogString, false),
+ ("NAME", StringType.catalogString, true)))
+ }
+
+ it("Should allow register tables based on other datasources") {
+ val citiesDataFrame = igniteSession.read.json(
+ gridClassLoader().getResource("cities.json").getFile)
+
+ citiesDataFrame.createOrReplaceTempView("JSON_CITIES")
+
+ val res = igniteSession.sql("SELECT id, name FROM json_cities").rdd
+
+ res.count should equal(3)
+
+ val cities = res.collect
+
+ cities.map(c ⇒ (c.getAs[JLong]("id"), c.getAs[String]("name"))) should equal (
+ Array(
+ (1, "Forest Hill"),
+ (2, "Denver"),
+ (3, "St. Petersburg")
+ )
+ )
+ }
+
+ it("Should allow schema specification in the table name for public schema") {
+ val res = igniteSession.sql("SELECT id, name FROM public.city").rdd
+
+ res.count should equal(4)
+ }
+
+ it("Should allow schema specification in the table name for non-public schema") {
+ val res = igniteSession.sql("SELECT id, name, salary FROM cache3.employee").rdd
+
+ res.count should equal(3)
+ }
+
+ // TODO: should be fixed in IGNITE-12246
+ ignore("Should allow Spark SQL to create a table") {
+ igniteSession.sql(
+ "CREATE TABLE NEW_SPARK_TABLE(id LONG, name STRING) USING JSON OPTIONS ('primaryKeyFields' = 'id')")
+
+ val tables = igniteSession.catalog.listTables.collect()
+
+ tables.find(_.name == "NEW_SPARK_TABLE").map(_.name) should equal (Some("NEW_SPARK_TABLE"))
+ }
+
+ // TODO: should be fixed in IGNITE-12246
+ ignore("Should disallow creation of tables in non-PUBLIC schemas") {
+ val ex = intercept[IgniteException] {
+ igniteSession.sql(
+ "CREATE TABLE cache3.NEW_SPARK_TABLE(id LONG, name STRING) " +
+ "USING JSON OPTIONS ('primaryKeyFields' = 'id')")
+ }
+
+ assertEquals(ex.getMessage, "Can only create new tables in PUBLIC schema, not cache3")
+ }
+ }
+
+ before {
+ igniteSession.catalog.setCurrentDatabase(SessionCatalog.DEFAULT_DATABASE)
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createPersonTable(client, DEFAULT_CACHE)
+
+ createCityTable(client, DEFAULT_CACHE)
+
+ createEmployeeCache(client, EMPLOYEE_CACHE_NAME)
+
+ createEmployeeCache(client, "myEmployeeCache", Some("employeeSchema"))
+
+ val configProvider = enclose(null) (_ ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameSchemaSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameSchemaSpec.scala
new file mode 100644
index 0000000..78a78a8
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameSchemaSpec.scala
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.Ignite
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.cache.query.annotations.QuerySqlField
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.spark.AbstractDataFrameSpec._
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.types._
+import org.junit.runner.RunWith
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.scalatestplus.junit.JUnitRunner
+
+import scala.annotation.meta.field
+
+/**
+ * Tests to check loading schema for Ignite data sources.
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteDataFrameSchemaSpec extends AbstractDataFrameSpec {
+ var personDataFrame: DataFrame = _
+
+ var employeeDataFrame: DataFrame = _
+
+ var personWithAliasesDataFrame: DataFrame = _
+
+ var columnMetaDataFrame: DataFrame = _
+
+ var addedColumnDataFrame: DataFrame = _
+
+ var droppedColumnDataFrame: DataFrame = _
+
+ describe("Loading DataFrame schema for Ignite tables") {
+ it("should successfully load DataFrame schema for a Ignite SQL Table") {
+ personDataFrame.schema.fields.map(f ⇒ (f.name, f.dataType, f.nullable)) should equal (
+ Array(
+ ("NAME", StringType, true),
+ ("BIRTH_DATE", DateType, true),
+ ("IS_RESIDENT", BooleanType, true),
+ ("SALARY", DoubleType, true),
+ ("PENSION", DoubleType, true),
+ ("ACCOUNT", IgniteRDD.DECIMAL, true),
+ ("AGE", IntegerType, true),
+ ("ID", LongType, false),
+ ("CITY_ID", LongType, false))
+ )
+ }
+
+ it("should show correct schema for a Ignite SQL Table with modified column") {
+ columnMetaDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "test")
+ .load()
+
+ columnMetaDataFrame.schema.fields.map(f ⇒ (f.name, f.dataType, f.nullable)) should equal (
+ Array(
+ ("A", IntegerType, true),
+ ("B", StringType, true),
+ ("ID", IntegerType, false))
+ )
+
+ addColumnForTable(client, DEFAULT_CACHE)
+
+ addedColumnDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "test")
+ .load()
+
+ addedColumnDataFrame.schema.fields.map(f ⇒ (f.name, f.dataType, f.nullable)) should equal (
+ Array(
+ ("A", IntegerType, true),
+ ("B", StringType, true),
+ ("C", IntegerType, true),
+ ("ID", IntegerType, false))
+ )
+
+ dropColumnFromTable(client, DEFAULT_CACHE)
+
+ droppedColumnDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "test")
+ .load()
+
+ droppedColumnDataFrame.schema.fields.map(f ⇒ (f.name, f.dataType, f.nullable)) should equal (
+ Array(
+ ("B", StringType, true),
+ ("C", IntegerType, true),
+ ("ID", IntegerType, false))
+ )
+ }
+
+ it("should successfully load DataFrame data for a Ignite table configured throw java annotation") {
+ employeeDataFrame.schema.fields.map(f ⇒ (f.name, f.dataType, f.nullable)) should equal (
+ Array(
+ ("ID", LongType, true),
+ ("NAME", StringType, true),
+ ("SALARY", FloatType, true))
+ )
+ }
+
+ it("should use GridQueryTypeDescriptor column aliases") {
+ personWithAliasesDataFrame.schema.fields.map(f ⇒ (f.name, f.dataType, f.nullable)) should equal (
+ Array(
+ ("ID", LongType, true),
+ ("PERSON_NAME", StringType, true))
+ )
+ }
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ client.getOrCreateCache(new CacheConfiguration[Long, JPersonWithAlias]()
+ .setName("P3")
+ .setIndexedTypes(classOf[Long], classOf[JPersonWithAlias]))
+
+ personWithAliasesDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, classOf[JPersonWithAlias].getSimpleName)
+ .load()
+
+ createPersonTable(client, DEFAULT_CACHE)
+
+ createMetaTestTable(client, DEFAULT_CACHE)
+
+ createEmployeeCache(client, EMPLOYEE_CACHE_NAME)
+
+ personDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "person")
+ .load()
+
+ personDataFrame.createOrReplaceTempView("person")
+
+ employeeDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "employee")
+ .load()
+
+ employeeDataFrame.createOrReplaceTempView("employee")
+ }
+
+ def createMetaTestTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ "CREATE TABLE test (id INT PRIMARY KEY, a INT, b CHAR)")).getAll
+ }
+
+ def addColumnForTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ "ALTER TABLE test ADD COLUMN c int")).getAll
+ }
+
+ def dropColumnFromTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ "ALTER TABLE test DROP COLUMN a")).getAll
+ }
+
+ case class JPersonWithAlias(
+ @(QuerySqlField @field) id: Long,
+ @(QuerySqlField @field)(name = "person_name", index = true) name: String)
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameSuite.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameSuite.scala
new file mode 100644
index 0000000..903acb4
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.spark.sql.ignite.IgniteSparkSessionSpec
+import org.junit.runner.RunWith
+import org.scalatest.Suites
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ * Test suite for Spark DataFrame API implementation.
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteDataFrameSuite extends Suites (
+ new IgniteDataFrameSchemaSpec,
+ new IgniteSQLDataFrameSpec,
+ new IgniteSQLDataFrameWriteSpec,
+ new IgniteSQLDataFrameIgniteSessionWriteSpec,
+ new IgniteDataFrameWrongConfigSpec,
+ new IgniteCatalogSpec,
+ new IgniteOptimizationSpec,
+ new IgniteOptimizationStringFuncSpec,
+ new IgniteOptimizationMathFuncSpec,
+ new IgniteOptimizationAggregationFuncSpec,
+ new IgniteOptimizationSystemFuncSpec,
+ new IgniteOptimizationJoinSpec,
+ new IgniteOptimizationDateFuncSpec,
+ new IgniteOptimizationDisableEnableSpec,
+ new IgniteSparkSessionSpec,
+ new IgniteRDDSpec
+)
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameWrongConfigSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameWrongConfigSpec.scala
new file mode 100644
index 0000000..559fa4c
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteDataFrameWrongConfigSpec.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.spark.AbstractDataFrameSpec.TEST_CONFIG_FILE
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.ignite.IgniteException
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ * Negative tests to check errors in case of wrong configuration.
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteDataFrameWrongConfigSpec extends AbstractDataFrameSpec {
+ describe("DataFrame negative cases") {
+ it("Should throw exception when try load unknown table") {
+ intercept[IgniteException] {
+ spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "unknown_table")
+ .load()
+ }
+ }
+
+ it("Should throw exception when no cache and no table") {
+ intercept[IgniteException] {
+ spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .load()
+ }
+ }
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationAggregationFuncSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationAggregationFuncSpec.scala
new file mode 100644
index 0000000..97f2e80
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationAggregationFuncSpec.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import java.lang.{Double => JDouble, Long => JLong}
+import org.apache.ignite.Ignite
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, TEST_CONFIG_FILE, checkOptimizationResult, enclose}
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteOptimizationAggregationFuncSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ describe("Supported optimized aggregation functions") {
+ it("COUNT") {
+ val df = igniteSession.sql("SELECT count(*) FROM numbers")
+
+ checkOptimizationResult(df, "SELECT count(1) FROM numbers")
+
+ val data = Tuple1(21)
+
+ checkQueryData(df, data)
+ }
+
+ it("AVG - DECIMAL") {
+ //TODO: add test for ticket IGNITE-12432
+ }
+
+ it("AVG - DOUBLE") {
+ val df = igniteSession.sql("SELECT AVG(val) FROM numbers WHERE id <= 3")
+
+ checkOptimizationResult(df, "SELECT AVG(val) FROM numbers WHERE id <= 3")
+
+ val data = Tuple1(.5)
+
+ checkQueryData(df, data)
+ }
+
+ it("MIN - DOUBLE") {
+ val df = igniteSession.sql("SELECT MIN(val) FROM numbers")
+
+ checkOptimizationResult(df, "SELECT MIN(val) FROM numbers")
+
+ val data = Tuple1(-1.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("MAX - DOUBLE") {
+ val df = igniteSession.sql("SELECT MAX(val) FROM numbers")
+
+ checkOptimizationResult(df, "SELECT MAX(val) FROM numbers")
+
+ val data = Tuple1(180.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("SUM - DOUBLE") {
+ val df = igniteSession.sql("SELECT SUM(val) FROM numbers WHERE id <= 3")
+
+ checkOptimizationResult(df, "SELECT SUM(val) FROM numbers WHERE id <= 3")
+
+ val data = Tuple1(1.5)
+
+ checkQueryData(df, data)
+ }
+
+ it("SUM - DECIMAL - 1") {
+ val df = igniteSession.sql("SELECT SUM(decimal_val) FROM numbers WHERE id IN (18, 19, 20)")
+
+ checkOptimizationResult(df, "SELECT SUM(decimal_val) FROM numbers WHERE id IN (18, 19, 20)")
+
+ df.printSchema()
+
+ val data = Tuple1(new java.math.BigDecimal(10.5).setScale(3))
+
+ checkQueryData(df, data)
+ }
+
+ it("SUM - DECIMAL - 2") {
+ val df = igniteSession.sql("SELECT SUM(decimal_val) FROM numbers WHERE id IN (18, 19, 20, 21)")
+
+ checkOptimizationResult(df, "SELECT SUM(decimal_val) FROM numbers WHERE id IN (18, 19, 20, 21)")
+
+ val data = Tuple1(new java.math.BigDecimal(15).setScale(3))
+
+ checkQueryData(df, data)
+ }
+
+ it("SUM - LONG") {
+ val df = igniteSession.sql("SELECT SUM(int_val) FROM numbers WHERE id in (15, 16, 17)")
+
+ checkOptimizationResult(df, "SELECT CAST(SUM(int_val) AS BIGINT) as \"SUM(int_val)\" " +
+ "FROM numbers WHERE id in (15, 16, 17)")
+
+ val data = Tuple1(6L)
+
+ checkQueryData(df, data)
+ }
+ }
+
+ def createNumberTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ """
+ | CREATE TABLE numbers (
+ | id LONG,
+ | val DOUBLE,
+ | int_val LONG,
+ | decimal_val DECIMAL(38, 3),
+ | PRIMARY KEY (id)) WITH "backups=1"
+ """.stripMargin)).getAll
+
+ var qry = new SqlFieldsQuery("INSERT INTO numbers (id, val) values (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], .0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], .5.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], 1.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], 2.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(5L.asInstanceOf[JLong], 4.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(6L.asInstanceOf[JLong], -0.5.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(7L.asInstanceOf[JLong], -1.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(8L.asInstanceOf[JLong], 42.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(9L.asInstanceOf[JLong], .51.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(10L.asInstanceOf[JLong], .49.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(11L.asInstanceOf[JLong], 100.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(12L.asInstanceOf[JLong], (Math.E*Math.E).asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(13L.asInstanceOf[JLong], Math.PI.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(14L.asInstanceOf[JLong], 180.0.asInstanceOf[JDouble])).getAll
+
+ qry = new SqlFieldsQuery("INSERT INTO numbers (id, int_val) values (?, ?)")
+
+ cache.query(qry.setArgs(15L.asInstanceOf[JLong], 1L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(16L.asInstanceOf[JLong], 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(17L.asInstanceOf[JLong], 3L.asInstanceOf[JLong])).getAll
+
+ qry = new SqlFieldsQuery("INSERT INTO numbers (id, decimal_val) values (?, ?)")
+
+ cache.query(qry.setArgs(18L.asInstanceOf[JLong], new java.math.BigDecimal(2.5))).getAll
+ cache.query(qry.setArgs(19L.asInstanceOf[JLong], new java.math.BigDecimal(3.5))).getAll
+ cache.query(qry.setArgs(20L.asInstanceOf[JLong], new java.math.BigDecimal(4.5))).getAll
+ cache.query(qry.setArgs(21L.asInstanceOf[JLong], new java.math.BigDecimal(4.5))).getAll
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createNumberTable(client, DEFAULT_CACHE)
+
+ val configProvider = enclose(null) (x ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationDateFuncSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationDateFuncSpec.scala
new file mode 100644
index 0000000..8352bdf
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationDateFuncSpec.scala
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.Ignite
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, TEST_CONFIG_FILE, checkOptimizationResult, enclose}
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+import java.lang.{Long => JLong}
+import java.util.{Date => JDate}
+import java.text.SimpleDateFormat
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.TimeUnit.DAYS
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteOptimizationDateFuncSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ val format = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss")
+
+ describe("Supported optimized date functions") {
+ it(" - CURRENT_TIMESTAMP") {
+ val df = igniteSession.sql("SELECT id, CURRENT_TIMESTAMP() FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = df.rdd.collect
+
+ assert(data(0).getAs[JLong]("id") == 1L)
+
+ val date: JDate = data(0).getAs[JDate]("current_timestamp()")
+ val millisDiff = new JDate().getTime - date.getTime
+
+ assert(millisDiff <= 30000)
+ }
+
+ it(" - CURRENT_DATE") {
+ val df = igniteSession.sql("SELECT id, CURRENT_DATE() FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = df.rdd.collect
+
+ assert(data(0).getAs[JLong]("id") == 1L)
+
+ val date: JDate = data(0).getAs[JDate]("current_date()")
+ val dayDiff = DAYS.convert(new JDate().getTime - date.getTime, TimeUnit.MILLISECONDS)
+
+ assert(dayDiff <= 1)
+ }
+
+ it(" - HOUR") {
+ val df = igniteSession.sql("SELECT HOUR(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(0)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - MINUTE") {
+ val df = igniteSession.sql("SELECT MINUTE(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(0)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - SECOND") {
+ val df = igniteSession.sql("SELECT SECOND(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(0)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - MONTH") {
+ val df = igniteSession.sql("SELECT MONTH(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(0)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - YEAR") {
+ val df = igniteSession.sql("SELECT YEAR(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(2017)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - QUARTER") {
+ val df = igniteSession.sql("SELECT QUARTER(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - WEEK") {
+ val df = igniteSession.sql("SELECT WEEKOFYEAR(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - DAY_OF_MONTH") {
+ val df = igniteSession.sql("SELECT DAYOFMONTH(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - DAY_OF_YEAR") {
+ val df = igniteSession.sql("SELECT DAYOFYEAR(val) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - DATE_ADD") {
+ val df = igniteSession.sql("SELECT DATE_ADD(val, 2) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(format.parse("03.01.2017 00:00:00"))
+
+ checkQueryData(df, data)
+ }
+
+ it(" - DATEDIFF") {
+ val df = igniteSession.sql("SELECT " +
+ "DATEDIFF(val, TO_DATE('2017-01-02 00:00:00.000', 'yyyy-MM-dd HH:mm:ss.SSS')) FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it(" - FORMATDATETIME") {
+ val df = igniteSession.sql("SELECT DATE_FORMAT(val, 'yyyy-MM-dd HH:mm:ss.SSS') FROM dates WHERE id = 1")
+
+ checkOptimizationResult(df)
+
+ val data = Tuple1("2017-01-01 00:00:00.000")
+
+ checkQueryData(df, data)
+ }
+ }
+
+ def createDateTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ """
+ | CREATE TABLE dates (
+ | id LONG,
+ | val DATE,
+ | PRIMARY KEY (id)) WITH "backups=1"
+ """.stripMargin)).getAll
+
+ val qry = new SqlFieldsQuery("INSERT INTO dates(id, val) values (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], format.parse("01.01.2017 00:00:00"))).getAll
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createDateTable(client, DEFAULT_CACHE)
+
+ val configProvider = enclose(null) (x ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationDisableEnableSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationDisableEnableSpec.scala
new file mode 100644
index 0000000..708d3cc
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationDisableEnableSpec.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.spark.AbstractDataFrameSpec.TEST_CONFIG_FILE
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.ignite.IgniteOptimization
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteOptimizationDisableEnableSpec extends AbstractDataFrameSpec {
+ var personDataFrame: DataFrame = _
+
+ describe("Ignite Optimization Disabling/Enabling") {
+ it("should add Ignite Optimization to a session on a first query") {
+ if (spark.sparkContext.isStopped)
+ createSparkSession()
+
+ assert(!igniteOptimizationExists(spark), "Session shouldn't contains IgniteOptimization")
+
+ personDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "person")
+ .load()
+
+ assert(igniteOptimizationExists(spark),
+ "Session should contains IgniteOptimization after executing query over Ignite Data Frame")
+
+ spark.stop()
+ }
+
+ it("should remove Ignite Optimization if it disabled at runtime") {
+ if (!spark.sparkContext.isStopped)
+ spark.stop()
+
+ val newSession = SparkSession.builder()
+ .appName("Ignite Optimization check")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .getOrCreate()
+
+ assert(!igniteOptimizationExists(newSession), "Session shouldn't contains IgniteOptimization")
+
+ var newPersonDataFrame = newSession.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "person")
+ .load()
+
+ assert(igniteOptimizationExists(newSession),
+ "Session should contains IgniteOptimization after executing query over Ignite Data Frame")
+
+
+ newSession.conf.set(OPTION_DISABLE_SPARK_SQL_OPTIMIZATION, "true")
+
+ newPersonDataFrame = newSession.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "person")
+ .load()
+
+ assert(!igniteOptimizationExists(newSession),
+ "Session shouldn't contains IgniteOptimization")
+
+ newSession.close()
+ }
+
+ it("shouldn't add Ignite Optimization to a session when it's disabled") {
+ if (!spark.sparkContext.isStopped)
+ spark.stop()
+
+ val newSession = SparkSession.builder()
+ .appName("Ignite Optimization check")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .config(OPTION_DISABLE_SPARK_SQL_OPTIMIZATION, "true")
+ .getOrCreate()
+
+ assert(!igniteOptimizationExists(newSession), "Session shouldn't contains IgniteOptimization")
+
+ val newPersonDataFrame = newSession.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "person")
+ .load()
+
+ newPersonDataFrame.createOrReplaceTempView("person")
+
+ val res = newSession.sqlContext.sql("SELECT name FROM person WHERE id = 2").rdd
+
+ res.count should equal(1)
+
+ assert(!igniteOptimizationExists(newSession), "Session shouldn't contains IgniteOptimization")
+
+ newSession.close()
+ }
+ }
+
+ def igniteOptimizationExists(session: SparkSession): Boolean =
+ session.sessionState.experimentalMethods.extraOptimizations.contains(IgniteOptimization)
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createPersonTable(client, "cache1")
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationJoinSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationJoinSpec.scala
new file mode 100644
index 0000000..da22cb1
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationJoinSpec.scala
@@ -0,0 +1,538 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import java.lang.{Long => JLong}
+import org.apache.ignite.Ignite
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, TEST_CONFIG_FILE, checkOptimizationResult, enclose}
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteOptimizationJoinSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ describe("Optimized join queries") {
+ it("UNION") {
+ val qry =
+ """
+ | SELECT id, val1 as val FROM jt1 UNION
+ | SELECT id, val2 as val FROM jt2 UNION
+ | SELECT id, val3 as val FROM jt3
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT id, val FROM (SELECT id, val1 as val FROM jt1 UNION " +
+ "SELECT id, val2 as val FROM jt2 UNION " +
+ "SELECT id, val3 as val FROM jt3) table1")
+
+ val data = (
+ (1L, "A"),
+ (1L, "B"),
+ (2L, "B"),
+ (2L, "C"),
+ (2L, "D"),
+ (3L, "C"),
+ (3L, "D"),
+ (3L, "E"))
+
+ checkQueryData(df, data, row ⇒ (row.getAs[JLong](0), row.getAs[String](1)))
+ }
+
+ it("UNION ALL") {
+ val qry =
+ """
+ | SELECT id, val1 as val FROM jt1 UNION ALL
+ | SELECT id, val2 as val FROM jt2 UNION ALL
+ | SELECT id, val3 as val FROM jt3
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT id, val1 as val FROM jt1 UNION " +
+ "SELECT id, val2 as val FROM jt2 UNION " +
+ "SELECT id, val3 as val FROM jt3")
+
+ val data = (
+ (1L, "A"),
+ (1L, "B"),
+ (2L, "B"),
+ (2L, "C"),
+ (2L, "D"),
+ (3L, "C"),
+ (3L, "D"),
+ (3L, "E"))
+
+ checkQueryData(df, data, row ⇒ (row.getAs[JLong](0), row.getAs[String](1)))
+ }
+
+ it("UNION ALL ORDER") {
+ val qry =
+ """
+ | SELECT id, val1 as val FROM jt1 UNION ALL
+ | SELECT id, val2 as val FROM jt2 UNION ALL
+ | SELECT id, val3 as val FROM jt3
+ | ORDER BY id DESC, val
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT id, val1 as val FROM jt1 UNION " +
+ "SELECT id, val2 as val FROM jt2 UNION " +
+ "SELECT id, val3 as val FROM jt3 " +
+ "ORDER BY id DESC, val")
+
+ val data = (
+ (3L, "C"),
+ (3L, "D"),
+ (3L, "E"),
+ (2L, "B"),
+ (2L, "C"),
+ (2L, "D"),
+ (1L, "A"),
+ (1L, "B")
+ )
+
+ checkQueryData(df, data, _ ⇒ 0)
+ }
+
+ it("UNION WITH AGGREGATE") {
+ val qry =
+ """
+ | SELECT VAL, COUNT(*) FROM (
+ | SELECT id, val1 as val FROM jt1 UNION
+ | SELECT id, val2 as val FROM jt2 UNION
+ | SELECT id, val3 as val FROM jt3 ) t1
+ | GROUP BY val HAVING COUNT(*) > 1
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT VAL, count(1) FROM (" +
+ "SELECT id, val1 AS val FROM JT1 UNION " +
+ "SELECT id, val2 AS val FROM JT2 UNION " +
+ "SELECT id, val3 AS val FROM JT3" +
+ ") table1 GROUP BY val HAVING count(1) > 1")
+
+ val data = (
+ ("B", 2L),
+ ("C", 2L),
+ ("D", 2L)
+ )
+
+ checkQueryData(df, data)
+ }
+
+ it("AGGREGATE ON AGGREGATE RESULT") {
+ val qry =
+ """
+ | SELECT SUM(cnt) FROM (
+ | SELECT VAL, COUNT(*) as CNT FROM (
+ | SELECT id, val1 as val FROM jt1 UNION
+ | SELECT id, val2 as val FROM jt2 UNION
+ | SELECT id, val3 as val FROM jt3 ) t1
+ | GROUP BY val HAVING COUNT(*) > 1
+ | ) t1
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT CAST(SUM(CNT) AS BIGINT) AS \"SUM(CNT)\" FROM (" +
+ "SELECT COUNT(1) AS CNT FROM (" +
+ "SELECT ID, VAL1 AS VAL FROM JT1 UNION " +
+ "SELECT ID, VAL2 AS VAL FROM JT2 UNION " +
+ "SELECT ID, VAL3 AS VAL FROM JT3" +
+ ") TABLE1 GROUP BY VAL HAVING CNT > 1) TABLE2")
+
+ val data = Tuple1(6.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("SELF INNER JOIN") {
+ val qry =
+ """
+ |SELECT
+ | jt1.id,
+ | jt1.val1,
+ | jt2.id,
+ | jt2.val1
+ |FROM
+ | jt1 JOIN
+ | jt1 as jt2 ON jt1.val1 = jt2.val1
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df, "SELECT JT1.ID, JT1.VAL1, table1.ID, table1.VAL1 " +
+ "FROM JT1 JOIN JT1 AS table1 ON jt1.val1 = table1.val1 " +
+ "WHERE jt1.val1 IS NOT NULL AND table1.val1 IS NOT NULL")
+
+ val data = (
+ (1, "A", 1, "A"),
+ (2, "B", 2, "B"),
+ (3, "C", 3, "C")
+ )
+
+ checkQueryData(df, data)
+ }
+
+
+ it("SELF INNER JOIN WITH WHERE") {
+ val qry =
+ """
+ |SELECT
+ | jt1.id,
+ | jt1.val1,
+ | jt2.id,
+ | jt2.val1
+ |FROM
+ | jt1 JOIN
+ | jt1 as jt2 ON jt1.val1 = jt2.val1
+ |WHERE jt2.val1 = 'A'
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df, "SELECT JT1.ID, JT1.VAL1, table1.ID, table1.VAL1 " +
+ "FROM JT1 JOIN JT1 as table1 ON JT1.val1 = table1.val1 " +
+ "WHERE JT1.val1 = 'A' AND JT1.val1 IS NOT NULL AND table1.val1 IS NOT NULL AND table1.val1 = 'A'")
+
+ val data = Tuple1(
+ (1, "A", 1, "A")
+ )
+
+ checkQueryData(df, data)
+ }
+
+
+ it("INNER JOIN") {
+ val qry =
+ """
+ |SELECT
+ | jt1.id as id1,
+ | jt1.val1,
+ | jt2.id as id2,
+ | jt2.val2
+ |FROM
+ | jt1 JOIN
+ | jt2 ON jt1.val1 = jt2.val2
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df, "SELECT JT1.ID AS id1, JT1.VAL1, JT2.ID AS id2, JT2.VAL2 " +
+ "FROM JT1 JOIN JT2 ON jt1.val1 = jt2.val2 " +
+ "WHERE jt1.val1 IS NOT NULL AND jt2.val2 IS NOT NULL")
+
+ val data = (
+ (2, "B", 1, "B"),
+ (3, "C", 2, "C")
+ )
+
+ checkQueryData(df, data)
+ }
+
+ it("INNER JOIN WITH WHERE") {
+ val qry =
+ """
+ |SELECT
+ | jt1.id as id1,
+ | jt1.val1,
+ | jt2.id as id2,
+ | jt2.val2
+ |FROM
+ | jt1 JOIN
+ | jt2 ON jt1.val1 = jt2.val2
+ |WHERE
+ | jt1.id < 10
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df, "SELECT jt1.id as id1, jt1.val1, jt2.id as id2, jt2.val2 " +
+ "FROM jt1 JOIN jt2 ON jt1.val1 = jt2.val2 " +
+ "WHERE jt1.id < 10 AND jt1.val1 IS NOT NULL and jt2.val2 IS NOT NULL")
+
+ val data = (
+ (2, "B", 1, "B"),
+ (3, "C", 2, "C")
+ )
+
+ checkQueryData(df, data)
+ }
+
+ it("LEFT JOIN") {
+ val qry =
+ """
+ |SELECT
+ | jt1.id as id1,
+ | jt1.val1,
+ | jt2.id as id2,
+ | jt2.val2
+ |FROM
+ | jt1 LEFT JOIN
+ | jt2 ON jt1.val1 = jt2.val2
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+ df.show();
+ checkOptimizationResult(df, "SELECT jt1.id as id1, jt1.val1, jt2.id as id2, jt2.val2 " +
+ "FROM jt1 LEFT JOIN jt2 ON jt1.val1 = jt2.val2 WHERE jt2.val2 is not null")
+
+ val data = (
+ (2, "B", 1, "B"),
+ (3, "C", 2, "C")
+ )
+
+ checkQueryData(df, data)
+ }
+
+ it("RIGHT JOIN") {
+ val qry =
+ """
+ |SELECT
+ | jt1.id as id1,
+ | jt1.val1,
+ | jt2.id as id2,
+ | jt2.val2
+ |FROM
+ | jt1 RIGHT JOIN
+ | jt2 ON jt1.val1 = jt2.val2
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df, "SELECT jt1.id as id1, jt1.val1, jt2.id as id2, jt2.val2 " +
+ "FROM jt1 RIGHT JOIN jt2 ON jt1.val1 = jt2.val2 WHERE jt1.val1 is not null")
+
+ val data = (
+ (2, "B", 1, "B"),
+ (3, "C", 2, "C")
+ )
+
+ checkQueryData(df, data, r ⇒ if (r.get(0) == null) 100L else r.getAs[Long](0))
+ }
+
+ // TODO: Fix multiple joins in IGNITE-12244
+ ignore("JOIN 3 TABLE") {
+ val qry =
+ """
+ |SELECT
+ | jt1.id as id1,
+ | jt1.val1 as val1,
+ | jt2.id as id2,
+ | jt2.val2 as val2,
+ | jt3.id as id3,
+ | jt3.val3 as val3
+ |FROM
+ | jt1 LEFT JOIN
+ | jt2 ON jt1.val1 = jt2.val2 LEFT JOIN
+ | jt3 ON jt1.val1 = jt3.val3
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT table1.id as id1, table1.val1, table1.id_2 as id2, table1.val2, jt3.id as id3, jt3.val3 " +
+ "FROM (" +
+ "SELECT jt1.val1, jt1.id, jt2.val2, jt2.id as id_2 " +
+ "FROM JT1 LEFT JOIN jt2 ON jt1.val1 = jt2.val2 WHERE jt2.val2 is not null) table1 LEFT JOIN " +
+ "jt3 ON table1.val1 = jt3.val3 WHERE jt3.val3 is not null")
+
+ val data = (
+ (2, "B", 1, "B", null, null),
+ (3, "C", 2, "C", null, null))
+
+ checkQueryData(df, data)
+ }
+
+ it("JOIN 3 TABLE AND AGGREGATE") {
+ val qry =
+ """
+ |SELECT SUM(id1) FROM (
+ | SELECT
+ | jt1.id as id1,
+ | jt1.val1 as val1,
+ | jt2.id as id2,
+ | jt2.val2 as val2,
+ | jt3.id as id3,
+ | jt3.val3 as val3
+ |FROM
+ | jt1 LEFT JOIN
+ | jt2 ON jt1.val1 = jt2.val2 LEFT JOIN
+ | jt3 ON jt1.val1 = jt3.val3
+ |) WHERE CONCAT(val1, val2) = 'BB' OR CONCAT(val1, val3) = 'AA'
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT CAST(SUM(table1.ID) AS BIGINT) AS \"sum(id1)\" FROM " +
+ "(SELECT JT1.VAL1, JT1.ID, JT2.VAL2 FROM JT1 LEFT JOIN JT2 ON JT1.val1 = JT2.val2 WHERE JT2.val2 is not null) table1 LEFT JOIN " +
+ "JT3 ON table1.val1 = JT3.val3 " +
+ "WHERE CONCAT(table1.val1, table1.val2) = 'BB' OR CONCAT(table1.val1, JT3.val3) = 'AA' AND JT3.val3 is not null")
+
+ val data = Tuple1(2)
+
+ checkQueryData(df, data, _ ⇒ 0)
+ }
+
+ it("INNER JOIN SUBQUERY") {
+ val qry =
+ """
+ |SELECT sum_id, val1, val2 FROM (
+ | SELECT
+ | jt1.id + jt2.id as sum_id,
+ | jt1.val1 as val1,
+ | jt2.val2 as val2
+ | FROM
+ | jt1 JOIN
+ | jt2 ON jt1.val1 = jt2.val2
+ |) t1 WHERE sum_id != 15
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT jt1.id + jt2.id as sum_id, jt1.val1, jt2.val2 FROM " +
+ "jt1 JOIN jt2 ON NOT jt1.id + jt2.id = 15 AND jt1.val1 = jt2.val2 " +
+ "WHERE " +
+ "jt1.val1 IS NOT NULL AND " +
+ "jt2.val2 IS NOT NULL"
+ )
+
+ val data = (
+ (3, "B", "B"),
+ (5, "C", "C")
+ )
+
+ checkQueryData(df, data)
+ }
+
+ it("INNER JOIN SUBQUERY - 2") {
+ val qry =
+ """
+ |SELECT SUM(sum_id) FROM (
+ | SELECT
+ | jt1.id + jt2.id as sum_id
+ | FROM
+ | jt1 JOIN
+ | jt2 ON jt1.val1 = jt2.val2
+ |) t1 WHERE sum_id != 15
+ |""".stripMargin
+
+ val df = igniteSession.sql(qry)
+
+ checkOptimizationResult(df,
+ "SELECT CAST(SUM(JT1.ID + JT2.ID) AS BIGINT) AS \"sum(sum_id)\" " +
+ "FROM JT1 JOIN JT2 ON NOT JT1.id + JT2.id = 15 AND JT1.val1 = JT2.val2 " +
+ "WHERE JT1.val1 IS NOT NULL AND JT2.val2 IS NOT NULL")
+
+ val data = Tuple1(8)
+
+ checkQueryData(df, data)
+ }
+ }
+
+ def createJoinedTables(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ """
+ | CREATE TABLE jt1 (
+ | id LONG,
+ | val1 VARCHAR,
+ | PRIMARY KEY (id)) WITH "backups=1"
+ """.stripMargin)).getAll
+
+ cache.query(new SqlFieldsQuery(
+ """
+ | CREATE TABLE jt2 (
+ | id LONG,
+ | val2 VARCHAR,
+ | PRIMARY KEY (id)) WITH "backups=1"
+ """.stripMargin)).getAll
+
+ cache.query(new SqlFieldsQuery(
+ """
+ | CREATE TABLE jt3 (
+ | id LONG,
+ | val3 VARCHAR,
+ | PRIMARY KEY (id)) WITH "backups=1"
+ """.stripMargin)).getAll
+
+ var qry = new SqlFieldsQuery("INSERT INTO jt1 (id, val1) values (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "A")).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "B")).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "C")).getAll
+
+ qry = new SqlFieldsQuery("INSERT INTO jt2 (id, val2) values (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "B")).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "C")).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "D")).getAll
+
+ qry = new SqlFieldsQuery("INSERT INTO jt3 (id, val3) values (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "A")).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "D")).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "E")).getAll
+
+ cache.query(new SqlFieldsQuery("CREATE INDEX idx1 ON jt1(val1)")).getAll
+ cache.query(new SqlFieldsQuery("CREATE INDEX idx2 ON jt2(val2)")).getAll
+ cache.query(new SqlFieldsQuery("CREATE INDEX idx3 ON jt3(val3)")).getAll
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createPersonTable(client, DEFAULT_CACHE)
+
+ createCityTable(client, DEFAULT_CACHE)
+
+ createJoinedTables(client, DEFAULT_CACHE)
+
+ val configProvider = enclose(null) (x ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationMathFuncSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationMathFuncSpec.scala
new file mode 100644
index 0000000..04ffbd4
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationMathFuncSpec.scala
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.Ignite
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, TEST_CONFIG_FILE, checkOptimizationResult, enclose}
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+import java.lang.{Double => JDouble, Long => JLong}
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteOptimizationMathFuncSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ describe("Supported optimized string functions") {
+ it("ABS") {
+ val df = igniteSession.sql("SELECT ABS(val) FROM numbers WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT ABS(val) FROM numbers WHERE id = 6")
+
+ val data = Tuple1(.5)
+
+ checkQueryData(df, data)
+ }
+
+ it("ACOS") {
+ val df = igniteSession.sql("SELECT ACOS(val) FROM numbers WHERE id = 7")
+
+ checkOptimizationResult(df, "SELECT ACOS(val) FROM numbers WHERE id = 7")
+
+ val data = Tuple1(Math.PI)
+
+ checkQueryData(df, data)
+ }
+
+ it("ASIN") {
+ val df = igniteSession.sql("SELECT ASIN(val) FROM numbers WHERE id = 7")
+
+ checkOptimizationResult(df, "SELECT ASIN(val) FROM numbers WHERE id = 7")
+
+ val data = Tuple1(-Math.PI/2)
+
+ checkQueryData(df, data)
+ }
+
+ it("ATAN") {
+ val df = igniteSession.sql("SELECT ATAN(val) FROM numbers WHERE id = 7")
+
+ checkOptimizationResult(df, "SELECT ATAN(val) FROM numbers WHERE id = 7")
+
+ val data = Tuple1(-Math.PI/4)
+
+ checkQueryData(df, data)
+ }
+
+ it("COS") {
+ val df = igniteSession.sql("SELECT COS(val) FROM numbers WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT COS(val) FROM numbers WHERE id = 1")
+
+ val data = Tuple1(1.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("SIN") {
+ val df = igniteSession.sql("SELECT SIN(val) FROM numbers WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT SIN(val) FROM numbers WHERE id = 1")
+
+ val data = Tuple1(.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("TAN") {
+ val df = igniteSession.sql("SELECT TAN(val) FROM numbers WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT TAN(val) FROM numbers WHERE id = 1")
+
+ val data = Tuple1(.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("COSH") {
+ val df = igniteSession.sql("SELECT COSH(val) FROM numbers WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT COSH(val) FROM numbers WHERE id = 1")
+
+ val data = Tuple1(1.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("SINH") {
+ val df = igniteSession.sql("SELECT SINH(val) FROM numbers WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT SINH(val) FROM numbers WHERE id = 1")
+
+ val data = Tuple1(.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("TANH") {
+ val df = igniteSession.sql("SELECT TANH(val) FROM numbers WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT TANH(val) FROM numbers WHERE id = 1")
+
+ val data = Tuple1(.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("ATAN2") {
+ val df = igniteSession.sql("SELECT ATAN2(val, 0.0) FROM numbers WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT ATAN2(VAL, 0.0) FROM NUMBERS WHERE ID = 1")
+
+ val data = Tuple1(.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("MOD") {
+ val df = igniteSession.sql("SELECT val % 9 FROM numbers WHERE id = 8")
+
+ checkOptimizationResult(df, "SELECT VAL % 9.0 AS \"(VAL % 9)\" FROM NUMBERS WHERE ID = 8")
+
+ val data = Tuple1(6.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("CEIL") {
+ val df = igniteSession.sql("SELECT CEIL(val) FROM numbers WHERE id = 2")
+
+ checkOptimizationResult(df, "SELECT CAST(CEIL(val) AS LONG) as \"CEIL(val)\" " +
+ "FROM numbers WHERE id = 2")
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it("ROUND") {
+ val df = igniteSession.sql("SELECT id, ROUND(val) FROM numbers WHERE id IN (2, 9, 10)")
+
+ checkOptimizationResult(df, "SELECT id, ROUND(val, 0) FROM numbers WHERE id IN (2, 9, 10)")
+
+ val data = (
+ (2, 1.0),
+ (9, 1.0),
+ (10, 0.0))
+
+ checkQueryData(df, data)
+ }
+
+ it("FLOOR") {
+ val df = igniteSession.sql("SELECT FLOOR(val) FROM numbers WHERE id = 2")
+
+ checkOptimizationResult(df, "SELECT CAST(FLOOR(val) AS LONG) as \"FLOOR(val)\" FROM numbers " +
+ "WHERE id = 2")
+
+ val data = Tuple1(0)
+
+ checkQueryData(df, data)
+ }
+
+ it("POWER") {
+ val df = igniteSession.sql("SELECT POWER(val, 3) FROM numbers WHERE id = 4")
+
+ checkOptimizationResult(df, "SELECT POWER(VAL, 3.0) AS \"POWER(VAL, 3)\" FROM NUMBERS WHERE ID = 4")
+
+ val data = Tuple1(8.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("EXP") {
+ val df = igniteSession.sql("SELECT id, EXP(val) FROM numbers WHERE id IN (1, 3)")
+
+ checkOptimizationResult(df, "SELECT id, EXP(val) FROM numbers WHERE id IN (1, 3)")
+
+ val data = (
+ (1, 1),
+ (3, Math.E))
+
+ checkQueryData(df, data)
+ }
+
+ it("LOG") {
+ val df = igniteSession.sql("SELECT LOG(val) FROM numbers WHERE id = 12")
+
+ checkOptimizationResult(df, "SELECT LOG(val) as \"LOG(E(), val)\" FROM numbers " +
+ "WHERE id = 12")
+
+ val data = Tuple1(2.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("LOG10") {
+ val df = igniteSession.sql("SELECT LOG10(val) FROM numbers WHERE id = 11")
+
+ checkOptimizationResult(df, "SELECT LOG10(val) FROM numbers WHERE id = 11")
+
+ val data = Tuple1(2.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("DEGREES") {
+ val df = igniteSession.sql("SELECT DEGREES(val) FROM numbers WHERE id = 13")
+
+ checkOptimizationResult(df, "SELECT DEGREES(val) FROM numbers WHERE id = 13")
+
+ val data = Tuple1(180.0)
+
+ checkQueryData(df, data)
+ }
+
+ it("RADIANS") {
+ val df = igniteSession.sql("SELECT RADIANS(val) FROM numbers WHERE id = 14")
+
+ checkOptimizationResult(df, "SELECT RADIANS(val) FROM numbers WHERE id = 14")
+
+ val data = Tuple1(Math.PI)
+
+ checkQueryData(df, data)
+ }
+
+ it("BITAND") {
+ val df = igniteSession.sql("SELECT int_val&1 FROM numbers WHERE id = 15")
+
+ checkOptimizationResult(df, "SELECT BITAND(INT_VAL, 1) AS \"(INT_VAL & 1)\" FROM NUMBERS WHERE ID = 15")
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it("BITOR") {
+ val df = igniteSession.sql("SELECT int_val|1 FROM numbers WHERE id = 16")
+
+ checkOptimizationResult(df, "SELECT BITOR(INT_VAL, 1) AS \"(INT_VAL | 1)\" FROM NUMBERS WHERE ID = 16")
+
+ val data = Tuple1(3)
+
+ checkQueryData(df, data)
+ }
+
+ it("BITXOR") {
+ val df = igniteSession.sql("SELECT int_val^1 FROM numbers WHERE id = 17")
+
+ checkOptimizationResult(df, "SELECT BITXOR(INT_VAL, 1) AS \"(INT_VAL ^ 1)\" FROM NUMBERS WHERE ID = 17")
+
+ val data = Tuple1(2)
+
+ checkQueryData(df, data)
+ }
+
+ it("RAND") {
+ val df = igniteSession.sql("SELECT id, RAND(1) FROM numbers WHERE id = 17")
+
+ checkOptimizationResult(df, "SELECT id, RAND(1) FROM numbers WHERE id = 17")
+
+ val data = df.rdd.collect
+
+ assert(data(0).getAs[JLong]("id") == 17L)
+ assert(data(0).getAs[JDouble]("rand(1)") != null)
+ }
+ }
+
+ def createNumberTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ """
+ | CREATE TABLE numbers (
+ | id LONG,
+ | val DOUBLE,
+ | int_val LONG,
+ | PRIMARY KEY (id)) WITH "backups=1"
+ """.stripMargin)).getAll
+
+ var qry = new SqlFieldsQuery("INSERT INTO numbers (id, val) values (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], .0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], .5.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], 1.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], 2.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(5L.asInstanceOf[JLong], 4.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(6L.asInstanceOf[JLong], -0.5.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(7L.asInstanceOf[JLong], -1.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(8L.asInstanceOf[JLong], 42.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(9L.asInstanceOf[JLong], .51.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(10L.asInstanceOf[JLong], .49.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(11L.asInstanceOf[JLong], 100.0.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(12L.asInstanceOf[JLong], (Math.E*Math.E).asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(13L.asInstanceOf[JLong], Math.PI.asInstanceOf[JDouble])).getAll
+ cache.query(qry.setArgs(14L.asInstanceOf[JLong], 180.0.asInstanceOf[JDouble])).getAll
+
+ qry = new SqlFieldsQuery("INSERT INTO numbers (id, int_val) values (?, ?)")
+
+ cache.query(qry.setArgs(15L.asInstanceOf[JLong], 1L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(16L.asInstanceOf[JLong], 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(17L.asInstanceOf[JLong], 3L.asInstanceOf[JLong])).getAll
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createNumberTable(client, DEFAULT_CACHE)
+
+ val configProvider = enclose(null) (x ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationSpec.scala
new file mode 100644
index 0000000..0165529
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationSpec.scala
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.cache.query.annotations.QuerySqlField
+import org.apache.ignite.configuration.CacheConfiguration
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.internal.util.IgniteUtils.gridClassLoader
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, TEST_CONFIG_FILE, checkOptimizationResult, enclose}
+import org.apache.ignite.spark.IgniteDataFrameSettings.{FORMAT_IGNITE, OPTION_TABLE}
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.apache.spark.sql.types.DataTypes.StringType
+import org.apache.spark.sql.{Dataset, Row}
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+import scala.annotation.meta.field
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteOptimizationSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ describe("Optimized queries") {
+ it("SELECT name as city_name FROM city") {
+ val df = igniteSession.sql("SELECT name as city_name FROM city")
+
+ checkOptimizationResult(df, "SELECT name as city_name FROM city")
+ }
+
+ it("SELECT count(*) as city_count FROM city") {
+ val df = igniteSession.sql("SELECT count(1) as city_count FROM city")
+
+ checkOptimizationResult(df, "SELECT count(1) as city_count FROM city")
+ }
+
+ it("SELECT count(*), city_id FROM person p GROUP BY city_id") {
+ val df = igniteSession.sql("SELECT city_id, count(*) FROM person GROUP BY city_id")
+
+ checkOptimizationResult(df, "SELECT city_id, count(1) FROM person GROUP BY city_id")
+
+ val data = (
+ (1, 1),
+ (2, 3),
+ (3, 1)
+ )
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT id, name FROM person WHERE id > 3 ORDER BY id") {
+ val df = igniteSession.sql("SELECT id, name FROM person WHERE id > 3 ORDER BY id")
+
+ checkOptimizationResult(df, "SELECT id, name FROM person WHERE id > 3 ORDER BY id")
+
+ val data = (
+ (4, "Richard Miles"),
+ (5, null))
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT id, name FROM person WHERE id > 3 ORDER BY id DESC") {
+ val df = igniteSession.sql("SELECT id, name FROM person WHERE id > 3 ORDER BY id DESC")
+
+ checkOptimizationResult(df, "SELECT id, name FROM person WHERE id > 3 ORDER BY id DESC")
+
+ val data = (
+ (5, null),
+ (4, "Richard Miles"))
+
+ checkQueryData(df, data, -_.getAs[Long]("id"))
+ }
+
+ it("SELECT id, test_reverse(name) FROM city ORDER BY id") {
+ igniteSession.udf.register("test_reverse", (str: String) ⇒ str.reverse)
+
+ val df = igniteSession.sql("SELECT id, test_reverse(name) FROM city ORDER BY id")
+
+ checkOptimizationResult(df, "SELECT name, id FROM city")
+
+ val data = (
+ (1, "Forest Hill".reverse),
+ (2, "Denver".reverse),
+ (3, "St. Petersburg".reverse),
+ (4, "St. Petersburg".reverse))
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT count(*), city_id FROM person p GROUP BY city_id HAVING count(*) > 1") {
+ val df = igniteSession.sql("SELECT city_id, count(*) FROM person p GROUP BY city_id HAVING count(*) > 1")
+
+ checkOptimizationResult(df, "SELECT city_id, count(1) FROM person GROUP BY city_id HAVING count(1) > 1")
+
+ val data = Tuple1(
+ (2, 3))
+
+ checkQueryData(df, data)
+ }
+
+ // TODO: fix it with IGNITE-12243
+ ignore("SELECT id FROM city HAVING id > 1") {
+ val df = igniteSession.sql("SELECT id FROM city HAVING id > 1")
+
+ checkOptimizationResult(df, "SELECT id FROM city WHERE id > 1")
+
+ val data = (2, 3, 4)
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT DISTINCT name FROM city ORDER BY name") {
+ val df = igniteSession.sql("SELECT DISTINCT name FROM city ORDER BY name")
+
+ checkOptimizationResult(df, "SELECT name FROM city GROUP BY name ORDER BY name")
+
+ val data = ("Denver", "Forest Hill", "St. Petersburg")
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT id, name FROM city ORDER BY id, name") {
+ val df = igniteSession.sql("SELECT id, name FROM city ORDER BY id, name")
+
+ checkOptimizationResult(df, "SELECT id, name FROM city ORDER BY id, name")
+
+ val data = (
+ (1, "Forest Hill"),
+ (2, "Denver"),
+ (3, "St. Petersburg"),
+ (4, "St. Petersburg"))
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT id, name FROM city WHERE id > 1 ORDER BY id") {
+ val df = igniteSession.sql("SELECT id, name FROM city WHERE id > 1 ORDER BY id")
+
+ checkOptimizationResult(df, "SELECT id, name FROM city WHERE id > 1 ORDER BY id")
+
+ val data = (
+ (2, "Denver"),
+ (3, "St. Petersburg"),
+ (4, "St. Petersburg"))
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT count(*) FROM city") {
+ val df = igniteSession.sql("SELECT count(*) FROM city")
+
+ checkOptimizationResult(df, "SELECT count(1) FROM city")
+
+ val data = Tuple1(4)
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT count(DISTINCT name) FROM city") {
+ val df = igniteSession.sql("SELECT count(DISTINCT name) FROM city")
+
+ checkOptimizationResult(df, "SELECT count(DISTINCT name) FROM city")
+
+ val data = Tuple1(3)
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT id FROM city LIMIT 2") {
+ val df = igniteSession.sql("SELECT id FROM city LIMIT 2")
+
+ checkOptimizationResult(df, "SELECT id FROM city LIMIT 2")
+
+ val data = (1, 2)
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT CAST(id AS STRING) FROM city") {
+ val df = igniteSession.sql("SELECT CAST(id AS STRING) FROM city")
+
+ checkOptimizationResult(df, "SELECT CAST(id AS varchar) as id FROM city")
+
+ val data = ("1", "2", "3", "4")
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT SQRT(id) FROM city WHERE id = 4 OR id = 1") {
+ val df = igniteSession.sql("SELECT SQRT(id) FROM city WHERE id = 4 OR id = 1")
+
+ checkOptimizationResult(df,
+ "SELECT SQRT(CAST(ID AS DOUBLE)) AS \"SQRT(ID)\" FROM CITY WHERE ID = 4 OR ID = 1")
+
+ val data = (1, 2)
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT CONCAT(id, \" - this is ID\") FROM city") {
+ val df = igniteSession.sql("SELECT CONCAT(id, \" - this is ID\") FROM city")
+
+ checkOptimizationResult(df,
+ "SELECT CONCAT(CAST(ID AS VARCHAR), ' - THIS IS ID') AS \"CONCAT(ID, - THIS IS ID)\" FROM CITY")
+
+ val data = (
+ "1 - this is ID",
+ "2 - this is ID",
+ "3 - this is ID",
+ "4 - this is ID")
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT id FROM city WHERE CONCAT(id, \" - this is ID\") = \"1 - this is ID\"") {
+ val df = igniteSession.sql("SELECT id FROM city WHERE CONCAT(id, \" - this is ID\") = \"1 - this is ID\"")
+
+ checkOptimizationResult(df,
+ "SELECT id FROM city WHERE CONCAT(CAST(id AS VARCHAR), ' - this is ID') = '1 - this is ID'")
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it("Should optimize union") {
+ val union = readTable("JPerson").union(readTable("JPerson2"))
+
+ val data = (
+ (1, "JPerson-1"),
+ (2, "JPerson-2"))
+
+ checkQueryData(union, data)
+ }
+
+ it("Should optimize null column") {
+ val p = readTable("JPerson").withColumn("nullColumn", lit(null).cast(StringType))
+
+ val data = Tuple1(
+ (1, "JPerson-1", null))
+
+ checkQueryData(p, data)
+ }
+ }
+
+ describe("Not Optimized Queries") {
+ it("SELECT id, name FROM json_cities") {
+ val citiesDataFrame = igniteSession.read.json(
+ gridClassLoader().getResource("cities.json").getFile)
+
+ citiesDataFrame.createOrReplaceTempView("JSON_CITIES")
+
+ val df = igniteSession.sql("SELECT id, name FROM json_cities")
+
+ val data = (
+ (1, "Forest Hill"),
+ (2, "Denver"),
+ (3, "St. Petersburg"))
+
+ checkQueryData(df, data)
+ }
+
+ it("SELECT id, test_reverse(name) tr FROM city WHERE test_reverse(name) = 'revneD' ORDER BY id") {
+ val df = igniteSession.sql("SELECT id, test_reverse(name) tr " +
+ "FROM city WHERE test_reverse(name) = 'revneD' ORDER BY id")
+
+ checkOptimizationResult(df)
+ }
+
+ it("SELECT id, test_reverse(name) tr FROM city WHERE test_reverse(name) = 'revneD' and id > 0 ORDER BY id") {
+ val df = igniteSession.sql("SELECT id, test_reverse(name) tr " +
+ "FROM city WHERE test_reverse(name) = 'revneD' and id > 0 ORDER BY id")
+
+ checkOptimizationResult(df)
+ }
+
+ it("SELECT id, test_reverse(name) tr FROM city ORDER BY tr") {
+ val df = igniteSession.sql("SELECT id, test_reverse(name) tr FROM city ORDER BY tr")
+
+ checkOptimizationResult(df)
+ }
+
+ it("SELECT count(*), test_reverse(name) tr FROM city GROUP BY test_reverse(name)") {
+ val df = igniteSession.sql("SELECT count(*), test_reverse(name) tr FROM city GROUP BY test_reverse(name)")
+
+ checkOptimizationResult(df)
+ }
+ }
+
+ def readTable(tblName: String): Dataset[Row] =
+ igniteSession.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_TABLE, tblName)
+ .option(IgniteDataFrameSettings.OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .load
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createPersonTable(client, DEFAULT_CACHE)
+
+ createCityTable(client, DEFAULT_CACHE)
+
+ val p = client.getOrCreateCache(new CacheConfiguration[Long, JPerson]()
+ .setName("P")
+ .setSqlSchema("SQL_PUBLIC")
+ .setIndexedTypes(classOf[Long], classOf[JPerson]))
+
+ p.put(1L, new JPerson(1L, "JPerson-1"))
+
+ val p2 = client.getOrCreateCache(new CacheConfiguration[Long, JPerson2]()
+ .setName("P2")
+ .setSqlSchema("SQL_PUBLIC")
+ .setIndexedTypes(classOf[Long], classOf[JPerson2]))
+
+ p2.put(1L, new JPerson2(2L, "JPerson-2"))
+
+ val configProvider = enclose(null) (x ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+
+ igniteSession.udf.register("test_reverse", (str: String) ⇒ str.reverse)
+ }
+
+ case class JPerson(
+ @(QuerySqlField @field) id: Long,
+ @(QuerySqlField @field)(index = true) name: String)
+
+ case class JPerson2(
+ @(QuerySqlField @field) id: Long,
+ @(QuerySqlField @field)(index = true) name: String)
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationStringFuncSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationStringFuncSpec.scala
new file mode 100644
index 0000000..b2eb5f7
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationStringFuncSpec.scala
@@ -0,0 +1,379 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.Ignite
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, TEST_CONFIG_FILE, checkOptimizationResult, enclose}
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+import java.lang.{Long => JLong}
+
+/**
+ * === Doesn't supported by Spark ===
+ * CHAR
+ * DIFFERENCE
+ * HEXTORAW
+ * RAWTOHEX
+ * REGEXP_LIKE
+ * SOUNDEX
+ * STRINGDECODE
+ * STRINGENCODE
+ * STRINGTOUTF8
+ * UTF8TOSTRING
+ * XMLATTR
+ * XMLNODE
+ * XMLCOMMENT
+ * XMLCDATA
+ * XMLSTARTDOC
+ * XMLTEXT
+ * TO_CHAR - The function that can format a timestamp, a number, or text.
+ * ====== This functions in spark master but not in release =====
+ * LEFT
+ * RIGHT
+ * INSERT
+ * REPLACE
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteOptimizationStringFuncSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ describe("Supported optimized string functions") {
+ it("LENGTH") {
+ val df = igniteSession.sql("SELECT LENGTH(str) FROM strings WHERE id <= 3")
+
+ checkOptimizationResult(df, "SELECT CAST(LENGTH(str) AS INTEGER) as \"length(str)\" FROM strings " +
+ "WHERE id <= 3")
+
+ val data = (3, 3, 6)
+
+ checkQueryData(df, data)
+ }
+
+ it("RTRIM") {
+ val df = igniteSession.sql("SELECT RTRIM(str) FROM strings WHERE id = 3")
+
+ checkOptimizationResult(df, "SELECT RTRIM(str) FROM strings WHERE id = 3")
+
+ val data = Tuple1("AAA")
+
+ checkQueryData(df, data)
+ }
+
+ it("RTRIMWithTrimStr") {
+ val df = igniteSession.sql("SELECT RTRIM('B', str) FROM strings WHERE id = 9")
+
+ checkOptimizationResult(df, "SELECT RTRIM(STR, 'B') AS \"TRIM(TRAILING B FROM STR)\" " +
+ "FROM STRINGS WHERE ID = 9")
+
+ val data = Tuple1("BAAA")
+
+ checkQueryData(df, data)
+ }
+
+ it("LTRIM") {
+ val df = igniteSession.sql("SELECT LTRIM(str) FROM strings WHERE id = 4")
+
+ checkOptimizationResult(df, "SELECT LTRIM(str) FROM strings WHERE id = 4")
+
+ val data = Tuple1("AAA")
+
+ checkQueryData(df, data)
+ }
+
+ it("LTRIMWithTrimStr") {
+ val df = igniteSession.sql("SELECT LTRIM('B', str) FROM strings WHERE id = 9")
+
+ checkOptimizationResult(df, "SELECT LTRIM(STR, 'B') AS \"TRIM(LEADING B FROM STR)\" " +
+ "FROM STRINGS WHERE ID = 9")
+
+ val data = Tuple1("AAAB")
+
+ checkQueryData(df, data)
+ }
+
+ it("TRIM") {
+ val df = igniteSession.sql("SELECT TRIM(str) FROM strings WHERE id = 5")
+
+ checkOptimizationResult(df, "SELECT TRIM(str) FROM strings WHERE id = 5")
+
+ val data = Tuple1("AAA")
+
+ checkQueryData(df, data)
+ }
+
+ it("TRIMWithTrimStr") {
+ val df = igniteSession.sql("SELECT TRIM('B', str) FROM strings WHERE id = 9")
+
+ checkOptimizationResult(df, "SELECT TRIM(STR, 'B') AS \"TRIM(BOTH B FROM STR)\" FROM STRINGS WHERE ID = 9")
+
+ val data = Tuple1("AAA")
+
+ checkQueryData(df, data)
+ }
+
+ it("TRIMWithTrimStrBOTH") {
+ val df = igniteSession.sql("SELECT TRIM(BOTH 'B' FROM str) FROM strings WHERE id = 9")
+
+ checkOptimizationResult(df, "SELECT TRIM(STR, 'B') AS \"TRIM(BOTH B FROM STR)\" FROM STRINGS WHERE ID = 9")
+
+ val data = Tuple1("AAA")
+
+ checkQueryData(df, data)
+ }
+
+ it("TRIMWithTrimStrLEADING") {
+ val df = igniteSession.sql("SELECT TRIM(LEADING 'B' FROM str) FROM strings WHERE id = 9")
+
+ checkOptimizationResult(df, "SELECT LTRIM(STR, 'B') AS \"TRIM(LEADING B FROM STR)\" " +
+ "FROM STRINGS WHERE ID = 9")
+
+ val data = Tuple1("AAAB")
+
+ checkQueryData(df, data)
+ }
+
+ it("TRIMWithTrimStrTRAILING") {
+ val df = igniteSession.sql("SELECT TRIM(TRAILING 'B' FROM str) FROM strings WHERE id = 9")
+
+ checkOptimizationResult(df, "SELECT RTRIM(STR, 'B') AS \"TRIM(TRAILING B FROM STR)\" " +
+ "FROM STRINGS WHERE ID = 9")
+
+ val data = Tuple1("BAAA")
+
+ checkQueryData(df, data)
+ }
+
+ it("LOWER") {
+ val df = igniteSession.sql("SELECT LOWER(str) FROM strings WHERE id = 2")
+
+ checkOptimizationResult(df, "SELECT LOWER(str) FROM strings WHERE id = 2")
+
+ val data = Tuple1("aaa")
+
+ checkQueryData(df, data)
+ }
+
+ it("UPPER") {
+ val df = igniteSession.sql("SELECT UPPER(str) FROM strings WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT UPPER(str) FROM strings WHERE id = 1")
+
+ val data = Tuple1("AAA")
+
+ checkQueryData(df, data)
+ }
+
+ it("LOWER(RTRIM)") {
+ val df = igniteSession.sql("SELECT LOWER(RTRIM(str)) FROM strings WHERE id = 3")
+
+ checkOptimizationResult(df, "SELECT LOWER(RTRIM(str)) FROM strings WHERE id = 3")
+
+ val data = Tuple1("aaa")
+
+ checkQueryData(df, data)
+ }
+
+ it("LOCATE") {
+ val df = igniteSession.sql("SELECT LOCATE('D', str) FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT LOCATE('D', str, 1) FROM strings WHERE id = 6")
+
+ val data = Tuple1(4)
+
+ checkQueryData(df, data)
+ }
+
+ it("LOCATE - 2") {
+ val df = igniteSession.sql("SELECT LOCATE('A', str) FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT LOCATE('A', str, 1) FROM strings WHERE id = 6")
+
+ val data = Tuple1(1)
+
+ checkQueryData(df, data)
+ }
+
+ it("POSITION") {
+ val df = igniteSession.sql("SELECT instr(str, 'BCD') FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT POSITION('BCD', str) as \"instr(str, BCD)\" FROM strings " +
+ "WHERE id = 6")
+
+ val data = Tuple1(2)
+
+ checkQueryData(df, data)
+ }
+
+ it("CONCAT") {
+ val df = igniteSession.sql("SELECT concat(str, 'XXX') FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT concat(str, 'XXX') FROM strings WHERE id = 6")
+
+ val data = Tuple1("ABCDEFXXX")
+
+ checkQueryData(df, data)
+ }
+
+ it("RPAD") {
+ val df = igniteSession.sql("SELECT RPAD(str, 10, 'X') FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT RPAD(str, 10, 'X') FROM strings WHERE id = 6")
+
+ val data = Tuple1("ABCDEFXXXX")
+
+ checkQueryData(df, data)
+ }
+
+ it("LPAD") {
+ val df = igniteSession.sql("SELECT LPAD(str, 10, 'X') FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT LPAD(str, 10, 'X') FROM strings WHERE id = 6")
+
+ val data = Tuple1("XXXXABCDEF")
+
+ checkQueryData(df, data)
+ }
+
+ it("REPEAT") {
+ val df = igniteSession.sql("SELECT REPEAT(str, 2) FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT REPEAT(str, 2) FROM strings WHERE id = 6")
+
+ val data = Tuple1("ABCDEFABCDEF")
+
+ checkQueryData(df, data)
+ }
+
+ it("SUBSTRING") {
+ val df = igniteSession.sql("SELECT SUBSTRING(str, 4, 3) FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT SUBSTR(str, 4, 3) as \"SUBSTRING(str, 4, 3)\" FROM strings " +
+ "WHERE id = 6")
+
+ val data = Tuple1("DEF")
+
+ checkQueryData(df, data)
+ }
+
+ it("SPACE") {
+ val df = igniteSession.sql("SELECT SPACE(LENGTH(str)) FROM strings WHERE id = 1")
+
+ checkOptimizationResult(df, "SELECT SPACE(CAST(LENGTH(str) AS INTEGER)) as \"SPACE(LENGTH(str))\" " +
+ "FROM strings WHERE id = 1")
+
+ val data = Tuple1(" ")
+
+ checkQueryData(df, data)
+ }
+
+ it("ASCII") {
+ val df = igniteSession.sql("SELECT ASCII(str) FROM strings WHERE id = 7")
+
+ checkOptimizationResult(df, "SELECT ASCII(str) FROM strings WHERE id = 7")
+
+ val data = Tuple1(50)
+
+ checkQueryData(df, data)
+ }
+
+ ignore("REGEXP_REPLACE") {
+ val df = igniteSession.sql("SELECT REGEXP_REPLACE(str, '(\\\\d+)', 'num') FROM strings WHERE id = 7")
+
+ checkOptimizationResult(df, "SELECT REGEXP_REPLACE(str, '(\\d+)', 'num') " +
+ "AS \"regexp_replace(str, (\\d+), num, 1)\" FROM STRINGS WHERE ID = 7")
+
+ val data = Tuple1("num")
+
+ checkQueryData(df, data)
+ }
+
+ it("CONCAT_WS") {
+ val df = igniteSession.sql("SELECT id, CONCAT_WS(', ', str, 'after') FROM strings " +
+ "WHERE id >= 7 AND id <= 8")
+
+ checkOptimizationResult(df, "SELECT id, CONCAT_WS(', ', str, 'after') FROM strings " +
+ "WHERE id >= 7 AND id <= 8")
+
+ val data = (
+ (7, "222, after"),
+ (8, "after"))
+
+ checkQueryData(df, data)
+ }
+
+ it("TRANSLATE") {
+ val df = igniteSession.sql("SELECT id, TRANSLATE(str, 'DEF', 'ABC') FROM strings WHERE id = 6")
+
+ checkOptimizationResult(df, "SELECT id, TRANSLATE(str, 'DEF', 'ABC') FROM strings " +
+ "WHERE id = 6")
+
+ val data = Tuple1((6, "ABCABC"))
+
+ checkQueryData(df, data)
+ }
+ }
+
+ def createStringTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ """
+ | CREATE TABLE strings (
+ | id LONG,
+ | str VARCHAR,
+ | PRIMARY KEY (id)) WITH "backups=1"
+ """.stripMargin)).getAll
+
+ val qry = new SqlFieldsQuery("INSERT INTO strings (id, str) values (?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], "aaa")).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], "AAA")).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], "AAA ")).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], " AAA")).getAll
+ cache.query(qry.setArgs(5L.asInstanceOf[JLong], " AAA ")).getAll
+ cache.query(qry.setArgs(6L.asInstanceOf[JLong], "ABCDEF")).getAll
+ cache.query(qry.setArgs(7L.asInstanceOf[JLong], "222")).getAll
+ cache.query(qry.setArgs(8L.asInstanceOf[JLong], null)).getAll
+ cache.query(qry.setArgs(9L.asInstanceOf[JLong], "BAAAB")).getAll
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createStringTable(client, DEFAULT_CACHE)
+
+ val configProvider = enclose(null) (x ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationSystemFuncSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationSystemFuncSpec.scala
new file mode 100644
index 0000000..490cb39
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteOptimizationSystemFuncSpec.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.Ignite
+import org.apache.ignite.cache.query.SqlFieldsQuery
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.junit.runner.RunWith
+
+import java.lang.{Double => JDouble, Long => JLong}
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, TEST_CONFIG_FILE, checkOptimizationResult, enclose}
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteOptimizationSystemFuncSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ describe("Supported optimized system functions") {
+ it("COALESCE") {
+ val df = igniteSession.sql("SELECT COALESCE(int_val1, int_val2) FROM numbers WHERE id IN (1, 2, 3)")
+
+ checkOptimizationResult(df, "SELECT COALESCE(int_val1, int_val2) FROM numbers WHERE id IN (1, 2, 3)")
+
+ val data = (1, 2, 3)
+
+ checkQueryData(df, data)
+ }
+
+ it("GREATEST") {
+ val df = igniteSession.sql("SELECT GREATEST(int_val1, int_val2) FROM numbers WHERE id IN (4, 5)")
+
+ checkOptimizationResult(df, "SELECT GREATEST(int_val1, int_val2) FROM numbers WHERE id IN (4, 5)")
+
+ val data = (4, 6)
+
+ checkQueryData(df, data)
+ }
+
+ it("LEAST") {
+ val df = igniteSession.sql("SELECT LEAST(int_val1, int_val2) FROM numbers WHERE id IN (4, 5)")
+
+ checkOptimizationResult(df, "SELECT LEAST(int_val1, int_val2) FROM numbers WHERE id IN (4, 5)")
+
+ val data = (3, 5)
+
+ checkQueryData(df, data)
+ }
+
+ it("IFNULL") {
+ val df = igniteSession.sql("SELECT IFNULL(int_val1, int_val2) FROM numbers WHERE id IN (1, 2, 3)")
+
+ checkOptimizationResult(df, "SELECT COALESCE(INT_VAL1, INT_VAL2) AS \"IFNULL(INT_VAL1, INT_VAL2)\" FROM NUMBERS WHERE ID IN (1, 2, 3)")
+
+ val data = (1, 2, 3)
+
+ checkQueryData(df, data)
+ }
+
+ it("NULLIF") {
+ val df = igniteSession.sql("SELECT id, NULLIF(int_val1, int_val2) FROM numbers WHERE id IN (6, 7)")
+
+ checkOptimizationResult(df)
+
+ val data = (
+ (6, null),
+ (7, 8))
+
+ checkQueryData(df, data)
+ }
+
+ it("NVL2") {
+ val df = igniteSession.sql("SELECT id, NVL2(int_val1, 'not null', 'null') FROM numbers WHERE id IN (1, 2, 3)")
+
+ checkOptimizationResult(df)
+
+ val data = (
+ (1, "not null"),
+ (2, "null"),
+ (3, "not null"))
+
+ checkQueryData(df, data)
+ }
+ }
+
+ def createNumberTable(client: Ignite, cacheName: String): Unit = {
+ val cache = client.cache(cacheName)
+
+ cache.query(new SqlFieldsQuery(
+ """
+ | CREATE TABLE numbers (
+ | id LONG,
+ | int_val1 LONG,
+ | int_val2 LONG,
+ | PRIMARY KEY (id)) WITH "backups=1"
+ """.stripMargin)).getAll
+
+
+ val qry = new SqlFieldsQuery("INSERT INTO numbers (id, int_val1, int_val2) values (?, ?, ?)")
+
+ cache.query(qry.setArgs(1L.asInstanceOf[JLong], 1L.asInstanceOf[JLong], null)).getAll
+ cache.query(qry.setArgs(2L.asInstanceOf[JLong], null, 2L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(3L.asInstanceOf[JLong], 3L.asInstanceOf[JLong], null)).getAll
+ cache.query(qry.setArgs(4L.asInstanceOf[JLong], 3L.asInstanceOf[JLong], 4L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(5L.asInstanceOf[JLong], 6L.asInstanceOf[JLong], 5L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(6L.asInstanceOf[JLong], 7L.asInstanceOf[JLong], 7L.asInstanceOf[JLong])).getAll
+ cache.query(qry.setArgs(7L.asInstanceOf[JLong], 8L.asInstanceOf[JLong], 9L.asInstanceOf[JLong])).getAll
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createNumberTable(client, DEFAULT_CACHE)
+
+ val configProvider = enclose(null) (x ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteRDDSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteRDDSpec.scala
new file mode 100644
index 0000000..8b91794
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteRDDSpec.scala
@@ -0,0 +1,430 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.Ignition
+import org.apache.ignite.cache.query.annotations.{QuerySqlField, QueryTextField}
+import org.apache.ignite.configuration.{CacheConfiguration, IgniteConfiguration}
+import org.apache.ignite.lang.IgniteUuid
+import org.apache.ignite.spark.IgniteRDDSpec._
+import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi
+import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder
+import org.apache.spark.SparkContext
+import org.junit.runner.RunWith
+import org.scalatest._
+
+import scala.collection.JavaConversions._
+import IgniteRDDSpec._
+import org.apache.ignite.binary.BinaryObject
+import org.scalatest.funspec.AnyFunSpec
+import org.scalatest.matchers.should.Matchers
+import org.scalatestplus.junit.JUnitRunner
+
+import scala.annotation.meta.field
+import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
+
+@RunWith(classOf[JUnitRunner])
+class IgniteRDDSpec extends AnyFunSpec with Matchers with BeforeAndAfterAll with BeforeAndAfterEach {
+ describe("IgniteRDD") {
+ it("should successfully store data to ignite using savePairs") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc,
+ () ⇒ configuration("client", client = true))
+
+ // Save pairs ("0", "val0"), ("1", "val1"), ... to Ignite cache.
+ ic.fromCache[String, String](STR_STR_CACHE_NAME).savePairs(sc.parallelize(0 to 10000, 2).map(i ⇒ (String.valueOf(i), "val" + i)))
+
+ // Check cache contents.
+ val ignite = Ignition.ignite("grid-0")
+
+ for (i ← 0 to 10000) {
+ val res = ignite.cache[String, String](STR_STR_CACHE_NAME).get(String.valueOf(i))
+
+ assert(res != null, "Value was not put to cache for key: " + i)
+ assert("val" + i == res, "Invalid value stored for key: " + i)
+ }
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should successfully store data to ignite using savePairs with inline transformation") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc, () ⇒ configuration("client", client = true))
+
+ // Save pairs ("0", "val0"), ("1", "val1"), ... to Ignite cache.
+ ic.fromCache(STR_STR_CACHE_NAME).savePairs(
+ sc.parallelize(0 to 10000, 2), (i: Int, ic) ⇒ (String.valueOf(i), "val" + i))
+
+ // Check cache contents.
+ val ignite = Ignition.ignite("grid-0")
+
+ for (i ← 0 to 10000) {
+ val res = ignite.cache[String, String](STR_STR_CACHE_NAME).get(String.valueOf(i))
+
+ assert(res != null, "Value was not put to cache for key: " + i)
+ assert("val" + i == res, "Invalid value stored for key: " + i)
+ }
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should successfully store data to ignite using saveValues") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc, () ⇒ configuration("client", client = true))
+
+ // Save pairs ("0", "val0"), ("1", "val1"), ... to Ignite cache.
+ ic.fromCache(UUID_STR_CACHE_NAME).saveValues(
+ sc.parallelize(0 to 10000, 2).map(i ⇒ "val" + i))
+
+ // Check cache contents.
+ val ignite = Ignition.ignite("grid-0")
+
+ val values = ignite.cache[IgniteUuid, String](UUID_STR_CACHE_NAME).toList.map(e ⇒ e.getValue)
+
+ for (i ← 0 to 10000)
+ assert(values.contains("val" + i), "Value not found for index: " + i)
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should successfully store data to ignite using saveValues with inline transformation") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc, () ⇒ configuration("client", client = true))
+
+ // Save pairs ("0", "val0"), ("1", "val1"), ... to Ignite cache.
+ ic.fromCache(UUID_STR_CACHE_NAME).saveValues(
+ sc.parallelize(0 to 10000, 2), (i: Int, ic) ⇒ "val" + i)
+
+ // Check cache contents.
+ val ignite = Ignition.ignite("grid-0")
+
+ val values = ignite.cache[IgniteUuid, String](UUID_STR_CACHE_NAME).toList.map(e ⇒ e.getValue)
+
+ for (i ← 0 to 10000)
+ assert(values.contains("val" + i), "Value not found for index: " + i)
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should successfully read data from ignite") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val cache = Ignition.ignite("grid-0").cache[String, Int](STR_INT_CACHE_NAME)
+
+ val num = 10000
+
+ for (i ← 0 to num) {
+ cache.put(String.valueOf(i), i)
+ }
+
+ val ic = new IgniteContext(sc,
+ () ⇒ configuration("client", client = true))
+
+ val res = ic.fromCache[String, Int](STR_INT_CACHE_NAME).map(_._2).sum()
+
+ assert(res == (0 to num).sum)
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should successfully query objects from ignite") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc,
+ () ⇒ configuration("client", client = true))
+
+ val cache: IgniteRDD[String, Entity] = ic.fromCache[String, Entity](ENTITY_CACHE_NAME)
+
+ cache.savePairs(sc.parallelize(0 to 1000, 2).map(i ⇒ (String.valueOf(i), new Entity(i, "name" + i, i * 100))))
+
+ val res: Array[Entity] = cache.objectSql("Entity", "name = ? and salary = ?", "name50", 5000).map(_._2).collect()
+
+ assert(res.length == 1, "Invalid result length")
+ assert(50 == res(0).id, "Invalid result")
+ assert("name50" == res(0).name, "Invalid result")
+ assert(5000 == res(0).salary)
+
+ assert(500 == cache.objectSql("Entity", "id > 500").count(), "Invalid count")
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should successfully query fields from ignite") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc,
+ () ⇒ configuration("client", client = true))
+
+ val cache: IgniteRDD[String, Entity] = ic.fromCache(ENTITY_CACHE_NAME)
+
+ import ic.sqlContext.implicits._
+
+ cache.savePairs(sc.parallelize(0 to 1000, 2).map(i ⇒ (String.valueOf(i), new Entity(i, "name" + i, i * 100))))
+
+ val df = cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000)
+
+ df.printSchema()
+
+ val res = df.collect()
+
+ assert(res.length == 1, "Invalid result length")
+ assert(50 == res(0)(0), "Invalid result")
+ assert("name50" == res(0)(1), "Invalid result")
+ assert(5000 == res(0)(2), "Invalid result")
+
+ val df0 = cache.sql("select id, name, salary from Entity").where('NAME === "name50" and 'SALARY === 5000)
+
+ val res0 = df0.collect()
+
+ assert(res0.length == 1, "Invalid result length")
+ assert(50 == res0(0)(0), "Invalid result")
+ assert("name50" == res0(0)(1), "Invalid result")
+ assert(5000 == res0(0)(2), "Invalid result")
+
+ assert(500 == cache.sql("select id from Entity where id > 500").count(), "Invalid count")
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should successfully start spark context with XML configuration") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc, "spark/spark-config.xml")
+
+ val cache: IgniteRDD[String, String] = ic.fromCache[String, String](STR_STR_CACHE_NAME)
+
+ cache.savePairs(sc.parallelize(1 to 1000, 2).map(i ⇒ (String.valueOf(i), "val" + i)))
+
+ assert(1000 == cache.count())
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should successfully query complex object fields") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc,
+ () ⇒ configuration("client", client = true))
+
+ val cache: IgniteRDD[Integer, WithObjectField] = ic.fromCache[Integer, WithObjectField](WITH_OBJECT_FIELD_CACHE_NAME)
+
+ cache.savePairs(sc.parallelize(0 to 1000, 2).map(i ⇒ (i:java.lang.Integer, new WithObjectField(i, new Entity(i, "", i)))))
+
+ val df = cache.sql(s"select i, ts from $WITH_OBJECT_FIELD_CACHE_NAME where i = ?", 50)
+
+ df.printSchema()
+
+ val res = df.collect()
+
+ assert(res.length == 1, "Invalid result length")
+ assert(50 == res(0)(0), "Invalid result")
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should properly count RDD size") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc, () ⇒ configuration("client", client = true))
+
+ val cache: IgniteRDD[Integer, WithObjectField] = ic.fromCache(WITH_OBJECT_FIELD_CACHE_NAME)
+
+ assert(cache.count() == 0)
+ assert(cache.isEmpty())
+
+ cache.savePairs(sc.parallelize(0 until 1000, 2).map(i ⇒ (i:java.lang.Integer, new WithObjectField(i, new Entity(i, "", i)))))
+
+ assert(cache.count() == 1000)
+ assert(!cache.isEmpty())
+
+ cache.clear()
+
+ assert(cache.count() == 0)
+ assert(cache.isEmpty())
+ }
+ finally {
+ sc.stop()
+ }
+ }
+
+ it("should properly work with binary objects") {
+ val sc = new SparkContext("local[*]", "test")
+
+ try {
+ val ic = new IgniteContext(sc, () ⇒ configuration("client", client = true))
+
+ val cache = ic.fromCache[String, Entity](ENTITY_CACHE_NAME)
+
+ cache.savePairs(sc.parallelize(0 until 10, 2).map(i ⇒ (String.valueOf(i),
+ new Entity(i, "name" + i, i * 100))))
+
+ val res = cache.withKeepBinary[String, BinaryObject]().map(t ⇒ t._2.field[Int]("salary")).collect()
+
+ println(res)
+ }
+ finally {
+ sc.stop()
+ }
+ }
+ }
+
+ override protected def beforeEach() = {
+ for (cacheName <- Ignition.ignite("grid-0").cacheNames()) {
+ Ignition.ignite("grid-0").cache(cacheName).clear()
+ }
+ }
+
+ override protected def afterEach() = {
+ Ignition.stop("client", false)
+ }
+
+ override protected def beforeAll() = {
+ for (i ← 0 to 3) {
+ Ignition.start(configuration("grid-" + i, client = false))
+ }
+ }
+
+ override protected def afterAll() = {
+ for (i ← 0 to 3) {
+ Ignition.stop("grid-" + i, false)
+ }
+ }
+}
+
+case class WithObjectField(
+ @(QuerySqlField @field)(index = true) val i : Int,
+ @(QuerySqlField @field)(index = false) val ts : Object
+) {
+}
+
+/**
+ * Constants and utility methods.
+ */
+object IgniteRDDSpec {
+ /** Cache name for the pairs (String, Entity). */
+ val ENTITY_CACHE_NAME = "entity"
+
+ /** Cache name for the pairs (String, WithObjectField). */
+ val WITH_OBJECT_FIELD_CACHE_NAME = "withObjectField"
+
+ /** Cache name for the pairs (String, String). */
+ val STR_STR_CACHE_NAME = "StrStr"
+
+ /** Cache name for the pairs (String, String). */
+ val UUID_STR_CACHE_NAME = "UuidStr"
+
+ /** Cache name for the pairs (String, Int). */
+ val STR_INT_CACHE_NAME = "StrInt"
+
+ /** Type alias for `QuerySqlField`. */
+ type ScalarCacheQuerySqlField = QuerySqlField @field
+
+ /** Type alias for `QueryTextField`. */
+ type ScalarCacheQueryTextField = QueryTextField @field
+
+ /**
+ * Gets ignite configuration.
+ *
+ * @param igniteInstanceName Ignite instance name.
+ * @param client Client mode flag.
+ * @return Ignite configuration.
+ */
+ def configuration(igniteInstanceName: String, client: Boolean): IgniteConfiguration = {
+ val cfg = new IgniteConfiguration
+
+ cfg.setLocalHost("127.0.0.1")
+
+ val discoSpi = new TcpDiscoverySpi
+
+ val ipFinder = new TcpDiscoveryVmIpFinder()
+
+ ipFinder.setAddresses(List("127.0.0.1:47500..47504"))
+
+ discoSpi.setIpFinder(ipFinder)
+
+ cfg.setDiscoverySpi(discoSpi)
+
+ cfg.setCacheConfiguration(
+ cacheConfiguration[String, String](STR_STR_CACHE_NAME),
+ cacheConfiguration[IgniteUuid, String](UUID_STR_CACHE_NAME),
+ cacheConfiguration[String, Integer](STR_INT_CACHE_NAME),
+ cacheConfiguration[String, Entity](ENTITY_CACHE_NAME),
+ cacheConfiguration[Integer, WithObjectField](WITH_OBJECT_FIELD_CACHE_NAME))
+
+ cfg.setClientMode(client)
+
+ cfg.setIgniteInstanceName(igniteInstanceName)
+
+ cfg
+ }
+
+ /**
+ * Gets cache configuration for the given grid name.
+ *
+ * @tparam K class of cached keys
+ * @tparam V class of cached values
+ * @param cacheName cache name.
+ * @return Cache configuration.
+ */
+ def cacheConfiguration[K : ClassTag, V : ClassTag](cacheName : String): CacheConfiguration[Object, Object] = {
+ val ccfg = new CacheConfiguration[Object, Object]()
+
+ ccfg.setBackups(1)
+
+ ccfg.setName(cacheName)
+
+ ccfg.setIndexedTypes(
+ implicitly[reflect.ClassTag[K]].runtimeClass.asInstanceOf[Class[K]],
+ implicitly[reflect.ClassTag[V]].runtimeClass.asInstanceOf[Class[V]])
+
+ ccfg
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameIgniteSessionWriteSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameIgniteSessionWriteSpec.scala
new file mode 100644
index 0000000..29becf8
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameIgniteSessionWriteSpec.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.internal.util.IgniteUtils.gridClassLoader
+import org.apache.ignite.spark.AbstractDataFrameSpec.{TEST_CONFIG_FILE, enclose}
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.ignite.testframework.GridTestUtils.resolveIgnitePath
+import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.ignite.IgniteSparkSession
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteSQLDataFrameIgniteSessionWriteSpec extends IgniteSQLDataFrameWriteSpec {
+ describe("Additional features for IgniteSparkSession") {
+ // TODO: fix it IGNITE-12435
+ ignore("Save data frame as a existing table with saveAsTable('table_name') - Overwrite") {
+ val citiesDataFrame = spark.read.json(
+ gridClassLoader().getResource("cities.json").getFile)
+
+ citiesDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "template=replicated")
+ .mode(SaveMode.Overwrite)
+ .saveAsTable("city")
+
+ assert(rowsCount("city") == citiesDataFrame.count(),
+ s"Table json_city should contain data from json file.")
+ }
+
+ // TODO: fix it IGNITE-12435
+ ignore("Save data frame as a existing table with saveAsTable('table_name') - Append") {
+ val citiesDataFrame = spark.read.json(
+ gridClassLoader().getResource("cities.json").getFile)
+
+ val rowCnt = citiesDataFrame.count()
+
+ citiesDataFrame
+ .withColumn("id", col("id") + rowCnt) //Edit id column to prevent duplication
+ .write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "template=replicated")
+ .mode(SaveMode.Append)
+ .partitionBy("id")
+ .saveAsTable("city")
+
+ assert(rowsCount("city") == rowCnt*2,
+ s"Table json_city should contain data from json file.")
+ }
+
+ // TODO: fix it IGNITE-12435
+ ignore("Save data frame as a new table with saveAsTable('table_name')") {
+ val citiesDataFrame = spark.read.json(
+ gridClassLoader().getResource("cities.json").getFile)
+
+ citiesDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "template=replicated")
+ .saveAsTable("new_cities")
+
+ assert(rowsCount("new_cities") == citiesDataFrame.count(),
+ s"Table json_city should contain data from json file.")
+ }
+ }
+
+ override protected def createSparkSession(): Unit = {
+ val configProvider = enclose(null) (x ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ spark = IgniteSparkSession.builder()
+ .appName("DataFrameSpec")
+ .master("local")
+ .config("spark.executor.instances", "2")
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameSpec.scala
new file mode 100644
index 0000000..8b45dc9
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameSpec.scala
@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import com.google.common.collect.Iterators
+import org.apache.ignite.spark.AbstractDataFrameSpec.TEST_CONFIG_FILE
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.spark.sql.DataFrame
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ * Tests to check all kinds of SQL queries from Spark SQL engine to Ignite SQL table.
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteSQLDataFrameSpec extends AbstractDataFrameSpec {
+ var personDataFrame: DataFrame = _
+
+ describe("DataFrame for a Ignite SQL table") {
+ it("Should correct filter with EqualTo Clause") {
+ val res = spark.sqlContext.sql("SELECT name FROM person WHERE id = 2").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect
+
+ persons(0).getAs[String]("name") should equal("Jane Roe")
+ }
+
+ it("Should correct filter with EqualToNullSafe Clause") {
+ val res = spark.sqlContext.sql("SELECT id FROM person WHERE name = 'Jane Roe'").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect
+
+ persons(0).getAs[Long]("id") should equal(2)
+ }
+
+ it("Should correct filter with GreaterThen Clause") {
+ val res = spark.sqlContext.sql("SELECT id, name FROM person WHERE id > 3").rdd
+
+ res.count should equal(2)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[String]("name") should equal("Richard Miles")
+ persons(1).getAs[String]("name") should equal(null)
+ }
+
+ it("Should correct filter with GreaterThenOrEqual Clause") {
+ val res = spark.sqlContext.sql("SELECT id, name FROM person WHERE id >= 3").rdd
+
+ res.count should equal(3)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[String]("name") should equal("Mary Major")
+ persons(1).getAs[String]("name") should equal("Richard Miles")
+ persons(2).getAs[String]("name") should equal(null)
+ }
+
+ it("Should correct filter with LessThan Clause") {
+ val res = spark.sqlContext.sql("SELECT name FROM person WHERE id < 2").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect
+
+ persons(0).getAs[String]("name") should equal("John Doe")
+ }
+
+ it("Should correct filter with LessThanOrEqual Clause") {
+ val res = spark.sqlContext.sql("SELECT id, name FROM person WHERE id <= 2").rdd
+
+ res.count should equal(2)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[String]("name") should equal("John Doe")
+ persons(1).getAs[String]("name") should equal("Jane Roe")
+ }
+
+ it("Should correct filter with In Clause") {
+ val res = spark.sqlContext.sql(
+ "SELECT id FROM person WHERE name in ('Jane Roe', 'Richard Miles', 'Unknown Person')").rdd
+
+ res.count should equal(2)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[Long]("id") should equal(2L)
+ persons(1).getAs[Long]("id") should equal(4L)
+ }
+
+ it("Should correct filter with IsNull Clause") {
+ val res = spark.sqlContext.sql(
+ "SELECT id FROM person WHERE name IS NULL").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect
+
+ persons(0).getAs[Long]("id") should equal(5L)
+ }
+
+ it("Should correct filter with IsNotNull Clause") {
+ val res = spark.sqlContext.sql(
+ "SELECT id FROM person WHERE name IS NOT NULL").rdd
+
+ res.count should equal(4)
+
+ res.collect.map(r ⇒ r.getAs[Long]("id")).sorted should equal(Array(1, 2, 3, 4))
+
+ }
+
+ it("Should correct filter with And Clause") {
+ val res = spark.sqlContext.sql("SELECT id, name FROM person WHERE id <= 4 AND name = 'Jane Roe'").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[Long]("id") should equal(2)
+ persons(0).getAs[String]("name") should equal("Jane Roe")
+ }
+
+ it("Should correct filter with Or Clause") {
+ val res = spark.sqlContext.sql("SELECT id, name FROM person WHERE id = 2 OR name = 'John Doe'").rdd
+
+ res.count should equal(2)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[Long]("id") should equal(1)
+ persons(0).getAs[String]("name") should equal("John Doe")
+
+ persons(1).getAs[Long]("id") should equal(2)
+ persons(1).getAs[String]("name") should equal("Jane Roe")
+ }
+
+ it("Should correct filter with Not Clause") {
+ val res = spark.sqlContext.sql("SELECT id FROM person WHERE NOT(name is null)").rdd
+
+ res.count should equal(4)
+
+ res.collect.map(r ⇒ r.getAs[Long]("id")).sorted should equal(Array(1, 2, 3, 4))
+ }
+
+ it("Should correct filter with StringStartsWith Clause") {
+ val res = spark.sqlContext.sql("SELECT id, name FROM person WHERE name LIKE 'J%'").rdd
+
+ res.count should equal(2)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[Long]("id") should equal(1)
+ persons(0).getAs[String]("name") should equal("John Doe")
+
+ persons(1).getAs[Long]("id") should equal(2)
+ persons(1).getAs[String]("name") should equal("Jane Roe")
+ }
+
+ it("Should correct filter with StringEndsWith Clause") {
+ val res = spark.sqlContext.sql("SELECT id, name FROM person WHERE name LIKE '%e'").rdd
+
+ res.count should equal(2)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[Long]("id") should equal(1)
+ persons(0).getAs[String]("name") should equal("John Doe")
+
+ persons(1).getAs[Long]("id") should equal(2)
+ persons(1).getAs[String]("name") should equal("Jane Roe")
+ }
+
+ it("Should correct filter with StringContains Clause") {
+ val res = spark.sqlContext.sql("SELECT id, name FROM person WHERE name LIKE '%M%'").rdd
+
+ res.count should equal(2)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("id"))
+
+ persons(0).getAs[Long]("id") should equal(3)
+ persons(0).getAs[String]("name") should equal("Mary Major")
+
+ persons(1).getAs[Long]("id") should equal(4)
+ persons(1).getAs[String]("name") should equal("Richard Miles")
+ }
+
+ it("Should correct calculate MAX aggregate function") {
+ val res = spark.sqlContext.sql("SELECT max(id) FROM person").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect
+
+ persons(0).getAs[Long]("max(id)") should equal(5)
+ }
+
+ it("Should correct calculate MIN aggregate function") {
+ val res = spark.sqlContext.sql("SELECT min(id) FROM person").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect
+
+ persons(0).getAs[Long]("min(id)") should equal(1)
+ }
+
+ it("Should correct calculate AVG aggregate function") {
+ val res = spark.sqlContext.sql("SELECT avg(id) FROM person WHERE id = 1 OR id = 2").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect
+
+ persons(0).getAs[Double]("avg(id)") should equal(1.5D)
+ }
+
+ it("Should correct calculate COUNT(*) aggregate function") {
+ val res = spark.sqlContext.sql("SELECT count(*) FROM person").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect
+
+ persons(0).getAs[Long]("count(1)") should equal(5)
+ }
+
+ it("Should correct execute GROUP BY query") {
+ val res = spark.sqlContext.sql("SELECT count(1), city_id FROM person GROUP BY city_id").rdd
+
+ res.count should equal(3)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("city_id"))
+
+ persons(0).getAs[Long]("city_id") should equal(1)
+ persons(0).getAs[Long]("count(1)") should equal(1)
+
+ persons(1).getAs[Long]("city_id") should equal(2)
+ persons(1).getAs[Long]("count(1)") should equal(3)
+
+ persons(2).getAs[Long]("city_id") should equal(3)
+ persons(2).getAs[Long]("count(1)") should equal(1)
+ }
+
+ it("Should correct execute GROUP BY with HAVING query") {
+ val res = spark.sqlContext.sql("SELECT count(1), city_id FROM person GROUP BY city_id HAVING count(1) > 1").rdd
+
+ res.count should equal(1)
+
+ val persons = res.collect.sortBy(_.getAs[Long]("city_id"))
+
+ persons(0).getAs[Long]("city_id") should equal(2)
+ persons(0).getAs[Long]("count(1)") should equal(3)
+ }
+
+ it("should use the schema name where one is specified") {
+ // `employeeCache1` is created in the schema matching the name of the cache, ie. `employeeCache1`.
+ createEmployeeCache(client, "employeeCache1")
+
+ spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "employee")
+ .option(OPTION_SCHEMA, "employeeCache1")
+ .load()
+ .createOrReplaceTempView("employeeWithSchema")
+
+ // `employeeCache2` is created with a custom schema of `employeeSchema`.
+ createEmployeeCache(client, "employeeCache2", Some("employeeSchema"))
+
+ Iterators.size(client.cache("employeeCache2").iterator()) should equal(3)
+
+ // Remove a value from `employeeCache2` so that we know whether the select statement picks up the
+ // correct cache, ie. it should now have 2 values compared to 3 in `employeeCache1`.
+ client.cache("employeeCache2").remove("key1") shouldBe true
+
+ spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "employee")
+ .option(OPTION_SCHEMA, "employeeSchema")
+ .load()
+ .createOrReplaceTempView("employeeWithSchema2")
+
+ val res = spark.sqlContext.sql("SELECT id FROM employeeWithSchema").rdd
+
+ res.count should equal(3)
+
+ val res2 = spark.sqlContext.sql("SELECT id FROM employeeWithSchema2").rdd
+
+ res2.count should equal(2)
+ }
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createPersonTable(client, "cache1")
+
+ personDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "person")
+ .load()
+
+ personDataFrame.createOrReplaceTempView("person")
+ }
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameWriteSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameWriteSpec.scala
new file mode 100644
index 0000000..a2e8b5f
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/ignite/spark/IgniteSQLDataFrameWriteSpec.scala
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.spark
+
+import org.apache.ignite.IgniteException
+import org.apache.ignite.internal.util.IgniteUtils.gridClassLoader
+import org.apache.ignite.spark.AbstractDataFrameSpec.{PERSON_TBL_NAME, PERSON_TBL_NAME_2, TEST_CONFIG_FILE}
+import org.apache.ignite.spark.IgniteDataFrameSettings._
+import org.apache.ignite.spark.impl.sqlTableInfo
+import org.apache.spark.sql.SaveMode.{Append, Ignore, Overwrite}
+import org.apache.spark.sql.{DataFrame, SaveMode}
+import org.junit.Assert.assertEquals
+import org.junit.runner.RunWith
+import org.apache.spark.sql.functions._
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteSQLDataFrameWriteSpec extends AbstractDataFrameSpec {
+ var personDataFrame: DataFrame = _
+
+ describe("Write DataFrame into a Ignite SQL table") {
+ it("Save data frame as a new table") {
+ val rowsCnt = personDataFrame.count()
+
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "new_persons")
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .save()
+
+ assert(rowsCnt == rowsCount("new_persons"), "Data should be saved into 'new_persons' table")
+ }
+
+ it("Save data frame to existing table") {
+ val rowsCnt = personDataFrame.count()
+
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME_2)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id, city_id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "backups=1, affinityKey=city_id")
+ .mode(Overwrite)
+ .save()
+
+ assert(rowsCnt == rowsCount(PERSON_TBL_NAME_2), s"Data should be saved into $PERSON_TBL_NAME_2 table")
+ }
+
+ it("Save data frame to existing table with streamer options") {
+ val rowsCnt = personDataFrame.count()
+
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME_2)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id, city_id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "backups=1, affinityKey=city_id")
+ .option(OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS, 3)
+ .option(OPTION_STREAMER_PER_NODE_BUFFER_SIZE, 1)
+ .option(OPTION_STREAMER_FLUSH_FREQUENCY, 10000)
+ .mode(Overwrite)
+ .save()
+
+ assert(rowsCnt == rowsCount(PERSON_TBL_NAME_2), s"Data should be saved into $PERSON_TBL_NAME_2 table")
+ }
+
+ it("Ignore save operation if table exists") {
+ //Count of records before saving
+ val person2RowsCntBeforeSave = rowsCount(PERSON_TBL_NAME_2)
+
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME_2)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id, city_id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "backups=1, affinityKey=city_id")
+ .mode(Ignore)
+ .save()
+
+ assert(rowsCount(PERSON_TBL_NAME_2) == person2RowsCntBeforeSave, "Save operation should be ignored.")
+ }
+
+ it("Append data frame data to existing table") {
+ //Count of records before appending
+ val person2RowsCnt = rowsCount(PERSON_TBL_NAME_2)
+
+ //Count of appended records
+ val personRowsCnt = personDataFrame.count()
+
+ personDataFrame
+ .withColumn("id", col("id") + person2RowsCnt) //Edit id column to prevent duplication
+ .write.format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME_2)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id, city_id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "backups=1, affinityKey=city_id")
+ .mode(Append)
+ .save()
+
+ assert(rowsCount(PERSON_TBL_NAME_2) == person2RowsCnt + personRowsCnt,
+ s"Table $PERSON_TBL_NAME_2 should contain data from $PERSON_TBL_NAME")
+ }
+
+ it("Save another data source data as a Ignite table") {
+ val citiesDataFrame = spark.read.json(
+ gridClassLoader().getResource("cities.json").getFile)
+
+ citiesDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "json_city")
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "template=replicated")
+ .save()
+
+ assert(rowsCount("json_city") == citiesDataFrame.count(),
+ "Table json_city should contain data from json file.")
+ }
+
+ it("Save data frame as a new table with save('table_name')") {
+ val rowsCnt = personDataFrame.count()
+
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .save("saved_persons")
+
+ assert(rowsCnt == rowsCount("saved_persons"), "Data should be saved into 'saved_persons' table")
+ }
+
+ it("Should keep first row if allowOverwrite is false") {
+ val nonUniqueCitiesDataFrame = spark.read.json(
+ gridClassLoader().getResource("cities_non_unique.json").getFile)
+
+ nonUniqueCitiesDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "first_row_json_city")
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "template=replicated")
+ .option(OPTION_STREAMER_ALLOW_OVERWRITE, false)
+ .save()
+
+ val cities = readTable("first_row_json_city").collect().sortBy(_.getAs[Long]("ID"))
+
+ assert(cities(0).getAs[String]("NAME") == "Forest Hill")
+ assert(cities(1).getAs[String]("NAME") == "Denver")
+ assert(cities(2).getAs[String]("NAME") == "St. Petersburg")
+ }
+
+ it("Should keep last row if allowOverwrite is true") {
+ val nonUniqueCitiesDataFrame = spark.read.json(
+ gridClassLoader().getResource("cities_non_unique.json").getFile)
+
+ nonUniqueCitiesDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "last_row_json_city")
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "template=replicated")
+ .option(OPTION_STREAMER_ALLOW_OVERWRITE, true)
+ .save()
+
+ val cities = readTable("last_row_json_city").collect().sortBy(_.getAs[Long]("ID"))
+
+ assert(cities(0).getAs[String]("NAME") == "Paris")
+ assert(cities(1).getAs[String]("NAME") == "New York")
+ assert(cities(2).getAs[String]("NAME") == "Moscow")
+ }
+ }
+
+ describe("Wrong DataFrame Write Options") {
+ it("Should throw exception with ErrorIfExists for a existing table") {
+ intercept[IgniteException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .mode(SaveMode.ErrorIfExists)
+ .save()
+ }
+ }
+
+ it("Should throw exception if primary key fields not specified") {
+ intercept[IgniteException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "persons_no_pk")
+ .save()
+ }
+ }
+
+ it("Should throw exception if primary key fields not specified for existing table") {
+ intercept[IgniteException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .mode(Overwrite)
+ .save()
+ }
+
+ val tblInfo = sqlTableInfo(client, PERSON_TBL_NAME, None)
+
+ assert(tblInfo.isDefined, s"Table $PERSON_TBL_NAME should exists.")
+ }
+
+ it("Should throw exception for wrong pk field") {
+ intercept[IgniteException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "unknown_field")
+ .mode(Overwrite)
+ .save()
+ }
+
+ val tblInfo = sqlTableInfo(client, PERSON_TBL_NAME, None)
+
+ assert(tblInfo.isDefined, s"Table $PERSON_TBL_NAME should exists.")
+ }
+
+ it("Should throw exception for wrong pk field - 2") {
+ intercept[IgniteException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id,unknown_field")
+ .mode(Overwrite)
+ .save()
+ }
+
+ val tblInfo = sqlTableInfo(client, PERSON_TBL_NAME, None)
+
+ assert(tblInfo.isDefined, s"Table $PERSON_TBL_NAME should exists.")
+ }
+
+ it("Should throw exception for wrong WITH clause") {
+ intercept[IgniteException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "person_unsupported_with")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "unsupported_with_clause")
+ .mode(Overwrite)
+ .save()
+ }
+ }
+
+ it("Should throw exception for wrong table name") {
+ intercept[IgniteException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "wrong-table-name")
+ .option(OPTION_CREATE_TABLE_PARAMETERS, "unsupported_with_clause")
+ .mode(Overwrite)
+ .save()
+ }
+ }
+
+ it("Should throw exception if streamingFlushFrequency is not a number") {
+ intercept[NumberFormatException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_STREAMER_FLUSH_FREQUENCY, "not_a_number")
+ .mode(Overwrite)
+ .save()
+ }
+ }
+
+ it("Should throw exception if streamingPerNodeBufferSize is not a number") {
+ intercept[NumberFormatException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_STREAMER_PER_NODE_BUFFER_SIZE, "not_a_number")
+ .mode(Overwrite)
+ .save()
+ }
+ }
+
+ it("Should throw exception if streamingPerNodeParallelOperations is not a number") {
+ intercept[NumberFormatException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS, "not_a_number")
+ .mode(Overwrite)
+ .save()
+ }
+ }
+
+ it("Should throw exception if streamerAllowOverwrite is not a boolean") {
+ intercept[IllegalArgumentException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_STREAMER_ALLOW_OVERWRITE, "not_a_boolean")
+ .mode(Overwrite)
+ .save()
+ }
+ }
+
+ it("Should throw exception if saving data frame as a new table with non-PUBLIC schema") {
+ val ex = intercept[IgniteException] {
+ personDataFrame.write
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, "nonexistant-table-name")
+ .option(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS, "id")
+ .option(OPTION_SCHEMA, "mySchema")
+ .save()
+ }
+
+ assertEquals(ex.getMessage,
+ "Creating new tables in schema mySchema is not valid, tables must only be created in " +
+ org.apache.ignite.internal.processors.query.QueryUtils.DFLT_SCHEMA)
+ }
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createPersonTable(client, "cache1")
+
+ createPersonTable2(client, "cache1")
+
+ createCityTable(client, "cache1")
+
+ personDataFrame = spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, PERSON_TBL_NAME)
+ .load()
+
+ personDataFrame.createOrReplaceTempView("person")
+ }
+
+ /**
+ * @param tbl Table name.
+ * @return Count of rows in table.
+ */
+ protected def rowsCount(tbl: String): Long = readTable(tbl).count()
+
+ /**
+ * @param tbl Table name.
+ * @return Ignite Table DataFrame.
+ */
+ protected def readTable(tbl: String): DataFrame =
+ spark.read
+ .format(FORMAT_IGNITE)
+ .option(OPTION_CONFIG_FILE, TEST_CONFIG_FILE)
+ .option(OPTION_TABLE, tbl)
+ .load()
+}
diff --git a/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/spark/sql/ignite/IgniteSparkSessionSpec.scala b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/spark/sql/ignite/IgniteSparkSessionSpec.scala
new file mode 100644
index 0000000..99e2318
--- /dev/null
+++ b/modules/spark-3.2-ext/spark-3.2/src/test/scala/org/apache/spark/sql/ignite/IgniteSparkSessionSpec.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.ignite
+
+import org.apache.ignite.internal.IgnitionEx
+import org.apache.ignite.internal.util.IgniteUtils.gridClassLoader
+import org.apache.ignite.spark.AbstractDataFrameSpec
+import org.apache.ignite.spark.AbstractDataFrameSpec.{DEFAULT_CACHE, TEST_CONFIG_FILE, enclose}
+import org.junit.runner.RunWith
+import org.scalatestplus.junit.JUnitRunner
+
+/**
+ * Tests to check Spark Session implementation.
+ */
+@RunWith(classOf[JUnitRunner])
+class IgniteSparkSessionSpec extends AbstractDataFrameSpec {
+ var igniteSession: IgniteSparkSession = _
+
+ describe("Ignite Spark Session Implementation") {
+ it("should keep session state after session clone") {
+ val dfProvider = (s: IgniteSparkSession) => {
+ s.read.json(gridClassLoader().getResource("cities.json").getFile)
+ .filter("name = 'Denver'")
+ }
+
+ var df = dfProvider(igniteSession).cache()
+
+ val cachedData = igniteSession.sharedState.cacheManager.lookupCachedData(df)
+
+ cachedData shouldBe defined
+
+ val otherSession = igniteSession.cloneSession()
+
+ df = dfProvider(otherSession)
+
+ val otherCachedData = otherSession.sharedState.cacheManager.lookupCachedData(df)
+
+ otherCachedData shouldBe defined
+
+ cachedData shouldEqual otherCachedData
+ }
+ }
+
+ override protected def beforeAll(): Unit = {
+ super.beforeAll()
+
+ createCityTable(client, DEFAULT_CACHE)
+
+ val configProvider = enclose(null)(_ ⇒ () ⇒ {
+ val cfg = IgnitionEx.loadConfiguration(TEST_CONFIG_FILE).get1()
+
+ cfg.setClientMode(true)
+
+ cfg.setIgniteInstanceName("client-2")
+
+ cfg
+ })
+
+ igniteSession = IgniteSparkSession.builder()
+ .config(spark.sparkContext.getConf)
+ .igniteConfigProvider(configProvider)
+ .getOrCreate()
+ }
+}
diff --git a/pom.xml b/pom.xml
index 2900b52..4b061e5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -58,6 +58,7 @@
<module>modules/geospatial-ext</module>
<module>modules/aop-ext</module>
<module>modules/spark-ext</module>
+ <module>modules/spark-3.2-ext</module>
<module>modules/mesos-ext</module>
<module>modules/yarn-ext</module>
<module>modules/cloud-ext</module>