Initial commit
diff --git a/phoenix-flume/pom.xml b/phoenix-flume/pom.xml
new file mode 100644
index 0000000..64d9bc7
--- /dev/null
+++ b/phoenix-flume/pom.xml
@@ -0,0 +1,199 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-connectors</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-flume</artifactId>
+ <name>Phoenix - Flume</name>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- Test Dependencies -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.high-scale-lib</groupId>
+ <artifactId>high-scale-lib</artifactId>
+ <version>1.1.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yammer.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ <version>2.1.2</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.htrace</groupId>
+ <artifactId>htrace-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <scope>test</scope>
+ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- to work with json data using flume -->
+ <dependency>
+ <groupId>com.tdunning</groupId>
+ <artifactId>json</artifactId>
+ <version>1.8</version>
+ </dependency>
+ <dependency>
+ <groupId>com.jayway.jsonpath</groupId>
+ <artifactId>json-path</artifactId>
+ <version>2.2.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-csv</artifactId>
+ <version>${commons-csv.version}</version>
+ </dependency>
+ <!-- Main dependency on flume. The last to avoid using old commons-io in IT -->
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-core</artifactId>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/phoenix-flume/pom.xml~ b/phoenix-flume/pom.xml~
new file mode 100644
index 0000000..384f14e
--- /dev/null
+++ b/phoenix-flume/pom.xml~
@@ -0,0 +1,199 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-flume</artifactId>
+ <name>Phoenix - Flume</name>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- Test Dependencies -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.high-scale-lib</groupId>
+ <artifactId>high-scale-lib</artifactId>
+ <version>1.1.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yammer.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ <version>2.1.2</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.htrace</groupId>
+ <artifactId>htrace-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <scope>test</scope>
+ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- to work with json data using flume -->
+ <dependency>
+ <groupId>com.tdunning</groupId>
+ <artifactId>json</artifactId>
+ <version>1.8</version>
+ </dependency>
+ <dependency>
+ <groupId>com.jayway.jsonpath</groupId>
+ <artifactId>json-path</artifactId>
+ <version>2.2.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-csv</artifactId>
+ <version>${commons-csv.version}</version>
+ </dependency>
+ <!-- Main dependency on flume. The last to avoid using old commons-io in IT -->
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-core</artifactId>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/phoenix-flume/src/it/java/org/apache/phoenix/flume/CsvEventSerializerIT.java b/phoenix-flume/src/it/java/org/apache/phoenix/flume/CsvEventSerializerIT.java
new file mode 100644
index 0000000..842db04
--- /dev/null
+++ b/phoenix-flume/src/it/java/org/apache/phoenix/flume/CsvEventSerializerIT.java
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.lifecycle.LifecycleState;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.phoenix.end2end.BaseHBaseManagedTimeIT;
+import org.apache.phoenix.flume.serializer.EventSerializers;
+import org.apache.phoenix.flume.sink.PhoenixSink;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.junit.Test;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class CsvEventSerializerIT extends BaseHBaseManagedTimeIT {
+
+ private Context sinkContext;
+ private PhoenixSink sink;
+
+ @Test
+ public void testWithDefaultDelimiters() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_CSV_TEST";
+
+ String ddl = "CREATE TABLE IF NOT EXISTS " + fullTableName
+ + " (flume_time timestamp not null, col1 varchar , col2 double, col3 varchar[], col4 integer[]"
+ + " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+ String columns = "col1,col2,col3,col4";
+ String rowkeyType = DefaultKeyGenerator.TIMESTAMP.name();
+ initSinkContext(fullTableName, ddl, columns, null, null, null, null, rowkeyType, null);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+
+ final String eventBody = "kalyan,10.5,\"abc,pqr,xyz\",\"1,2,3,4\"";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(1, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testKeyGenerator() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_CSV_TEST";
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "kalyan,10.5,\"abc,pqr,xyz\",\"1,2,3,4\"";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(1, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testMismatchKeyGenerator() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_CSV_TEST";
+ initSinkContextWithDefaults(fullTableName);
+ setConfig(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,
+ DefaultKeyGenerator.UUID.name());
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "kalyan,10.5,\"abc,pqr,xyz\",\"1,2,3,4\"";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ try {
+ sink.process();
+ fail();
+ } catch (Exception ex) {
+ assertTrue(ex.getCause().getMessage().contains("java.lang.IllegalArgumentException: Invalid format:"));
+ }
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testMissingColumnsInEvent() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_CSV_TEST";
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "kalyan,\"abc,pqr,xyz\",\"1,2,3,4\"";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(0, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testBatchEvents() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_CSV_TEST";
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ int numEvents = 150;
+ String col1 = "val1";
+ String a1 = "\"aaa,bbb,ccc\"";
+ String a2 = "\"1,2,3,4\"";
+ String eventBody = null;
+ List<Event> eventList = Lists.newArrayListWithCapacity(numEvents);
+ for (int i = 0; i < eventList.size(); i++) {
+ eventBody = (col1 + i) + "," + i * 10.5 + "," + a1 + "," + a2;
+ Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ eventList.add(event);
+ }
+
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ for (Event event : eventList) {
+ channel.put(event);
+ }
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(eventList.size(), rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testEventsWithHeaders() throws Exception {
+
+ sinkContext = new Context();
+ final String fullTableName = "FLUME_CSV_TEST";
+ final String ddl = "CREATE TABLE IF NOT EXISTS "
+ + fullTableName
+ + " (rowkey VARCHAR not null, col1 varchar , col2 double, col3 varchar[], col4 integer[], host varchar , source varchar \n"
+ + " CONSTRAINT pk PRIMARY KEY (rowkey))\n";
+ String columns = "col1,col2,col3,col4";
+ String rowkeyType = DefaultKeyGenerator.UUID.name();
+ String headers = "host,source";
+ initSinkContext(fullTableName, ddl, columns, null, null, null, null, rowkeyType, headers);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+
+ int numEvents = 10;
+ String col1 = "val1";
+ String a1 = "\"aaa,bbb,ccc\"";
+ String a2 = "\"1,2,3,4\"";
+ String hostHeader = "host1";
+ String sourceHeader = "source1";
+ String eventBody = null;
+ List<Event> eventList = Lists.newArrayListWithCapacity(numEvents);
+ for (int i = 0; i < numEvents; i++) {
+ eventBody = (col1 + i) + "," + i * 10.5 + "," + a1 + "," + a2;
+ Map<String, String> headerMap = Maps.newHashMapWithExpectedSize(2);
+ headerMap.put("host", hostHeader);
+ headerMap.put("source", sourceHeader);
+ Event event = EventBuilder.withBody(Bytes.toBytes(eventBody), headerMap);
+ eventList.add(event);
+ }
+
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ for (Event event : eventList) {
+ channel.put(event);
+ }
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ final String query = " SELECT * FROM \n " + fullTableName;
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final ResultSet rs;
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ try {
+ rs = conn.createStatement().executeQuery(query);
+ assertTrue(rs.next());
+ assertEquals("host1", rs.getString("host"));
+ assertEquals("source1", rs.getString("source"));
+
+ assertTrue(rs.next());
+ assertEquals("host1", rs.getString("host"));
+ assertEquals("source1", rs.getString("source"));
+ } finally {
+ if (conn != null) {
+ conn.close();
+ }
+ }
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ private Channel initChannel() {
+ // Channel configuration
+ Context channelContext = new Context();
+ channelContext.put("capacity", "10000");
+ channelContext.put("transactionCapacity", "200");
+
+ Channel channel = new MemoryChannel();
+ channel.setName("memorychannel");
+ Configurables.configure(channel, channelContext);
+ return channel;
+ }
+
+ private void initSinkContext(final String fullTableName, final String ddl, final String columns,
+ final String csvDelimiter, final String csvQuote, final String csvEscape, final String csvArrayDelimiter,
+ final String rowkeyType, final String headers) {
+ Preconditions.checkNotNull(fullTableName);
+ sinkContext = new Context();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, fullTableName);
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER, EventSerializers.CSV.name());
+ sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES, columns);
+ if (null != csvDelimiter)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CSV_DELIMITER, csvDelimiter);
+ if (null != csvQuote)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CSV_QUOTE, csvQuote);
+ if (null != csvEscape)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CSV_ESCAPE, csvEscape);
+ if (null != csvArrayDelimiter)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CSV_ARRAY_DELIMITER,
+ csvArrayDelimiter);
+ if (null != rowkeyType)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,
+ rowkeyType);
+ if (null != headers)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_HEADER_NAMES, headers);
+ }
+
+ private void initSinkContextWithDefaults(final String fullTableName) {
+ String ddl = "CREATE TABLE IF NOT EXISTS " + fullTableName
+ + " (flume_time timestamp not null, col1 varchar , col2 double, col3 varchar[], col4 integer[]"
+ + " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+ String columns = "col1,col2,col3,col4";
+ String rowkeyType = DefaultKeyGenerator.TIMESTAMP.name();
+ initSinkContext(fullTableName, ddl, columns, null, null, null, null, rowkeyType, null);
+ }
+
+ private void setConfig(final String configName, final String configValue) {
+ Preconditions.checkNotNull(sinkContext);
+ Preconditions.checkNotNull(configName);
+ Preconditions.checkNotNull(configValue);
+ sinkContext.put(configName, configValue);
+ }
+
+ private int countRows(final String fullTableName) throws SQLException {
+ Preconditions.checkNotNull(fullTableName);
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ ResultSet rs = null;
+ try {
+ rs = conn.createStatement().executeQuery("select count(*) from " + fullTableName);
+ int rowsCount = 0;
+ while (rs.next()) {
+ rowsCount = rs.getInt(1);
+ }
+ return rowsCount;
+
+ } finally {
+ if (rs != null) {
+ rs.close();
+ }
+ if (conn != null) {
+ conn.close();
+ }
+ }
+
+ }
+
+ private void dropTable(final String fullTableName) throws SQLException {
+ Preconditions.checkNotNull(fullTableName);
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ try {
+ conn.createStatement().execute("drop table if exists " + fullTableName);
+ } finally {
+ if (conn != null) {
+ conn.close();
+ }
+ }
+ }
+
+}
diff --git a/phoenix-flume/src/it/java/org/apache/phoenix/flume/JsonEventSerializerIT.java b/phoenix-flume/src/it/java/org/apache/phoenix/flume/JsonEventSerializerIT.java
new file mode 100644
index 0000000..0210bad
--- /dev/null
+++ b/phoenix-flume/src/it/java/org/apache/phoenix/flume/JsonEventSerializerIT.java
@@ -0,0 +1,541 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.lifecycle.LifecycleState;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.phoenix.end2end.BaseHBaseManagedTimeIT;
+import org.apache.phoenix.flume.serializer.EventSerializers;
+import org.apache.phoenix.flume.sink.PhoenixSink;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.junit.Test;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class JsonEventSerializerIT extends BaseHBaseManagedTimeIT {
+
+ private Context sinkContext;
+ private PhoenixSink sink;
+
+ @Test
+ public void testWithOutColumnsMapping() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_JSON_TEST";
+
+ String ddl = "CREATE TABLE IF NOT EXISTS " + fullTableName
+ + " (flume_time timestamp not null, col1 varchar , col2 double, col3 varchar[], col4 integer[]"
+ + " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+ String columns = "col1,col2,col3,col4";
+ String rowkeyType = DefaultKeyGenerator.TIMESTAMP.name();
+ initSinkContext(fullTableName, ddl, columns, null, rowkeyType, null);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "{\"col1\" : \"kalyan\", \"col2\" : 10.5, \"col3\" : [\"abc\",\"pqr\",\"xyz\"], \"col4\" : [1,2,3,4]}";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(1, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testDifferentColumnNames() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_JSON_TEST";
+
+ String ddl = "CREATE TABLE IF NOT EXISTS " + fullTableName
+ + " (flume_time timestamp not null, col1 varchar , col2 double, col3 varchar[], col4 integer[]"
+ + " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+ String columns = "col1,col2,col3,col4";
+ String rowkeyType = DefaultKeyGenerator.TIMESTAMP.name();
+ String columnsMapping = "{\"col1\":\"col1\",\"col2\":\"f2\",\"col3\":\"f3\",\"col4\":\"col4\"}";
+
+ initSinkContext(fullTableName, ddl, columns, columnsMapping, rowkeyType, null);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "{\"col1\" : \"kalyan\", \"f2\" : 10.5, \"f3\" : [\"abc\",\"pqr\",\"xyz\"], \"col4\" : [1,2,3,4]}";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(1, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testInnerColumns() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_JSON_TEST";
+
+ String ddl = "CREATE TABLE IF NOT EXISTS " + fullTableName
+ + " (flume_time timestamp not null, col1 varchar , col2 double, col3 varchar[], col4 integer[]"
+ + " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+ String columns = "col1,col2,col3,col4";
+ String rowkeyType = DefaultKeyGenerator.TIMESTAMP.name();
+ String columnsMapping = "{\"col1\":\"col1\",\"col2\":\"x.y\",\"col3\":\"a.b1.c\",\"col4\":\"col4\"}";
+
+ initSinkContext(fullTableName, ddl, columns, columnsMapping, rowkeyType, null);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "{\"col1\" : \"kalyan\", \"x\" : {\"y\" : 10.5}, \"a\" : {\"b1\" : {\"c\" : [\"abc\",\"pqr\",\"xyz\"] }, \"b2\" : 111}, \"col4\" : [1,2,3,4]}";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(1, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testInnerColumnsWithArrayMapping() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_JSON_TEST";
+
+ String ddl = "CREATE TABLE IF NOT EXISTS " + fullTableName
+ + " (flume_time timestamp not null, col1 varchar , col2 double, col3 varchar[], col4 integer[]"
+ + " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+ String columns = "col1,col2,col3,col4";
+ String rowkeyType = DefaultKeyGenerator.TIMESTAMP.name();
+ String columnsMapping = "{\"col1\":\"col1\",\"col2\":\"x.y\",\"col3\":\"a.b[*].c\",\"col4\":\"col4\"}";
+
+ initSinkContext(fullTableName, ddl, columns, columnsMapping, rowkeyType, null);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "{\"col1\" : \"kalyan\", \"x\" : {\"y\" : 10.5}, \"a\" : {\"b\" : [{\"c\" : \"abc\"}, {\"c\" : \"pqr\"}, {\"c\" : \"xyz\"}] , \"b2\" : 111}, \"col4\" : [1,2,3,4]}";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(1, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testKeyGenerator() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_JSON_TEST";
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "{\"col1\" : \"kalyan\", \"col2\" : 10.5, \"col3\" : [\"abc\",\"pqr\",\"xyz\"], \"col4\" : [1,2,3,4]}";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(1, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testMismatchKeyGenerator() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_JSON_TEST";
+ initSinkContextWithDefaults(fullTableName);
+ setConfig(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,
+ DefaultKeyGenerator.UUID.name());
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "{\"col1\" : \"kalyan\", \"col2\" : 10.5, \"col3\" : [\"abc\",\"pqr\",\"xyz\"], \"col4\" : [1,2,3,4]}";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ try {
+ sink.process();
+ fail();
+ } catch (Exception ex) {
+ assertTrue(ex.getCause().getMessage().contains("java.lang.IllegalArgumentException: Invalid format:"));
+ }
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testMissingColumnsInEvent() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_JSON_TEST";
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "{\"col1\" : \"kalyan\", \"col3\" : [\"abc\",\"pqr\",\"xyz\"], \"col4\" : [1,2,3,4]}";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(0, rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testBatchEvents() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = "FLUME_JSON_TEST";
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ int numEvents = 150;
+ String col1 = "val1";
+ String a1 = "[aaa,bbb,ccc]";
+ String a2 = "[1,2,3,4]";
+ String eventBody = null;
+ List<Event> eventList = Lists.newArrayListWithCapacity(numEvents);
+ for (int i = 0; i < eventList.size(); i++) {
+ eventBody = "{\"col1\" : \"" + (col1 + i) + "\", \"col2\" : " + i * 10.5 + " , \"col3\" : " + a1
+ + " , \"col4\" : " + a2 + "}";
+ Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ eventList.add(event);
+ }
+
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ for (Event event : eventList) {
+ channel.put(event);
+ }
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(eventList.size(), rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ @Test
+ public void testEventsWithHeaders() throws Exception {
+
+ sinkContext = new Context();
+ final String fullTableName = "FLUME_JSON_TEST";
+ final String ddl = "CREATE TABLE IF NOT EXISTS "
+ + fullTableName
+ + " (rowkey VARCHAR not null, col1 varchar , col2 double, col3 varchar[], col4 integer[], host varchar , source varchar \n"
+ + " CONSTRAINT pk PRIMARY KEY (rowkey))\n";
+ String columns = "col1,col2,col3,col4";
+ String columnsMapping = "{\"col1\":\"col1\",\"col2\":\"col2\",\"col3\":\"col3\",\"col4\":\"col4\"}";
+ String rowkeyType = DefaultKeyGenerator.UUID.name();
+ String headers = "host,source";
+ initSinkContext(fullTableName, ddl, columns, columnsMapping, rowkeyType, headers);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+
+ int numEvents = 10;
+ String col1 = "val1";
+ String a1 = "[aaa,bbb,ccc]";
+ String a2 = "[1,2,3,4]";
+ String hostHeader = "host1";
+ String sourceHeader = "source1";
+ String eventBody = null;
+ List<Event> eventList = Lists.newArrayListWithCapacity(numEvents);
+ for (int i = 0; i < numEvents; i++) {
+ eventBody = "{\"col1\" : \"" + (col1 + i) + "\", \"col2\" : " + i * 10.5 + " , \"col3\" : " + a1
+ + " , \"col4\" : " + a2 + "}";
+ Map<String, String> headerMap = Maps.newHashMapWithExpectedSize(2);
+ headerMap.put("host", hostHeader);
+ headerMap.put("source", sourceHeader);
+ Event event = EventBuilder.withBody(Bytes.toBytes(eventBody), headerMap);
+ eventList.add(event);
+ }
+
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ for (Event event : eventList) {
+ channel.put(event);
+ }
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ final String query = " SELECT * FROM \n " + fullTableName;
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final ResultSet rs;
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ try {
+ rs = conn.createStatement().executeQuery(query);
+ assertTrue(rs.next());
+ assertEquals("host1", rs.getString("host"));
+ assertEquals("source1", rs.getString("source"));
+
+ assertTrue(rs.next());
+ assertEquals("host1", rs.getString("host"));
+ assertEquals("source1", rs.getString("source"));
+ } finally {
+ if (conn != null) {
+ conn.close();
+ }
+ }
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ dropTable(fullTableName);
+ }
+
+ private Channel initChannel() {
+ // Channel configuration
+ Context channelContext = new Context();
+ channelContext.put("capacity", "10000");
+ channelContext.put("transactionCapacity", "200");
+
+ Channel channel = new MemoryChannel();
+ channel.setName("memorychannel");
+ Configurables.configure(channel, channelContext);
+ return channel;
+ }
+
+ private void initSinkContext(final String fullTableName, final String ddl, final String columns,
+ final String columnsMapping, final String rowkeyType, final String headers) {
+ Preconditions.checkNotNull(fullTableName);
+ sinkContext = new Context();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, fullTableName);
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER, EventSerializers.JSON.name());
+ sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES, columns);
+ if (null != columnsMapping)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMNS_MAPPING,
+ columnsMapping);
+ if (null != rowkeyType)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,
+ rowkeyType);
+ if (null != headers)
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_HEADER_NAMES, headers);
+ }
+
+ private void initSinkContextWithDefaults(final String fullTableName) {
+ String ddl = "CREATE TABLE IF NOT EXISTS " + fullTableName
+ + " (flume_time timestamp not null, col1 varchar , col2 double, col3 varchar[], col4 integer[]"
+ + " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+ String columns = "col1,col2,col3,col4";
+ String columnsMapping = "{\"col1\":\"col1\",\"col2\":\"col2\",\"col3\":\"col3\",\"col4\":\"col4\"}";
+ String rowkeyType = DefaultKeyGenerator.TIMESTAMP.name();
+ initSinkContext(fullTableName, ddl, columns, columnsMapping, rowkeyType, null);
+ }
+
+ private void setConfig(final String configName, final String configValue) {
+ Preconditions.checkNotNull(sinkContext);
+ Preconditions.checkNotNull(configName);
+ Preconditions.checkNotNull(configValue);
+ sinkContext.put(configName, configValue);
+ }
+
+ private int countRows(final String fullTableName) throws SQLException {
+ Preconditions.checkNotNull(fullTableName);
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ ResultSet rs = null;
+ try {
+ rs = conn.createStatement().executeQuery("select count(*) from " + fullTableName);
+ int rowsCount = 0;
+ while (rs.next()) {
+ rowsCount = rs.getInt(1);
+ }
+ return rowsCount;
+
+ } finally {
+ if (rs != null) {
+ rs.close();
+ }
+ if (conn != null) {
+ conn.close();
+ }
+ }
+
+ }
+
+ private void dropTable(final String fullTableName) throws SQLException {
+ Preconditions.checkNotNull(fullTableName);
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ try {
+ conn.createStatement().execute("drop table if exists " + fullTableName);
+ } finally {
+ if (conn != null) {
+ conn.close();
+ }
+ }
+ }
+
+}
diff --git a/phoenix-flume/src/it/java/org/apache/phoenix/flume/PhoenixSinkIT.java b/phoenix-flume/src/it/java/org/apache/phoenix/flume/PhoenixSinkIT.java
new file mode 100644
index 0000000..867d1ad
--- /dev/null
+++ b/phoenix-flume/src/it/java/org/apache/phoenix/flume/PhoenixSinkIT.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.util.Properties;
+
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Sink;
+import org.apache.flume.SinkFactory;
+import org.apache.flume.Transaction;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.lifecycle.LifecycleState;
+import org.apache.flume.sink.DefaultSinkFactory;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.phoenix.end2end.BaseHBaseManagedTimeIT;
+import org.apache.phoenix.flume.serializer.CustomSerializer;
+import org.apache.phoenix.flume.serializer.EventSerializers;
+import org.apache.phoenix.flume.sink.NullPhoenixSink;
+import org.apache.phoenix.flume.sink.PhoenixSink;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.TestUtil;
+import org.junit.Assert;
+import org.junit.Test;
+
+
+public class PhoenixSinkIT extends BaseHBaseManagedTimeIT {
+
+ private Context sinkContext;
+ private PhoenixSink sink;
+
+
+ @Test
+ public void testSinkCreation() {
+ SinkFactory factory = new DefaultSinkFactory ();
+ Sink sink = factory.create("PhoenixSink__", "org.apache.phoenix.flume.sink.PhoenixSink");
+ Assert.assertNotNull(sink);
+ Assert.assertTrue(PhoenixSink.class.isInstance(sink));
+ }
+ @Test
+ public void testConfiguration () {
+
+ sinkContext = new Context ();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, "test");
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER,EventSerializers.REGEX.name());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,"col1,col2");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.TIMESTAMP.name());
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ }
+
+
+
+ @Test(expected= NullPointerException.class)
+ public void testInvalidConfiguration () {
+
+ sinkContext = new Context ();
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER,EventSerializers.REGEX.name());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,"col1,col2");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.TIMESTAMP.name());
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ }
+
+ @Test(expected=RuntimeException.class)
+ public void testInvalidConfigurationOfSerializer () {
+
+ sinkContext = new Context ();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, "test");
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER,"unknown");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,"col1,col2");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.TIMESTAMP.name());
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ }
+
+ @Test
+ public void testInvalidTable() {
+ sinkContext = new Context ();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, "flume_test");
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER, EventSerializers.REGEX.name());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES, "col1,col2");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.TIMESTAMP.name());
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+ try {
+ sink.start();
+ fail();
+ }catch(Exception e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("ERROR 1012 (42M03): Table undefined."));
+ }
+ }
+
+ @Test
+ public void testSinkLifecycle () {
+ String tableName = generateUniqueName();
+
+ String ddl = "CREATE TABLE " + tableName +
+ " (flume_time timestamp not null, col1 varchar , col2 varchar" +
+ " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+
+ sinkContext = new Context ();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, tableName);
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER,EventSerializers.REGEX.name());
+ sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_REGULAR_EXPRESSION,"^([^\t]+)\t([^\t]+)$");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,"col1,col2");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.TIMESTAMP.name());
+
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ Assert.assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ Assert.assertEquals(LifecycleState.START, sink.getLifecycleState());
+ sink.stop();
+ Assert.assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+ }
+
+ @Test
+ public void testCreateTable () throws Exception {
+ String tableName = generateUniqueName();
+ String ddl = "CREATE TABLE " + tableName + " " +
+ " (flume_time timestamp not null, col1 varchar , col2 varchar" +
+ " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+
+ final String fullTableName = tableName;
+ sinkContext = new Context ();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, fullTableName);
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER,EventSerializers.REGEX.name());
+ sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_REGULAR_EXPRESSION,"^([^\t]+)\t([^\t]+)$");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,"col1,col2");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR, DefaultKeyGenerator.TIMESTAMP.name());
+
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ Assert.assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ HBaseAdmin admin = driver.getConnectionQueryServices(getUrl(), TestUtil.TEST_PROPERTIES).getAdmin();
+ try {
+ boolean exists = admin.tableExists(fullTableName);
+ Assert.assertTrue(exists);
+ }finally {
+ admin.close();
+ }
+ }
+
+ @Test
+ public void testExtendedSink() throws Exception {
+ // Create a mock NullPhoenixSink which extends PhoenixSink, and verify configure is invoked()
+
+ PhoenixSink sink = mock(NullPhoenixSink.class);
+ sinkContext = new Context();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, "FLUME_TEST_EXTENDED");
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER, CustomSerializer.class.getName());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES, "ID, COUNTS");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR, DefaultKeyGenerator.TIMESTAMP.name());
+
+ Configurables.configure(sink, sinkContext);
+ verify(sink).configure(sinkContext);
+ }
+
+ @Test
+ public void testExtendedSerializer() throws Exception {
+ /*
+ Sadly, we can't mock a serializer, as the PhoenixSink does a Class.forName() to instantiate
+ it. Instead. we'll setup a Flume channel and verify the data our custom serializer wrote.
+ */
+
+ final String fullTableName = "FLUME_TEST_EXTENDED";
+ final String ddl = "CREATE TABLE " + fullTableName + " (ID BIGINT NOT NULL PRIMARY KEY, COUNTS UNSIGNED_LONG)";
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ conn.createStatement().execute(ddl);
+ conn.commit();
+
+ sinkContext = new Context();
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, "FLUME_TEST_EXTENDED");
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER, CustomSerializer.class.getName());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES, "ID, COUNTS");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR, DefaultKeyGenerator.TIMESTAMP.name());
+
+ PhoenixSink sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ // Send a test event through Flume, using our custom serializer
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+ sink.start();
+
+ final Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(EventBuilder.withBody(Bytes.toBytes("test event")));
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+ sink.stop();
+
+ // Verify our serializer wrote out data
+ ResultSet rs = conn.createStatement().executeQuery("SELECT * FROM FLUME_TEST_EXTENDED");
+ assertTrue(rs.next());
+ assertTrue(rs.getLong(1) == 1L);
+ }
+
+ private Channel initChannel() {
+ //Channel configuration
+ Context channelContext = new Context();
+ channelContext.put("capacity", "10000");
+ channelContext.put("transactionCapacity", "200");
+
+ Channel channel = new MemoryChannel();
+ channel.setName("memorychannel");
+ Configurables.configure(channel, channelContext);
+ return channel;
+ }
+
+
+}
diff --git a/phoenix-flume/src/it/java/org/apache/phoenix/flume/RegexEventSerializerIT.java b/phoenix-flume/src/it/java/org/apache/phoenix/flume/RegexEventSerializerIT.java
new file mode 100644
index 0000000..9548e65
--- /dev/null
+++ b/phoenix-flume/src/it/java/org/apache/phoenix/flume/RegexEventSerializerIT.java
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.lifecycle.LifecycleState;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.phoenix.end2end.BaseHBaseManagedTimeIT;
+import org.apache.phoenix.flume.serializer.EventSerializers;
+import org.apache.phoenix.flume.sink.PhoenixSink;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.junit.Test;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+
+public class RegexEventSerializerIT extends BaseHBaseManagedTimeIT {
+
+ private Context sinkContext;
+ private PhoenixSink sink;
+
+ @Test
+ public void testKeyGenerator() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = generateUniqueName();
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "val1\tval2";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(1 , rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ }
+
+
+ @Test
+ public void testMismatchKeyGenerator() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = generateUniqueName();
+ initSinkContextWithDefaults(fullTableName);
+ setConfig(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.UUID.name());
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "val1\tval2";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ try {
+ sink.process();
+ fail();
+ }catch(Exception ex){
+ assertTrue(ex.getCause().getMessage().contains("java.lang.IllegalArgumentException: Invalid format:"));
+ }
+ }
+
+ @Test
+ public void testMissingColumnsInEvent() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = generateUniqueName();
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ final String eventBody = "val1";
+ final Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(0 , rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ }
+
+ @Test
+ public void testBatchEvents() throws EventDeliveryException, SQLException {
+
+ final String fullTableName = generateUniqueName();
+ initSinkContextWithDefaults(fullTableName);
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+ int numEvents = 150;
+ String col1 = "val1";
+ String col2 = "val2";
+ String eventBody = null;
+ List<Event> eventList = Lists.newArrayListWithCapacity(numEvents);
+ for(int i = 0 ; i < eventList.size() ; i++) {
+ eventBody = (col1 + i) + "\t" + (col2 + i);
+ Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
+ eventList.add(event);
+ }
+
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ for(Event event : eventList) {
+ channel.put(event);
+ }
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ int rowsInDb = countRows(fullTableName);
+ assertEquals(eventList.size(), rowsInDb);
+
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ }
+
+ @Test
+ public void testApacheLogRegex() throws Exception {
+
+ sinkContext = new Context ();
+ final String fullTableName = generateUniqueName();
+ final String logRegex = "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) \"([^ ]+) ([^ ]+)" +
+ " ([^\"]+)\" (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\")" +
+ " ([^ \"]*|\"[^\"]*\"))?";
+
+ final String columns = "host,identity,user,time,method,request,protocol,status,size,referer,agent";
+
+ String ddl = "CREATE TABLE " + fullTableName +
+ " (uid VARCHAR NOT NULL, user VARCHAR, time varchar, host varchar , identity varchar, method varchar, request varchar , protocol varchar," +
+ " status integer , size integer , referer varchar , agent varchar CONSTRAINT pk PRIMARY KEY (uid))\n";
+
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, fullTableName);
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER,EventSerializers.REGEX.name());
+ sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_REGULAR_EXPRESSION,logRegex);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,columns);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.UUID.name());
+
+ String message1 = "33.22.11.00 - user1 [12/Dec/2013:07:01:19 +0000] " +
+ "\"GET /wp-admin/css/install.css HTTP/1.0\" 200 813 " +
+ "\"http://www.google.com\" \"Mozilla/5.0 (comp" +
+ "atible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)\"";
+
+ String message2 = "192.168.20.1 - user2 [13/Dec/2013:06:05:19 +0000] " +
+ "\"GET /wp-admin/css/install.css HTTP/1.0\" 400 363 " +
+ "\"http://www.salesforce.com/in/?ir=1\" \"Mozilla/5.0 (comp" +
+ "atible;)\"";
+
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+
+ final Event event1 = EventBuilder.withBody(Bytes.toBytes(message1));
+ final Event event2 = EventBuilder.withBody(Bytes.toBytes(message2));
+
+ final Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ channel.put(event1);
+ channel.put(event2);
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ final String query = " SELECT * FROM \n " + fullTableName;
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final ResultSet rs ;
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ try{
+ rs = conn.createStatement().executeQuery(query);
+ assertTrue(rs.next());
+ assertTrue(rs.next());
+
+ }finally {
+ if(conn != null) {
+ conn.close();
+ }
+ }
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ }
+
+
+ @Test
+ public void testEventsWithHeaders() throws Exception {
+
+ sinkContext = new Context ();
+ final String fullTableName = generateUniqueName();
+ final String ddl = "CREATE TABLE " + fullTableName +
+ " (rowkey VARCHAR not null, col1 varchar , cf1.col2 varchar , host varchar , source varchar \n" +
+ " CONSTRAINT pk PRIMARY KEY (rowkey))\n";
+
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, fullTableName);
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER,EventSerializers.REGEX.name());
+ sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_REGULAR_EXPRESSION,"^([^\t]+)\t([^\t]+)$");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,"col1,cf1.col2");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_HEADER_NAMES,"host,source");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.UUID.name());
+
+ sink = new PhoenixSink();
+ Configurables.configure(sink, sinkContext);
+ assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
+
+ final Channel channel = this.initChannel();
+ sink.setChannel(channel);
+
+ sink.start();
+
+ int numEvents = 10;
+ String col1 = "val1";
+ String col2 = "val2";
+ String hostHeader = "host1";
+ String sourceHeader = "source1";
+ String eventBody = null;
+ List<Event> eventList = Lists.newArrayListWithCapacity(numEvents);
+ for(int i = 0 ; i < numEvents ; i++) {
+ eventBody = (col1 + i) + "\t" + (col2 + i);
+ Map<String, String> headerMap = Maps.newHashMapWithExpectedSize(2);
+ headerMap.put("host",hostHeader);
+ headerMap.put("source",sourceHeader);
+ Event event = EventBuilder.withBody(Bytes.toBytes(eventBody),headerMap);
+ eventList.add(event);
+ }
+
+ // put event in channel
+ Transaction transaction = channel.getTransaction();
+ transaction.begin();
+ for(Event event : eventList) {
+ channel.put(event);
+ }
+ transaction.commit();
+ transaction.close();
+
+ sink.process();
+
+ final String query = " SELECT * FROM \n " + fullTableName;
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final ResultSet rs ;
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ try{
+ rs = conn.createStatement().executeQuery(query);
+ assertTrue(rs.next());
+ assertEquals("host1",rs.getString("host"));
+ assertEquals("source1",rs.getString("source"));
+
+ assertTrue(rs.next());
+ assertEquals("host1",rs.getString("host"));
+ assertEquals("source1",rs.getString("source"));
+ }finally {
+ if(conn != null) {
+ conn.close();
+ }
+ }
+ sink.stop();
+ assertEquals(LifecycleState.STOP, sink.getLifecycleState());
+
+ }
+
+ private Channel initChannel() {
+ //Channel configuration
+ Context channelContext = new Context();
+ channelContext.put("capacity", "10000");
+ channelContext.put("transactionCapacity", "200");
+
+ Channel channel = new MemoryChannel();
+ channel.setName("memorychannel");
+ Configurables.configure(channel, channelContext);
+ return channel;
+ }
+
+ private void initSinkContextWithDefaults(final String fullTableName) {
+ Preconditions.checkNotNull(fullTableName);
+ sinkContext = new Context ();
+ String ddl = "CREATE TABLE " + fullTableName +
+ " (flume_time timestamp not null, col1 varchar , col2 varchar" +
+ " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
+
+ sinkContext.put(FlumeConstants.CONFIG_TABLE, fullTableName);
+ sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER,EventSerializers.REGEX.name());
+ sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_REGULAR_EXPRESSION,"^([^\t]+)\t([^\t]+)$");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,"col1,col2");
+ sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR,DefaultKeyGenerator.TIMESTAMP.name());
+
+ }
+
+ private void setConfig(final String configName , final String configValue) {
+ Preconditions.checkNotNull(sinkContext);
+ Preconditions.checkNotNull(configName);
+ Preconditions.checkNotNull(configValue);
+ sinkContext.put(configName, configValue);
+ }
+
+ private int countRows(final String fullTableName) throws SQLException {
+ Preconditions.checkNotNull(fullTableName);
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ final Connection conn = DriverManager.getConnection(getUrl(), props);
+ ResultSet rs = null ;
+ try{
+ rs = conn.createStatement().executeQuery("select count(*) from "+fullTableName);
+ int rowsCount = 0;
+ while(rs.next()) {
+ rowsCount = rs.getInt(1);
+ }
+ return rowsCount;
+
+ } finally {
+ if(rs != null) {
+ rs.close();
+ }
+ if(conn != null) {
+ conn.close();
+ }
+ }
+
+
+ }
+
+}
diff --git a/phoenix-flume/src/it/java/org/apache/phoenix/flume/serializer/CustomSerializer.java b/phoenix-flume/src/it/java/org/apache/phoenix/flume/serializer/CustomSerializer.java
new file mode 100644
index 0000000..5db5fa6
--- /dev/null
+++ b/phoenix-flume/src/it/java/org/apache/phoenix/flume/serializer/CustomSerializer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.serializer;
+
+import java.sql.SQLException;
+import java.util.List;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+
+public class CustomSerializer extends BaseEventSerializer {
+ @Override
+ public void doConfigure(Context context) {
+
+ }
+
+ @Override
+ public void doInitialize() throws SQLException {
+
+ }
+
+ @Override
+ public void upsertEvents(List<Event> events) throws SQLException {
+ // Just execute a sample UPSERT
+ connection.createStatement().execute("UPSERT INTO FLUME_TEST_EXTENDED(ID, COUNTS) VALUES(1, 1)");
+ connection.commit();
+ }
+}
diff --git a/phoenix-flume/src/it/java/org/apache/phoenix/flume/sink/NullPhoenixSink.java b/phoenix-flume/src/it/java/org/apache/phoenix/flume/sink/NullPhoenixSink.java
new file mode 100644
index 0000000..1df52e1
--- /dev/null
+++ b/phoenix-flume/src/it/java/org/apache/phoenix/flume/sink/NullPhoenixSink.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.sink;
+
+public class NullPhoenixSink extends PhoenixSink {
+}
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/DefaultKeyGenerator.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/DefaultKeyGenerator.java
new file mode 100644
index 0000000..3820c2a
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/DefaultKeyGenerator.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Random;
+
+import org.apache.phoenix.util.DateUtil;
+
+public enum DefaultKeyGenerator implements KeyGenerator {
+
+ UUID {
+
+ @Override
+ public String generate() {
+ return String.valueOf(java.util.UUID.randomUUID());
+ }
+
+ },
+ TIMESTAMP {
+
+ @Override
+ public String generate() {
+ java.sql.Timestamp ts = new Timestamp(System.currentTimeMillis());
+ return DateUtil.DEFAULT_DATE_FORMATTER.format(ts);
+ }
+
+ },
+ DATE {
+
+ @Override
+ public String generate() {
+ Date dt = new Date(System.currentTimeMillis());
+ return DateUtil.DEFAULT_DATE_FORMATTER.format(dt);
+ }
+ },
+ RANDOM {
+
+ @Override
+ public String generate() {
+ return String.valueOf(new Random().nextLong());
+ }
+
+ },
+ NANOTIMESTAMP {
+
+ @Override
+ public String generate() {
+ return String.valueOf(System.nanoTime());
+ }
+
+ };
+}
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/FlumeConstants.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/FlumeConstants.java
new file mode 100644
index 0000000..a146bbe
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/FlumeConstants.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume;
+
+public final class FlumeConstants {
+
+ /**
+ * The Hbase table which the sink should write to.
+ */
+ public static final String CONFIG_TABLE = "table";
+ /**
+ * The ddl query for the Hbase table where events are ingested to.
+ */
+ public static final String CONFIG_TABLE_DDL = "ddl";
+ /**
+ * Maximum number of events the sink should take from the channel per transaction, if available.
+ */
+ public static final String CONFIG_BATCHSIZE = "batchSize";
+ /**
+ * The fully qualified class name of the serializer the sink should use.
+ */
+ public static final String CONFIG_SERIALIZER = "serializer";
+ /**
+ * Configuration to pass to the serializer.
+ */
+ public static final String CONFIG_SERIALIZER_PREFIX = CONFIG_SERIALIZER + ".";
+
+ /**
+ * Configuration for the zookeeper quorum.
+ */
+ public static final String CONFIG_ZK_QUORUM = "zookeeperQuorum";
+
+ /**
+ * Configuration for the jdbc url.
+ */
+ public static final String CONFIG_JDBC_URL = "jdbcUrl";
+
+ /**
+ * Default batch size .
+ */
+ public static final Integer DEFAULT_BATCH_SIZE = 100;
+
+ /** Regular expression used to parse groups from event data. */
+ public static final String CONFIG_REGULAR_EXPRESSION = "regex";
+ public static final String REGEX_DEFAULT = "(.*)";
+
+ /** Whether to ignore case when performing regex matches. */
+ public static final String IGNORE_CASE_CONFIG = "regexIgnoreCase";
+ public static final boolean IGNORE_CASE_DEFAULT = false;
+
+ /** JSON expression used to parse groups from event data. */
+ public static final String CONFIG_COLUMNS_MAPPING = "columnsMapping";
+ public static final String CONFIG_PARTIAL_SCHEMA = "partialSchema";
+ public static final String JSON_DEFAULT = "{}";
+
+ /** CSV expression used to parse groups from event data. */
+ public static final String CSV_DELIMITER = "csvDelimiter";
+ public static final String CSV_DELIMITER_DEFAULT = ",";
+ public static final String CSV_QUOTE = "csvQuote";
+ public static final String CSV_QUOTE_DEFAULT = "\"";
+ public static final String CSV_ESCAPE = "csvEscape";
+ public static final String CSV_ESCAPE_DEFAULT = "\\";
+ public static final String CSV_ARRAY_DELIMITER = "csvArrayDelimiter";
+ public static final String CSV_ARRAY_DELIMITER_DEFAULT = ",";
+
+ /** Comma separated list of column names . */
+ public static final String CONFIG_COLUMN_NAMES = "columns";
+
+ /** The header columns to persist as columns into the default column family. */
+ public static final String CONFIG_HEADER_NAMES = "headers";
+
+ /** The rowkey type generator . */
+ public static final String CONFIG_ROWKEY_TYPE_GENERATOR = "rowkeyType";
+
+ /**
+ * The default delimiter for columns and headers
+ */
+ public static final String DEFAULT_COLUMNS_DELIMITER = ",";
+}
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/KeyGenerator.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/KeyGenerator.java
new file mode 100644
index 0000000..d823a56
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/KeyGenerator.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume;
+
+public interface KeyGenerator {
+
+ public String generate();
+}
+
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/SchemaHandler.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/SchemaHandler.java
new file mode 100644
index 0000000..8b14b64
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/SchemaHandler.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+
+public class SchemaHandler {
+
+ private static final Logger logger = LoggerFactory.getLogger(SchemaHandler.class);
+
+ public static boolean createTable(Connection connection, String createTableDdl) {
+ Preconditions.checkNotNull(connection);
+ Preconditions.checkNotNull(createTableDdl);
+ boolean status = true;
+ try {
+ status = connection.createStatement().execute(createTableDdl);
+ } catch (SQLException e) {
+ logger.error("An error occurred during executing the create table ddl {} ",createTableDdl);
+ Throwables.propagate(e);
+ }
+ return status;
+
+ }
+
+}
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/BaseEventSerializer.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/BaseEventSerializer.java
new file mode 100644
index 0000000..24527e3
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/BaseEventSerializer.java
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.serializer;
+
+import static org.apache.phoenix.flume.FlumeConstants.CONFIG_COLUMN_NAMES;
+import static org.apache.phoenix.flume.FlumeConstants.CONFIG_HEADER_NAMES;
+import static org.apache.phoenix.flume.FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR;
+import static org.apache.phoenix.flume.FlumeConstants.DEFAULT_COLUMNS_DELIMITER;
+import static org.apache.phoenix.util.PhoenixRuntime.UPSERT_BATCH_SIZE_ATTRIB;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.flume.Context;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.phoenix.exception.SQLExceptionCode;
+import org.apache.phoenix.exception.SQLExceptionInfo;
+import org.apache.phoenix.flume.DefaultKeyGenerator;
+import org.apache.phoenix.flume.FlumeConstants;
+import org.apache.phoenix.flume.KeyGenerator;
+import org.apache.phoenix.flume.SchemaHandler;
+import org.apache.phoenix.util.ColumnInfo;
+import org.apache.phoenix.util.QueryUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.apache.phoenix.util.StringUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Splitter;
+import com.google.common.base.Strings;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public abstract class BaseEventSerializer implements EventSerializer {
+
+ private static final Logger logger = LoggerFactory.getLogger(BaseEventSerializer.class);
+
+ protected Connection connection;
+ protected String fullTableName;
+ protected ColumnInfo[] columnMetadata;
+ protected boolean autoGenerateKey = false;
+ protected KeyGenerator keyGenerator;
+ protected List<String> colNames = Lists.newArrayListWithExpectedSize(10);
+ protected List<String> headers = Lists.newArrayListWithExpectedSize(5);
+ protected String upsertStatement;
+ private String jdbcUrl;
+ private Integer batchSize;
+ private String createTableDdl;
+
+
+
+
+
+ @Override
+ public void configure(Context context) {
+
+ this.createTableDdl = context.getString(FlumeConstants.CONFIG_TABLE_DDL);
+ this.fullTableName = context.getString(FlumeConstants.CONFIG_TABLE);
+ final String zookeeperQuorum = context.getString(FlumeConstants.CONFIG_ZK_QUORUM);
+ final String ipJdbcURL = context.getString(FlumeConstants.CONFIG_JDBC_URL);
+ this.batchSize = context.getInteger(FlumeConstants.CONFIG_BATCHSIZE, FlumeConstants.DEFAULT_BATCH_SIZE);
+ final String columnNames = context.getString(CONFIG_COLUMN_NAMES);
+ final String headersStr = context.getString(CONFIG_HEADER_NAMES);
+ final String keyGeneratorType = context.getString(CONFIG_ROWKEY_TYPE_GENERATOR);
+
+ Preconditions.checkNotNull(this.fullTableName,"Table name cannot be empty, please specify in the configuration file");
+ if(!Strings.isNullOrEmpty(zookeeperQuorum)) {
+ this.jdbcUrl = QueryUtil.getUrl(zookeeperQuorum);
+ }
+ if(!Strings.isNullOrEmpty(ipJdbcURL)) {
+ this.jdbcUrl = ipJdbcURL;
+ }
+ Preconditions.checkNotNull(this.jdbcUrl,"Please specify either the zookeeper quorum or the jdbc url in the configuration file");
+ Preconditions.checkNotNull(columnNames,"Column names cannot be empty, please specify in configuration file");
+ for(String s : Splitter.on(DEFAULT_COLUMNS_DELIMITER).split(columnNames)) {
+ colNames.add(s);
+ }
+
+ if(!Strings.isNullOrEmpty(headersStr)) {
+ for(String s : Splitter.on(DEFAULT_COLUMNS_DELIMITER).split(headersStr)) {
+ headers.add(s);
+ }
+ }
+
+ if(!Strings.isNullOrEmpty(keyGeneratorType)) {
+ try {
+ keyGenerator = DefaultKeyGenerator.valueOf(keyGeneratorType.toUpperCase());
+ this.autoGenerateKey = true;
+ } catch(IllegalArgumentException iae) {
+ logger.error("An invalid key generator {} was specified in configuration file. Specify one of {}",keyGeneratorType,DefaultKeyGenerator.values());
+ Throwables.propagate(iae);
+ }
+ }
+
+ logger.debug(" the jdbcUrl configured is {}",jdbcUrl);
+ logger.debug(" columns configured are {}",colNames.toString());
+ logger.debug(" headers configured are {}",headersStr);
+ logger.debug(" the keyGenerator configured is {} ",keyGeneratorType);
+
+ doConfigure(context);
+
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ // NO-OP
+
+ }
+
+
+ @Override
+ public void initialize() throws SQLException {
+ final Properties props = new Properties();
+ props.setProperty(UPSERT_BATCH_SIZE_ATTRIB, String.valueOf(this.batchSize));
+ ResultSet rs = null;
+ try {
+ this.connection = DriverManager.getConnection(this.jdbcUrl, props);
+ this.connection.setAutoCommit(false);
+ if(this.createTableDdl != null) {
+ SchemaHandler.createTable(connection,createTableDdl);
+ }
+
+
+ final Map<String,Integer> qualifiedColumnMap = Maps.newLinkedHashMap();
+ final Map<String,Integer> unqualifiedColumnMap = Maps.newLinkedHashMap();
+ final String schemaName = SchemaUtil.getSchemaNameFromFullName(fullTableName);
+ final String tableName = SchemaUtil.getTableNameFromFullName(fullTableName);
+
+ String rowkey = null;
+ String cq = null;
+ String cf = null;
+ Integer dt = null;
+ rs = connection.getMetaData().getColumns("", StringUtil.escapeLike(SchemaUtil.normalizeIdentifier(schemaName)), StringUtil.escapeLike(SchemaUtil.normalizeIdentifier(tableName)), null);
+ while (rs.next()) {
+ cf = rs.getString(QueryUtil.COLUMN_FAMILY_POSITION);
+ cq = rs.getString(QueryUtil.COLUMN_NAME_POSITION);
+ // TODO: Fix this .. change `DATA_TYPE_POSITION` value 5 to 26
+ // dt = rs.getInt(QueryUtil.DATA_TYPE_POSITION);
+ dt = rs.getInt(26);
+ if(Strings.isNullOrEmpty(cf)) {
+ rowkey = cq; // this is required only when row key is auto generated
+ } else {
+ qualifiedColumnMap.put(SchemaUtil.getColumnDisplayName(cf, cq), dt);
+ }
+ unqualifiedColumnMap.put(SchemaUtil.getColumnDisplayName(null, cq), dt);
+ }
+
+ //can happen when table not found in Hbase.
+ if(unqualifiedColumnMap.isEmpty()) {
+ throw new SQLExceptionInfo.Builder(SQLExceptionCode.TABLE_UNDEFINED)
+ .setTableName(tableName).build().buildException();
+ }
+
+ int colSize = colNames.size();
+ int headersSize = headers.size();
+ int totalSize = colSize + headersSize + ( autoGenerateKey ? 1 : 0);
+ columnMetadata = new ColumnInfo[totalSize] ;
+
+ int position = 0;
+ position = this.addToColumnMetadataInfo(colNames, qualifiedColumnMap, unqualifiedColumnMap, position);
+ position = this.addToColumnMetadataInfo(headers, qualifiedColumnMap, unqualifiedColumnMap, position);
+
+ if(autoGenerateKey) {
+ Integer sqlType = unqualifiedColumnMap.get(rowkey);
+ if (sqlType == null) {
+ throw new SQLExceptionInfo.Builder(SQLExceptionCode.PRIMARY_KEY_MISSING)
+ .setColumnName(rowkey).setTableName(fullTableName).build().buildException();
+ }
+ columnMetadata[position] = new ColumnInfo(rowkey, sqlType);
+ position++;
+ }
+
+ this.upsertStatement = QueryUtil.constructUpsertStatement(fullTableName, Arrays.asList(columnMetadata));
+ logger.info(" the upsert statement is {} " ,this.upsertStatement);
+
+ } catch (SQLException e) {
+ logger.error("error {} occurred during initializing connection ",e.getMessage());
+ throw e;
+ } finally {
+ if(rs != null) {
+ rs.close();
+ }
+ }
+ doInitialize();
+ }
+
+ private int addToColumnMetadataInfo(final List<String> columns , final Map<String,Integer> qualifiedColumnsInfoMap, Map<String, Integer> unqualifiedColumnsInfoMap, int position) throws SQLException {
+ Preconditions.checkNotNull(columns);
+ Preconditions.checkNotNull(qualifiedColumnsInfoMap);
+ Preconditions.checkNotNull(unqualifiedColumnsInfoMap);
+ for (int i = 0 ; i < columns.size() ; i++) {
+ String columnName = SchemaUtil.normalizeIdentifier(columns.get(i).trim());
+ Integer sqlType = unqualifiedColumnsInfoMap.get(columnName);
+ if (sqlType == null) {
+ sqlType = qualifiedColumnsInfoMap.get(columnName);
+ if (sqlType == null) {
+ throw new SQLExceptionInfo.Builder(SQLExceptionCode.COLUMN_NOT_FOUND)
+ .setColumnName(columnName).setTableName(this.fullTableName).build().buildException();
+ }
+ }
+ columnMetadata[position] = new ColumnInfo(columnName, sqlType);
+ position++;
+ }
+ return position;
+ }
+
+ public abstract void doConfigure(Context context);
+
+ public abstract void doInitialize() throws SQLException;
+
+
+ @Override
+ public void close() {
+ if(connection != null) {
+ try {
+ connection.close();
+ } catch (SQLException e) {
+ logger.error(" Error while closing connection {} ");
+ }
+ }
+ }
+}
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/CsvEventSerializer.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/CsvEventSerializer.java
new file mode 100644
index 0000000..a856c3e
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/CsvEventSerializer.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.serializer;
+
+import static org.apache.phoenix.flume.FlumeConstants.CSV_DELIMITER;
+import static org.apache.phoenix.flume.FlumeConstants.CSV_DELIMITER_DEFAULT;
+import static org.apache.phoenix.flume.FlumeConstants.CSV_QUOTE;
+import static org.apache.phoenix.flume.FlumeConstants.CSV_QUOTE_DEFAULT;
+import static org.apache.phoenix.flume.FlumeConstants.CSV_ESCAPE;
+import static org.apache.phoenix.flume.FlumeConstants.CSV_ESCAPE_DEFAULT;
+import static org.apache.phoenix.flume.FlumeConstants.CSV_ARRAY_DELIMITER;
+import static org.apache.phoenix.flume.FlumeConstants.CSV_ARRAY_DELIMITER_DEFAULT;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.sql.Array;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.phoenix.schema.types.PDataType;
+import org.json.JSONArray;
+import org.json.JSONTokener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+
+public class CsvEventSerializer extends BaseEventSerializer {
+
+ private static final Logger logger = LoggerFactory.getLogger(CsvEventSerializer.class);
+
+ private String csvDelimiter;
+ private String csvQuote;
+ private String csvEscape;
+ private String csvArrayDelimiter;
+ private CsvLineParser csvLineParser;
+
+ /**
+ *
+ */
+ @Override
+ public void doConfigure(Context context) {
+ csvDelimiter = context.getString(CSV_DELIMITER, CSV_DELIMITER_DEFAULT);
+ csvQuote = context.getString(CSV_QUOTE, CSV_QUOTE_DEFAULT);
+ csvEscape = context.getString(CSV_ESCAPE, CSV_ESCAPE_DEFAULT);
+ csvArrayDelimiter = context.getString(CSV_ARRAY_DELIMITER, CSV_ARRAY_DELIMITER_DEFAULT);
+ csvLineParser = new CsvLineParser(csvDelimiter.toCharArray()[0], csvQuote.toCharArray()[0],
+ csvEscape.toCharArray()[0]);
+ }
+
+ /**
+ *
+ */
+ @Override
+ public void doInitialize() throws SQLException {
+ // NO-OP
+ }
+
+ @Override
+ public void upsertEvents(List<Event> events) throws SQLException {
+ Preconditions.checkNotNull(events);
+ Preconditions.checkNotNull(connection);
+ Preconditions.checkNotNull(this.upsertStatement);
+
+ boolean wasAutoCommit = connection.getAutoCommit();
+ connection.setAutoCommit(false);
+ try (PreparedStatement colUpsert = connection.prepareStatement(upsertStatement)) {
+ String value = null;
+ Integer sqlType = null;
+ for (Event event : events) {
+ byte[] payloadBytes = event.getBody();
+ if (payloadBytes == null || payloadBytes.length == 0) {
+ continue;
+ }
+ String payload = new String(payloadBytes);
+ CSVRecord csvRecord = csvLineParser.parse(payload);
+ if (colNames.size() != csvRecord.size()) {
+ logger.debug("payload data {} doesn't match the fields mapping {} ", payload, colNames);
+ continue;
+ }
+ Map<String, String> data = new HashMap<String, String>();
+ for (int i = 0; i < csvRecord.size(); i++) {
+ data.put(colNames.get(i), csvRecord.get(i));
+ }
+ Collection<String> values = data.values();
+ if (values.contains(null)) {
+ logger.debug("payload data {} doesn't match the fields mapping {} ", payload, colNames);
+ continue;
+ }
+
+ int index = 1;
+ int offset = 0;
+ for (int i = 0; i < colNames.size(); i++, offset++) {
+ if (columnMetadata[offset] == null) {
+ continue;
+ }
+ String colName = colNames.get(i);
+ value = data.get(colName);
+ sqlType = columnMetadata[offset].getSqlType();
+ PDataType pDataType = PDataType.fromTypeId(sqlType);
+ Object upsertValue;
+ if (pDataType.isArrayType()) {
+ String arrayJson = Arrays.toString(value.split(csvArrayDelimiter));
+ JSONArray jsonArray = new JSONArray(new JSONTokener(arrayJson));
+ Object[] vals = new Object[jsonArray.length()];
+ for (int x = 0; x < jsonArray.length(); x++) {
+ vals[x] = jsonArray.get(x);
+ }
+ String baseTypeSqlName = PDataType.arrayBaseType(pDataType).getSqlTypeName();
+ Array array = connection.createArrayOf(baseTypeSqlName, vals);
+ upsertValue = pDataType.toObject(array, pDataType);
+ } else {
+ upsertValue = pDataType.toObject(value);
+ }
+ if (upsertValue != null) {
+ colUpsert.setObject(index++, upsertValue, sqlType);
+ } else {
+ colUpsert.setNull(index++, sqlType);
+ }
+ }
+
+ // add headers if necessary
+ Map<String, String> headerValues = event.getHeaders();
+ for (int i = 0; i < headers.size(); i++, offset++) {
+ String headerName = headers.get(i);
+ String headerValue = headerValues.get(headerName);
+ sqlType = columnMetadata[offset].getSqlType();
+ Object upsertValue = PDataType.fromTypeId(sqlType).toObject(headerValue);
+ if (upsertValue != null) {
+ colUpsert.setObject(index++, upsertValue, sqlType);
+ } else {
+ colUpsert.setNull(index++, sqlType);
+ }
+ }
+
+ if (autoGenerateKey) {
+ sqlType = columnMetadata[offset].getSqlType();
+ String generatedRowValue = this.keyGenerator.generate();
+ Object rowkeyValue = PDataType.fromTypeId(sqlType).toObject(generatedRowValue);
+ colUpsert.setObject(index++, rowkeyValue, sqlType);
+ }
+ colUpsert.execute();
+ }
+ connection.commit();
+ } catch (Exception ex) {
+ logger.error("An error {} occurred during persisting the event ", ex.getMessage());
+ throw new SQLException(ex.getMessage());
+ } finally {
+ if (wasAutoCommit) {
+ connection.setAutoCommit(true);
+ }
+ }
+
+ }
+
+ static class CsvLineParser {
+ private final CSVFormat csvFormat;
+
+ CsvLineParser(char fieldDelimiter, char quote, char escape) {
+ this.csvFormat = CSVFormat.DEFAULT.withIgnoreEmptyLines(true).withDelimiter(fieldDelimiter)
+ .withEscape(escape).withQuote(quote);
+ }
+
+ public CSVRecord parse(String input) throws IOException {
+ CSVParser csvParser = new CSVParser(new StringReader(input), this.csvFormat);
+ return Iterables.getFirst(csvParser, null);
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/EventSerializer.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/EventSerializer.java
new file mode 100644
index 0000000..80959f5
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/EventSerializer.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.serializer;
+
+import java.sql.SQLException;
+import java.util.List;
+
+import org.apache.flume.Event;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.ConfigurableComponent;
+
+import org.apache.phoenix.util.SQLCloseable;
+
+public interface EventSerializer extends Configurable,ConfigurableComponent,SQLCloseable {
+
+ /**
+ * called during the start of the process to initialize the table columns.
+ */
+ public void initialize() throws SQLException;
+
+ /**
+ * @param events to be written to HBase.
+ * @throws SQLException
+ */
+ public void upsertEvents(List<Event> events) throws SQLException;
+
+}
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/EventSerializers.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/EventSerializers.java
new file mode 100644
index 0000000..8c99d7d
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/EventSerializers.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.serializer;
+
+public enum EventSerializers {
+
+ REGEX(RegexEventSerializer.class.getName()), JSON(JsonEventSerializer.class.getName()), CSV(CsvEventSerializer.class.getName());
+
+ private final String className;
+
+ private EventSerializers(String serializerClassName) {
+ this.className = serializerClassName;
+ }
+
+ /**
+ * @return Returns the serializer className.
+ */
+ public String getClassName() {
+ return className;
+ }
+}
\ No newline at end of file
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/JsonEventSerializer.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/JsonEventSerializer.java
new file mode 100644
index 0000000..9226017
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/JsonEventSerializer.java
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.serializer;
+
+import static org.apache.phoenix.flume.FlumeConstants.JSON_DEFAULT;
+import static org.apache.phoenix.flume.FlumeConstants.CONFIG_COLUMNS_MAPPING;
+import static org.apache.phoenix.flume.FlumeConstants.CONFIG_PARTIAL_SCHEMA;
+
+import java.sql.Array;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.phoenix.schema.types.PDataType;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONTokener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.jayway.jsonpath.Configuration;
+import com.jayway.jsonpath.JsonPath;
+import com.jayway.jsonpath.spi.json.JsonOrgJsonProvider;
+import com.jayway.jsonpath.spi.mapper.JsonOrgMappingProvider;
+
+public class JsonEventSerializer extends BaseEventSerializer {
+
+ private static final Logger logger = LoggerFactory.getLogger(JsonEventSerializer.class);
+
+ private JSONObject jsonSchema;
+ private boolean isProperMapping;
+ private boolean partialSchema;
+
+ /**
+ *
+ */
+ @Override
+ public void doConfigure(Context context) {
+ final String jsonData = context.getString(CONFIG_COLUMNS_MAPPING, JSON_DEFAULT);
+ try {
+ jsonSchema = new JSONObject(jsonData);
+ if (jsonSchema.length() == 0) {
+ for (String colName : colNames) {
+ jsonSchema.put(colName, colName);
+ }
+ isProperMapping = true;
+ } else {
+ Iterator<String> keys = jsonSchema.keys();
+ List<String> keylist = new ArrayList<String>();
+ while (keys.hasNext()) {
+ keylist.add(keys.next());
+ }
+ isProperMapping = CollectionUtils.isEqualCollection(keylist, colNames);
+ }
+ } catch (JSONException e) {
+ e.printStackTrace();
+ logger.debug("json mapping not proper, verify the data {} ", jsonData);
+ }
+ partialSchema = context.getBoolean(CONFIG_PARTIAL_SCHEMA, false);
+ }
+
+ /**
+ *
+ */
+ @Override
+ public void doInitialize() throws SQLException {
+ // NO-OP
+ }
+
+ @Override
+ public void upsertEvents(List<Event> events) throws SQLException {
+ Preconditions.checkNotNull(events);
+ Preconditions.checkNotNull(connection);
+ Preconditions.checkNotNull(this.upsertStatement);
+ Preconditions.checkArgument(isProperMapping, "Please verify fields mapping is not properly done..");
+
+ boolean wasAutoCommit = connection.getAutoCommit();
+ connection.setAutoCommit(false);
+ try (PreparedStatement colUpsert = connection.prepareStatement(upsertStatement)) {
+ String value = null;
+ Integer sqlType = null;
+ JSONObject inputJson = new JSONObject();
+ for (Event event : events) {
+ byte[] payloadBytes = event.getBody();
+ if (payloadBytes == null || payloadBytes.length == 0) {
+ continue;
+ }
+ String payload = new String(payloadBytes);
+
+ try {
+ inputJson = new JSONObject(payload);
+ } catch (Exception e) {
+ logger.debug("payload is not proper json");
+ continue;
+ }
+
+ Map<String, String> data = new HashMap<String, String>();
+ for (String colName : colNames) {
+ String pattern = colName;
+ if (jsonSchema.has(colName)) {
+ Object obj = jsonSchema.opt(colName);
+ if (null != obj) {
+ pattern = obj.toString();
+ }
+ }
+ pattern = "$." + pattern;
+ value = getPatternData(inputJson, pattern);
+
+ // if field mapping data is null then look for column data
+ if (null == value && partialSchema) {
+ pattern = "$." + colName;
+ value = getPatternData(inputJson, pattern);
+ }
+
+ data.put(colName, value);
+ }
+
+ Collection<String> values = data.values();
+ if (values.contains(null)) {
+ logger.debug("payload data {} doesn't match the fields mapping {} ", inputJson, jsonSchema);
+ continue;
+ }
+
+ int index = 1;
+ int offset = 0;
+ for (int i = 0; i < colNames.size(); i++, offset++) {
+ if (columnMetadata[offset] == null) {
+ continue;
+ }
+ String colName = colNames.get(i);
+ value = data.get(colName);
+ sqlType = columnMetadata[offset].getSqlType();
+ PDataType pDataType = PDataType.fromTypeId(sqlType);
+ Object upsertValue;
+ if (pDataType.isArrayType()) {
+ JSONArray jsonArray = new JSONArray(new JSONTokener(value));
+ Object[] vals = new Object[jsonArray.length()];
+ for (int x = 0; x < jsonArray.length(); x++) {
+ vals[x] = jsonArray.get(x);
+ }
+ String baseTypeSqlName = PDataType.arrayBaseType(pDataType).getSqlTypeName();
+ Array array = connection.createArrayOf(baseTypeSqlName, vals);
+ upsertValue = pDataType.toObject(array, pDataType);
+ } else {
+ upsertValue = pDataType.toObject(value);
+ }
+ if (upsertValue != null) {
+ colUpsert.setObject(index++, upsertValue, sqlType);
+ } else {
+ colUpsert.setNull(index++, sqlType);
+ }
+ }
+
+ // add headers if necessary
+ Map<String, String> headerValues = event.getHeaders();
+ for (int i = 0; i < headers.size(); i++, offset++) {
+ String headerName = headers.get(i);
+ String headerValue = headerValues.get(headerName);
+ sqlType = columnMetadata[offset].getSqlType();
+ Object upsertValue = PDataType.fromTypeId(sqlType).toObject(headerValue);
+ if (upsertValue != null) {
+ colUpsert.setObject(index++, upsertValue, sqlType);
+ } else {
+ colUpsert.setNull(index++, sqlType);
+ }
+ }
+
+ if (autoGenerateKey) {
+ sqlType = columnMetadata[offset].getSqlType();
+ String generatedRowValue = this.keyGenerator.generate();
+ Object rowkeyValue = PDataType.fromTypeId(sqlType).toObject(generatedRowValue);
+ colUpsert.setObject(index++, rowkeyValue, sqlType);
+ }
+ colUpsert.execute();
+ }
+ connection.commit();
+ } catch (Exception ex) {
+ logger.error("An error {} occurred during persisting the event ", ex.getMessage());
+ throw new SQLException(ex.getMessage());
+ } finally {
+ if (wasAutoCommit) {
+ connection.setAutoCommit(true);
+ }
+ }
+
+ }
+
+ private String getPatternData(JSONObject json, String pattern) {
+ Configuration JSON_ORG_CONFIGURATION = Configuration.builder().mappingProvider(new JsonOrgMappingProvider())
+ .jsonProvider(new JsonOrgJsonProvider()).build();
+ String value;
+ try {
+ Object object = JsonPath.using(JSON_ORG_CONFIGURATION).parse(json).read(pattern);
+ value = object.toString();
+ } catch (Exception e) {
+ value = null;
+ }
+ return value;
+ }
+
+}
\ No newline at end of file
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/RegexEventSerializer.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/RegexEventSerializer.java
new file mode 100644
index 0000000..b636481
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/serializer/RegexEventSerializer.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.serializer;
+
+import static org.apache.phoenix.flume.FlumeConstants.CONFIG_REGULAR_EXPRESSION;
+import static org.apache.phoenix.flume.FlumeConstants.IGNORE_CASE_CONFIG;
+import static org.apache.phoenix.flume.FlumeConstants.IGNORE_CASE_DEFAULT;
+import static org.apache.phoenix.flume.FlumeConstants.REGEX_DEFAULT;
+
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.phoenix.schema.types.PDataType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+public class RegexEventSerializer extends BaseEventSerializer {
+
+ private static final Logger logger = LoggerFactory.getLogger(RegexEventSerializer.class);
+
+ private Pattern inputPattern;
+
+ /**
+ *
+ */
+ @Override
+ public void doConfigure(Context context) {
+ final String regex = context.getString(CONFIG_REGULAR_EXPRESSION, REGEX_DEFAULT);
+ final boolean regexIgnoreCase = context.getBoolean(IGNORE_CASE_CONFIG,IGNORE_CASE_DEFAULT);
+ inputPattern = Pattern.compile(regex, Pattern.DOTALL + (regexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
+ }
+
+
+ /**
+ *
+ */
+ @Override
+ public void doInitialize() throws SQLException {
+ // NO-OP
+ }
+
+
+ @Override
+ public void upsertEvents(List<Event> events) throws SQLException {
+ Preconditions.checkNotNull(events);
+ Preconditions.checkNotNull(connection);
+ Preconditions.checkNotNull(this.upsertStatement);
+
+ boolean wasAutoCommit = connection.getAutoCommit();
+ connection.setAutoCommit(false);
+ try (PreparedStatement colUpsert = connection.prepareStatement(upsertStatement)) {
+ String value = null;
+ Integer sqlType = null;
+ for(Event event : events) {
+ byte [] payloadBytes = event.getBody();
+ if(payloadBytes == null || payloadBytes.length == 0) {
+ continue;
+ }
+ String payload = new String(payloadBytes);
+ Matcher m = inputPattern.matcher(payload.trim());
+
+ if (!m.matches()) {
+ logger.debug("payload {} doesn't match the pattern {} ", payload, inputPattern.toString());
+ continue;
+ }
+ if (m.groupCount() != colNames.size()) {
+ logger.debug("payload {} size doesn't match the pattern {} ", m.groupCount(), colNames.size());
+ continue;
+ }
+ int index = 1 ;
+ int offset = 0;
+ for (int i = 0 ; i < colNames.size() ; i++,offset++) {
+ if (columnMetadata[offset] == null ) {
+ continue;
+ }
+
+ value = m.group(i + 1);
+ sqlType = columnMetadata[offset].getSqlType();
+ Object upsertValue = PDataType.fromTypeId(sqlType).toObject(value);
+ if (upsertValue != null) {
+ colUpsert.setObject(index++, upsertValue, sqlType);
+ } else {
+ colUpsert.setNull(index++, sqlType);
+ }
+ }
+
+ //add headers if necessary
+ Map<String,String> headerValues = event.getHeaders();
+ for(int i = 0 ; i < headers.size() ; i++ , offset++) {
+
+ String headerName = headers.get(i);
+ String headerValue = headerValues.get(headerName);
+ sqlType = columnMetadata[offset].getSqlType();
+ Object upsertValue = PDataType.fromTypeId(sqlType).toObject(headerValue);
+ if (upsertValue != null) {
+ colUpsert.setObject(index++, upsertValue, sqlType);
+ } else {
+ colUpsert.setNull(index++, sqlType);
+ }
+ }
+
+ if(autoGenerateKey) {
+ sqlType = columnMetadata[offset].getSqlType();
+ String generatedRowValue = this.keyGenerator.generate();
+ Object rowkeyValue = PDataType.fromTypeId(sqlType).toObject(generatedRowValue);
+ colUpsert.setObject(index++, rowkeyValue ,sqlType);
+ }
+ colUpsert.execute();
+ }
+ connection.commit();
+ } catch(Exception ex){
+ logger.error("An error {} occurred during persisting the event ",ex.getMessage());
+ throw new SQLException(ex.getMessage());
+ } finally {
+ if(wasAutoCommit) {
+ connection.setAutoCommit(true);
+ }
+ }
+
+ }
+
+}
diff --git a/phoenix-flume/src/main/java/org/apache/phoenix/flume/sink/PhoenixSink.java b/phoenix-flume/src/main/java/org/apache/phoenix/flume/sink/PhoenixSink.java
new file mode 100644
index 0000000..2b102a2
--- /dev/null
+++ b/phoenix-flume/src/main/java/org/apache/phoenix/flume/sink/PhoenixSink.java
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.flume.sink;
+
+import java.sql.SQLException;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.flume.Channel;
+import org.apache.flume.ChannelException;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.instrumentation.SinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.apache.phoenix.flume.FlumeConstants;
+import org.apache.phoenix.flume.serializer.EventSerializer;
+import org.apache.phoenix.flume.serializer.EventSerializers;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+
+public class PhoenixSink extends AbstractSink implements Configurable {
+ private static final Logger logger = LoggerFactory.getLogger(PhoenixSink.class);
+ private static AtomicInteger counter = new AtomicInteger();
+ private static final String NAME = "Phoenix Sink__";
+
+ private SinkCounter sinkCounter;
+ private Integer batchSize;
+ private EventSerializer serializer;
+
+ public PhoenixSink(){
+ }
+
+ @Override
+ public void configure(Context context){
+ this.setName(NAME + counter.incrementAndGet());
+ this.batchSize = context.getInteger(FlumeConstants.CONFIG_BATCHSIZE, FlumeConstants.DEFAULT_BATCH_SIZE);
+ final String eventSerializerType = context.getString(FlumeConstants.CONFIG_SERIALIZER);
+
+ Preconditions.checkNotNull(eventSerializerType,"Event serializer cannot be empty, please specify in the configuration file");
+ initializeSerializer(context,eventSerializerType);
+ this.sinkCounter = new SinkCounter(this.getName());
+ }
+
+ /**
+ * Initializes the serializer for flume events.
+ * @param eventSerializerType
+ */
+ private void initializeSerializer(final Context context,final String eventSerializerType) {
+ String serializerClazz = null;
+ EventSerializers eventSerializer = null;
+
+ try {
+ eventSerializer = EventSerializers.valueOf(eventSerializerType.toUpperCase());
+ } catch(IllegalArgumentException iae) {
+ serializerClazz = eventSerializerType;
+ }
+
+ final Context serializerContext = new Context();
+ serializerContext.putAll(context.getSubProperties(FlumeConstants.CONFIG_SERIALIZER_PREFIX));
+ copyPropertiesToSerializerContext(context,serializerContext);
+
+ try {
+ @SuppressWarnings("unchecked")
+ Class<? extends EventSerializer> clazz = null;
+ if(serializerClazz == null) {
+ clazz = (Class<? extends EventSerializer>) Class.forName(eventSerializer.getClassName());
+ }
+ else {
+ clazz = (Class<? extends EventSerializer>) Class.forName(serializerClazz);
+ }
+
+ serializer = clazz.newInstance();
+ serializer.configure(serializerContext);
+
+ } catch (Exception e) {
+ logger.error("Could not instantiate event serializer." , e);
+ Throwables.propagate(e);
+ }
+ }
+
+ private void copyPropertiesToSerializerContext(Context context, Context serializerContext) {
+
+ serializerContext.put(FlumeConstants.CONFIG_TABLE_DDL,context.getString(FlumeConstants.CONFIG_TABLE_DDL));
+ serializerContext.put(FlumeConstants.CONFIG_TABLE,context.getString(FlumeConstants.CONFIG_TABLE));
+ serializerContext.put(FlumeConstants.CONFIG_ZK_QUORUM,context.getString(FlumeConstants.CONFIG_ZK_QUORUM));
+ serializerContext.put(FlumeConstants.CONFIG_JDBC_URL,context.getString(FlumeConstants.CONFIG_JDBC_URL));
+ serializerContext.put(FlumeConstants.CONFIG_BATCHSIZE,context.getString(FlumeConstants.CONFIG_BATCHSIZE));
+ }
+
+ @Override
+ public void start() {
+ logger.info("Starting sink {} ",this.getName());
+ sinkCounter.start();
+ try {
+ serializer.initialize();
+ sinkCounter.incrementConnectionCreatedCount();
+ } catch(Exception ex) {
+ sinkCounter.incrementConnectionFailedCount();
+ logger.error("Error {} in initializing the serializer.",ex.getMessage());
+ Throwables.propagate(ex);
+ }
+ super.start();
+ }
+
+ @Override
+ public void stop(){
+ super.stop();
+ try {
+ serializer.close();
+ } catch (SQLException e) {
+ logger.error(" Error while closing connection {} for sink {} ",e.getMessage(),this.getName());
+ }
+ sinkCounter.incrementConnectionClosedCount();
+ sinkCounter.stop();
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+
+ Status status = Status.READY;
+ Channel channel = getChannel();
+ Transaction transaction = null;
+ List<Event> events = Lists.newArrayListWithExpectedSize(this.batchSize);
+ long startTime = System.nanoTime();
+ try {
+ transaction = channel.getTransaction();
+ transaction.begin();
+
+ for(long i = 0; i < this.batchSize; i++) {
+ Event event = channel.take();
+ if(event == null){
+ status = Status.BACKOFF;
+ if (i == 0) {
+ sinkCounter.incrementBatchEmptyCount();
+ } else {
+ sinkCounter.incrementBatchUnderflowCount();
+ }
+ break;
+ } else {
+ events.add(event);
+ }
+ }
+ if (!events.isEmpty()) {
+ if (events.size() == this.batchSize) {
+ sinkCounter.incrementBatchCompleteCount();
+ }
+ else {
+ sinkCounter.incrementBatchUnderflowCount();
+ status = Status.BACKOFF;
+ }
+ // save to Hbase
+ serializer.upsertEvents(events);
+ sinkCounter.addToEventDrainSuccessCount(events.size());
+ }
+ else {
+ logger.debug("no events to process ");
+ sinkCounter.incrementBatchEmptyCount();
+ status = Status.BACKOFF;
+ }
+ transaction.commit();
+ } catch (ChannelException e) {
+ transaction.rollback();
+ status = Status.BACKOFF;
+ sinkCounter.incrementConnectionFailedCount();
+ }
+ catch (SQLException e) {
+ sinkCounter.incrementConnectionFailedCount();
+ transaction.rollback();
+ logger.error("exception while persisting to Hbase ", e);
+ throw new EventDeliveryException("Failed to persist message to Hbase", e);
+ }
+ catch (Throwable e) {
+ transaction.rollback();
+ logger.error("exception while processing in Phoenix Sink", e);
+ throw new EventDeliveryException("Failed to persist message", e);
+ }
+ finally {
+ logger.info(String.format("Time taken to process [%s] events was [%s] seconds",
+ events.size(),
+ TimeUnit.SECONDS.convert(System.nanoTime() - startTime, TimeUnit.NANOSECONDS)));
+ if( transaction != null ) {
+ transaction.close();
+ }
+ }
+ return status;
+ }
+
+}
diff --git a/phoenix-hive/pom.xml b/phoenix-hive/pom.xml
new file mode 100644
index 0000000..705ad34
--- /dev/null
+++ b/phoenix-hive/pom.xml
@@ -0,0 +1,192 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-connectors</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-hive</artifactId>
+ <name>Phoenix - Hive</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>${hive.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>${hive.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </dependency>
+
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <scope>test</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-auth</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-common</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-tests</artifactId>
+ <scope>test</scope>
+ <version>0.8.4</version>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-dag</artifactId>
+ <scope>test</scope>
+ <version>0.8.4</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <version>${mockito-all.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>copy-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-jar-with-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <configuration>
+ <appendAssemblyId>false</appendAssemblyId>
+ <finalName>phoenix-${project.version}-hive</finalName>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/phoenix-hive/pom.xml~ b/phoenix-hive/pom.xml~
new file mode 100644
index 0000000..0918e67
--- /dev/null
+++ b/phoenix-hive/pom.xml~
@@ -0,0 +1,192 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-hive</artifactId>
+ <name>Phoenix - Hive</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>${hive.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>${hive.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </dependency>
+
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <scope>test</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-auth</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-common</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-tests</artifactId>
+ <scope>test</scope>
+ <version>0.8.4</version>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-dag</artifactId>
+ <scope>test</scope>
+ <version>0.8.4</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <version>${mockito-all.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>copy-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-jar-with-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <configuration>
+ <appendAssemblyId>false</appendAssemblyId>
+ <finalName>phoenix-${project.version}-hive</finalName>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/phoenix-hive/src/it/java/org/apache/phoenix/hive/BaseHivePhoenixStoreIT.java b/phoenix-hive/src/it/java/org/apache/phoenix/hive/BaseHivePhoenixStoreIT.java
new file mode 100644
index 0000000..c705e2d
--- /dev/null
+++ b/phoenix-hive/src/it/java/org/apache/phoenix/hive/BaseHivePhoenixStoreIT.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
+import org.apache.phoenix.jdbc.PhoenixDriver;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.TestUtil;
+import org.junit.AfterClass;
+import org.junit.experimental.categories.Category;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.*;
+import java.util.Properties;
+
+import static org.apache.phoenix.query.BaseTest.setUpConfigForMiniCluster;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * Base class for all Hive Phoenix integration tests that may be run with Tez or MR mini cluster
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+public class BaseHivePhoenixStoreIT {
+
+ private static final Log LOG = LogFactory.getLog(BaseHivePhoenixStoreIT.class);
+ protected static HBaseTestingUtility hbaseTestUtil;
+ protected static MiniHBaseCluster hbaseCluster;
+ private static String zkQuorum;
+ protected static Connection conn;
+ private static Configuration conf;
+ protected static HiveTestUtil qt;
+ protected static String hiveOutputDir;
+ protected static String hiveLogDir;
+
+
+ public static void setup(HiveTestUtil.MiniClusterType clusterType)throws Exception {
+ String hadoopConfDir = System.getenv("HADOOP_CONF_DIR");
+ if (null != hadoopConfDir && !hadoopConfDir.isEmpty()) {
+ LOG.warn("WARNING: HADOOP_CONF_DIR is set in the environment which may cause "
+ + "issues with test execution via MiniDFSCluster");
+ }
+ hbaseTestUtil = new HBaseTestingUtility();
+ conf = hbaseTestUtil.getConfiguration();
+ setUpConfigForMiniCluster(conf);
+ conf.set(QueryServices.DROP_METADATA_ATTRIB, Boolean.toString(true));
+ hiveOutputDir = new Path(hbaseTestUtil.getDataTestDir(), "hive_output").toString();
+ File outputDir = new File(hiveOutputDir);
+ outputDir.mkdirs();
+ hiveLogDir = new Path(hbaseTestUtil.getDataTestDir(), "hive_log").toString();
+ File logDir = new File(hiveLogDir);
+ logDir.mkdirs();
+ // Setup Hive mini Server
+ Path testRoot = hbaseTestUtil.getDataTestDir();
+ System.setProperty("test.tmp.dir", testRoot.toString());
+ System.setProperty("test.warehouse.dir", (new Path(testRoot, "warehouse")).toString());
+
+ try {
+ qt = new HiveTestUtil(hiveOutputDir, hiveLogDir, clusterType, null);
+ } catch (Exception e) {
+ LOG.error("Unexpected exception in setup", e);
+ fail("Unexpected exception in setup");
+ }
+
+ //Start HBase cluster
+ hbaseCluster = hbaseTestUtil.startMiniCluster(1);
+ MiniDFSCluster x = hbaseTestUtil.getDFSCluster();
+ Class.forName(PhoenixDriver.class.getName());
+ zkQuorum = "localhost:" + hbaseTestUtil.getZkCluster().getClientPort();
+ Properties props = PropertiesUtil.deepCopy(TestUtil.TEST_PROPERTIES);
+ props.put(QueryServices.DROP_METADATA_ATTRIB, Boolean.toString(true));
+ conn = DriverManager.getConnection(PhoenixRuntime.JDBC_PROTOCOL +
+ PhoenixRuntime.JDBC_PROTOCOL_SEPARATOR + zkQuorum, props);
+ // Setup Hive Output Folder
+
+ Statement stmt = conn.createStatement();
+ stmt.execute("create table t(a integer primary key,b varchar)");
+ }
+
+ protected void runTest(String fname, String fpath) throws Exception {
+ long startTime = System.currentTimeMillis();
+ try {
+ LOG.info("Begin query: " + fname);
+ qt.addFile(fpath);
+
+ if (qt.shouldBeSkipped(fname)) {
+ LOG.info("Test " + fname + " skipped");
+ return;
+ }
+
+ qt.cliInit(fname);
+ qt.clearTestSideEffects();
+ int ecode = qt.executeClient(fname);
+ if (ecode != 0) {
+ qt.failed(ecode, fname, null);
+ return;
+ }
+
+ ecode = qt.checkCliDriverResults(fname);
+ if (ecode != 0) {
+ qt.failedDiff(ecode, fname, null);
+ }
+ qt.clearPostTestEffects();
+
+ } catch (Throwable e) {
+ qt.failed(e, fname, null);
+ }
+
+ long elapsedTime = System.currentTimeMillis() - startTime;
+ LOG.info("Done query: " + fname + " elapsedTime=" + elapsedTime / 1000 + "s");
+ assertTrue("Test passed", true);
+ }
+
+ protected void createFile(String content, String fullName) throws IOException {
+ FileUtils.write(new File(fullName), content);
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ if (qt != null) {
+ try {
+ qt.shutdown();
+ } catch (Exception e) {
+ LOG.error("Unexpected exception in setup", e);
+ fail("Unexpected exception in tearDown");
+ }
+ }
+ try {
+ conn.close();
+ } finally {
+ try {
+ PhoenixDriver.INSTANCE.close();
+ } finally {
+ try {
+ DriverManager.deregisterDriver(PhoenixDriver.INSTANCE);
+ } finally {
+ hbaseTestUtil.shutdownMiniCluster();
+ }
+ }
+ }
+ }
+}
diff --git a/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveMapReduceIT.java b/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveMapReduceIT.java
new file mode 100644
index 0000000..4bc5a7d
--- /dev/null
+++ b/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveMapReduceIT.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.phoenix.hive;
+
+import static org.junit.Assert.fail;
+
+import java.util.Map;
+
+import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+import org.junit.Ignore;
+
+@Category(NeedsOwnMiniClusterTest.class)
+public class HiveMapReduceIT extends HivePhoenixStoreIT {
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ final String hadoopConfDir = System.getenv("HADOOP_CONF_DIR");
+ if (hadoopConfDir != null && hadoopConfDir.length() != 0) {
+ fail("HADOOP_CONF_DIR is non-empty in the current shell environment which will very likely cause this test to fail.");
+ }
+ setup(HiveTestUtil.MiniClusterType.mr);
+ }
+}
diff --git a/phoenix-hive/src/it/java/org/apache/phoenix/hive/HivePhoenixStoreIT.java b/phoenix-hive/src/it/java/org/apache/phoenix/hive/HivePhoenixStoreIT.java
new file mode 100644
index 0000000..66f99ad
--- /dev/null
+++ b/phoenix-hive/src/it/java/org/apache/phoenix/hive/HivePhoenixStoreIT.java
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
+import org.apache.phoenix.util.StringUtil;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test methods only. All supporting methods should be placed to BaseHivePhoenixStoreIT
+ */
+
+@Category(NeedsOwnMiniClusterTest.class)
+@Ignore("This class contains only test methods and should not be executed directly")
+public class HivePhoenixStoreIT extends BaseHivePhoenixStoreIT {
+
+ /**
+ * Create a table with two column, insert 1 row, check that phoenix table is created and
+ * the row is there
+ *
+ * @throws Exception
+ */
+ @Test
+ public void simpleTest() throws Exception {
+ String testName = "simpleTest";
+ hbaseTestUtil.getTestFileSystem().createNewFile(new Path(hiveLogDir, testName + ".out"));
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveLogDir, testName + ".out").toString());
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveOutputDir, testName + ".out").toString());
+ StringBuilder sb = new StringBuilder();
+ sb.append("CREATE TABLE phoenix_table(ID STRING, SALARY STRING)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF + " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.table.name'='phoenix_table'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id');");
+ sb.append("INSERT INTO TABLE phoenix_table" + HiveTestUtil.CRLF +
+ "VALUES ('10', '1000');" + HiveTestUtil.CRLF);
+ String fullPath = new Path(hbaseTestUtil.getDataTestDir(), testName).toString();
+ createFile(sb.toString(), fullPath);
+ runTest(testName, fullPath);
+
+ String phoenixQuery = "SELECT * FROM phoenix_table";
+ PreparedStatement statement = conn.prepareStatement(phoenixQuery);
+ ResultSet rs = statement.executeQuery();
+ assert (rs.getMetaData().getColumnCount() == 2);
+ assertTrue(rs.next());
+ assert (rs.getString(1).equals("10"));
+ assert (rs.getString(2).equals("1000"));
+ }
+
+ /**
+ * Create hive table with custom column mapping
+ * @throws Exception
+ */
+
+ @Test
+ public void simpleColumnMapTest() throws Exception {
+ String testName = "cmTest";
+ hbaseTestUtil.getTestFileSystem().createNewFile(new Path(hiveLogDir, testName + ".out"));
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveLogDir, testName + ".out").toString());
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveOutputDir, testName + ".out").toString());
+ StringBuilder sb = new StringBuilder();
+ sb.append("CREATE TABLE column_table(ID STRING, P1 STRING, p2 STRING)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF + " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.table.name'='column_table'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.column.mapping' = 'id:C1, p1:c2, p2:C3'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id');");
+ sb.append("INSERT INTO TABLE column_table" + HiveTestUtil.CRLF +
+ "VALUES ('1', '2', '3');" + HiveTestUtil.CRLF);
+ String fullPath = new Path(hbaseTestUtil.getDataTestDir(), testName).toString();
+ createFile(sb.toString(), fullPath);
+ runTest(testName, fullPath);
+
+ String phoenixQuery = "SELECT C1, \"c2\", C3 FROM column_table";
+ PreparedStatement statement = conn.prepareStatement(phoenixQuery);
+ ResultSet rs = statement.executeQuery();
+ assert (rs.getMetaData().getColumnCount() == 3);
+ assertTrue(rs.next());
+ assert (rs.getString(1).equals("1"));
+ assert (rs.getString(2).equals("2"));
+ assert (rs.getString(3).equals("3"));
+
+ }
+
+
+ /**
+ * Datatype Test
+ *
+ * @throws Exception
+ */
+ @Test
+ public void dataTypeTest() throws Exception {
+ String testName = "dataTypeTest";
+ hbaseTestUtil.getTestFileSystem().createNewFile(new Path(hiveLogDir, testName + ".out"));
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveLogDir, testName + ".out").toString());
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveOutputDir, testName + ".out").toString());
+ StringBuilder sb = new StringBuilder();
+ sb.append("CREATE TABLE phoenix_datatype(ID int, description STRING, ts TIMESTAMP, db " +
+ "DOUBLE,fl FLOAT, us INT)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF + " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.hbase.table.name'='phoenix_datatype'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id');");
+ sb.append("INSERT INTO TABLE phoenix_datatype" + HiveTestUtil.CRLF +
+ "VALUES (10, \"foodesc\", \"2013-01-05 01:01:01\", 200,2.0,-1);" + HiveTestUtil.CRLF);
+ String fullPath = new Path(hbaseTestUtil.getDataTestDir(), testName).toString();
+ createFile(sb.toString(), fullPath);
+ runTest(testName, fullPath);
+
+ String phoenixQuery = "SELECT * FROM phoenix_datatype";
+ PreparedStatement statement = conn.prepareStatement(phoenixQuery);
+ ResultSet rs = statement.executeQuery();
+ assert (rs.getMetaData().getColumnCount() == 6);
+ while (rs.next()) {
+ assert (rs.getInt(1) == 10);
+ assert (rs.getString(2).equalsIgnoreCase("foodesc"));
+ assert (rs.getDouble(4) == 200);
+ assert (rs.getFloat(5) == 2.0);
+ assert (rs.getInt(6) == -1);
+ }
+ }
+
+ /**
+ * Datatype Test
+ *
+ * @throws Exception
+ */
+ @Test
+ public void MultiKey() throws Exception {
+ String testName = "MultiKey";
+ hbaseTestUtil.getTestFileSystem().createNewFile(new Path(hiveLogDir, testName + ".out"));
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveLogDir, testName + ".out").toString());
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveOutputDir, testName + ".out").toString());
+ StringBuilder sb = new StringBuilder();
+ sb.append("CREATE TABLE phoenix_MultiKey(ID int, ID2 String,description STRING," +
+ "db DOUBLE,fl FLOAT, us INT)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF +
+ " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.hbase.table.name'='phoenix_MultiKey'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id,id2');" + HiveTestUtil.CRLF);
+ sb.append("INSERT INTO TABLE phoenix_MultiKey VALUES (10, \"part2\",\"foodesc\",200,2.0,-1);" +
+ HiveTestUtil.CRLF);
+ String fullPath = new Path(hbaseTestUtil.getDataTestDir(), testName).toString();
+ createFile(sb.toString(), fullPath);
+ runTest(testName, fullPath);
+
+ String phoenixQuery = "SELECT * FROM phoenix_MultiKey";
+ PreparedStatement statement = conn.prepareStatement(phoenixQuery);
+ ResultSet rs = statement.executeQuery();
+ assert (rs.getMetaData().getColumnCount() == 6);
+ while (rs.next()) {
+ assert (rs.getInt(1) == 10);
+ assert (rs.getString(2).equalsIgnoreCase("part2"));
+ assert (rs.getString(3).equalsIgnoreCase("foodesc"));
+ assert (rs.getDouble(4) == 200);
+ assert (rs.getFloat(5) == 2.0);
+ assert (rs.getInt(6) == -1);
+ }
+ }
+
+ /**
+ * Test that hive is able to access Phoenix data during MR job (creating two tables and perform join on it)
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testJoinNoColumnMaps() throws Exception {
+ String testName = "testJoin";
+ hbaseTestUtil.getTestFileSystem().createNewFile(new Path(hiveLogDir, testName + ".out"));
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveLogDir, testName + ".out").toString());
+ createFile("10\tpart2\tfoodesc\t200.0\t2.0\t-1\t10\tpart2\tfoodesc\t200.0\t2.0\t-1\n",
+ new Path(hiveOutputDir, testName + ".out").toString());
+ StringBuilder sb = new StringBuilder();
+ sb.append("CREATE TABLE joinTable1(ID int, ID2 String,description STRING," +
+ "db DOUBLE,fl FLOAT, us INT)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF +
+ " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.hbase.table.name'='joinTable1'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id,id2');" + HiveTestUtil.CRLF);
+ sb.append("CREATE TABLE joinTable2(ID int, ID2 String,description STRING," +
+ "db DOUBLE,fl FLOAT, us INT)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF +
+ " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.hbase.table.name'='joinTable2'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id,id2');" + HiveTestUtil.CRLF);
+
+ sb.append("INSERT INTO TABLE joinTable1 VALUES (5, \"part2\",\"foodesc\",200,2.0,-1);" + HiveTestUtil.CRLF);
+ sb.append("INSERT INTO TABLE joinTable1 VALUES (10, \"part2\",\"foodesc\",200,2.0,-1);" + HiveTestUtil.CRLF);
+
+ sb.append("INSERT INTO TABLE joinTable2 VALUES (5, \"part2\",\"foodesc\",200,2.0,-1);" + HiveTestUtil.CRLF);
+ sb.append("INSERT INTO TABLE joinTable2 VALUES (10, \"part2\",\"foodesc\",200,2.0,-1);" + HiveTestUtil.CRLF);
+
+ sb.append("SELECT * from joinTable1 A join joinTable2 B on A.ID = B.ID WHERE A.ID=10;" +
+ HiveTestUtil.CRLF);
+
+ String fullPath = new Path(hbaseTestUtil.getDataTestDir(), testName).toString();
+ createFile(sb.toString(), fullPath);
+ runTest(testName, fullPath);
+ }
+
+ /**
+ * Test that hive is able to access Phoenix data during MR job (creating two tables and perform join on it)
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testJoinColumnMaps() throws Exception {
+ String testName = "testJoin";
+ hbaseTestUtil.getTestFileSystem().createNewFile(new Path(hiveLogDir, testName + ".out"));
+ createFile("10\t200.0\tpart2\n", new Path(hiveOutputDir, testName + ".out").toString());
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveLogDir, testName + ".out").toString());
+
+ StringBuilder sb = new StringBuilder();
+ sb.append("CREATE TABLE joinTable3(ID int, ID2 String,description STRING," +
+ "db DOUBLE,fl FLOAT, us INT)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF +
+ " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.hbase.table.name'='joinTable3'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.column.mapping' = 'id:i1, id2:I2'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id,id2');" + HiveTestUtil.CRLF);
+ sb.append("CREATE TABLE joinTable4(ID int, ID2 String,description STRING," +
+ "db DOUBLE,fl FLOAT, us INT)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF +
+ " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.hbase.table.name'='joinTable4'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.column.mapping' = 'id:i1, id2:I2'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id,id2');" + HiveTestUtil.CRLF);
+
+ sb.append("INSERT INTO TABLE joinTable3 VALUES (5, \"part1\",\"foodesc\",200,2.0,-1);" + HiveTestUtil.CRLF);
+ sb.append("INSERT INTO TABLE joinTable3 VALUES (10, \"part1\",\"foodesc\",200,2.0,-1);" + HiveTestUtil.CRLF);
+
+ sb.append("INSERT INTO TABLE joinTable4 VALUES (5, \"part2\",\"foodesc\",200,2.0,-1);" + HiveTestUtil.CRLF);
+ sb.append("INSERT INTO TABLE joinTable4 VALUES (10, \"part2\",\"foodesc\",200,2.0,-1);" + HiveTestUtil.CRLF);
+
+ sb.append("SELECT A.ID, a.db, B.ID2 from joinTable3 A join joinTable4 B on A.ID = B.ID WHERE A.ID=10;" +
+ HiveTestUtil.CRLF);
+
+ String fullPath = new Path(hbaseTestUtil.getDataTestDir(), testName).toString();
+ createFile(sb.toString(), fullPath);
+ runTest(testName, fullPath);
+ //Test that Phoenix has correctly mapped columns. We are checking both, primary key and
+ // regular columns mapped and not mapped
+ String phoenixQuery = "SELECT \"i1\", \"I2\", \"db\" FROM joinTable3 where \"i1\" = 10 AND \"I2\" = 'part1' AND \"db\" = 200";
+ PreparedStatement statement = conn.prepareStatement(phoenixQuery);
+ ResultSet rs = statement.executeQuery();
+ assert (rs.getMetaData().getColumnCount() == 3);
+ while (rs.next()) {
+ assert (rs.getInt(1) == 10);
+ assert (rs.getString(2).equalsIgnoreCase("part1"));
+ assert (rs.getDouble(3) == 200);
+ }
+ }
+
+ @Test
+ public void testTimestampPredicate() throws Exception {
+ String testName = "testTimeStampPredicate";
+ hbaseTestUtil.getTestFileSystem().createNewFile(new Path(hiveLogDir, testName + ".out"));
+ createFile("10\t2013-01-02 01:01:01.123456\n", new Path(hiveOutputDir, testName + ".out").toString());
+ createFile(StringUtil.EMPTY_STRING, new Path(hiveLogDir, testName + ".out").toString());
+
+ StringBuilder sb = new StringBuilder();
+ sb.append("CREATE TABLE timeStampTable(ID int,ts TIMESTAMP)" + HiveTestUtil.CRLF +
+ " STORED BY \"org.apache.phoenix.hive.PhoenixStorageHandler\"" + HiveTestUtil
+ .CRLF +
+ " TBLPROPERTIES(" + HiveTestUtil.CRLF +
+ " 'phoenix.hbase.table.name'='TIMESTAMPTABLE'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.znode.parent'='/hbase'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.quorum'='localhost'," + HiveTestUtil.CRLF +
+ " 'phoenix.zookeeper.client.port'='" +
+ hbaseTestUtil.getZkCluster().getClientPort() + "'," + HiveTestUtil.CRLF +
+ " 'phoenix.column.mapping' = 'id:ID, ts:TS'," + HiveTestUtil.CRLF +
+ " 'phoenix.rowkeys'='id');" + HiveTestUtil.CRLF);
+ sb.append("INSERT INTO TABLE timeStampTable VALUES (10, \"2013-01-02 01:01:01.123456\");" + HiveTestUtil.CRLF);
+ sb.append("SELECT * from timeStampTable WHERE ts between '2013-01-02 01:01:01.123455' and " +
+ " '2013-01-02 12:01:02.123457789' AND id = 10;" + HiveTestUtil.CRLF);
+
+ String fullPath = new Path(hbaseTestUtil.getDataTestDir(), testName).toString();
+ createFile(sb.toString(), fullPath);
+ runTest(testName, fullPath);
+ }
+}
diff --git a/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveTestUtil.java b/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveTestUtil.java
new file mode 100644
index 0000000..b4c4e46
--- /dev/null
+++ b/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveTestUtil.java
@@ -0,0 +1,1280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import com.google.common.collect.ImmutableList;
+import junit.framework.Assert;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
+import org.apache.hadoop.hive.cli.CliDriver;
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.common.io.CachingPrintStream;
+import org.apache.hadoop.hive.common.io.DigestPrintStream;
+import org.apache.hadoop.hive.common.io.SortAndDigestPrintStream;
+import org.apache.hadoop.hive.common.io.SortPrintStream;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.util.Shell;
+import org.apache.hive.common.util.StreamPrinter;
+import org.apache.tools.ant.BuildException;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.ZooKeeper;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintStream;
+import java.io.StringWriter;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Deque;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * HiveTestUtil cloned from Hive QTestUtil. Can be outdated and may require update once a problem
+ * found.
+ */
+public class HiveTestUtil {
+
+ public static final String UTF_8 = "UTF-8";
+ private static final Log LOG = LogFactory.getLog("HiveTestUtil");
+ private static final String QTEST_LEAVE_FILES = "QTEST_LEAVE_FILES";
+ public static final String DEFAULT_DATABASE_NAME = "default";
+
+ private String testWarehouse;
+ private final String testFiles;
+ protected final String outDir;
+ protected final String logDir;
+ private final TreeMap<String, String> qMap;
+ private final Set<String> qSkipSet;
+ private final Set<String> qSortSet;
+ private final Set<String> qSortQuerySet;
+ private final Set<String> qHashQuerySet;
+ private final Set<String> qSortNHashQuerySet;
+ private final Set<String> qJavaVersionSpecificOutput;
+ private static final String SORT_SUFFIX = ".sorted";
+ private static MiniClusterType clusterType = MiniClusterType.none;
+ private ParseDriver pd;
+ protected Hive db;
+ protected HiveConf conf;
+ private BaseSemanticAnalyzer sem;
+ protected final boolean overWrite;
+ private CliDriver cliDriver;
+ private HadoopShims.MiniMrShim mr = null;
+ private HadoopShims.MiniDFSShim dfs = null;
+ private String hadoopVer = null;
+ private HiveTestSetup setup = null;
+ private boolean isSessionStateStarted = false;
+ private static final String javaVersion = getJavaVersion();
+
+ private String initScript = "";
+ private String cleanupScript = "";
+
+ public HiveConf getConf() {
+ return conf;
+ }
+
+ public boolean deleteDirectory(File path) {
+ if (path.exists()) {
+ File[] files = path.listFiles();
+ for (File file : files) {
+ if (file.isDirectory()) {
+ deleteDirectory(file);
+ } else {
+ file.delete();
+ }
+ }
+ }
+ return (path.delete());
+ }
+
+ public void copyDirectoryToLocal(Path src, Path dest) throws Exception {
+
+ FileSystem srcFs = src.getFileSystem(conf);
+ FileSystem destFs = dest.getFileSystem(conf);
+ if (srcFs.exists(src)) {
+ FileStatus[] files = srcFs.listStatus(src);
+ for (FileStatus file : files) {
+ String name = file.getPath().getName();
+ Path dfs_path = file.getPath();
+ Path local_path = new Path(dest, name);
+
+ if (file.isDir()) {
+ if (!destFs.exists(local_path)) {
+ destFs.mkdirs(local_path);
+ }
+ copyDirectoryToLocal(dfs_path, local_path);
+ } else {
+ srcFs.copyToLocalFile(dfs_path, local_path);
+ }
+ }
+ }
+ }
+
+ static Pattern mapTok = Pattern.compile("(\\.?)(.*)_map_(.*)");
+ static Pattern reduceTok = Pattern.compile("(.*)(reduce_[^\\.]*)((\\..*)?)");
+
+ public void normalizeNames(File path) throws Exception {
+ if (path.isDirectory()) {
+ File[] files = path.listFiles();
+ for (File file : files) {
+ normalizeNames(file);
+ }
+ } else {
+ Matcher m = reduceTok.matcher(path.getName());
+ if (m.matches()) {
+ String name = m.group(1) + "reduce" + m.group(3);
+ path.renameTo(new File(path.getParent(), name));
+ } else {
+ m = mapTok.matcher(path.getName());
+ if (m.matches()) {
+ String name = m.group(1) + "map_" + m.group(3);
+ path.renameTo(new File(path.getParent(), name));
+ }
+ }
+ }
+ }
+
+ public String getOutputDirectory() {
+ return outDir;
+ }
+
+ public String getLogDirectory() {
+ return logDir;
+ }
+
+ private String getHadoopMainVersion(String input) {
+ if (input == null) {
+ return null;
+ }
+ Pattern p = Pattern.compile("^(\\d+\\.\\d+).*");
+ Matcher m = p.matcher(input);
+ if (m.matches()) {
+ return m.group(1);
+ }
+ return null;
+ }
+
+ public void initConf() throws Exception {
+ // Plug verifying metastore in for testing.
+ conf.setVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL,
+ "org.apache.hadoop.hive.metastore.VerifyingObjectStore");
+
+ if (mr != null) {
+ assert dfs != null;
+
+ mr.setupConfiguration(conf);
+
+ // set fs.default.name to the uri of mini-dfs
+ String dfsUriString = WindowsPathUtil.getHdfsUriString(dfs.getFileSystem().getUri()
+ .toString());
+ conf.setVar(HiveConf.ConfVars.HADOOPFS, dfsUriString);
+ // hive.metastore.warehouse.dir needs to be set relative to the mini-dfs
+ conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE,
+ (new Path(dfsUriString,
+ "/build/ql/test/data/warehouse/")).toString());
+ }
+
+ // Windows paths should be converted after MiniMrShim.setupConfiguration()
+ // since setupConfiguration may overwrite configuration values.
+ if (Shell.WINDOWS) {
+ WindowsPathUtil.convertPathsFromWindowsToHdfs(conf);
+ }
+ }
+
+ public enum MiniClusterType {
+ mr,
+ tez,
+ none;
+
+ public static MiniClusterType valueForString(String type) {
+ if (type.equals("miniMR")) {
+ return mr;
+ } else if (type.equals("tez")) {
+ return tez;
+ } else {
+ return none;
+ }
+ }
+ }
+
+ public HiveTestUtil(String outDir, String logDir, MiniClusterType clusterType, String hadoopVer)
+ throws Exception {
+ this(outDir, logDir, clusterType, null, hadoopVer);
+ }
+
+ public HiveTestUtil(String outDir, String logDir, MiniClusterType clusterType, String confDir,
+ String hadoopVer)
+ throws Exception {
+ this.outDir = outDir;
+ this.logDir = logDir;
+ if (confDir != null && !confDir.isEmpty()) {
+ HiveConf.setHiveSiteLocation(new URL("file://" + new File(confDir).toURI().getPath()
+ + "/hive-site.xml"));
+ LOG.info("Setting hive-site: " + HiveConf.getHiveSiteLocation());
+ }
+ conf = new HiveConf();
+ String tmpBaseDir = System.getProperty("test.tmp.dir");
+ if (tmpBaseDir == null || tmpBaseDir == "") {
+ tmpBaseDir = System.getProperty("java.io.tmpdir");
+ }
+ String metaStoreURL = "jdbc:derby:" + tmpBaseDir + File.separator + "metastore_dbtest;" +
+ "create=true";
+ conf.set(ConfVars.METASTORECONNECTURLKEY.varname, metaStoreURL);
+ System.setProperty(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, metaStoreURL);
+
+ //set where derby logs
+ File derbyLogFile = new File(tmpBaseDir + "/derby.log");
+ derbyLogFile.createNewFile();
+ System.setProperty("derby.stream.error.file", derbyLogFile.getPath());
+
+ this.hadoopVer = getHadoopMainVersion(hadoopVer);
+ qMap = new TreeMap<String, String>();
+ qSkipSet = new HashSet<String>();
+ qSortSet = new HashSet<String>();
+ qSortQuerySet = new HashSet<String>();
+ qHashQuerySet = new HashSet<String>();
+ qSortNHashQuerySet = new HashSet<String>();
+ qJavaVersionSpecificOutput = new HashSet<String>();
+ this.clusterType = clusterType;
+
+ // Using randomUUID for dfs cluster
+ System.setProperty("test.build.data", "target/test-data/hive-" + UUID.randomUUID().toString
+ ());
+
+ HadoopShims shims = ShimLoader.getHadoopShims();
+ int numberOfDataNodes = 1;
+
+ if (clusterType != MiniClusterType.none) {
+ dfs = shims.getMiniDfs(conf, numberOfDataNodes, true, null);
+ FileSystem fs = dfs.getFileSystem();
+ String uriString = WindowsPathUtil.getHdfsUriString(fs.getUri().toString());
+ if (clusterType == MiniClusterType.tez) {
+ conf.set("hive.execution.engine", "tez");
+ mr = shims.getMiniTezCluster(conf, 1, uriString, 1);
+ } else {
+ conf.set("hive.execution.engine", "mr");
+ mr = shims.getMiniMrCluster(conf, 1, uriString, 1);
+
+ }
+ }
+
+ initConf();
+
+ // Use the current directory if it is not specified
+ String dataDir = conf.get("test.data.files");
+ if (dataDir == null) {
+ dataDir = new File(".").getAbsolutePath() + "/data/files";
+ }
+
+ testFiles = dataDir;
+
+ // Use the current directory if it is not specified
+ String scriptsDir = conf.get("test.data.scripts");
+ if (scriptsDir == null) {
+ scriptsDir = new File(".").getAbsolutePath() + "/data/scripts";
+ }
+ if (!initScript.isEmpty()) {
+ this.initScript = scriptsDir + "/" + initScript;
+ }
+ if (!cleanupScript.isEmpty()) {
+ this.cleanupScript = scriptsDir + "/" + cleanupScript;
+ }
+
+ overWrite = "true".equalsIgnoreCase(System.getProperty("test.output.overwrite"));
+
+ setup = new HiveTestSetup();
+ setup.preTest(conf);
+ init();
+ }
+
+ public void shutdown() throws Exception {
+ cleanUp();
+ setup.tearDown();
+ if (mr != null) {
+ mr.shutdown();
+ mr = null;
+ }
+ FileSystem.closeAll();
+ if (dfs != null) {
+ dfs.shutdown();
+ dfs = null;
+ }
+ }
+
+ public String readEntireFileIntoString(File queryFile) throws IOException {
+ InputStreamReader isr = new InputStreamReader(
+ new BufferedInputStream(new FileInputStream(queryFile)), HiveTestUtil.UTF_8);
+ StringWriter sw = new StringWriter();
+ try {
+ IOUtils.copy(isr, sw);
+ } finally {
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ return sw.toString();
+ }
+
+ public void addFile(String queryFile) throws IOException {
+ addFile(queryFile, false);
+ }
+
+ public void addFile(String queryFile, boolean partial) throws IOException {
+ addFile(new File(queryFile));
+ }
+
+ public void addFile(File qf) throws IOException {
+ addFile(qf, false);
+ }
+
+ public void addFile(File qf, boolean partial) throws IOException {
+ String query = readEntireFileIntoString(qf);
+ qMap.put(qf.getName(), query);
+ if (partial) return;
+
+ if (matches(SORT_BEFORE_DIFF, query)) {
+ qSortSet.add(qf.getName());
+ } else if (matches(SORT_QUERY_RESULTS, query)) {
+ qSortQuerySet.add(qf.getName());
+ } else if (matches(HASH_QUERY_RESULTS, query)) {
+ qHashQuerySet.add(qf.getName());
+ } else if (matches(SORT_AND_HASH_QUERY_RESULTS, query)) {
+ qSortNHashQuerySet.add(qf.getName());
+ }
+ }
+
+ private static final Pattern SORT_BEFORE_DIFF = Pattern.compile("-- SORT_BEFORE_DIFF");
+ private static final Pattern SORT_QUERY_RESULTS = Pattern.compile("-- SORT_QUERY_RESULTS");
+ private static final Pattern HASH_QUERY_RESULTS = Pattern.compile("-- HASH_QUERY_RESULTS");
+ private static final Pattern SORT_AND_HASH_QUERY_RESULTS = Pattern.compile("-- " +
+ "SORT_AND_HASH_QUERY_RESULTS");
+
+ private boolean matches(Pattern pattern, String query) {
+ Matcher matcher = pattern.matcher(query);
+ if (matcher.find()) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Get formatted Java version to include minor version, but
+ * exclude patch level.
+ *
+ * @return Java version formatted as major_version.minor_version
+ */
+ private static String getJavaVersion() {
+ String version = System.getProperty("java.version");
+ if (version == null) {
+ throw new NullPointerException("No java version could be determined " +
+ "from system properties");
+ }
+
+ // "java version" system property is formatted
+ // major_version.minor_version.patch_level.
+ // Find second dot, instead of last dot, to be safe
+ int pos = version.indexOf('.');
+ pos = version.indexOf('.', pos + 1);
+ return version.substring(0, pos);
+ }
+
+ /**
+ * Clear out any side effects of running tests
+ */
+ public void clearPostTestEffects() throws Exception {
+ setup.postTest(conf);
+ }
+
+ /**
+ * Clear out any side effects of running tests
+ */
+ public void clearTablesCreatedDuringTests() throws Exception {
+ if (System.getenv(QTEST_LEAVE_FILES) != null) {
+ return;
+ }
+
+ // Delete any tables other than the source tables
+ // and any databases other than the default database.
+ for (String dbName : db.getAllDatabases()) {
+ SessionState.get().setCurrentDatabase(dbName);
+ for (String tblName : db.getAllTables()) {
+ if (!DEFAULT_DATABASE_NAME.equals(dbName)) {
+ Table tblObj = db.getTable(tblName);
+ // dropping index table can not be dropped directly. Dropping the base
+ // table will automatically drop all its index table
+ if (tblObj.isIndexTable()) {
+ continue;
+ }
+ db.dropTable(dbName, tblName);
+ } else {
+ // this table is defined in srcTables, drop all indexes on it
+ List<Index> indexes = db.getIndexes(dbName, tblName, (short) -1);
+ if (indexes != null && indexes.size() > 0) {
+ for (Index index : indexes) {
+ db.dropIndex(dbName, tblName, index.getIndexName(), true, true);
+ }
+ }
+ }
+ }
+ if (!DEFAULT_DATABASE_NAME.equals(dbName)) {
+ // Drop cascade, may need to drop functions
+ db.dropDatabase(dbName, true, true, true);
+ }
+ }
+
+ // delete remaining directories for external tables (can affect stats for following tests)
+ try {
+ Path p = new Path(testWarehouse);
+ FileSystem fileSystem = p.getFileSystem(conf);
+ if (fileSystem.exists(p)) {
+ for (FileStatus status : fileSystem.listStatus(p)) {
+ if (status.isDir()) {
+ fileSystem.delete(status.getPath(), true);
+ }
+ }
+ }
+ } catch (IllegalArgumentException e) {
+ // ignore.. provides invalid url sometimes intentionally
+ }
+ SessionState.get().setCurrentDatabase(DEFAULT_DATABASE_NAME);
+
+ List<String> roleNames = db.getAllRoleNames();
+ for (String roleName : roleNames) {
+ if (!"PUBLIC".equalsIgnoreCase(roleName) && !"ADMIN".equalsIgnoreCase(roleName)) {
+ db.dropRole(roleName);
+ }
+ }
+ }
+
+ /**
+ * Clear out any side effects of running tests
+ */
+ public void clearTestSideEffects() throws Exception {
+ if (System.getenv(QTEST_LEAVE_FILES) != null) {
+ return;
+ }
+
+ clearTablesCreatedDuringTests();
+ }
+
+ public void cleanUp() throws Exception {
+ if (!isSessionStateStarted) {
+ startSessionState();
+ }
+ if (System.getenv(QTEST_LEAVE_FILES) != null) {
+ return;
+ }
+
+ clearTablesCreatedDuringTests();
+
+ SessionState.get().getConf().setBoolean("hive.test.shutdown.phase", true);
+
+ if (cleanupScript != "") {
+ String cleanupCommands = readEntireFileIntoString(new File(cleanupScript));
+ LOG.info("Cleanup (" + cleanupScript + "):\n" + cleanupCommands);
+ if (cliDriver == null) {
+ cliDriver = new CliDriver();
+ }
+ cliDriver.processLine(cleanupCommands);
+ }
+
+ SessionState.get().getConf().setBoolean("hive.test.shutdown.phase", false);
+
+ // delete any contents in the warehouse dir
+ Path p = new Path(testWarehouse);
+ FileSystem fs = p.getFileSystem(conf);
+
+ try {
+ FileStatus[] ls = fs.listStatus(p);
+ for (int i = 0; (ls != null) && (i < ls.length); i++) {
+ fs.delete(ls[i].getPath(), true);
+ }
+ } catch (FileNotFoundException e) {
+ // Best effort
+ }
+
+ FunctionRegistry.unregisterTemporaryUDF("test_udaf");
+ FunctionRegistry.unregisterTemporaryUDF("test_error");
+ }
+
+ public void createSources() throws Exception {
+ if (!isSessionStateStarted) {
+ startSessionState();
+ }
+ conf.setBoolean("hive.test.init.phase", true);
+
+ if (cliDriver == null) {
+ cliDriver = new CliDriver();
+ }
+ cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+
+ conf.setBoolean("hive.test.init.phase", false);
+ }
+
+ public void init() throws Exception {
+ testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
+ conf.setBoolVar(HiveConf.ConfVars.SUBMITLOCALTASKVIACHILD, false);
+ String execEngine = conf.get("hive.execution.engine");
+ conf.set("hive.execution.engine", "mr");
+ SessionState.start(conf);
+ conf.set("hive.execution.engine", execEngine);
+ db = Hive.get(conf);
+ pd = new ParseDriver();
+ sem = new SemanticAnalyzer(conf);
+ }
+
+ public void init(String tname) throws Exception {
+ cleanUp();
+ createSources();
+ cliDriver.processCmd("set hive.cli.print.header=true;");
+ }
+
+ public void cliInit(String tname) throws Exception {
+ cliInit(tname, true);
+ }
+
+ public String cliInit(String tname, boolean recreate) throws Exception {
+ if (recreate) {
+ cleanUp();
+ createSources();
+ }
+
+ HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
+ "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator");
+ Utilities.clearWorkMap();
+ CliSessionState ss = new CliSessionState(conf);
+ assert ss != null;
+ ss.in = System.in;
+
+ String outFileExtension = getOutFileExtension(tname);
+ String stdoutName = null;
+ if (outDir != null) {
+ File qf = new File(outDir, tname);
+ stdoutName = qf.getName().concat(outFileExtension);
+ } else {
+ stdoutName = tname + outFileExtension;
+ }
+
+ File outf = new File(logDir, stdoutName);
+ OutputStream fo = new BufferedOutputStream(new FileOutputStream(outf));
+ if (qSortQuerySet.contains(tname)) {
+ ss.out = new SortPrintStream(fo, "UTF-8");
+ } else if (qHashQuerySet.contains(tname)) {
+ ss.out = new DigestPrintStream(fo, "UTF-8");
+ } else if (qSortNHashQuerySet.contains(tname)) {
+ ss.out = new SortAndDigestPrintStream(fo, "UTF-8");
+ } else {
+ ss.out = new PrintStream(fo, true, "UTF-8");
+ }
+ ss.err = new CachingPrintStream(fo, true, "UTF-8");
+ ss.setIsSilent(true);
+ SessionState oldSs = SessionState.get();
+
+ if (oldSs != null && clusterType == MiniClusterType.tez) {
+ oldSs.close();
+ }
+
+ if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
+ oldSs.out.close();
+ }
+ SessionState.start(ss);
+
+ cliDriver = new CliDriver();
+ cliDriver.processInitFiles(ss);
+
+ return outf.getAbsolutePath();
+ }
+
+ private CliSessionState startSessionState()
+ throws IOException {
+
+ HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
+ "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator");
+
+ String execEngine = conf.get("hive.execution.engine");
+ conf.set("hive.execution.engine", "mr");
+ CliSessionState ss = new CliSessionState(conf);
+ assert ss != null;
+ ss.in = System.in;
+ ss.out = System.out;
+ ss.err = System.out;
+
+ SessionState oldSs = SessionState.get();
+ if (oldSs != null && clusterType == MiniClusterType.tez) {
+ oldSs.close();
+ }
+ if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
+ oldSs.out.close();
+ }
+ SessionState.start(ss);
+
+ isSessionStateStarted = true;
+
+ conf.set("hive.execution.engine", execEngine);
+ return ss;
+ }
+
+ public int executeOne(String tname) {
+ String q = qMap.get(tname);
+
+ if (q.indexOf(";") == -1) {
+ return -1;
+ }
+
+ String q1 = q.substring(0, q.indexOf(";") + 1);
+ String qrest = q.substring(q.indexOf(";") + 1);
+ qMap.put(tname, qrest);
+
+ LOG.info("Executing " + q1);
+ return cliDriver.processLine(q1);
+ }
+
+ public static final String CRLF = System.getProperty("line.separator");
+
+ public int executeClient(String tname1, String tname2) {
+ String commands = getCommands(tname1) + CRLF + getCommands(tname2);
+ return cliDriver.processLine(commands);
+ }
+
+ public int executeClient(String tname) {
+ conf.set("mapreduce.job.name", "test");
+ return cliDriver.processLine(getCommands(tname), false);
+ }
+
+ private String getCommands(String tname) {
+ String commands = qMap.get(tname);
+ StringBuilder newCommands = new StringBuilder(commands.length());
+ int lastMatchEnd = 0;
+ Matcher commentMatcher = Pattern.compile("^--.*$", Pattern.MULTILINE).matcher(commands);
+ while (commentMatcher.find()) {
+ newCommands.append(commands.substring(lastMatchEnd, commentMatcher.start()));
+ newCommands.append(commentMatcher.group().replaceAll("(?<!\\\\);", "\\\\;"));
+ lastMatchEnd = commentMatcher.end();
+ }
+ newCommands.append(commands.substring(lastMatchEnd, commands.length()));
+ commands = newCommands.toString();
+ return commands;
+ }
+
+ public boolean shouldBeSkipped(String tname) {
+ return qSkipSet.contains(tname);
+ }
+
+ private String getOutFileExtension(String fname) {
+ String outFileExtension = ".out";
+ if (qJavaVersionSpecificOutput.contains(fname)) {
+ outFileExtension = ".java" + javaVersion + ".out";
+ }
+
+ return outFileExtension;
+ }
+
+ /**
+ * Given the current configurations (e.g., hadoop version and execution mode), return
+ * the correct file name to compare with the current test run output.
+ *
+ * @param outDir The directory where the reference log files are stored.
+ * @param testName The test file name (terminated by ".out").
+ * @return The file name appended with the configuration values if it exists.
+ */
+ public String outPath(String outDir, String testName) {
+ String ret = (new File(outDir, testName)).getPath();
+ // List of configurations. Currently the list consists of hadoop version and execution
+ // mode only
+ List<String> configs = new ArrayList<String>();
+ configs.add(this.hadoopVer);
+
+ Deque<String> stack = new LinkedList<String>();
+ StringBuilder sb = new StringBuilder();
+ sb.append(testName);
+ stack.push(sb.toString());
+
+ // example file names are input1.q.out_0.20.0_minimr or input2.q.out_0.17
+ for (String s : configs) {
+ sb.append('_');
+ sb.append(s);
+ stack.push(sb.toString());
+ }
+ while (stack.size() > 0) {
+ String fileName = stack.pop();
+ File f = new File(outDir, fileName);
+ if (f.exists()) {
+ ret = f.getPath();
+ break;
+ }
+ }
+ return ret;
+ }
+
+ private Pattern[] toPattern(String[] patternStrs) {
+ Pattern[] patterns = new Pattern[patternStrs.length];
+ for (int i = 0; i < patternStrs.length; i++) {
+ patterns[i] = Pattern.compile(patternStrs[i]);
+ }
+ return patterns;
+ }
+
+ private void maskPatterns(Pattern[] patterns, String fname) throws Exception {
+ String maskPattern = "#### A masked pattern was here ####";
+
+ String line;
+ BufferedReader in;
+ BufferedWriter out;
+
+ File file = new File(fname);
+ File fileOrig = new File(fname + ".orig");
+ FileUtils.copyFile(file, fileOrig);
+
+ in = new BufferedReader(new InputStreamReader(new FileInputStream(fileOrig), "UTF-8"));
+ out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
+
+ boolean lastWasMasked = false;
+ while (null != (line = in.readLine())) {
+ for (Pattern pattern : patterns) {
+ line = pattern.matcher(line).replaceAll(maskPattern);
+ }
+
+ if (line.equals(maskPattern)) {
+ // We're folding multiple masked lines into one.
+ if (!lastWasMasked) {
+ out.write(line);
+ out.write("\n");
+ lastWasMasked = true;
+ }
+ } else {
+ out.write(line);
+ out.write("\n");
+ lastWasMasked = false;
+ }
+ }
+
+ in.close();
+ out.close();
+ }
+
+ private final Pattern[] planMask = toPattern(new String[]{
+ ".*file:.*",
+ ".*pfile:.*",
+ ".*hdfs:.*",
+ ".*/tmp/.*",
+ ".*invalidscheme:.*",
+ ".*lastUpdateTime.*",
+ ".*lastAccessTime.*",
+ ".*lastModifiedTime.*",
+ ".*[Oo]wner.*",
+ ".*CreateTime.*",
+ ".*LastAccessTime.*",
+ ".*Location.*",
+ ".*LOCATION '.*",
+ ".*transient_lastDdlTime.*",
+ ".*last_modified_.*",
+ ".*at org.*",
+ ".*at sun.*",
+ ".*at java.*",
+ ".*at junit.*",
+ ".*Caused by:.*",
+ ".*LOCK_QUERYID:.*",
+ ".*LOCK_TIME:.*",
+ ".*grantTime.*",
+ ".*[.][.][.] [0-9]* more.*",
+ ".*job_[0-9_]*.*",
+ ".*job_local[0-9_]*.*",
+ ".*USING 'java -cp.*",
+ "^Deleted.*",
+ ".*DagName:.*",
+ ".*Input:.*/data/files/.*",
+ ".*Output:.*/data/files/.*",
+ ".*total number of created files now is.*"
+ });
+
+ public int checkCliDriverResults(String tname) throws Exception {
+ assert (qMap.containsKey(tname));
+
+ String outFileExtension = getOutFileExtension(tname);
+ String outFileName = outPath(outDir, tname + outFileExtension);
+
+ File f = new File(logDir, tname + outFileExtension);
+
+ maskPatterns(planMask, f.getPath());
+ int exitVal = executeDiffCommand(f.getPath(),
+ outFileName, false,
+ qSortSet.contains(tname));
+
+ if (exitVal != 0 && overWrite) {
+ exitVal = overwriteResults(f.getPath(), outFileName);
+ }
+
+ return exitVal;
+ }
+
+
+ public int checkCompareCliDriverResults(String tname, List<String> outputs) throws Exception {
+ assert outputs.size() > 1;
+ maskPatterns(planMask, outputs.get(0));
+ for (int i = 1; i < outputs.size(); ++i) {
+ maskPatterns(planMask, outputs.get(i));
+ int ecode = executeDiffCommand(
+ outputs.get(i - 1), outputs.get(i), false, qSortSet.contains(tname));
+ if (ecode != 0) {
+ LOG.info("Files don't match: " + outputs.get(i - 1) + " and " + outputs.get(i));
+ return ecode;
+ }
+ }
+ return 0;
+ }
+
+ private static int overwriteResults(String inFileName, String outFileName) throws Exception {
+ // This method can be replaced with Files.copy(source, target, REPLACE_EXISTING)
+ // once Hive uses JAVA 7.
+ LOG.info("Overwriting results " + inFileName + " to " + outFileName);
+ return executeCmd(new String[]{
+ "cp",
+ getQuotedString(inFileName),
+ getQuotedString(outFileName)
+ });
+ }
+
+ private static int executeDiffCommand(String inFileName,
+ String outFileName,
+ boolean ignoreWhiteSpace,
+ boolean sortResults
+ ) throws Exception {
+
+ int result = 0;
+
+ if (sortResults) {
+ // sort will try to open the output file in write mode on windows. We need to
+ // close it first.
+ SessionState ss = SessionState.get();
+ if (ss != null && ss.out != null && ss.out != System.out) {
+ ss.out.close();
+ }
+
+ String inSorted = inFileName + SORT_SUFFIX;
+ String outSorted = outFileName + SORT_SUFFIX;
+
+ result = sortFiles(inFileName, inSorted);
+ result |= sortFiles(outFileName, outSorted);
+ if (result != 0) {
+ LOG.error("ERROR: Could not sort files before comparing");
+ return result;
+ }
+ inFileName = inSorted;
+ outFileName = outSorted;
+ }
+
+ ArrayList<String> diffCommandArgs = new ArrayList<String>();
+ diffCommandArgs.add("diff");
+
+ // Text file comparison
+ diffCommandArgs.add("-a");
+
+ // Ignore changes in the amount of white space
+ if (ignoreWhiteSpace || Shell.WINDOWS) {
+ diffCommandArgs.add("-b");
+ }
+
+ // Files created on Windows machines have different line endings
+ // than files created on Unix/Linux. Windows uses carriage return and line feed
+ // ("\r\n") as a line ending, whereas Unix uses just line feed ("\n").
+ // Also StringBuilder.toString(), Stream to String conversions adds extra
+ // spaces at the end of the line.
+ if (Shell.WINDOWS) {
+ diffCommandArgs.add("--strip-trailing-cr"); // Strip trailing carriage return on input
+ diffCommandArgs.add("-B"); // Ignore changes whose lines are all blank
+ }
+ // Add files to compare to the arguments list
+ diffCommandArgs.add(getQuotedString(inFileName));
+ diffCommandArgs.add(getQuotedString(outFileName));
+
+ result = executeCmd(diffCommandArgs);
+
+ if (sortResults) {
+ new File(inFileName).delete();
+ new File(outFileName).delete();
+ }
+
+ return result;
+ }
+
+ private static int sortFiles(String in, String out) throws Exception {
+ return executeCmd(new String[]{
+ "sort",
+ getQuotedString(in),
+ }, out, null);
+ }
+
+ private static int executeCmd(Collection<String> args) throws Exception {
+ return executeCmd(args, null, null);
+ }
+
+ private static int executeCmd(String[] args) throws Exception {
+ return executeCmd(args, null, null);
+ }
+
+ private static int executeCmd(Collection<String> args, String outFile, String errFile) throws
+ Exception {
+ String[] cmdArray = args.toArray(new String[args.size()]);
+ return executeCmd(cmdArray, outFile, errFile);
+ }
+
+ private static int executeCmd(String[] args, String outFile, String errFile) throws Exception {
+ LOG.info("Running: " + org.apache.commons.lang.StringUtils.join(args, ' '));
+
+ PrintStream out = outFile == null ?
+ SessionState.getConsole().getChildOutStream() :
+ new PrintStream(new FileOutputStream(outFile), true);
+ PrintStream err = errFile == null ?
+ SessionState.getConsole().getChildErrStream() :
+ new PrintStream(new FileOutputStream(errFile), true);
+
+ Process executor = Runtime.getRuntime().exec(args);
+
+ StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, err);
+ StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, out);
+
+ outPrinter.start();
+ errPrinter.start();
+
+ int result = executor.waitFor();
+
+ outPrinter.join();
+ errPrinter.join();
+
+ if (outFile != null) {
+ out.close();
+ }
+
+ if (errFile != null) {
+ err.close();
+ }
+
+ return result;
+ }
+
+ private static String getQuotedString(String str) {
+ return Shell.WINDOWS ? String.format("\"%s\"", str) : str;
+ }
+
+ public ASTNode parseQuery(String tname) throws Exception {
+ return pd.parse(qMap.get(tname));
+ }
+
+ public void resetParser() throws SemanticException {
+ pd = new ParseDriver();
+ sem = new SemanticAnalyzer(conf);
+ }
+
+ public TreeMap<String, String> getQMap() {
+ return qMap;
+ }
+
+ /**
+ * HiveTestSetup defines test fixtures which are reused across testcases,
+ * and are needed before any test can be run
+ */
+ public static class HiveTestSetup {
+ private MiniZooKeeperCluster zooKeeperCluster = null;
+ private int zkPort;
+ private ZooKeeper zooKeeper;
+
+ public HiveTestSetup() {
+ }
+
+ public void preTest(HiveConf conf) throws Exception {
+
+ if (zooKeeperCluster == null) {
+ //create temp dir
+ String tmpBaseDir = System.getProperty("test.tmp.dir");
+ File tmpDir = Utilities.createTempDir(tmpBaseDir);
+
+ zooKeeperCluster = new MiniZooKeeperCluster();
+ zkPort = zooKeeperCluster.startup(tmpDir);
+ }
+
+ if (zooKeeper != null) {
+ zooKeeper.close();
+ }
+
+ int sessionTimeout = (int) conf.getTimeVar(HiveConf.ConfVars
+ .HIVE_ZOOKEEPER_SESSION_TIMEOUT, TimeUnit.MILLISECONDS);
+ zooKeeper = new ZooKeeper("localhost:" + zkPort, sessionTimeout, new Watcher() {
+ @Override
+ public void process(WatchedEvent arg0) {
+ }
+ });
+
+ String zkServer = "localhost";
+ conf.set("hive.zookeeper.quorum", zkServer);
+ conf.set("hive.zookeeper.client.port", "" + zkPort);
+ }
+
+ public void postTest(HiveConf conf) throws Exception {
+ if (zooKeeperCluster == null) {
+ return;
+ }
+
+ if (zooKeeper != null) {
+ zooKeeper.close();
+ }
+
+ ZooKeeperHiveLockManager.releaseAllLocks(conf);
+ }
+
+ public void tearDown() throws Exception {
+ if (zooKeeperCluster != null) {
+ zooKeeperCluster.shutdown();
+ zooKeeperCluster = null;
+ }
+ }
+ }
+
+ /**
+ * QTRunner: Runnable class for running a a single query file.
+ **/
+ public static class HiveTestRunner implements Runnable {
+ private final HiveTestUtil qt;
+ private final String fname;
+
+ public HiveTestRunner(HiveTestUtil qt, String fname) {
+ this.qt = qt;
+ this.fname = fname;
+ }
+
+ @Override
+ public void run() {
+ try {
+ // assumption is that environment has already been cleaned once globally
+ // hence each thread does not call cleanUp() and createSources() again
+ qt.cliInit(fname, false);
+ qt.executeClient(fname);
+ } catch (Throwable e) {
+ LOG.error("Query file " + fname + " failed with exception ", e);
+ e.printStackTrace();
+ outputTestFailureHelpMessage();
+ }
+ }
+ }
+
+ /**
+ * Executes a set of query files in sequence.
+ *
+ * @param qfiles array of input query files containing arbitrary number of hive
+ * queries
+ * @param qt array of HiveTestUtils, one per qfile
+ * @return true if all queries passed, false otw
+ */
+ public static boolean queryListRunnerSingleThreaded(File[] qfiles, HiveTestUtil[] qt)
+ throws Exception {
+ boolean failed = false;
+ qt[0].cleanUp();
+ qt[0].createSources();
+ for (int i = 0; i < qfiles.length && !failed; i++) {
+ qt[i].clearTestSideEffects();
+ qt[i].cliInit(qfiles[i].getName(), false);
+ qt[i].executeClient(qfiles[i].getName());
+ int ecode = qt[i].checkCliDriverResults(qfiles[i].getName());
+ if (ecode != 0) {
+ failed = true;
+ LOG.error("Test " + qfiles[i].getName()
+ + " results check failed with error code " + ecode);
+ outputTestFailureHelpMessage();
+ }
+ qt[i].clearPostTestEffects();
+ }
+ return (!failed);
+ }
+
+ public static void outputTestFailureHelpMessage() {
+ LOG.error("See ./ql/target/tmp/log/hive.log or ./itests/qtest/target/tmp/log/hive.log, "
+ + "or check ./ql/target/surefire-reports or " +
+ "./itests/qtest/target/surefire-reports/ for specific test cases logs.");
+ }
+
+ public static String ensurePathEndsInSlash(String path) {
+ if (path == null) {
+ throw new NullPointerException("Path cannot be null");
+ }
+ if (path.endsWith(File.separator)) {
+ return path;
+ } else {
+ return path + File.separator;
+ }
+ }
+
+ private static String[] cachedQvFileList = null;
+ private static ImmutableList<String> cachedDefaultQvFileList = null;
+ private static Pattern qvSuffix = Pattern.compile("_[0-9]+.qv$", Pattern.CASE_INSENSITIVE);
+
+ public static List<String> getVersionFiles(String queryDir, String tname) {
+ ensureQvFileList(queryDir);
+ List<String> result = getVersionFilesInternal(tname);
+ if (result == null) {
+ result = cachedDefaultQvFileList;
+ }
+ return result;
+ }
+
+ private static void ensureQvFileList(String queryDir) {
+ if (cachedQvFileList != null) return;
+ // Not thread-safe.
+ LOG.info("Getting versions from " + queryDir);
+ cachedQvFileList = (new File(queryDir)).list(new FilenameFilter() {
+ @Override
+ public boolean accept(File dir, String name) {
+ return name.toLowerCase().endsWith(".qv");
+ }
+ });
+ if (cachedQvFileList == null) return; // no files at all
+ Arrays.sort(cachedQvFileList, String.CASE_INSENSITIVE_ORDER);
+ List<String> defaults = getVersionFilesInternal("default");
+ cachedDefaultQvFileList = (defaults != null)
+ ? ImmutableList.copyOf(defaults) : ImmutableList.<String>of();
+ }
+
+ private static List<String> getVersionFilesInternal(String tname) {
+ if (cachedQvFileList == null) {
+ return new ArrayList<String>();
+ }
+ int pos = Arrays.binarySearch(cachedQvFileList, tname, String.CASE_INSENSITIVE_ORDER);
+ if (pos >= 0) {
+ throw new BuildException("Unexpected file list element: " + cachedQvFileList[pos]);
+ }
+ List<String> result = null;
+ for (pos = (-pos - 1); pos < cachedQvFileList.length; ++pos) {
+ String candidate = cachedQvFileList[pos];
+ if (candidate.length() <= tname.length()
+ || !tname.equalsIgnoreCase(candidate.substring(0, tname.length()))
+ || !qvSuffix.matcher(candidate.substring(tname.length())).matches()) {
+ break;
+ }
+ if (result == null) {
+ result = new ArrayList<String>();
+ }
+ result.add(candidate);
+ }
+ return result;
+ }
+
+ public void failed(int ecode, String fname, String debugHint) {
+ String command = SessionState.get() != null ? SessionState.get().getLastCommand() : null;
+ Assert.fail("Client Execution failed with error code = " + ecode +
+ (command != null ? " running " + command : "") + (debugHint != null ? debugHint :
+ ""));
+ }
+
+ // for negative tests, which is succeeded.. no need to print the query string
+ public void failed(String fname, String debugHint) {
+ Assert.fail("Client Execution was expected to fail, but succeeded with error code 0 " +
+ (debugHint != null ? debugHint : ""));
+ }
+
+ public void failedDiff(int ecode, String fname, String debugHint) {
+ Assert.fail("Client Execution results failed with error code = " + ecode +
+ (debugHint != null ? debugHint : ""));
+ }
+
+ public void failed(Throwable e, String fname, String debugHint) {
+ String command = SessionState.get() != null ? SessionState.get().getLastCommand() : null;
+ LOG.error("Exception: ", e);
+ e.printStackTrace();
+ LOG.error("Failed query: " + fname);
+ Assert.fail("Unexpected exception " +
+ org.apache.hadoop.util.StringUtils.stringifyException(e) + "\n" +
+ (command != null ? " running " + command : "") +
+ (debugHint != null ? debugHint : ""));
+ }
+
+ public static class WindowsPathUtil {
+
+ public static void convertPathsFromWindowsToHdfs(HiveConf conf) {
+ // Following local paths are used as HDFS paths in unit tests.
+ // It works well in Unix as the path notation in Unix and HDFS is more or less same.
+ // But when it comes to Windows, drive letter separator ':' & backslash '\" are invalid
+ // characters in HDFS so we need to converts these local paths to HDFS paths before
+ // using them
+ // in unit tests.
+
+ String orgWarehouseDir = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
+ conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, getHdfsUriString(orgWarehouseDir));
+
+ String orgTestTempDir = System.getProperty("test.tmp.dir");
+ System.setProperty("test.tmp.dir", getHdfsUriString(orgTestTempDir));
+
+ String orgTestWarehouseDir = System.getProperty("test.warehouse.dir");
+ System.setProperty("test.warehouse.dir", getHdfsUriString(orgTestWarehouseDir));
+
+ String orgScratchDir = conf.getVar(HiveConf.ConfVars.SCRATCHDIR);
+ conf.setVar(HiveConf.ConfVars.SCRATCHDIR, getHdfsUriString(orgScratchDir));
+ }
+
+ public static String getHdfsUriString(String uriStr) {
+ assert uriStr != null;
+ if (Shell.WINDOWS) {
+ // If the URI conversion is from Windows to HDFS then replace the '\' with '/'
+ // and remove the windows single drive letter & colon from absolute path.
+ return uriStr.replace('\\', '/')
+ .replaceFirst("/[c-zC-Z]:", "/")
+ .replaceFirst("^[c-zC-Z]:", "");
+ }
+ return uriStr;
+ }
+ }
+}
diff --git a/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveTezIT.java b/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveTezIT.java
new file mode 100644
index 0000000..8dc3309
--- /dev/null
+++ b/phoenix-hive/src/it/java/org/apache/phoenix/hive/HiveTezIT.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.phoenix.hive;
+
+import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
+import org.junit.BeforeClass;
+import org.junit.experimental.categories.Category;
+import org.junit.Ignore;
+
+@Category(NeedsOwnMiniClusterTest.class)
+public class HiveTezIT extends HivePhoenixStoreIT {
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ setup(HiveTestUtil.MiniClusterType.tez);
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixMetaHook.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixMetaHook.java
new file mode 100644
index 0000000..c35634a
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixMetaHook.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import com.google.common.base.CharMatcher;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.util.PhoenixConnectionUtil;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+import org.apache.phoenix.hive.util.PhoenixUtil;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.phoenix.hive.util.ColumnMappingUtils.getColumnMappingMap;
+
+/**
+ * Implementation for notification methods which are invoked as part of transactions against the
+ * hive metastore,allowing Phoenix metadata to be kept in sync with Hive'smetastore.
+ */
+public class PhoenixMetaHook implements HiveMetaHook {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixMetaHook.class);
+
+ @Override
+ public void preCreateTable(Table table) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Precreate table : " + table.getTableName());
+ }
+
+ try (Connection conn = PhoenixConnectionUtil.getConnection(table)) {
+ String tableType = table.getTableType();
+ String tableName = PhoenixStorageHandlerUtil.getTargetTableName(table);
+
+ if (TableType.EXTERNAL_TABLE.name().equals(tableType)) {
+ // Check whether phoenix table exists.
+ if (!PhoenixUtil.existTable(conn, tableName)) {
+ // Error if phoenix table not exist.
+ throw new MetaException("Phoenix table " + tableName + " doesn't exist");
+ }
+ } else if (TableType.MANAGED_TABLE.name().equals(tableType)) {
+ // Check whether phoenix table exists.
+ if (PhoenixUtil.existTable(conn, tableName)) {
+ // Error if phoenix table already exist.
+ throw new MetaException("Phoenix table " + tableName + " already exist.");
+ }
+
+ PhoenixUtil.createTable(conn, createTableStatement(table));
+ } else {
+ throw new MetaException("Unsupported table Type: " + table.getTableType());
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Phoenix table " + tableName + " was created");
+ }
+ } catch (SQLException e) {
+ throw new MetaException(e.getMessage());
+ }
+ }
+
+ private String createTableStatement(Table table) throws MetaException {
+ Map<String, String> tableParameterMap = table.getParameters();
+
+ String tableName = PhoenixStorageHandlerUtil.getTargetTableName(table);
+ StringBuilder ddl = new StringBuilder("create table ").append(tableName).append(" (\n");
+
+ String phoenixRowKeys = tableParameterMap.get(PhoenixStorageHandlerConstants
+ .PHOENIX_ROWKEYS);
+ StringBuilder realRowKeys = new StringBuilder();
+ List<String> phoenixRowKeyList = Lists.newArrayList(Splitter.on
+ (PhoenixStorageHandlerConstants.COMMA).trimResults().split(phoenixRowKeys));
+ Map<String, String> columnMappingMap = getColumnMappingMap(tableParameterMap.get
+ (PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING));
+
+ List<FieldSchema> fieldSchemaList = table.getSd().getCols();
+ for (int i = 0, limit = fieldSchemaList.size(); i < limit; i++) {
+ FieldSchema fieldSchema = fieldSchemaList.get(i);
+ String fieldName = fieldSchema.getName();
+ String fieldType = fieldSchema.getType();
+ String columnType = PhoenixUtil.getPhoenixType(fieldType);
+
+ String rowKeyName = getRowKeyMapping(fieldName, phoenixRowKeyList);
+ if (rowKeyName != null) {
+ String columnName = columnMappingMap.get(fieldName);
+ if(columnName != null) {
+ rowKeyName = columnName;
+ }
+ // In case of RowKey
+ if ("binary".equals(columnType)) {
+ // Phoenix must define max length of binary when type definition. Obtaining
+ // information from the column mapping. ex) phoenix.rowkeys = "r1, r2(100), ..."
+ List<String> tokenList = Lists.newArrayList(Splitter.on(CharMatcher.is('(')
+ .or(CharMatcher.is(')'))).trimResults().split(rowKeyName));
+ columnType = columnType + "(" + tokenList.get(1) + ")";
+ rowKeyName = tokenList.get(0);
+ }
+
+ ddl.append(" ").append("\"").append(rowKeyName).append("\"").append(" ").append(columnType).append(" not " +
+ "null,\n");
+ realRowKeys.append("\"").append(rowKeyName).append("\",");
+ } else {
+ // In case of Column
+ String columnName = columnMappingMap.get(fieldName);
+
+ if (columnName == null) {
+ // Use field definition.
+ columnName = fieldName;
+ }
+
+ if ("binary".equals(columnType)) {
+ // Phoenix must define max length of binary when type definition. Obtaining
+ // information from the column mapping. ex) phoenix.column.mapping=c1:c1(100)
+ List<String> tokenList = Lists.newArrayList(Splitter.on(CharMatcher.is('(')
+ .or(CharMatcher.is(')'))).trimResults().split(columnName));
+ columnType = columnType + "(" + tokenList.get(1) + ")";
+ columnName = tokenList.get(0);
+ }
+
+ ddl.append(" ").append("\"").append(columnName).append("\"").append(" ").append(columnType).append(",\n");
+ }
+ }
+ ddl.append(" ").append("constraint pk_").append(PhoenixUtil.getTableSchema(tableName.toUpperCase())[1]).append(" primary key(")
+ .append(realRowKeys.deleteCharAt(realRowKeys.length() - 1)).append(")\n)\n");
+
+ String tableOptions = tableParameterMap.get(PhoenixStorageHandlerConstants
+ .PHOENIX_TABLE_OPTIONS);
+ if (tableOptions != null) {
+ ddl.append(tableOptions);
+ }
+
+ String statement = ddl.toString();
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("DDL : " + statement);
+ }
+
+ return statement;
+ }
+
+ private String getRowKeyMapping(String rowKeyName, List<String> phoenixRowKeyList) {
+ String rowKeyMapping = null;
+
+ for (String phoenixRowKey : phoenixRowKeyList) {
+ if (phoenixRowKey.equals(rowKeyName)) {
+ rowKeyMapping = phoenixRowKey;
+ break;
+ } else if (phoenixRowKey.startsWith(rowKeyName + "(") && phoenixRowKey.endsWith(")")) {
+ rowKeyMapping = phoenixRowKey;
+ break;
+ }
+ }
+
+ return rowKeyMapping;
+ }
+
+ @Override
+ public void rollbackCreateTable(Table table) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Rollback for table : " + table.getTableName());
+ }
+
+ dropTableIfExist(table);
+ }
+
+ @Override
+ public void commitCreateTable(Table table) throws MetaException {
+
+ }
+
+ @Override
+ public void preDropTable(Table table) throws MetaException {
+ }
+
+ @Override
+ public void rollbackDropTable(Table table) throws MetaException {
+ }
+
+ @Override
+ public void commitDropTable(Table table, boolean deleteData) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Commit drop table : " + table.getTableName());
+ }
+
+ dropTableIfExist(table);
+ }
+
+ private void dropTableIfExist(Table table) throws MetaException {
+ try (Connection conn = PhoenixConnectionUtil.getConnection(table)) {
+ String tableType = table.getTableType();
+ String tableName = PhoenixStorageHandlerUtil.getTargetTableName(table);
+
+ if (TableType.MANAGED_TABLE.name().equals(tableType)) {
+ // Drop if phoenix table exist.
+ if (PhoenixUtil.existTable(conn, tableName)) {
+ PhoenixUtil.dropTable(conn, tableName);
+ }
+ }
+ } catch (SQLException e) {
+ throw new MetaException(e.getMessage());
+ }
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRecordUpdater.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRecordUpdater.java
new file mode 100644
index 0000000..089a299
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRecordUpdater.java
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.phoenix.hive.PhoenixSerializer.DmlType;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.mapreduce.PhoenixResultWritable;
+import org.apache.phoenix.hive.util.PhoenixConnectionUtil;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+import org.apache.phoenix.hive.util.PhoenixUtil;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.schema.ConcurrentTableMutationException;
+import org.apache.phoenix.schema.MetaDataClient;
+import org.apache.phoenix.util.QueryUtil;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.Properties;
+
+public class PhoenixRecordUpdater implements RecordUpdater {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixRecordUpdater.class);
+
+ private final Connection conn;
+ private final PreparedStatement pstmt;
+ private final long batchSize;
+ private long numRecords = 0;
+
+ private Configuration config;
+ private String tableName;
+ private MetaDataClient metaDataClient;
+ private boolean restoreWalMode;
+
+ private long rowCountDelta = 0;
+
+ private PhoenixSerializer phoenixSerializer;
+ private ObjectInspector objInspector;
+ private PreparedStatement pstmtForDelete;
+
+ public PhoenixRecordUpdater(Path path, AcidOutputFormat.Options options) throws IOException {
+ this.config = options.getConfiguration();
+ tableName = config.get(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME);
+
+ Properties props = new Properties();
+
+ try {
+ // Disable WAL
+ String walConfigName = tableName.toLowerCase() + PhoenixStorageHandlerConstants
+ .DISABLE_WAL;
+ boolean disableWal = config.getBoolean(walConfigName, false);
+ if (disableWal) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(walConfigName + " is true. batch.mode will be set true.");
+ }
+
+ props.setProperty(PhoenixStorageHandlerConstants.BATCH_MODE, "true");
+ }
+
+ this.conn = PhoenixConnectionUtil.getInputConnection(config, props);
+
+ if (disableWal) {
+ metaDataClient = new MetaDataClient((PhoenixConnection) conn);
+
+ if (!PhoenixUtil.isDisabledWal(metaDataClient, tableName)) {
+ // execute alter tablel statement if disable_wal is not true.
+ try {
+ PhoenixUtil.alterTableForWalDisable(conn, tableName, true);
+ } catch (ConcurrentTableMutationException e) {
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Concurrent modification of disableWAL");
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(tableName + "s wal disabled.");
+ }
+
+ // restore original value of disable_wal at the end.
+ restoreWalMode = true;
+ }
+ }
+
+ this.batchSize = PhoenixConfigurationUtil.getBatchSize(config);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Batch-size : " + batchSize);
+ }
+
+ String upsertQuery = QueryUtil.constructUpsertStatement(tableName, PhoenixUtil
+ .getColumnInfoList(conn, tableName));
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Upsert-query : " + upsertQuery);
+ }
+ this.pstmt = this.conn.prepareStatement(upsertQuery);
+ } catch (SQLException e) {
+ throw new IOException(e);
+ }
+
+ this.objInspector = options.getInspector();
+ try {
+ phoenixSerializer = new PhoenixSerializer(config, options.getTableProperties());
+ } catch (SerDeException e) {
+ throw new IOException(e);
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.ql.io.RecordUpdater#insert(long, java.lang.Object)
+ */
+ @Override
+ public void insert(long currentTransaction, Object row) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Insert - currentTranscation : " + currentTransaction + ", row : " +
+ PhoenixStorageHandlerUtil.toString(row));
+ }
+
+ PhoenixResultWritable pResultWritable = (PhoenixResultWritable) phoenixSerializer
+ .serialize(row, objInspector, DmlType.INSERT);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Data : " + pResultWritable.getValueList());
+ }
+
+ write(pResultWritable);
+
+ rowCountDelta++;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.ql.io.RecordUpdater#update(long, java.lang.Object)
+ */
+ @Override
+ public void update(long currentTransaction, Object row) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Update - currentTranscation : " + currentTransaction + ", row : " +
+ PhoenixStorageHandlerUtil.toString(row));
+ }
+
+ PhoenixResultWritable pResultWritable = (PhoenixResultWritable) phoenixSerializer
+ .serialize(row, objInspector, DmlType.UPDATE);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Data : " + pResultWritable.getValueList());
+ }
+
+ write(pResultWritable);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.ql.io.RecordUpdater#delete(long, java.lang.Object)
+ */
+ @Override
+ public void delete(long currentTransaction, Object row) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Delete - currentTranscation : " + currentTransaction + ", row : " +
+ PhoenixStorageHandlerUtil.toString(row));
+ }
+
+ PhoenixResultWritable pResultWritable = (PhoenixResultWritable) phoenixSerializer
+ .serialize(row, objInspector, DmlType.DELETE);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Data : " + pResultWritable.getValueList());
+ }
+
+ if (pstmtForDelete == null) {
+ try {
+ String deleteQuery = PhoenixUtil.constructDeleteStatement(conn, tableName);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Delete query : " + deleteQuery);
+ }
+
+ pstmtForDelete = conn.prepareStatement(deleteQuery);
+ } catch (SQLException e) {
+ throw new IOException(e);
+ }
+ }
+
+ delete(pResultWritable);
+
+ rowCountDelta--;
+ }
+
+ private void delete(PhoenixResultWritable pResultWritable) throws IOException {
+ try {
+ pResultWritable.delete(pstmtForDelete);
+ numRecords++;
+ pstmtForDelete.executeUpdate();
+
+ if (numRecords % batchSize == 0) {
+ LOG.debug("Commit called on a batch of size : " + batchSize);
+ conn.commit();
+ }
+ } catch (SQLException e) {
+ throw new IOException("Exception while deleting to table.", e);
+ }
+ }
+
+ private void write(PhoenixResultWritable pResultWritable) throws IOException {
+ try {
+ pResultWritable.write(pstmt);
+ numRecords++;
+ pstmt.executeUpdate();
+
+ if (numRecords % batchSize == 0) {
+ LOG.debug("Commit called on a batch of size : " + batchSize);
+ conn.commit();
+ }
+ } catch (SQLException e) {
+ throw new IOException("Exception while writing to table.", e);
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.ql.io.RecordUpdater#flush()
+ */
+ @Override
+ public void flush() throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Flush called");
+ }
+
+ try {
+ conn.commit();
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Written row : " + numRecords);
+ }
+ } catch (SQLException e) {
+ LOG.error("SQLException while performing the commit for the task.");
+ throw new IOException(e);
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.ql.io.RecordUpdater#close(boolean)
+ */
+ @Override
+ public void close(boolean abort) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("abort : " + abort);
+ }
+
+ try {
+ conn.commit();
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Written row : " + numRecords);
+ }
+ } catch (SQLException e) {
+ LOG.error("SQLException while performing the commit for the task.");
+ throw new IOException(e);
+ } finally {
+ try {
+ if (restoreWalMode && PhoenixUtil.isDisabledWal(metaDataClient, tableName)) {
+ try {
+ PhoenixUtil.alterTableForWalDisable(conn, tableName, false);
+ } catch (ConcurrentTableMutationException e) {
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Concurrent modification of disableWAL");
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(tableName + "s wal enabled.");
+ }
+ }
+
+ // flush when [table-name].auto.flush is true.
+ String autoFlushConfigName = tableName.toLowerCase() +
+ PhoenixStorageHandlerConstants.AUTO_FLUSH;
+ boolean autoFlush = config.getBoolean(autoFlushConfigName, false);
+ if (autoFlush) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("autoFlush is " + autoFlush);
+ }
+
+ PhoenixUtil.flush(conn, tableName);
+ }
+
+ PhoenixUtil.closeResource(pstmt);
+ PhoenixUtil.closeResource(pstmtForDelete);
+ PhoenixUtil.closeResource(conn);
+ } catch (SQLException ex) {
+ LOG.error("SQLException while closing the connection for the task.");
+ throw new IOException(ex);
+ }
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.ql.io.RecordUpdater#getStats()
+ */
+ @Override
+ public SerDeStats getStats() {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("getStats called");
+ }
+
+ SerDeStats stats = new SerDeStats();
+ stats.setRowCount(rowCountDelta);
+ // Don't worry about setting raw data size diff. There is no reasonable way to calculate
+ // that without finding the row we are updating or deleting, which would be a mess.
+ return stats;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRow.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRow.java
new file mode 100644
index 0000000..fa307ce
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRow.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.hive.serde2.StructObject;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Implementation for Hive SerDe StructObject
+ */
+public class PhoenixRow implements StructObject {
+
+ private List<String> columnList;
+ private Map<String, Object> resultRowMap;
+
+ public PhoenixRow(List<String> columnList) {
+ this.columnList = columnList;
+ }
+
+ public PhoenixRow setResultRowMap(Map<String, Object> resultRowMap) {
+ this.resultRowMap = resultRowMap;
+ return this;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.serde2.StructObject#getField(int)
+ */
+ @Override
+ public Object getField(int fieldID) {
+ return resultRowMap.get(columnList.get(fieldID));
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.hive.serde2.StructObject#getFieldsAsList()
+ */
+ @Override
+ public List<Object> getFieldsAsList() {
+ return Lists.newArrayList(resultRowMap.values());
+ }
+
+
+ @Override
+ public String toString() {
+ return resultRowMap.toString();
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRowKey.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRowKey.java
new file mode 100644
index 0000000..a963fba
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixRowKey.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Map;
+
+/**
+ * Hive's RecordIdentifier implementation.
+ */
+
+public class PhoenixRowKey extends RecordIdentifier {
+
+ private PrimaryKeyData rowKeyMap = PrimaryKeyData.EMPTY;
+
+ public PhoenixRowKey() {
+
+ }
+
+ public void setRowKeyMap(Map<String, Object> rowKeyMap) {
+ this.rowKeyMap = new PrimaryKeyData(rowKeyMap);
+ }
+
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ super.write(dataOutput);
+
+ rowKeyMap.serialize((OutputStream) dataOutput);
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ super.readFields(dataInput);
+
+ try {
+ rowKeyMap = PrimaryKeyData.deserialize((InputStream) dataInput);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixSerDe.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixSerDe.java
new file mode 100644
index 0000000..9ef0158
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixSerDe.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.Writable;
+import org.apache.phoenix.hive.PhoenixSerializer.DmlType;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.mapreduce.PhoenixResultWritable;
+import org.apache.phoenix.hive.objectinspector.PhoenixObjectInspectorFactory;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * SerDe implementation for Phoenix Hive Storage
+ *
+ */
+public class PhoenixSerDe extends AbstractSerDe {
+
+ public static final Log LOG = LogFactory.getLog(PhoenixSerDe.class);
+
+ private PhoenixSerializer serializer;
+ private ObjectInspector objectInspector;
+
+ private LazySerDeParameters serdeParams;
+ private PhoenixRow row;
+
+ private Properties tableProperties;
+
+ /**
+ * @throws SerDeException
+ */
+ public PhoenixSerDe() throws SerDeException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("PhoenixSerDe created");
+ }
+ }
+
+ @Override
+ public void initialize(Configuration conf, Properties tbl) throws SerDeException {
+ tableProperties = tbl;
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("SerDe initialize : " + tbl.getProperty("name"));
+ }
+
+ serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
+ objectInspector = createLazyPhoenixInspector(conf, tbl);
+
+ String inOutWork = tbl.getProperty(PhoenixStorageHandlerConstants.IN_OUT_WORK);
+ if (inOutWork == null) {
+ return;
+ }
+
+ serializer = new PhoenixSerializer(conf, tbl);
+ row = new PhoenixRow(serdeParams.getColumnNames());
+ }
+
+ @Override
+ public Object deserialize(Writable result) throws SerDeException {
+ if (!(result instanceof PhoenixResultWritable)) {
+ throw new SerDeException(result.getClass().getName() + ": expects " +
+ "PhoenixResultWritable!");
+ }
+
+ return row.setResultRowMap(((PhoenixResultWritable) result).getResultMap());
+ }
+
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return PhoenixResultWritable.class;
+ }
+
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+ try {
+ return serializer.serialize(obj, objInspector, DmlType.NONE);
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+ }
+
+ @Override
+ public SerDeStats getSerDeStats() {
+ // no support for statistics
+ return null;
+ }
+
+ public Properties getTableProperties() {
+ return tableProperties;
+ }
+
+ public LazySerDeParameters getSerdeParams() {
+ return serdeParams;
+ }
+
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return objectInspector;
+ }
+
+ private ObjectInspector createLazyPhoenixInspector(Configuration conf, Properties tbl) throws
+ SerDeException {
+ List<String> columnNameList = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS)
+ .split(PhoenixStorageHandlerConstants.COMMA));
+ List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty
+ (serdeConstants.LIST_COLUMN_TYPES));
+
+ List<ObjectInspector> columnObjectInspectors = Lists.newArrayListWithExpectedSize
+ (columnTypeList.size());
+
+ for (TypeInfo typeInfo : columnTypeList) {
+ columnObjectInspectors.add(PhoenixObjectInspectorFactory.createObjectInspector
+ (typeInfo, serdeParams));
+ }
+
+ return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columnNameList,
+ columnObjectInspectors, null, serdeParams.getSeparators()[0], serdeParams,
+ ObjectInspectorOptions.JAVA);
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixSerializer.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixSerializer.java
new file mode 100644
index 0000000..852407a
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixSerializer.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.mapreduce.PhoenixResultWritable;
+import org.apache.phoenix.hive.util.PhoenixConnectionUtil;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+import org.apache.phoenix.hive.util.PhoenixUtil;
+import org.apache.phoenix.util.ColumnInfo;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * Serializer used in PhoenixSerDe and PhoenixRecordUpdater to produce Writable.
+ */
+public class PhoenixSerializer {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixSerializer.class);
+
+ public static enum DmlType {
+ NONE,
+ SELECT,
+ INSERT,
+ UPDATE,
+ DELETE
+ }
+
+ private int columnCount = 0;
+ private PhoenixResultWritable pResultWritable;
+
+ public PhoenixSerializer(Configuration config, Properties tbl) throws SerDeException {
+ String mapping = tbl.getProperty(PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING, null);
+ if(mapping!=null ) {
+ config.set(PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING, mapping);
+ }
+ try (Connection conn = PhoenixConnectionUtil.getInputConnection(config, tbl)) {
+ List<ColumnInfo> columnMetadata = PhoenixUtil.getColumnInfoList(conn, tbl.getProperty
+ (PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME));
+
+ columnCount = columnMetadata.size();
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Column-meta : " + columnMetadata);
+ }
+
+ pResultWritable = new PhoenixResultWritable(config, columnMetadata);
+ } catch (SQLException | IOException e) {
+ throw new SerDeException(e);
+ }
+ }
+
+ public Writable serialize(Object values, ObjectInspector objInspector, DmlType dmlType) {
+ pResultWritable.clear();
+
+ final StructObjectInspector structInspector = (StructObjectInspector) objInspector;
+ final List<? extends StructField> fieldList = structInspector.getAllStructFieldRefs();
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("FieldList : " + fieldList + " values(" + values.getClass() + ") : " +
+ values);
+ }
+
+ int fieldCount = columnCount;
+ if (dmlType == DmlType.UPDATE || dmlType == DmlType.DELETE) {
+ fieldCount++;
+ }
+
+ for (int i = 0; i < fieldCount; i++) {
+ if (fieldList.size() <= i) {
+ break;
+ }
+
+ StructField structField = fieldList.get(i);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("structField[" + i + "] : " + structField);
+ }
+
+ if (structField != null) {
+ Object fieldValue = structInspector.getStructFieldData(values, structField);
+ ObjectInspector fieldOI = structField.getFieldObjectInspector();
+
+ String fieldName = structField.getFieldName();
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Field " + fieldName + "[" + i + "] : " + fieldValue + ", " +
+ fieldOI);
+ }
+
+ Object value = null;
+ switch (fieldOI.getCategory()) {
+ case PRIMITIVE:
+ value = ((PrimitiveObjectInspector) fieldOI).getPrimitiveJavaObject
+ (fieldValue);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Field " + fieldName + "[" + i + "] : " + value + "(" + value
+ .getClass() + ")");
+ }
+
+ if (value instanceof HiveDecimal) {
+ value = ((HiveDecimal) value).bigDecimalValue();
+ } else if (value instanceof HiveChar) {
+ value = ((HiveChar) value).getValue().trim();
+ }
+
+ pResultWritable.add(value);
+ break;
+ case LIST:
+ // Not support for arrays in insert statement yet
+ break;
+ case STRUCT:
+ if (dmlType == DmlType.DELETE) {
+ // When update/delete, First value is struct<transactionid:bigint,
+ // bucketid:int,rowid:bigint,primaryKey:binary>>
+ List<Object> fieldValueList = ((StandardStructObjectInspector)
+ fieldOI).getStructFieldsDataAsList(fieldValue);
+
+ // convert to map from binary of primary key.
+ @SuppressWarnings("unchecked")
+ Map<String, Object> primaryKeyMap = (Map<String, Object>)
+ PhoenixStorageHandlerUtil.toMap(((BytesWritable)
+ fieldValueList.get(3)).getBytes());
+ for (Object pkValue : primaryKeyMap.values()) {
+ pResultWritable.add(pkValue);
+ }
+ }
+
+ break;
+ default:
+ new SerDeException("Phoenix Unsupported column type: " + fieldOI
+ .getCategory());
+ }
+ }
+ }
+
+ return pResultWritable;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixStorageHandler.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixStorageHandler.java
new file mode 100644
index 0000000..4e9f465
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixStorageHandler.java
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
+import org.apache.hadoop.hive.ql.metadata.InputEstimator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.mapreduce.PhoenixInputFormat;
+import org.apache.phoenix.hive.mapreduce.PhoenixOutputFormat;
+import org.apache.phoenix.hive.ppd.PhoenixPredicateDecomposer;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.UUID;
+
+/**
+ * This class manages all the Phoenix/Hive table initial configurations and SerDe Election
+ */
+@SuppressWarnings("deprecation")
+public class PhoenixStorageHandler extends DefaultStorageHandler implements
+ HiveStoragePredicateHandler, InputEstimator {
+
+
+ private Configuration jobConf;
+ private Configuration hbaseConf;
+
+
+ @Override
+ public void setConf(Configuration conf) {
+ jobConf = conf;
+ hbaseConf = HBaseConfiguration.create(conf);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return hbaseConf;
+ }
+
+ private static final Log LOG = LogFactory.getLog(PhoenixStorageHandler.class);
+
+ public PhoenixStorageHandler() {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("PhoenixStorageHandler created");
+ }
+ }
+
+ @Override
+ public HiveMetaHook getMetaHook() {
+ return new PhoenixMetaHook();
+ }
+
+ @Override
+ public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
+ try {
+ TableMapReduceUtil.addDependencyJars(jobConf);
+ org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(jobConf,
+ PhoenixStorageHandler.class);
+ JobConf hbaseJobConf = new JobConf(getConf());
+ org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf);
+ ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+
+ }
+
+ @SuppressWarnings("rawtypes")
+ @Override
+ public Class<? extends OutputFormat> getOutputFormatClass() {
+ return PhoenixOutputFormat.class;
+ }
+
+ @SuppressWarnings("rawtypes")
+ @Override
+ public Class<? extends InputFormat> getInputFormatClass() {
+ return PhoenixInputFormat.class;
+ }
+
+ @Override
+ public void configureInputJobProperties(TableDesc tableDesc, Map<String, String>
+ jobProperties) {
+ configureJobProperties(tableDesc, jobProperties);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Configuring input job for table : " + tableDesc.getTableName());
+ }
+
+ // initialization efficiency. Inform to SerDe about in/out work.
+ tableDesc.getProperties().setProperty(PhoenixStorageHandlerConstants.IN_OUT_WORK,
+ PhoenixStorageHandlerConstants.IN_WORK);
+ }
+
+ @Override
+ public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String>
+ jobProperties) {
+ configureJobProperties(tableDesc, jobProperties);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Configuring output job for table : " + tableDesc.getTableName());
+ }
+
+ // initialization efficiency. Inform to SerDe about in/out work.
+ tableDesc.getProperties().setProperty(PhoenixStorageHandlerConstants.IN_OUT_WORK,
+ PhoenixStorageHandlerConstants.OUT_WORK);
+ }
+
+ @Override
+ public void configureTableJobProperties(TableDesc tableDesc, Map<String, String>
+ jobProperties) {
+ configureJobProperties(tableDesc, jobProperties);
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ protected void configureJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
+ Properties tableProperties = tableDesc.getProperties();
+
+ String inputFormatClassName =
+ tableProperties.getProperty(PhoenixStorageHandlerConstants
+ .HBASE_INPUT_FORMAT_CLASS);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(PhoenixStorageHandlerConstants.HBASE_INPUT_FORMAT_CLASS + " is " +
+ inputFormatClassName);
+ }
+
+ Class<?> inputFormatClass;
+ try {
+ if (inputFormatClassName != null) {
+ inputFormatClass = JavaUtils.loadClass(inputFormatClassName);
+ } else {
+ inputFormatClass = PhoenixInputFormat.class;
+ }
+ } catch (Exception e) {
+ LOG.error(e.getMessage(), e);
+ throw new RuntimeException(e);
+ }
+
+ if (inputFormatClass != null) {
+ tableDesc.setInputFileFormatClass((Class<? extends InputFormat>) inputFormatClass);
+ }
+
+ String tableName = tableProperties.getProperty(PhoenixStorageHandlerConstants
+ .PHOENIX_TABLE_NAME);
+ if (tableName == null) {
+ tableName = tableDesc.getTableName();
+ tableProperties.setProperty(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME,
+ tableName);
+ }
+ SessionState sessionState = SessionState.get();
+
+ String sessionId;
+ if(sessionState!= null) {
+ sessionId = sessionState.getSessionId();
+ } else {
+ sessionId = UUID.randomUUID().toString();
+ }
+ jobProperties.put(PhoenixConfigurationUtil.SESSION_ID, sessionId);
+ jobProperties.put(PhoenixConfigurationUtil.INPUT_TABLE_NAME, tableName);
+ jobProperties.put(PhoenixStorageHandlerConstants.ZOOKEEPER_QUORUM, tableProperties
+ .getProperty(PhoenixStorageHandlerConstants.ZOOKEEPER_QUORUM,
+ PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_QUORUM));
+ jobProperties.put(PhoenixStorageHandlerConstants.ZOOKEEPER_PORT, tableProperties
+ .getProperty(PhoenixStorageHandlerConstants.ZOOKEEPER_PORT, String.valueOf
+ (PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_PORT)));
+ jobProperties.put(PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT, tableProperties
+ .getProperty(PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT,
+ PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_PARENT));
+ String columnMapping = tableProperties
+ .getProperty(PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING);
+ if(columnMapping != null) {
+ jobProperties.put(PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING, columnMapping);
+ }
+
+ jobProperties.put(hive_metastoreConstants.META_TABLE_STORAGE, this.getClass().getName());
+
+ // set configuration when direct work with HBase.
+ jobProperties.put(HConstants.ZOOKEEPER_QUORUM, jobProperties.get
+ (PhoenixStorageHandlerConstants.ZOOKEEPER_QUORUM));
+ jobProperties.put(HConstants.ZOOKEEPER_CLIENT_PORT, jobProperties.get
+ (PhoenixStorageHandlerConstants.ZOOKEEPER_PORT));
+ jobProperties.put(HConstants.ZOOKEEPER_ZNODE_PARENT, jobProperties.get
+ (PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT));
+ addHBaseResources(jobConf, jobProperties);
+ }
+
+ /**
+ * Utility method to add hbase-default.xml and hbase-site.xml properties to a new map
+ * if they are not already present in the jobConf.
+ * @param jobConf Job configuration
+ * @param newJobProperties Map to which new properties should be added
+ */
+ private void addHBaseResources(Configuration jobConf,
+ Map<String, String> newJobProperties) {
+ Configuration conf = new Configuration(false);
+ HBaseConfiguration.addHbaseResources(conf);
+ for (Map.Entry<String, String> entry : conf) {
+ if (jobConf.get(entry.getKey()) == null) {
+ newJobProperties.put(entry.getKey(), entry.getValue());
+ }
+ }
+ }
+
+ @Override
+ public Class<? extends SerDe> getSerDeClass() {
+ return PhoenixSerDe.class;
+ }
+
+ @Override
+ public DecomposedPredicate decomposePredicate(JobConf jobConf, Deserializer deserializer,
+ ExprNodeDesc predicate) {
+ PhoenixSerDe phoenixSerDe = (PhoenixSerDe) deserializer;
+ List<String> columnNameList = phoenixSerDe.getSerdeParams().getColumnNames();
+
+ return PhoenixPredicateDecomposer.create(columnNameList).decomposePredicate(predicate);
+ }
+
+ @Override
+ public Estimation estimate(JobConf job, TableScanOperator ts, long remaining) throws
+ HiveException {
+ String hiveTableName = ts.getConf().getTableMetadata().getTableName();
+ int reducerCount = job.getInt(hiveTableName + PhoenixStorageHandlerConstants
+ .PHOENIX_REDUCER_NUMBER, 1);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Estimating input size for table: " + hiveTableName + " with reducer count " +
+ reducerCount + ". Remaining : " + remaining);
+ }
+
+ long bytesPerReducer = job.getLong(HiveConf.ConfVars.BYTESPERREDUCER.varname,
+ Long.parseLong(HiveConf.ConfVars.BYTESPERREDUCER.getDefaultValue()));
+ long totalLength = reducerCount * bytesPerReducer;
+
+ return new Estimation(0, totalLength);
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/PrimaryKeyData.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PrimaryKeyData.java
new file mode 100644
index 0000000..7773997
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/PrimaryKeyData.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InvalidClassException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.ObjectStreamClass;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Wrapper around the primary key data for Hive.
+ */
+public class PrimaryKeyData implements Serializable{
+ public static final PrimaryKeyData EMPTY = new PrimaryKeyData(Collections.<String,Object> emptyMap());
+ private static final long serialVersionUID = 1L;
+
+ // Based on https://www.ibm.com/developerworks/library/se-lookahead/. Prevents unexpected
+ // deserialization of other objects of an unexpected class.
+ private static class LookAheadObjectInputStream extends ObjectInputStream {
+ public LookAheadObjectInputStream(InputStream in) throws IOException {
+ super(in);
+ }
+
+ @Override
+ protected Class<?> resolveClass(ObjectStreamClass desc) throws IOException, ClassNotFoundException {
+ if (!desc.getName().equals(PrimaryKeyData.class.getName()) &&
+ !desc.getName().startsWith("java.lang.") &&
+ !desc.getName().startsWith("java.util.") &&
+ !desc.getName().startsWith("java.sql.")) {
+ throw new InvalidClassException(desc.getName(), "Expected an instance of PrimaryKeyData");
+ }
+ return super.resolveClass(desc);
+ }
+ }
+
+ private final HashMap<String,Object> data;
+
+ public PrimaryKeyData(Map<String,Object> data) {
+ if (data instanceof HashMap) {
+ this.data = (HashMap<String,Object>) data;
+ } else {
+ this.data = new HashMap<>(Objects.requireNonNull(data));
+ }
+ }
+
+ public HashMap<String,Object> getData() {
+ return data;
+ }
+
+ public void serialize(OutputStream output) throws IOException {
+ try (ObjectOutputStream oos = new ObjectOutputStream(output)) {
+ oos.writeObject(this);
+ oos.flush();
+ }
+ }
+
+ public static PrimaryKeyData deserialize(InputStream input) throws IOException, ClassNotFoundException {
+ try (LookAheadObjectInputStream ois = new LookAheadObjectInputStream(input)) {
+ Object obj = ois.readObject();
+ if (obj instanceof PrimaryKeyData) {
+ return (PrimaryKeyData) obj;
+ }
+ throw new InvalidClassException(obj == null ? "null" : obj.getClass().getName(), "Disallowed serialized class");
+ }
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/constants/PhoenixStorageHandlerConstants.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/constants/PhoenixStorageHandlerConstants.java
new file mode 100644
index 0000000..e3c7d84
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/constants/PhoenixStorageHandlerConstants.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.constants;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.io.IntWritable;
+
+import java.util.List;
+
+/**
+ * Constants using for Hive Storage Handler implementation
+ */
+public class PhoenixStorageHandlerConstants {
+
+ public static final String HBASE_INPUT_FORMAT_CLASS = "phoenix.input.format.class";
+
+ public static final String PHOENIX_TABLE_NAME = "phoenix.table.name";
+
+ public static final String DEFAULT_PHOENIX_INPUT_CLASS = "org.apache.phoenix.hive.mapreduce" +
+ ".PhoenixResultWritable";
+
+ public static final String ZOOKEEPER_QUORUM = "phoenix.zookeeper.quorum";
+ public static final String ZOOKEEPER_PORT = "phoenix.zookeeper.client.port";
+ public static final String ZOOKEEPER_PARENT = "phoenix.zookeeper.znode.parent";
+ public static final String DEFAULT_ZOOKEEPER_QUORUM = "localhost";
+ public static final int DEFAULT_ZOOKEEPER_PORT = 2181;
+ public static final String DEFAULT_ZOOKEEPER_PARENT = "/hbase";
+
+ public static final String PHOENIX_ROWKEYS = "phoenix.rowkeys";
+ public static final String PHOENIX_COLUMN_MAPPING = "phoenix.column.mapping";
+ public static final String PHOENIX_TABLE_OPTIONS = "phoenix.table.options";
+
+ public static final String PHOENIX_TABLE_QUERY_HINT = ".query.hint";
+ public static final String PHOENIX_REDUCER_NUMBER = ".reducer.count";
+ public static final String DISABLE_WAL = ".disable.wal";
+ public static final String BATCH_MODE = "batch.mode";
+ public static final String AUTO_FLUSH = ".auto.flush";
+
+ public static final String COLON = ":";
+ public static final String COMMA = ",";
+ public static final String EMPTY_STRING = "";
+ public static final String SPACE = " ";
+ public static final String LEFT_ROUND_BRACKET = "(";
+ public static final String RIGHT_ROUND_BRACKET = ")";
+ public static final String QUOTATION_MARK = "'";
+ public static final String EQUAL = "=";
+ public static final String IS = "is";
+ public static final String QUESTION = "?";
+
+ public static final String SPLIT_BY_STATS = "split.by.stats";
+ public static final String HBASE_SCAN_CACHE = "hbase.scan.cache";
+ public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock";
+ public static final String HBASE_DATE_FORMAT = "hbase.date.format";
+ public static final String HBASE_TIMESTAMP_FORMAT = "hbase.timestamp.format";
+ public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd";
+ public static final String DEFAULT_TIMESTAMP_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
+
+ public static final String IN_OUT_WORK = "in.out.work";
+ public static final String IN_WORK = "input";
+ public static final String OUT_WORK = "output";
+
+ public static final String MR = "mr";
+ public static final String TEZ = "tez";
+ public static final String SPARK = "spark";
+
+ public static final String DATE_TYPE = "date";
+ public static final String TIMESTAMP_TYPE = "timestamp";
+ public static final String BETWEEN_COMPARATOR = "between";
+ public static final String IN_COMPARATOR = "in";
+ public static final List<String> COMMON_COMPARATOR = Lists.newArrayList("=", "<", ">", "<=",
+ ">=");
+
+ // date/timestamp
+ public static final String COLUMNE_MARKER = "$columnName$";
+ public static final String PATERN_MARKER = "$targetPattern$";
+ public static final String DATE_PATTERN = "'?\\d{4}-\\d{2}-\\d{2}'?";
+ public static final String TIMESTAMP_PATTERN = "'?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\" +
+ ".?\\d{0,9}'?";
+ public static final String COMMON_OPERATOR_PATTERN = "(\\(?\"?" + COLUMNE_MARKER + "\"?\\)?\\s*" +
+ "(=|>|<|<=|>=)\\s*(" + PATERN_MARKER + "))";
+ public static final String BETWEEN_OPERATOR_PATTERN = "(\\(?\"?" + COLUMNE_MARKER + "\"?\\)?\\s*(" +
+ "(?i)not)?\\s*(?i)between\\s*(" + PATERN_MARKER + ")\\s*(?i)and\\s*(" + PATERN_MARKER
+ + "))";
+ public static final String IN_OPERATOR_PATTERN = "(\\(?\"?" + COLUMNE_MARKER + "\"?\\)?\\s*((?i)" +
+ "not)?\\s*(?i)in\\s*\\((" + PATERN_MARKER + ",?\\s*)+\\))";
+
+ public static final String FUNCTION_VALUE_MARKER = "$value$";
+ public static final String DATE_FUNCTION_TEMPLETE = "to_date(" + FUNCTION_VALUE_MARKER + ")";
+ public static final String TIMESTAMP_FUNCTION_TEMPLATE = "to_timestamp(" +
+ FUNCTION_VALUE_MARKER + ")";
+
+ public static final IntWritable INT_ZERO = new IntWritable(0);
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputFormat.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputFormat.java
new file mode 100644
index 0000000..b550e32
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputFormat.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.mapreduce;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.Statement;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HConnection;
+import org.apache.hadoop.hbase.client.HConnectionManager;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.RegionSizeCalculator;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.ppd.PhoenixPredicateDecomposer;
+import org.apache.phoenix.hive.ql.index.IndexSearchCondition;
+import org.apache.phoenix.hive.query.PhoenixQueryBuilder;
+import org.apache.phoenix.hive.util.PhoenixConnectionUtil;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+import org.apache.phoenix.iterate.MapReduceParallelScanGrouper;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.query.KeyRange;
+import org.apache.phoenix.util.PhoenixRuntime;
+
+/**
+ * Custom InputFormat to feed into Hive
+ */
+@SuppressWarnings({"deprecation", "rawtypes"})
+public class PhoenixInputFormat<T extends DBWritable> implements InputFormat<WritableComparable,
+ T> {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixInputFormat.class);
+
+ public PhoenixInputFormat() {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("PhoenixInputFormat created");
+ }
+ }
+
+ @Override
+ public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
+ String tableName = jobConf.get(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME);
+
+ String query;
+ String executionEngine = jobConf.get(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname,
+ HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.getDefaultValue());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Target table name at split phase : " + tableName + "with whereCondition :" +
+ jobConf.get(TableScanDesc.FILTER_TEXT_CONF_STR) +
+ " and " + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " : " +
+ executionEngine);
+ }
+
+ if (PhoenixStorageHandlerConstants.MR.equals(executionEngine)) {
+ List<IndexSearchCondition> conditionList = null;
+ String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+ if (filterExprSerialized != null) {
+ ExprNodeGenericFuncDesc filterExpr =
+ Utilities.deserializeExpression(filterExprSerialized);
+ PhoenixPredicateDecomposer predicateDecomposer =
+ PhoenixPredicateDecomposer.create(Arrays.asList(jobConf.get(serdeConstants.LIST_COLUMNS).split(",")));
+ predicateDecomposer.decomposePredicate(filterExpr);
+ if (predicateDecomposer.isCalledPPD()) {
+ conditionList = predicateDecomposer.getSearchConditionList();
+ }
+ }
+
+ query = PhoenixQueryBuilder.getInstance().buildQuery(jobConf, tableName,
+ PhoenixStorageHandlerUtil.getReadColumnNames(jobConf), conditionList);
+ } else if (PhoenixStorageHandlerConstants.TEZ.equals(executionEngine)) {
+ Map<String, TypeInfo> columnTypeMap =
+ PhoenixStorageHandlerUtil.createColumnTypeMap(jobConf);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Column type map for TEZ : " + columnTypeMap);
+ }
+
+ String whereClause = jobConf.get(TableScanDesc.FILTER_TEXT_CONF_STR);
+ query = PhoenixQueryBuilder.getInstance().buildQuery(jobConf, tableName,
+ PhoenixStorageHandlerUtil.getReadColumnNames(jobConf), whereClause, columnTypeMap);
+ } else {
+ throw new IOException(executionEngine + " execution engine unsupported yet.");
+ }
+
+ final QueryPlan queryPlan = getQueryPlan(jobConf, query);
+ final List<KeyRange> allSplits = queryPlan.getSplits();
+ final List<InputSplit> splits = generateSplits(jobConf, queryPlan, allSplits, query);
+
+ return splits.toArray(new InputSplit[splits.size()]);
+ }
+
+ private List<InputSplit> generateSplits(final JobConf jobConf, final QueryPlan qplan,
+ final List<KeyRange> splits, String query) throws
+ IOException {
+ Preconditions.checkNotNull(qplan);
+ Preconditions.checkNotNull(splits);
+ final List<InputSplit> psplits = Lists.newArrayListWithExpectedSize(splits.size());
+
+ Path[] tablePaths = FileInputFormat.getInputPaths(ShimLoader.getHadoopShims()
+ .newJobContext(new Job(jobConf)));
+ boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS,
+ false);
+
+ setScanCacheSize(jobConf);
+
+ // Adding Localization
+ try (HConnection connection = HConnectionManager.createConnection(PhoenixConnectionUtil.getConfiguration(jobConf))) {
+ RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan
+ .getTableRef().getTable().getPhysicalName().toString()));
+ RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection
+ .getAdmin());
+
+ for (List<Scan> scans : qplan.getScans()) {
+ PhoenixInputSplit inputSplit;
+
+ HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow()
+ , false);
+ long regionSize = sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName
+ ());
+ String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG);
+
+ if (splitByStats) {
+ for (Scan aScan : scans) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Split for scan : " + aScan + "with scanAttribute : " + aScan
+ .getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" +
+ aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan
+ .getBatch() + "] and regionLocation : " + regionLocation);
+ }
+
+ inputSplit = new PhoenixInputSplit(Lists.newArrayList(aScan), tablePaths[0],
+ regionLocation, regionSize);
+ inputSplit.setQuery(query);
+ psplits.add(inputSplit);
+ }
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans
+ .get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans
+ .size() - 1).getStopRow()));
+ LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans
+ .get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " +
+ "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks()
+ + ", " + scans.get(0).getBatch() + "] and regionLocation : " +
+ regionLocation);
+
+ for (int i = 0, limit = scans.size(); i < limit; i++) {
+ LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes
+ .toStringBinary(scans.get(i).getAttribute
+ (BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY)));
+ }
+ }
+
+ inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation,
+ regionSize);
+ inputSplit.setQuery(query);
+ psplits.add(inputSplit);
+ }
+ }
+ }
+
+ return psplits;
+ }
+
+ private void setScanCacheSize(JobConf jobConf) {
+ int scanCacheSize = jobConf.getInt(PhoenixStorageHandlerConstants.HBASE_SCAN_CACHE, -1);
+ if (scanCacheSize > 0) {
+ jobConf.setInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, scanCacheSize);
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generating splits with scanCacheSize : " + scanCacheSize);
+ }
+ }
+
+ @Override
+ public RecordReader<WritableComparable, T> getRecordReader(InputSplit split, JobConf job,
+ Reporter reporter) throws
+ IOException {
+ final QueryPlan queryPlan = getQueryPlan(job, ((PhoenixInputSplit) split).getQuery());
+ @SuppressWarnings("unchecked")
+ final Class<T> inputClass = (Class<T>) job.getClass(PhoenixConfigurationUtil.INPUT_CLASS,
+ PhoenixResultWritable.class);
+
+ PhoenixRecordReader<T> recordReader = new PhoenixRecordReader<T>(inputClass, job,
+ queryPlan);
+ recordReader.initialize(split);
+
+ return recordReader;
+ }
+
+ /**
+ * Returns the query plan associated with the select query.
+ */
+ private QueryPlan getQueryPlan(final Configuration configuration, String selectStatement)
+ throws IOException {
+ try {
+ final String currentScnValue = configuration.get(PhoenixConfigurationUtil
+ .CURRENT_SCN_VALUE);
+ final Properties overridingProps = new Properties();
+ if (currentScnValue != null) {
+ overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, currentScnValue);
+ }
+ final Connection connection = PhoenixConnectionUtil.getInputConnection(configuration,
+ overridingProps);
+ Preconditions.checkNotNull(selectStatement);
+ final Statement statement = connection.createStatement();
+ final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Compiled query : " + selectStatement);
+ }
+
+ // Optimize the query plan so that we potentially use secondary indexes
+ final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement);
+ // Initialize the query plan so it sets up the parallel scans
+ queryPlan.iterator(MapReduceParallelScanGrouper.getInstance());
+ return queryPlan;
+ } catch (Exception exception) {
+ LOG.error(String.format("Failed to get the query plan with error [%s]", exception.getMessage()));
+ throw new RuntimeException(exception);
+ }
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputSplit.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputSplit.java
new file mode 100644
index 0000000..d76e863
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputSplit.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.mapreduce;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.phoenix.query.KeyRange;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * InputSplit implementation. Represents the data to be processed by an individual Mapper
+ */
+public class PhoenixInputSplit extends FileSplit implements InputSplit {
+
+ private List<Scan> scans;
+ private KeyRange keyRange;
+
+ private long regionSize;
+
+ // query is in the split because it is not delivered in jobConf.
+ private String query;
+
+ public PhoenixInputSplit() {
+ }
+
+ public PhoenixInputSplit(final List<Scan> scans, Path dummyPath, String regionLocation, long
+ length) {
+ super(dummyPath, 0, 0, new String[]{regionLocation});
+
+ regionSize = length;
+
+ Preconditions.checkNotNull(scans);
+ Preconditions.checkState(!scans.isEmpty());
+ this.scans = scans;
+ init();
+ }
+
+ public List<Scan> getScans() {
+ return scans;
+ }
+
+ public KeyRange getKeyRange() {
+ return keyRange;
+ }
+
+ public String getQuery() {
+ return query;
+ }
+
+ public void setQuery(String query) {
+ this.query = query;
+ }
+
+ private void init() {
+ this.keyRange = KeyRange.getKeyRange(scans.get(0).getStartRow(), scans.get(scans.size() -
+ 1).getStopRow());
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+
+ Preconditions.checkNotNull(scans);
+ WritableUtils.writeVInt(out, scans.size());
+ for (Scan scan : scans) {
+ ClientProtos.Scan protoScan = ProtobufUtil.toScan(scan);
+ byte[] protoScanBytes = protoScan.toByteArray();
+ WritableUtils.writeVInt(out, protoScanBytes.length);
+ out.write(protoScanBytes);
+ }
+
+ WritableUtils.writeString(out, query);
+ WritableUtils.writeVLong(out, regionSize);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ super.readFields(in);
+
+ int count = WritableUtils.readVInt(in);
+ scans = Lists.newArrayListWithExpectedSize(count);
+ for (int i = 0; i < count; i++) {
+ byte[] protoScanBytes = new byte[WritableUtils.readVInt(in)];
+ in.readFully(protoScanBytes);
+ ClientProtos.Scan protoScan = ClientProtos.Scan.parseFrom(protoScanBytes);
+ Scan scan = ProtobufUtil.toScan(protoScan);
+ scans.add(scan);
+ }
+ init();
+
+ query = WritableUtils.readString(in);
+ regionSize = WritableUtils.readVLong(in);
+ }
+
+ @Override
+ public long getLength() {
+ return regionSize;
+ }
+
+ @Override
+ public String[] getLocations() throws IOException {
+ return new String[]{};
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + keyRange.hashCode();
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (!(obj instanceof PhoenixInputSplit)) {
+ return false;
+ }
+ PhoenixInputSplit other = (PhoenixInputSplit) obj;
+ if (keyRange == null) {
+ if (other.keyRange != null) {
+ return false;
+ }
+ } else if (!keyRange.equals(other.keyRange)) {
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixOutputFormat.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixOutputFormat.java
new file mode 100644
index 0000000..ed47176
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixOutputFormat.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+import org.apache.hadoop.util.Progressable;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Properties;
+
+/**
+ * Custom OutputFormat to feed into Hive. Describes the output-specification for a Map-Reduce job.
+ */
+public class PhoenixOutputFormat<T extends DBWritable> implements OutputFormat<NullWritable, T>,
+ AcidOutputFormat<NullWritable, T> {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixOutputFormat.class);
+
+ public PhoenixOutputFormat() {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("PhoenixOutputFormat created");
+ }
+ }
+
+ @Override
+ public RecordWriter<NullWritable, T> getRecordWriter(FileSystem ignored, JobConf jobConf,
+ String name, Progressable progress)
+ throws IOException {
+ return createRecordWriter(jobConf, new Properties());
+ }
+
+ @Override
+ public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
+
+ }
+
+ @Override
+ public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter
+ (JobConf jobConf, Path finalOutPath, Class<? extends Writable> valueClass, boolean
+ isCompressed, Properties tableProperties, Progressable progress) throws
+ IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Get RecordWriter for finalOutPath : " + finalOutPath + ", valueClass" +
+ " : " +
+ valueClass
+ .getName() + ", isCompressed : " + isCompressed + ", tableProperties " +
+ ": " + tableProperties + ", progress : " + progress);
+ }
+
+ return createRecordWriter(jobConf, new Properties());
+ }
+
+ @Override
+ public RecordUpdater getRecordUpdater(Path path, org.apache.hadoop.hive.ql.io
+ .AcidOutputFormat.Options options) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Get RecordWriter for path : " + path + ", options : " +
+ PhoenixStorageHandlerUtil
+ .getOptionsValue(options));
+ }
+ return new PhoenixRecordWriter<T>(path, options);
+ }
+
+ @Override
+ public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getRawRecordWriter(Path path,
+ org.apache.hadoop.hive.ql.io.AcidOutputFormat.Options options) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Get RawRecordWriter for path : " + path + ", options : " +
+ PhoenixStorageHandlerUtil.getOptionsValue(options));
+ }
+
+ return new PhoenixRecordWriter<T>(path, options);
+ }
+
+ private PhoenixRecordWriter<T> createRecordWriter(Configuration config, Properties properties) {
+ try {
+ return new PhoenixRecordWriter<T>(config, properties);
+ } catch (SQLException e) {
+ LOG.error("Error during PhoenixRecordWriter instantiation :" + e.getMessage());
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixRecordReader.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixRecordReader.java
new file mode 100644
index 0000000..2ff8aca
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixRecordReader.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.compile.StatementContext;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.hive.PhoenixRowKey;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+import org.apache.phoenix.iterate.ConcatResultIterator;
+import org.apache.phoenix.iterate.LookAheadResultIterator;
+import org.apache.phoenix.iterate.MapReduceParallelScanGrouper;
+import org.apache.phoenix.iterate.PeekingResultIterator;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.iterate.RoundRobinResultIterator;
+import org.apache.phoenix.iterate.SequenceResultIterator;
+import org.apache.phoenix.iterate.TableResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.monitoring.ReadMetricQueue;
+import org.apache.phoenix.monitoring.ScanMetricsHolder;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+
+/**
+ * @RecordReader implementation that iterates over the the records.
+ */
+@SuppressWarnings("rawtypes")
+public class PhoenixRecordReader<T extends DBWritable> implements
+ RecordReader<WritableComparable, T> {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixRecordReader.class);
+
+ private final Configuration configuration;
+ private final QueryPlan queryPlan;
+ private WritableComparable key;
+ private T value = null;
+ private Class<T> inputClass;
+ private ResultIterator resultIterator = null;
+ private PhoenixResultSet resultSet;
+ private long readCount;
+
+
+ private boolean isTransactional;
+
+ public PhoenixRecordReader(Class<T> inputClass, final Configuration configuration, final
+ QueryPlan queryPlan) throws IOException {
+ this.inputClass = inputClass;
+ this.configuration = configuration;
+ this.queryPlan = queryPlan;
+
+ isTransactional = PhoenixStorageHandlerUtil.isTransactionalTable(configuration);
+ }
+
+ public void initialize(InputSplit split) throws IOException {
+ final PhoenixInputSplit pSplit = (PhoenixInputSplit) split;
+ final List<Scan> scans = pSplit.getScans();
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Target table : " + queryPlan.getTableRef().getTable().getPhysicalName());
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans.get(0)
+ .getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans.size() - 1)
+ .getStopRow()));
+ LOG.debug("First scan : " + scans.get(0) + " scanAttribute : " + scans.get(0)
+ .getAttributesMap());
+
+ for (int i = 0, limit = scans.size(); i < limit; i++) {
+ LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " +
+ Bytes.toStringBinary(scans.get(i).getAttribute(BaseScannerRegionObserver
+ .EXPECTED_UPPER_REGION_KEY)));
+ }
+ }
+
+ try {
+ List<PeekingResultIterator> iterators = Lists.newArrayListWithExpectedSize(scans.size
+ ());
+ StatementContext ctx = queryPlan.getContext();
+ ReadMetricQueue readMetrics = ctx.getReadMetricsQueue();
+ String tableName = queryPlan.getTableRef().getTable().getPhysicalName().getString();
+ long renewScannerLeaseThreshold = queryPlan.getContext().getConnection()
+ .getQueryServices().getRenewLeaseThresholdMilliSeconds();
+ for (Scan scan : scans) {
+ scan.setAttribute(BaseScannerRegionObserver.SKIP_REGION_BOUNDARY_CHECK, Bytes
+ .toBytes(true));
+ ScanMetricsHolder scanMetricsHolder = ScanMetricsHolder.getInstance(readMetrics, tableName, scan, ctx.getConnection().getLogLevel());
+ final TableResultIterator tableResultIterator = new TableResultIterator(
+ queryPlan.getContext().getConnection().getMutationState(), scan, scanMetricsHolder,
+ renewScannerLeaseThreshold, queryPlan, MapReduceParallelScanGrouper.getInstance());
+
+ PeekingResultIterator peekingResultIterator = LookAheadResultIterator.wrap
+ (tableResultIterator);
+ iterators.add(peekingResultIterator);
+ }
+ ResultIterator iterator = queryPlan.useRoundRobinIterator()
+ ? RoundRobinResultIterator.newIterator(iterators, queryPlan)
+ : ConcatResultIterator.newIterator(iterators);
+ if (queryPlan.getContext().getSequenceManager().getSequenceCount() > 0) {
+ iterator = new SequenceResultIterator(iterator, queryPlan.getContext()
+ .getSequenceManager());
+ }
+ this.resultIterator = iterator;
+ // Clone the row projector as it's not thread safe and would be used
+ // simultaneously by multiple threads otherwise.
+ this.resultSet = new PhoenixResultSet(this.resultIterator, queryPlan.getProjector()
+ .cloneIfNecessary(),
+ queryPlan.getContext());
+ } catch (SQLException e) {
+ LOG.error(String.format(" Error [%s] initializing PhoenixRecordReader. ", e
+ .getMessage()));
+ Throwables.propagate(e);
+ }
+ }
+
+ @Override
+ public boolean next(WritableComparable key, T value) throws IOException {
+ try {
+ if (!resultSet.next()) {
+ return false;
+ }
+ value.readFields(resultSet);
+
+ if (isTransactional) {
+ ((PhoenixResultWritable) value).readPrimaryKey((PhoenixRowKey) key);
+ }
+
+ ++readCount;
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Result[" + readCount + "] : " + ((PhoenixResultWritable) value)
+ .getResultMap());
+ }
+
+ return true;
+ } catch (SQLException e) {
+ LOG.error(String.format(" Error [%s] occurred while iterating over the resultset. ",
+ e.getMessage()));
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public WritableComparable createKey() {
+ if (isTransactional) {
+ key = new PhoenixRowKey();
+ } else {
+ key = NullWritable.get();
+ }
+
+ return key;
+ }
+
+ @Override
+ public T createValue() {
+ value = ReflectionUtils.newInstance(inputClass, this.configuration);
+ return value;
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Read Count : " + readCount);
+ }
+
+ if (resultIterator != null) {
+ try {
+ resultIterator.close();
+ } catch (SQLException e) {
+ LOG.error(" Error closing resultset.");
+ throw new RuntimeException(e);
+ }
+ }
+
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ return 0;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixRecordWriter.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixRecordWriter.java
new file mode 100644
index 0000000..c6884df
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixRecordWriter.java
@@ -0,0 +1,355 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.mapreduce;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+import org.apache.phoenix.hive.PhoenixSerializer;
+import org.apache.phoenix.hive.PhoenixSerializer.DmlType;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.util.PhoenixConnectionUtil;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+import org.apache.phoenix.hive.util.PhoenixUtil;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.schema.ConcurrentTableMutationException;
+import org.apache.phoenix.schema.MetaDataClient;
+import org.apache.phoenix.util.QueryUtil;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.Properties;
+
+/**
+ *
+ * RecordWriter implementation. Writes records to the output
+ * WARNING : There is possibility that WAL disable setting not working properly due concurrent
+ * enabling/disabling WAL.
+ *
+ */
+public class PhoenixRecordWriter<T extends DBWritable> implements RecordWriter<NullWritable, T>,
+ org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter, RecordUpdater {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixRecordWriter.class);
+
+ private Connection conn;
+ private PreparedStatement pstmt;
+ private long batchSize;
+ private long numRecords = 0;
+
+ private Configuration config;
+ private String tableName;
+ private MetaDataClient metaDataClient;
+ private boolean restoreWalMode;
+
+ // For RecordUpdater
+ private long rowCountDelta = 0;
+ private PhoenixSerializer phoenixSerializer;
+ private ObjectInspector objInspector;
+ private PreparedStatement pstmtForDelete;
+
+ // For RecordUpdater
+ public PhoenixRecordWriter(Path path, AcidOutputFormat.Options options) throws IOException {
+ Configuration config = options.getConfiguration();
+ Properties props = new Properties();
+
+ try {
+ initialize(config, props);
+ } catch (SQLException e) {
+ throw new IOException(e);
+ }
+
+ this.objInspector = options.getInspector();
+ try {
+ phoenixSerializer = new PhoenixSerializer(config, options.getTableProperties());
+ } catch (SerDeException e) {
+ throw new IOException(e);
+ }
+ }
+
+ public PhoenixRecordWriter(final Configuration configuration, final Properties props) throws
+ SQLException {
+ initialize(configuration, props);
+ }
+
+ private void initialize(Configuration config, Properties properties) throws SQLException {
+ this.config = config;
+ tableName = config.get(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME);
+
+ // Disable WAL
+ String walConfigName = tableName.toLowerCase() + PhoenixStorageHandlerConstants.DISABLE_WAL;
+ boolean disableWal = config.getBoolean(walConfigName, false);
+ if (disableWal) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Property " + walConfigName + " is true. batch.mode will be set true. ");
+ }
+
+ properties.setProperty(PhoenixStorageHandlerConstants.BATCH_MODE, "true");
+ }
+
+ this.conn = PhoenixConnectionUtil.getInputConnection(config, properties);
+
+ if (disableWal) {
+ metaDataClient = new MetaDataClient((PhoenixConnection) conn);
+
+ if (!PhoenixUtil.isDisabledWal(metaDataClient, tableName)) {
+ // execute alter tablel statement if disable_wal is not true.
+ try {
+ PhoenixUtil.alterTableForWalDisable(conn, tableName, true);
+ } catch (ConcurrentTableMutationException e) {
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Another mapper or task processing wal disable");
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(tableName + "s wal disabled.");
+ }
+
+ // restore original value of disable_wal at the end.
+ restoreWalMode = true;
+ }
+ }
+
+ this.batchSize = PhoenixConfigurationUtil.getBatchSize(config);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Batch-size : " + batchSize);
+ }
+
+ String upsertQuery = QueryUtil.constructUpsertStatement(tableName, PhoenixUtil
+ .getColumnInfoList(conn, tableName));
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Upsert-query : " + upsertQuery);
+ }
+ this.pstmt = this.conn.prepareStatement(upsertQuery);
+ }
+
+ @Override
+ public void write(NullWritable key, T record) throws IOException {
+ try {
+ record.write(pstmt);
+ numRecords++;
+ pstmt.executeUpdate();
+
+ if (numRecords % batchSize == 0) {
+ LOG.debug("Commit called on a batch of size : " + batchSize);
+ conn.commit();
+ }
+ } catch (SQLException e) {
+ throw new IOException("Exception while writing to table.", e);
+ }
+ }
+
+ @Override
+ public void close(Reporter reporter) throws IOException {
+ try {
+ conn.commit();
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Wrote row : " + numRecords);
+ }
+ } catch (SQLException e) {
+ LOG.error("SQLException while performing the commit for the task.");
+ throw new IOException(e);
+ } finally {
+ try {
+ if (restoreWalMode && PhoenixUtil.isDisabledWal(metaDataClient, tableName)) {
+ try {
+ PhoenixUtil.alterTableForWalDisable(conn, tableName, false);
+ } catch (ConcurrentTableMutationException e) {
+ if (LOG.isWarnEnabled()) {
+ LOG.warn("Another mapper or task processing wal enable");
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(tableName + "s wal enabled.");
+ }
+ }
+
+ // flush if [table-name].auto.flush is true.
+ String autoFlushConfigName = tableName.toLowerCase() +
+ PhoenixStorageHandlerConstants.AUTO_FLUSH;
+ boolean autoFlush = config.getBoolean(autoFlushConfigName, false);
+ if (autoFlush) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("autoFlush is true.");
+ }
+
+ PhoenixUtil.flush(conn, tableName);
+ }
+
+ PhoenixUtil.closeResource(pstmt);
+ PhoenixUtil.closeResource(pstmtForDelete);
+ PhoenixUtil.closeResource(conn);
+ } catch (SQLException ex) {
+ LOG.error("SQLException while closing the connection for the task.");
+ throw new IOException(ex);
+ }
+ }
+ }
+
+ // For Testing
+ public boolean isRestoreWalMode() {
+ return restoreWalMode;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void write(Writable w) throws IOException {
+ PhoenixResultWritable row = (PhoenixResultWritable) w;
+
+ write(NullWritable.get(), (T) row);
+ }
+
+ @Override
+ public void close(boolean abort) throws IOException {
+ close(Reporter.NULL);
+ }
+
+ @Override
+ public void insert(long currentTransaction, Object row) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("insert transaction : " + currentTransaction + ", row : " +
+ PhoenixStorageHandlerUtil.toString(row));
+ }
+
+ PhoenixResultWritable pResultWritable = (PhoenixResultWritable) phoenixSerializer
+ .serialize(row, objInspector, DmlType.INSERT);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Data : " + pResultWritable.getValueList());
+ }
+
+ write(pResultWritable);
+ rowCountDelta++;
+ }
+
+ @Override
+ public void update(long currentTransaction, Object row) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("update transaction : " + currentTransaction + ", row : " +
+ PhoenixStorageHandlerUtil
+ .toString(row));
+ }
+
+ PhoenixResultWritable pResultWritable = (PhoenixResultWritable) phoenixSerializer
+ .serialize(row, objInspector, DmlType.UPDATE);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Data : " + pResultWritable.getValueList());
+ }
+
+ write(pResultWritable);
+ }
+
+ @Override
+ public void delete(long currentTransaction, Object row) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("delete transaction : " + currentTransaction + ", row : " +
+ PhoenixStorageHandlerUtil.toString(row));
+ }
+
+ PhoenixResultWritable pResultWritable = (PhoenixResultWritable) phoenixSerializer
+ .serialize(row, objInspector, DmlType.DELETE);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Data : " + pResultWritable.getValueList());
+ }
+
+ if (pstmtForDelete == null) {
+ try {
+ String deleteQuery = PhoenixUtil.constructDeleteStatement(conn, tableName);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Delete query : " + deleteQuery);
+ }
+
+ pstmtForDelete = conn.prepareStatement(deleteQuery);
+ } catch (SQLException e) {
+ throw new IOException(e);
+ }
+ }
+
+ delete(pResultWritable);
+
+ rowCountDelta--;
+ }
+
+ private void delete(PhoenixResultWritable pResultWritable) throws IOException {
+ try {
+ pResultWritable.delete(pstmtForDelete);
+ numRecords++;
+ pstmtForDelete.executeUpdate();
+
+ if (numRecords % batchSize == 0) {
+ LOG.debug("Commit called on a batch of size : " + batchSize);
+ conn.commit();
+ }
+ } catch (SQLException e) {
+ throw new IOException("Exception while deleting to table.", e);
+ }
+ }
+
+ @Override
+ public void flush() throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Flush called");
+ }
+
+ try {
+ conn.commit();
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Written row : " + numRecords);
+ }
+ } catch (SQLException e) {
+ LOG.error("SQLException while performing the commit for the task.");
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public SerDeStats getStats() {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("getStats called");
+ }
+
+ SerDeStats stats = new SerDeStats();
+ stats.setRowCount(rowCountDelta);
+ // Don't worry about setting raw data size diff. There is no reasonable way to calculate
+ // that without finding the row we are updating or deleting, which would be a mess.
+ return stats;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixResultWritable.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixResultWritable.java
new file mode 100644
index 0000000..2bdc7b2
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixResultWritable.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.mapreduce;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+import org.apache.phoenix.hive.PhoenixRowKey;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.util.ColumnMappingUtils;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+import org.apache.phoenix.hive.util.PhoenixUtil;
+import org.apache.phoenix.util.ColumnInfo;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Serialized class for SerDe
+ *
+ */
+public class PhoenixResultWritable implements Writable, DBWritable, Configurable {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixResultWritable.class);
+
+ private List<ColumnInfo> columnMetadataList;
+ private List<Object> valueList; // for output
+ private Map<String, Object> rowMap = Maps.newHashMap(); // for input
+ private Map<String, String> columnMap;
+
+ private int columnCount = -1;
+
+ private Configuration config;
+ private boolean isTransactional;
+ private Map<String, Object> rowKeyMap = Maps.newLinkedHashMap();
+ private List<String> primaryKeyColumnList;
+
+ public PhoenixResultWritable() {
+ }
+
+ public PhoenixResultWritable(Configuration config) throws IOException {
+ setConf(config);
+ }
+
+ public PhoenixResultWritable(Configuration config, List<ColumnInfo> columnMetadataList)
+ throws IOException {
+ this(config);
+ this.columnMetadataList = columnMetadataList;
+ valueList = Lists.newArrayListWithExpectedSize(columnMetadataList.size());
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ // for write
+ public void clear() {
+ valueList.clear();
+ }
+
+ // for write
+ public void add(Object value) {
+ valueList.add(value);
+ }
+
+ @Override
+ public void write(PreparedStatement statement) throws SQLException {
+ ColumnInfo columnInfo = null;
+ Object value = null;
+
+ try {
+ for (int i = 0, limit = columnMetadataList.size(); i < limit; i++) {
+ columnInfo = columnMetadataList.get(i);
+
+ if (valueList.size() > i) {
+ value = valueList.get(i);
+ } else {
+ value = null;
+ }
+
+ if (value == null) {
+ statement.setNull(i + 1, columnInfo.getSqlType());
+ } else {
+ statement.setObject(i + 1, value, columnInfo.getSqlType());
+ }
+ }
+ } catch (SQLException | RuntimeException e) {
+ LOG.error("[column-info, value] : " + columnInfo + ", " + value);
+ throw e;
+ }
+ }
+
+ public void delete(PreparedStatement statement) throws SQLException {
+ ColumnInfo columnInfo = null;
+ Object value = null;
+
+ try {
+ for (int i = 0, limit = primaryKeyColumnList.size(); i < limit; i++) {
+ columnInfo = columnMetadataList.get(i);
+
+ if (valueList.size() > i) {
+ value = valueList.get(i);
+ } else {
+ value = null;
+ }
+
+ if (value == null) {
+ statement.setNull(i + 1, columnInfo.getSqlType());
+ } else {
+ statement.setObject(i + 1, value, columnInfo.getSqlType());
+ }
+ }
+ } catch (SQLException | RuntimeException e) {
+ LOG.error("[column-info, value] : " + columnInfo + ", " + value);
+ throw e;
+ }
+ }
+
+ @Override
+ public void readFields(ResultSet resultSet) throws SQLException {
+ ResultSetMetaData rsmd = resultSet.getMetaData();
+ if (columnCount == -1) {
+ this.columnCount = rsmd.getColumnCount();
+ }
+ rowMap.clear();
+
+ for (int i = 0; i < columnCount; i++) {
+ Object value = resultSet.getObject(i + 1);
+ String columnName = rsmd.getColumnName(i + 1);
+ String mapName = columnMap.get(columnName);
+ if(mapName != null) {
+ columnName = mapName;
+ }
+ rowMap.put(columnName, value);
+ }
+
+ // Adding row__id column.
+ if (isTransactional) {
+ rowKeyMap.clear();
+
+ for (String pkColumn : primaryKeyColumnList) {
+ rowKeyMap.put(pkColumn, rowMap.get(pkColumn));
+ }
+ }
+ }
+
+ public void readPrimaryKey(PhoenixRowKey rowKey) {
+ rowKey.setRowKeyMap(rowKeyMap);
+ }
+
+ public List<ColumnInfo> getColumnMetadataList() {
+ return columnMetadataList;
+ }
+
+ public void setColumnMetadataList(List<ColumnInfo> columnMetadataList) {
+ this.columnMetadataList = columnMetadataList;
+ }
+
+ public Map<String, Object> getResultMap() {
+ return rowMap;
+ }
+
+ public List<Object> getValueList() {
+ return valueList;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ config = conf;
+ this.columnMap = ColumnMappingUtils.getReverseColumnMapping(config.get(PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING,""));
+
+ isTransactional = PhoenixStorageHandlerUtil.isTransactionalTable(config);
+
+ if (isTransactional) {
+ primaryKeyColumnList = PhoenixUtil.getPrimaryKeyColumnList(config, config.get
+ (PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME));
+ }
+ }
+
+ @Override
+ public Configuration getConf() {
+ return config;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/AbstractPhoenixObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/AbstractPhoenixObjectInspector.java
new file mode 100644
index 0000000..1de1cc7
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/AbstractPhoenixObjectInspector.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive
+ .AbstractPrimitiveLazyObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * AbstractPhoenixObjectInspector for a LazyPrimitive object
+ */
+public abstract class AbstractPhoenixObjectInspector<T extends Writable>
+ extends AbstractPrimitiveLazyObjectInspector<T> {
+
+ private final Log log;
+
+ public AbstractPhoenixObjectInspector() {
+ super();
+
+ log = LogFactory.getLog(getClass());
+ }
+
+ protected AbstractPhoenixObjectInspector(PrimitiveTypeInfo typeInfo) {
+ super(typeInfo);
+
+ log = LogFactory.getLog(getClass());
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : o;
+ }
+
+ public void logExceptionMessage(Object value, String dataType) {
+ if (log.isDebugEnabled()) {
+ log.debug("Data not in the " + dataType + " data type range so converted to null. " +
+ "Given data is :"
+ + value.toString(), new Exception("For debugging purposes"));
+ }
+ }
+}
\ No newline at end of file
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixBinaryObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixBinaryObjectInspector.java
new file mode 100644
index 0000000..2c642d2
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixBinaryObjectInspector.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BytesWritable;
+
+/**
+ * ObjectInspector for Binary type
+ */
+
+public class PhoenixBinaryObjectInspector extends AbstractPhoenixObjectInspector<BytesWritable>
+ implements BinaryObjectInspector {
+
+ public PhoenixBinaryObjectInspector() {
+ super(TypeInfoFactory.binaryTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ byte[] clone = null;
+
+ if (o != null) {
+ byte[] source = (byte[]) o;
+ clone = new byte[source.length];
+ System.arraycopy(source, 0, clone, 0, source.length);
+ }
+
+ return clone;
+ }
+
+ @Override
+ public byte[] getPrimitiveJavaObject(Object o) {
+ return (byte[]) o;
+ }
+
+ @Override
+ public BytesWritable getPrimitiveWritableObject(Object o) {
+ return new BytesWritable((byte[]) o);
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixBooleanObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixBooleanObjectInspector.java
new file mode 100644
index 0000000..a767ca0
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixBooleanObjectInspector.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BooleanWritable;
+
+public class PhoenixBooleanObjectInspector extends AbstractPhoenixObjectInspector<BooleanWritable>
+ implements BooleanObjectInspector {
+
+ public PhoenixBooleanObjectInspector() {
+ super(TypeInfoFactory.booleanTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Boolean((Boolean) o);
+ }
+
+ @Override
+ public BooleanWritable getPrimitiveWritableObject(Object o) {
+ return new BooleanWritable(get(o));
+ }
+
+ @Override
+ public boolean get(Object o) {
+ Boolean value = null;
+
+ if (o != null) {
+ try {
+ value = (Boolean) o;
+ } catch (Exception e) {
+ logExceptionMessage(o, "BOOLEAN");
+ }
+ }
+
+ return value;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixByteObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixByteObjectInspector.java
new file mode 100644
index 0000000..a19342a
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixByteObjectInspector.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.ByteWritable;
+
+/**
+ * ObjectInspector for byte type
+ */
+public class PhoenixByteObjectInspector extends AbstractPhoenixObjectInspector<ByteWritable>
+ implements ByteObjectInspector {
+
+ public PhoenixByteObjectInspector() {
+ super(TypeInfoFactory.byteTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Byte((Byte) o);
+ }
+
+ @Override
+ public ByteWritable getPrimitiveWritableObject(Object o) {
+ return new ByteWritable(get(o));
+ }
+
+ @Override
+ public byte get(Object o) {
+ Byte value = null;
+
+ if (o != null) {
+ try {
+ value = (Byte) o;
+ } catch (Exception e) {
+ logExceptionMessage(o, "BYTE");
+ }
+ }
+
+ return value;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixCharObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixCharObjectInspector.java
new file mode 100644
index 0000000..17222a2
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixCharObjectInspector.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+/**
+ * ObjectInspector for char type
+ */
+public class PhoenixCharObjectInspector extends AbstractPhoenixObjectInspector<HiveCharWritable>
+ implements HiveCharObjectInspector {
+
+ public PhoenixCharObjectInspector() {
+ this(TypeInfoFactory.charTypeInfo);
+ }
+
+ public PhoenixCharObjectInspector(PrimitiveTypeInfo type) {
+ super(type);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new String((String) o);
+ }
+
+ @Override
+ public HiveCharWritable getPrimitiveWritableObject(Object o) {
+ return new HiveCharWritable(getPrimitiveJavaObject(o));
+ }
+
+ @Override
+ public HiveChar getPrimitiveJavaObject(Object o) {
+ String value = (String) o;
+ return new HiveChar(value, value.length());
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDateObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDateObjectInspector.java
new file mode 100644
index 0000000..d97993e
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDateObjectInspector.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import java.sql.Date;
+
+/**
+ * ObjectInspector for date type
+ */
+
+public class PhoenixDateObjectInspector extends AbstractPhoenixObjectInspector<DateWritable>
+ implements DateObjectInspector {
+
+ public PhoenixDateObjectInspector() {
+ super(TypeInfoFactory.dateTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Date(((Date) o).getTime());
+ }
+
+ @Override
+ public DateWritable getPrimitiveWritableObject(Object o) {
+ DateWritable value = null;
+
+ if (o != null) {
+ try {
+ value = new DateWritable((Date) o);
+ } catch (Exception e) {
+ logExceptionMessage(o, "DATE");
+ value = new DateWritable();
+ }
+ }
+
+ return value;
+ }
+
+ @Override
+ public Date getPrimitiveJavaObject(Object o) {
+ return (Date) o;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDecimalObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDecimalObjectInspector.java
new file mode 100644
index 0000000..3853c18
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDecimalObjectInspector.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import java.math.BigDecimal;
+
+public class PhoenixDecimalObjectInspector extends
+ AbstractPhoenixObjectInspector<HiveDecimalWritable>
+ implements HiveDecimalObjectInspector {
+
+ public PhoenixDecimalObjectInspector() {
+ this(TypeInfoFactory.decimalTypeInfo);
+ }
+
+ public PhoenixDecimalObjectInspector(PrimitiveTypeInfo typeInfo) {
+ super(typeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new BigDecimal(o.toString());
+ }
+
+ @Override
+ public HiveDecimal getPrimitiveJavaObject(Object o) {
+ if (o == null) {
+ return null;
+ }
+
+ return HiveDecimalUtils.enforcePrecisionScale(HiveDecimal.create((BigDecimal) o),(DecimalTypeInfo)typeInfo);
+ }
+
+ @Override
+ public HiveDecimalWritable getPrimitiveWritableObject(Object o) {
+ HiveDecimalWritable value = null;
+
+ if (o != null) {
+ try {
+ value = new HiveDecimalWritable(getPrimitiveJavaObject(o));
+ } catch (Exception e) {
+ logExceptionMessage(o, "DECIMAL");
+ }
+ }
+
+ return value;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDoubleObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDoubleObjectInspector.java
new file mode 100644
index 0000000..9f440ed
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixDoubleObjectInspector.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.DoubleWritable;
+
+/**
+ * ObjectInspector for double type
+ */
+public class PhoenixDoubleObjectInspector extends AbstractPhoenixObjectInspector<DoubleWritable>
+ implements DoubleObjectInspector {
+
+ public PhoenixDoubleObjectInspector() {
+ super(TypeInfoFactory.doubleTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Double((Double) o);
+ }
+
+ @Override
+ public DoubleWritable getPrimitiveWritableObject(Object o) {
+ return new DoubleWritable(get(o));
+ }
+
+ @Override
+ public double get(Object o) {
+ Double value = null;
+
+ if (o != null) {
+ try {
+ value = ((Double) o).doubleValue();
+ } catch (Exception e) {
+ logExceptionMessage(o, "LONG");
+ }
+ }
+
+ return value;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixFloatObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixFloatObjectInspector.java
new file mode 100644
index 0000000..bf1badc
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixFloatObjectInspector.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.FloatWritable;
+
+/**
+ * ObjectInspector for float type
+ */
+
+public class PhoenixFloatObjectInspector extends AbstractPhoenixObjectInspector<FloatWritable>
+ implements FloatObjectInspector {
+
+ public PhoenixFloatObjectInspector() {
+ super(TypeInfoFactory.floatTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Float((Float) o);
+ }
+
+ @Override
+ public FloatWritable getPrimitiveWritableObject(Object o) {
+ return new FloatWritable(get(o));
+ }
+
+ @Override
+ public float get(Object o) {
+ Float value = null;
+
+ if (o != null) {
+ try {
+ value = ((Float) o).floatValue();
+ } catch (Exception e) {
+ logExceptionMessage(o, "LONG");
+ }
+ }
+
+ return value;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixIntObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixIntObjectInspector.java
new file mode 100644
index 0000000..3511ee3
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixIntObjectInspector.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.IntWritable;
+
+public class PhoenixIntObjectInspector extends AbstractPhoenixObjectInspector<IntWritable>
+ implements IntObjectInspector {
+
+ public PhoenixIntObjectInspector() {
+ super(TypeInfoFactory.intTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Integer((Integer) o);
+ }
+
+ @Override
+ public Category getCategory() {
+ return Category.PRIMITIVE;
+ }
+
+ @Override
+ public IntWritable getPrimitiveWritableObject(Object o) {
+ return new IntWritable(get(o));
+ }
+
+ @Override
+ public int get(Object o) {
+ Integer value = null;
+
+ if (o != null) {
+ try {
+ value = ((Integer) o).intValue();
+ } catch (Exception e) {
+ logExceptionMessage(o, "INT");
+ }
+ }
+
+ return value;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixListObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixListObjectInspector.java
new file mode 100644
index 0000000..c4f2d51
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixListObjectInspector.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.phoenix.schema.types.PhoenixArray;
+
+import java.util.List;
+
+/**
+ * ObjectInspector for list objects.
+ */
+public class PhoenixListObjectInspector implements ListObjectInspector {
+
+ private ObjectInspector listElementObjectInspector;
+ private byte separator;
+ private LazyObjectInspectorParameters lazyParams;
+
+ public PhoenixListObjectInspector(ObjectInspector listElementObjectInspector,
+ byte separator, LazyObjectInspectorParameters lazyParams) {
+ this.listElementObjectInspector = listElementObjectInspector;
+ this.separator = separator;
+ this.lazyParams = lazyParams;
+ }
+
+ @Override
+ public String getTypeName() {
+ return org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME + "<" +
+ listElementObjectInspector.getTypeName() + ">";
+ }
+
+ @Override
+ public Category getCategory() {
+ return Category.LIST;
+ }
+
+ @Override
+ public ObjectInspector getListElementObjectInspector() {
+ return listElementObjectInspector;
+ }
+
+ @Override
+ public Object getListElement(Object data, int index) {
+ if (data == null) {
+ return null;
+ }
+
+ PhoenixArray array = (PhoenixArray) data;
+
+ return array.getElement(index);
+ }
+
+ @Override
+ public int getListLength(Object data) {
+ if (data == null) {
+ return -1;
+ }
+
+ PhoenixArray array = (PhoenixArray) data;
+ return array.getDimensions();
+ }
+
+ @Override
+ public List<?> getList(Object data) {
+ if (data == null) {
+ return null;
+ }
+
+ PhoenixArray array = (PhoenixArray) data;
+ int valueLength = array.getDimensions();
+ List<Object> valueList = Lists.newArrayListWithExpectedSize(valueLength);
+
+ for (int i = 0; i < valueLength; i++) {
+ valueList.add(array.getElement(i));
+ }
+
+ return valueList;
+ }
+
+ public byte getSeparator() {
+ return separator;
+ }
+
+ public LazyObjectInspectorParameters getLazyParams() {
+ return lazyParams;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixLongObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixLongObjectInspector.java
new file mode 100644
index 0000000..554f2a4
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixLongObjectInspector.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.LongWritable;
+
+public class PhoenixLongObjectInspector extends AbstractPhoenixObjectInspector<LongWritable>
+ implements LongObjectInspector {
+
+ public PhoenixLongObjectInspector() {
+ super(TypeInfoFactory.longTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Long((Long) o);
+ }
+
+ @Override
+ public LongWritable getPrimitiveWritableObject(Object o) {
+ return new LongWritable(get(o));
+ }
+
+ @Override
+ public long get(Object o) {
+ Long value = null;
+
+ if (o != null) {
+ try {
+ value = ((Long) o).longValue();
+ } catch (Exception e) {
+ logExceptionMessage(o, "LONG");
+ }
+ }
+
+ return value;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixObjectInspectorFactory.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixObjectInspectorFactory.java
new file mode 100644
index 0000000..3a19ea7
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixObjectInspectorFactory.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Factory for object inspectors. Matches hive type to the corresponding Phoenix object inspector.
+ */
+
+public class PhoenixObjectInspectorFactory {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixObjectInspectorFactory.class);
+
+ private PhoenixObjectInspectorFactory() {
+
+ }
+
+ public static LazySimpleStructObjectInspector createStructObjectInspector(TypeInfo type,
+ LazySerDeParameters
+ serdeParams) {
+ StructTypeInfo structTypeInfo = (StructTypeInfo) type;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>
+ (fieldTypeInfos.size());
+
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(createObjectInspector(fieldTypeInfos.get(i), serdeParams));
+ }
+
+ return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
+ fieldNames, fieldObjectInspectors, null,
+ serdeParams.getSeparators()[1],
+ serdeParams, ObjectInspectorOptions.JAVA);
+ }
+
+ public static ObjectInspector createObjectInspector(TypeInfo type, LazySerDeParameters
+ serdeParams) {
+ ObjectInspector oi = null;
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Type : " + type);
+ }
+
+ switch (type.getCategory()) {
+ case PRIMITIVE:
+ switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
+ case BOOLEAN:
+ oi = new PhoenixBooleanObjectInspector();
+ break;
+ case BYTE:
+ oi = new PhoenixByteObjectInspector();
+ break;
+ case SHORT:
+ oi = new PhoenixShortObjectInspector();
+ break;
+ case INT:
+ oi = new PhoenixIntObjectInspector();
+ break;
+ case LONG:
+ oi = new PhoenixLongObjectInspector();
+ break;
+ case FLOAT:
+ oi = new PhoenixFloatObjectInspector();
+ break;
+ case DOUBLE:
+ oi = new PhoenixDoubleObjectInspector();
+ break;
+ case VARCHAR:
+ // same string
+ case STRING:
+ oi = new PhoenixStringObjectInspector(serdeParams.isEscaped(),
+ serdeParams.getEscapeChar());
+ break;
+ case CHAR:
+ oi = new PhoenixCharObjectInspector((PrimitiveTypeInfo)type);
+ break;
+ case DATE:
+ oi = new PhoenixDateObjectInspector();
+ break;
+ case TIMESTAMP:
+ oi = new PhoenixTimestampObjectInspector();
+ break;
+ case DECIMAL:
+ oi = new PhoenixDecimalObjectInspector((PrimitiveTypeInfo) type);
+ break;
+ case BINARY:
+ oi = new PhoenixBinaryObjectInspector();
+ break;
+ default:
+ throw new RuntimeException("Hive internal error. not supported data type " +
+ ": " + type);
+ }
+
+ break;
+ case LIST:
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("List type started");
+ }
+
+ ObjectInspector listElementObjectInspector = createObjectInspector((
+ (ListTypeInfo) type).getListElementTypeInfo(), serdeParams);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("List type ended");
+ }
+
+ oi = new PhoenixListObjectInspector(listElementObjectInspector, serdeParams
+ .getSeparators()[0], serdeParams);
+
+ break;
+ default:
+ throw new RuntimeException("Hive internal error. not supported data type : " +
+ type);
+ }
+
+ return oi;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixShortObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixShortObjectInspector.java
new file mode 100644
index 0000000..84529b0
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixShortObjectInspector.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+public class PhoenixShortObjectInspector extends AbstractPhoenixObjectInspector<ShortWritable>
+ implements ShortObjectInspector {
+
+ public PhoenixShortObjectInspector() {
+ super(TypeInfoFactory.shortTypeInfo);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Short((Short) o);
+ }
+
+ @Override
+ public ShortWritable getPrimitiveWritableObject(Object o) {
+ return new ShortWritable(get(o));
+ }
+
+ @Override
+ public short get(Object o) {
+ Short value = null;
+
+ if (o != null) {
+ try {
+ value = ((Short) o).shortValue();
+ } catch (Exception e) {
+ logExceptionMessage(o, "SHORT");
+ }
+ }
+
+ return value;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixStringObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixStringObjectInspector.java
new file mode 100644
index 0000000..e409e1d
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixStringObjectInspector.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.Text;
+
+/**
+ * ObjectInspector for string type
+ */
+public class PhoenixStringObjectInspector extends AbstractPhoenixObjectInspector<Text>
+ implements StringObjectInspector {
+
+ private boolean escaped;
+ private byte escapeChar;
+
+ public PhoenixStringObjectInspector(boolean escaped, byte escapeChar) {
+ super(TypeInfoFactory.stringTypeInfo);
+ this.escaped = escaped;
+ this.escapeChar = escapeChar;
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new String((String) o);
+ }
+
+ @Override
+ public String getPrimitiveJavaObject(Object o) {
+ return (String) o;
+ }
+
+ @Override
+ public Text getPrimitiveWritableObject(Object o) {
+ Text value = null;
+
+ if (o != null) {
+ try {
+ value = new Text((String) o);
+ } catch (Exception e) {
+ logExceptionMessage(o, "STRING");
+ }
+ }
+
+ return value;
+ }
+
+ public boolean isEscaped() {
+ return escaped;
+ }
+
+ public byte getEscapeChar() {
+ return escapeChar;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixTimestampObjectInspector.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixTimestampObjectInspector.java
new file mode 100644
index 0000000..7b13f2b
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/objectinspector/PhoenixTimestampObjectInspector.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.objectinspector;
+
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import java.sql.Timestamp;
+
+/**
+ * ObjectInspector for timestamp type
+ */
+public class PhoenixTimestampObjectInspector extends
+ AbstractPhoenixObjectInspector<TimestampWritable>
+ implements TimestampObjectInspector {
+
+ public PhoenixTimestampObjectInspector() {
+ super(TypeInfoFactory.timestampTypeInfo);
+ }
+
+ @Override
+ public Timestamp getPrimitiveJavaObject(Object o) {
+ return (Timestamp) o;
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new Timestamp(((Timestamp) o).getTime());
+ }
+
+ @Override
+ public TimestampWritable getPrimitiveWritableObject(Object o) {
+ TimestampWritable value = null;
+
+ if (o != null) {
+ try {
+ value = new TimestampWritable((Timestamp) o);
+ } catch (Exception e) {
+ logExceptionMessage(o, "TIMESTAMP");
+ }
+ }
+
+ return value;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/ppd/PhoenixPredicateDecomposer.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/ppd/PhoenixPredicateDecomposer.java
new file mode 100644
index 0000000..1e65819
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/ppd/PhoenixPredicateDecomposer.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.ppd;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.phoenix.hive.ql.index.IndexPredicateAnalyzer;
+import org.apache.phoenix.hive.ql.index.IndexSearchCondition;
+import org.apache.phoenix.hive.ql.index.PredicateAnalyzerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Supporting class that generate DecomposedPredicate companion to PhoenixHiveStorageHandler
+ * basing on search conditions.
+ */
+public class PhoenixPredicateDecomposer {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixPredicateDecomposer.class);
+
+ private List<String> columnNameList;
+ private boolean calledPPD;
+
+ private List<IndexSearchCondition> searchConditionList;
+
+ public static PhoenixPredicateDecomposer create(List<String> columnNameList) {
+ return new PhoenixPredicateDecomposer(columnNameList);
+ }
+
+ private PhoenixPredicateDecomposer(List<String> columnNameList) {
+ this.columnNameList = columnNameList;
+ }
+
+ public DecomposedPredicate decomposePredicate(ExprNodeDesc predicate) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("predicate - " + predicate.toString());
+ }
+
+ IndexPredicateAnalyzer analyzer = PredicateAnalyzerFactory.createPredicateAnalyzer
+ (columnNameList, getFieldValidator());
+ DecomposedPredicate decomposed = new DecomposedPredicate();
+
+ List<IndexSearchCondition> conditions = new ArrayList<IndexSearchCondition>();
+ decomposed.residualPredicate = (ExprNodeGenericFuncDesc) analyzer.analyzePredicate
+ (predicate, conditions);
+ if (!conditions.isEmpty()) {
+ decomposed.pushedPredicate = analyzer.translateSearchConditions(conditions);
+ try {
+ searchConditionList = conditions;
+ calledPPD = true;
+ } catch (Exception e) {
+ LOG.warn("Failed to decompose predicates", e);
+ return null;
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("decomposed predicate - residualPredicate: " + decomposed.residualPredicate +
+ ", pushedPredicate: " + decomposed.pushedPredicate);
+ }
+
+ return decomposed;
+ }
+
+ public List<IndexSearchCondition> getSearchConditionList() {
+ return searchConditionList;
+ }
+
+ public boolean isCalledPPD() {
+ return calledPPD;
+ }
+
+ protected IndexPredicateAnalyzer.FieldValidator getFieldValidator() {
+ return null;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/IndexPredicateAnalyzer.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/IndexPredicateAnalyzer.java
new file mode 100644
index 0000000..659983a
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/IndexPredicateAnalyzer.java
@@ -0,0 +1,523 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.ql.index;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+/**
+ * Clone of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer with modifying
+ * analyzePredicate method.
+ *
+ *
+ */
+public class IndexPredicateAnalyzer {
+
+ private static final Log LOG = LogFactory.getLog(IndexPredicateAnalyzer.class);
+
+ private final Set<String> udfNames;
+ private final Map<String, Set<String>> columnToUDFs;
+ private FieldValidator fieldValidator;
+
+ private boolean acceptsFields;
+
+ public IndexPredicateAnalyzer() {
+ udfNames = new HashSet<String>();
+ columnToUDFs = new HashMap<String, Set<String>>();
+ }
+
+ public void setFieldValidator(FieldValidator fieldValidator) {
+ this.fieldValidator = fieldValidator;
+ }
+
+ /**
+ * Registers a comparison operator as one which can be satisfied by an index
+ * search. Unless this is called, analyzePredicate will never find any
+ * indexable conditions.
+ *
+ * @param udfName name of comparison operator as returned by either
+ * {@link GenericUDFBridge#getUdfName} (for simple UDF's) or
+ * udf.getClass().getName() (for generic UDF's).
+ */
+ public void addComparisonOp(String udfName) {
+ udfNames.add(udfName);
+ }
+
+ /**
+ * Clears the set of column names allowed in comparisons. (Initially, all
+ * column names are allowed.)
+ */
+ public void clearAllowedColumnNames() {
+ columnToUDFs.clear();
+ }
+
+ /**
+ * Adds a column name to the set of column names allowed.
+ *
+ * @param columnName name of column to be allowed
+ */
+ public void allowColumnName(String columnName) {
+ columnToUDFs.put(columnName, udfNames);
+ }
+
+ /**
+ * add allowed functions per column
+ *
+ * @param columnName
+ * @param udfs
+ */
+ public void addComparisonOp(String columnName, String... udfs) {
+ Set<String> allowed = columnToUDFs.get(columnName);
+ if (allowed == null || allowed == udfNames) {
+ // override
+ columnToUDFs.put(columnName, new HashSet<String>(Arrays.asList(udfs)));
+ } else {
+ allowed.addAll(Arrays.asList(udfs));
+ }
+ }
+
+ /**
+ * Analyzes a predicate.
+ *
+ * @param predicate predicate to be analyzed
+ * @param searchConditions receives conditions produced by analysis
+ * @return residual predicate which could not be translated to
+ * searchConditions
+ */
+ public ExprNodeDesc analyzePredicate(ExprNodeDesc predicate, final List<IndexSearchCondition>
+ searchConditions) {
+
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ NodeProcessor nodeProcessor = new NodeProcessor() {
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object...
+ nodeOutputs) throws SemanticException {
+
+ // We can only push down stuff which appears as part of
+ // a pure conjunction: reject OR, CASE, etc.
+ for (Node ancestor : stack) {
+ if (nd == ancestor) {
+ break;
+ }
+ if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) {
+ return nd;
+ }
+ }
+
+ return analyzeExpr((ExprNodeGenericFuncDesc) nd, searchConditions, nodeOutputs);
+ }
+ };
+
+ Dispatcher disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(predicate);
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+
+ try {
+ ogw.startWalking(topNodes, nodeOutput);
+ } catch (SemanticException ex) {
+ throw new RuntimeException(ex);
+ }
+
+ ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate);
+ return residualPredicate;
+ }
+
+ // Check if ExprNodeColumnDesc is wrapped in expr.
+ // If so, peel off. Otherwise return itself.
+ private ExprNodeDesc getColumnExpr(ExprNodeDesc expr) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ return expr;
+ }
+ ExprNodeGenericFuncDesc funcDesc = null;
+ if (expr instanceof ExprNodeGenericFuncDesc) {
+ funcDesc = (ExprNodeGenericFuncDesc) expr;
+ }
+ if (null == funcDesc) {
+ return expr;
+ }
+ GenericUDF udf = funcDesc.getGenericUDF();
+ // check if its a simple cast expression.
+ if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary || udf
+ instanceof GenericUDFToChar
+ || udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToDecimal
+ || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToUnixTimeStamp
+ || udf instanceof GenericUDFToUtcTimestamp) && funcDesc.getChildren().size() == 1
+ && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
+ return expr.getChildren().get(0);
+ }
+ return expr;
+ }
+
+ private void processingBetweenOperator(ExprNodeGenericFuncDesc expr,
+ List<IndexSearchCondition> searchConditions, Object...
+ nodeOutputs) {
+ ExprNodeColumnDesc columnDesc = null;
+ String[] fields = null;
+
+ if (nodeOutputs[1] instanceof ExprNodeFieldDesc) {
+ // rowKey field
+ ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) nodeOutputs[1];
+ fields = ExprNodeDescUtils.extractFields(fieldDesc);
+
+ ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair((ExprNodeDesc)
+ nodeOutputs[1], (ExprNodeDesc) nodeOutputs[2]);
+ columnDesc = (ExprNodeColumnDesc) extracted[0];
+ } else if (nodeOutputs[0] instanceof ExprNodeGenericFuncDesc) {
+ columnDesc = (ExprNodeColumnDesc) ((ExprNodeGenericFuncDesc) nodeOutputs[1])
+ .getChildren().get(0);
+ } else {
+ columnDesc = (ExprNodeColumnDesc) nodeOutputs[1];
+ }
+
+ String udfName = expr.getGenericUDF().getUdfName();
+ ExprNodeConstantDesc[] betweenConstants = new ExprNodeConstantDesc[]{
+ (ExprNodeConstantDesc) nodeOutputs[2], (ExprNodeConstantDesc) nodeOutputs[3]};
+ boolean isNot = (Boolean) ((ExprNodeConstantDesc) nodeOutputs[0]).getValue();
+
+ searchConditions.add(new IndexSearchCondition(columnDesc, udfName, betweenConstants,
+ expr, fields, isNot));
+ }
+
+ private void processingInOperator(ExprNodeGenericFuncDesc expr, List<IndexSearchCondition>
+ searchConditions, boolean isNot, Object... nodeOutputs) {
+ ExprNodeColumnDesc columnDesc = null;
+ String[] fields = null;
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Processing In Operator. nodeOutputs : " + Lists.newArrayList(nodeOutputs));
+ }
+
+ if (nodeOutputs[0] instanceof ExprNodeFieldDesc) {
+ // rowKey field
+ ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) nodeOutputs[0];
+ fields = ExprNodeDescUtils.extractFields(fieldDesc);
+
+ ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair((ExprNodeDesc)
+ nodeOutputs[0], (ExprNodeDesc) nodeOutputs[1]);
+
+ if (extracted == null) { // adding for tez
+ return;
+ }
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("nodeOutputs[0] : " + nodeOutputs[0] + ", nodeOutputs[1] : " +
+ nodeOutputs[1] + " => " + Lists.newArrayList(extracted));
+ }
+
+ columnDesc = (ExprNodeColumnDesc) extracted[0];
+ } else if (nodeOutputs[0] instanceof ExprNodeGenericFuncDesc) {
+ columnDesc = (ExprNodeColumnDesc) ((ExprNodeGenericFuncDesc) nodeOutputs[0])
+ .getChildren().get(0);
+ } else {
+ columnDesc = (ExprNodeColumnDesc) nodeOutputs[0];
+ }
+
+ String udfName = expr.getGenericUDF().getUdfName();
+ ExprNodeConstantDesc[] inConstantDescs = new ExprNodeConstantDesc[nodeOutputs.length - 1];
+
+ for (int i = 0, limit = inConstantDescs.length; i < limit; i++) {
+ if (!(nodeOutputs[i + 1] instanceof ExprNodeConstantDesc)) { // adding for tez
+ return;
+ }
+
+ inConstantDescs[i] = (ExprNodeConstantDesc) nodeOutputs[i + 1];
+ }
+
+ searchConditions.add(new IndexSearchCondition(columnDesc, udfName, inConstantDescs, expr,
+ fields, isNot));
+ }
+
+ private void processingNullOperator(ExprNodeGenericFuncDesc expr, List<IndexSearchCondition>
+ searchConditions, Object... nodeOutputs) {
+ ExprNodeColumnDesc columnDesc = null;
+ String[] fields = null;
+
+ if (nodeOutputs[0] instanceof ExprNodeFieldDesc) {
+ // rowKey field
+ ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) nodeOutputs[0];
+ fields = ExprNodeDescUtils.extractFields(fieldDesc);
+
+ ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair((ExprNodeDesc)
+ nodeOutputs[0], new ExprNodeConstantDesc());
+ columnDesc = (ExprNodeColumnDesc) extracted[0];
+ } else if (nodeOutputs[0] instanceof ExprNodeGenericFuncDesc) {
+ columnDesc = (ExprNodeColumnDesc) ((ExprNodeGenericFuncDesc) nodeOutputs[0])
+ .getChildren().get(0);
+ } else {
+ columnDesc = (ExprNodeColumnDesc) nodeOutputs[0];
+ }
+
+ String udfName = expr.getGenericUDF().getUdfName();
+
+ searchConditions.add(new IndexSearchCondition(columnDesc, udfName, null, expr, fields,
+ false));
+ }
+
+ private void processingNotNullOperator(ExprNodeGenericFuncDesc expr,
+ List<IndexSearchCondition> searchConditions, Object...
+ nodeOutputs) {
+ ExprNodeColumnDesc columnDesc = null;
+ String[] fields = null;
+
+ if (nodeOutputs[0] instanceof ExprNodeFieldDesc) {
+ // rowKey field
+ ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) nodeOutputs[0];
+ fields = ExprNodeDescUtils.extractFields(fieldDesc);
+
+ ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair((ExprNodeDesc)
+ nodeOutputs[0], new ExprNodeConstantDesc());
+ columnDesc = (ExprNodeColumnDesc) extracted[0];
+ } else if (nodeOutputs[0] instanceof ExprNodeGenericFuncDesc) {
+ columnDesc = (ExprNodeColumnDesc) ((ExprNodeGenericFuncDesc) nodeOutputs[0])
+ .getChildren().get(0);
+ } else {
+ columnDesc = (ExprNodeColumnDesc) nodeOutputs[0];
+ }
+
+ String udfName = expr.getGenericUDF().getUdfName();
+
+ searchConditions.add(new IndexSearchCondition(columnDesc, udfName, null, expr, fields,
+ true));
+ }
+
+ private ExprNodeDesc analyzeExpr(ExprNodeGenericFuncDesc expr, List<IndexSearchCondition>
+ searchConditions, Object... nodeOutputs) throws SemanticException {
+
+ if (FunctionRegistry.isOpAnd(expr)) {
+ assert (nodeOutputs.length == 2);
+ ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
+ ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1];
+ if (residual1 == null) {
+ return residual2;
+ }
+ if (residual2 == null) {
+ return residual1;
+ }
+ List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>();
+ residuals.add(residual1);
+ residuals.add(residual2);
+ return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry
+ .getGenericUDFForAnd(), residuals);
+ }
+
+ GenericUDF genericUDF = expr.getGenericUDF();
+ if (!(genericUDF instanceof GenericUDFBaseCompare)) {
+ // 2015-10-22 Added by JeongMin Ju : Processing Between/In Operator
+ if (genericUDF instanceof GenericUDFBetween) {
+ // In case of not between, The value of first element of nodeOutputs is true.
+ // otherwise false.
+ processingBetweenOperator(expr, searchConditions, nodeOutputs);
+ return expr;
+ } else if (genericUDF instanceof GenericUDFIn) {
+ // In case of not in operator, in operator exist as child of not operator.
+ processingInOperator(expr, searchConditions, false, nodeOutputs);
+ return expr;
+ } else if (genericUDF instanceof GenericUDFOPNot &&
+ ((ExprNodeGenericFuncDesc) expr.getChildren().get(0)).getGenericUDF()
+ instanceof GenericUDFIn) {
+ // In case of not in operator, in operator exist as child of not operator.
+ processingInOperator((ExprNodeGenericFuncDesc) expr.getChildren().get(0),
+ searchConditions, true, ((ExprNodeGenericFuncDesc) nodeOutputs[0])
+ .getChildren().toArray());
+ return expr;
+ } else if (genericUDF instanceof GenericUDFOPNull) {
+ processingNullOperator(expr, searchConditions, nodeOutputs);
+ return expr;
+ } else if (genericUDF instanceof GenericUDFOPNotNull) {
+ processingNotNullOperator(expr, searchConditions, nodeOutputs);
+ return expr;
+ } else {
+ return expr;
+ }
+ }
+ ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0];
+ ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1];
+ // We may need to peel off the GenericUDFBridge that is added by CBO or
+ // user
+ if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) {
+ expr1 = getColumnExpr(expr1);
+ expr2 = getColumnExpr(expr2);
+ }
+
+ ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
+ if (extracted == null || (extracted.length > 2 && !acceptsFields)) {
+ return expr;
+ }
+
+ ExprNodeColumnDesc columnDesc;
+ ExprNodeConstantDesc constantDesc;
+ if (extracted[0] instanceof ExprNodeConstantDesc) {
+ genericUDF = genericUDF.flip();
+ columnDesc = (ExprNodeColumnDesc) extracted[1];
+ constantDesc = (ExprNodeConstantDesc) extracted[0];
+ } else {
+ columnDesc = (ExprNodeColumnDesc) extracted[0];
+ constantDesc = (ExprNodeConstantDesc) extracted[1];
+ }
+
+ Set<String> allowed = columnToUDFs.get(columnDesc.getColumn());
+ if (allowed == null) {
+ return expr;
+ }
+
+ String udfName = genericUDF.getUdfName();
+ if (!allowed.contains(genericUDF.getUdfName())) {
+ return expr;
+ }
+
+ String[] fields = null;
+ if (extracted.length > 2) {
+ ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) extracted[2];
+ if (!isValidField(fieldDesc)) {
+ return expr;
+ }
+ fields = ExprNodeDescUtils.extractFields(fieldDesc);
+ }
+
+ // We also need to update the expr so that the index query can be
+ // generated.
+ // Note that, hive does not support UDFToDouble etc in the query text.
+ List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>();
+ list.add(expr1);
+ list.add(expr2);
+ expr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list);
+
+ searchConditions.add(new IndexSearchCondition(columnDesc, udfName, constantDesc, expr,
+ fields));
+
+ // we converted the expression to a search condition, so
+ // remove it from the residual predicate
+ return fields == null ? null : expr;
+ }
+
+ private boolean isValidField(ExprNodeFieldDesc field) {
+ return fieldValidator == null || fieldValidator.validate(field);
+ }
+
+ /**
+ * Translates search conditions back to ExprNodeDesc form (as a left-deep
+ * conjunction).
+ *
+ * @param searchConditions (typically produced by analyzePredicate)
+ * @return ExprNodeGenericFuncDesc form of search conditions
+ */
+ public ExprNodeGenericFuncDesc translateSearchConditions(List<IndexSearchCondition>
+ searchConditions) {
+
+ ExprNodeGenericFuncDesc expr = null;
+
+ for (IndexSearchCondition searchCondition : searchConditions) {
+ if (expr == null) {
+ expr = searchCondition.getComparisonExpr();
+ continue;
+ }
+
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(expr);
+ children.add(searchCondition.getComparisonExpr());
+ expr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry
+ .getGenericUDFForAnd(), children);
+ }
+
+ return expr;
+ }
+
+ public void setAcceptsFields(boolean acceptsFields) {
+ this.acceptsFields = acceptsFields;
+ }
+
+ public static interface FieldValidator {
+ boolean validate(ExprNodeFieldDesc exprNodeDesc);
+ }
+
+ public static IndexPredicateAnalyzer createAnalyzer(boolean equalOnly) {
+ IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual");
+
+ if (equalOnly) {
+ return analyzer;
+ }
+
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic" +
+ ".GenericUDFOPEqualOrGreaterThan");
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic" +
+ ".GenericUDFOPEqualOrLessThan");
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan");
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan");
+
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual");
+ // apply !=
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween");
+ // apply (Not) Between
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn"); //
+ // apply (Not) In
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn"); //
+ // apply In
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull");
+ // apply Null
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull");
+ // apply Not Null
+
+ return analyzer;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/IndexSearchCondition.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/IndexSearchCondition.java
new file mode 100644
index 0000000..ebd978c
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/IndexSearchCondition.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.ql.index;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+
+/**
+ * IndexSearchCondition represents an individual search condition found by
+ * {@link IndexPredicateAnalyzer}.
+ *
+ */
+public class IndexSearchCondition {
+ private ExprNodeColumnDesc columnDesc;
+ private String comparisonOp;
+ private ExprNodeConstantDesc constantDesc;
+ private ExprNodeGenericFuncDesc comparisonExpr;
+
+ private String[] fields;
+
+ // Support (Not) Between/(Not) In Operator
+ private ExprNodeConstantDesc[] multiConstants;
+ private boolean isNot;
+
+ public IndexSearchCondition(ExprNodeColumnDesc columnDesc, String comparisonOp,
+ ExprNodeConstantDesc[] multiConstants, ExprNodeGenericFuncDesc
+ comparisonExpr, boolean isNot) {
+ this(columnDesc, comparisonOp, multiConstants, comparisonExpr, null, isNot);
+ }
+
+ public IndexSearchCondition(ExprNodeColumnDesc columnDesc, String comparisonOp,
+ ExprNodeConstantDesc[] multiConstants, ExprNodeGenericFuncDesc
+ comparisonExpr, String[] fields, boolean isNot) {
+ this.columnDesc = columnDesc;
+ this.comparisonOp = comparisonOp;
+ this.multiConstants = multiConstants;
+ this.comparisonExpr = comparisonExpr;
+ this.fields = fields;
+ this.isNot = isNot;
+ }
+
+ public ExprNodeConstantDesc[] getConstantDescs() {
+ return multiConstants;
+ }
+
+ public ExprNodeConstantDesc getConstantDesc(int index) {
+ return multiConstants[index];
+ }
+
+ public boolean isNot() {
+ return isNot;
+ }
+ //////////////////////////////////////////////////////////////////////////////
+
+ public IndexSearchCondition(ExprNodeColumnDesc columnDesc, String comparisonOp,
+ ExprNodeConstantDesc constantDesc, ExprNodeGenericFuncDesc
+ comparisonExpr) {
+ this(columnDesc, comparisonOp, constantDesc, comparisonExpr, null);
+ }
+
+ /**
+ * Constructs a search condition, which takes the form
+ * <p>
+ * <pre>
+ * column-ref comparison-op constant-value
+ * </pre>
+ * <p>
+ * .
+ *
+ * @param columnDesc column being compared
+ * @param comparisonOp comparison operator, e.g. "=" (taken from
+ * GenericUDFBridge.getUdfName())
+ * @param constantDesc constant value to search for
+ * @param comparisonExpr the original comparison expression
+ */
+ public IndexSearchCondition(ExprNodeColumnDesc columnDesc, String comparisonOp,
+ ExprNodeConstantDesc constantDesc, ExprNodeGenericFuncDesc
+ comparisonExpr, String[] fields) {
+
+ this.columnDesc = columnDesc;
+ this.comparisonOp = comparisonOp;
+ this.constantDesc = constantDesc;
+ this.comparisonExpr = comparisonExpr;
+ this.fields = fields;
+ }
+
+ public void setColumnDesc(ExprNodeColumnDesc columnDesc) {
+ this.columnDesc = columnDesc;
+ }
+
+ public ExprNodeColumnDesc getColumnDesc() {
+ return columnDesc;
+ }
+
+ public void setComparisonOp(String comparisonOp) {
+ this.comparisonOp = comparisonOp;
+ }
+
+ public String getComparisonOp() {
+ return comparisonOp;
+ }
+
+ public void setConstantDesc(ExprNodeConstantDesc constantDesc) {
+ this.constantDesc = constantDesc;
+ }
+
+ public ExprNodeConstantDesc getConstantDesc() {
+ return constantDesc;
+ }
+
+ public void setComparisonExpr(ExprNodeGenericFuncDesc comparisonExpr) {
+ this.comparisonExpr = comparisonExpr;
+ }
+
+ public ExprNodeGenericFuncDesc getComparisonExpr() {
+ return comparisonExpr;
+ }
+
+ public String[] getFields() {
+ return fields;
+ }
+
+ @Override
+ public String toString() {
+ return comparisonExpr.getExprString();
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/PredicateAnalyzerFactory.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/PredicateAnalyzerFactory.java
new file mode 100644
index 0000000..b6903b9
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/ql/index/PredicateAnalyzerFactory.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.ql.index;
+
+import org.apache.phoenix.hive.ql.index.IndexPredicateAnalyzer.FieldValidator;
+
+import java.util.List;
+
+public class PredicateAnalyzerFactory {
+ public static IndexPredicateAnalyzer createPredicateAnalyzer(List<String> ppdColumnList,
+ FieldValidator fieldValdator) {
+ // Create analyzer for conditions =, <, <=, >, >=
+ IndexPredicateAnalyzer analyzer = IndexPredicateAnalyzer.createAnalyzer(false);
+
+ for (String columnName : ppdColumnList) {
+ analyzer.allowColumnName(columnName);
+ }
+
+ analyzer.setAcceptsFields(true);
+ analyzer.setFieldValidator(fieldValdator);
+
+ return analyzer;
+ }
+
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/query/PhoenixQueryBuilder.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/query/PhoenixQueryBuilder.java
new file mode 100644
index 0000000..ab409ad
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/query/PhoenixQueryBuilder.java
@@ -0,0 +1,849 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.query;
+
+import com.google.common.base.CharMatcher;
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Predicate;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.annotation.Nullable;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.ql.index.IndexSearchCondition;
+import org.apache.phoenix.hive.util.ColumnMappingUtils;
+import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
+import org.apache.phoenix.hive.util.PhoenixUtil;
+import org.apache.phoenix.util.StringUtil;
+
+import static org.apache.phoenix.hive.util.ColumnMappingUtils.getColumnMappingMap;
+
+/**
+ * Query builder. Produces a query depending on the colummn list and conditions
+ */
+
+public class PhoenixQueryBuilder {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixQueryBuilder.class);
+
+ private static final String QUERY_TEMPLATE = "select $HINT$ $COLUMN_LIST$ from $TABLE_NAME$";
+
+ private static final PhoenixQueryBuilder QUERY_BUILDER = new PhoenixQueryBuilder();
+
+ private PhoenixQueryBuilder() {
+ if (LOG.isInfoEnabled()) {
+ LOG.info("PhoenixQueryBuilder created");
+ }
+ }
+
+ public static PhoenixQueryBuilder getInstance() {
+ return QUERY_BUILDER;
+ }
+
+ private void addConditionColumnToReadColumn(List<String> readColumnList, List<String>
+ conditionColumnList) {
+ if (readColumnList.isEmpty()) {
+ return;
+ }
+
+ for (String conditionColumn : conditionColumnList) {
+ if (!readColumnList.contains(conditionColumn)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Condition column " + conditionColumn + " does not exist in " +
+ "read-columns.");
+ }
+
+ readColumnList.add(conditionColumn);
+ }
+ }
+ }
+
+ private String makeQueryString(JobConf jobConf, String tableName, List<String>
+ readColumnList, String whereClause, String queryTemplate, String hints, Map<String,
+ TypeInfo> columnTypeMap) throws IOException {
+ StringBuilder sql = new StringBuilder();
+ List<String> conditionColumnList = buildWhereClause(jobConf, sql, whereClause, columnTypeMap);
+ readColumnList = replaceColumns(jobConf, readColumnList);
+
+ if (conditionColumnList.size() > 0) {
+ addConditionColumnToReadColumn(readColumnList, conditionColumnList);
+ sql.insert(0, queryTemplate.replace("$HINT$", hints).replace("$COLUMN_LIST$",
+ getSelectColumns(jobConf, tableName, readColumnList)).replace("$TABLE_NAME$",
+ tableName));
+ } else {
+ sql.append(queryTemplate.replace("$HINT$", hints).replace("$COLUMN_LIST$",
+ getSelectColumns(jobConf, tableName, readColumnList)).replace("$TABLE_NAME$",
+ tableName));
+ }
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Input query : " + sql.toString());
+ }
+
+ return sql.toString();
+ }
+
+ private static String findReplacement(JobConf jobConf, String column) {
+ Map<String, String> columnMappingMap = getColumnMappingMap(jobConf.get
+ (PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING));
+ if (columnMappingMap != null && columnMappingMap.containsKey(column)) {
+ return columnMappingMap.get(column);
+ } else {
+ return column;
+ }
+ }
+ private static List<String> replaceColumns(JobConf jobConf, List<String> columnList) {
+ Map<String, String> columnMappingMap = getColumnMappingMap(jobConf.get
+ (PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING));
+ if(columnMappingMap != null) {
+ List<String> newList = Lists.newArrayList();
+ for(String column:columnList) {
+ if(columnMappingMap.containsKey(column)) {
+ newList.add(columnMappingMap.get(column));
+ } else {
+ newList.add(column);
+ }
+ }
+ return newList;
+ }
+ return null;
+ }
+
+ private String makeQueryString(JobConf jobConf, String tableName, List<String>
+ readColumnList, List<IndexSearchCondition> searchConditions, String queryTemplate,
+ String hints) throws IOException {
+ StringBuilder query = new StringBuilder();
+ List<String> conditionColumnList = buildWhereClause(jobConf, query, searchConditions);
+
+ if (conditionColumnList.size() > 0) {
+ readColumnList = replaceColumns(jobConf, readColumnList);
+ addConditionColumnToReadColumn(readColumnList, conditionColumnList);
+ query.insert(0, queryTemplate.replace("$HINT$", hints).replace("$COLUMN_LIST$",
+ getSelectColumns(jobConf, tableName, readColumnList)).replace("$TABLE_NAME$",
+ tableName));
+ } else {
+ readColumnList = replaceColumns(jobConf, readColumnList);
+ query.append(queryTemplate.replace("$HINT$", hints).replace("$COLUMN_LIST$",
+ getSelectColumns(jobConf, tableName, readColumnList)).replace("$TABLE_NAME$",
+ tableName));
+ }
+
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Input query : " + query.toString());
+ }
+
+ return query.toString();
+ }
+
+ private String getSelectColumns(JobConf jobConf, String tableName, List<String>
+ readColumnList) throws IOException {
+ String selectColumns = Joiner.on(PhoenixStorageHandlerConstants.COMMA).join(ColumnMappingUtils.quoteColumns(readColumnList));
+
+ if (PhoenixStorageHandlerConstants.EMPTY_STRING.equals(selectColumns)) {
+ selectColumns = "*";
+ } else {
+ if (PhoenixStorageHandlerUtil.isTransactionalTable(jobConf)) {
+ List<String> pkColumnList = PhoenixUtil.getPrimaryKeyColumnList(jobConf, tableName);
+ StringBuilder pkColumns = new StringBuilder();
+
+ for (String pkColumn : pkColumnList) {
+ if (!readColumnList.contains(pkColumn)) {
+ pkColumns.append("\"").append(pkColumn).append("\"" + PhoenixStorageHandlerConstants.COMMA);
+ }
+ }
+
+ selectColumns = pkColumns.toString() + selectColumns;
+ }
+ }
+
+ return selectColumns;
+ }
+
+ public String buildQuery(JobConf jobConf, String tableName, List<String> readColumnList,
+ String whereClause, Map<String, TypeInfo> columnTypeMap) throws
+ IOException {
+ String hints = getHint(jobConf, tableName);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Building query with columns : " + readColumnList + " table name : " +
+ tableName + " with where conditions : " + whereClause + " hints : " + hints);
+ }
+
+ return makeQueryString(jobConf, tableName, Lists.newArrayList(readColumnList),
+ whereClause, QUERY_TEMPLATE, hints, columnTypeMap);
+ }
+
+ public String buildQuery(JobConf jobConf, String tableName, List<String> readColumnList,
+ List<IndexSearchCondition> searchConditions) throws IOException {
+ String hints = getHint(jobConf, tableName);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Building query with columns : " + readColumnList + " table name : " +
+ tableName + " search conditions : " + searchConditions + " hints : " + hints);
+ }
+
+ return makeQueryString(jobConf, tableName, Lists.newArrayList(readColumnList),
+ searchConditions, QUERY_TEMPLATE, hints);
+ }
+
+ private String getHint(JobConf jobConf, String tableName) {
+ StringBuilder hints = new StringBuilder("/*+ ");
+ if (!jobConf.getBoolean(PhoenixStorageHandlerConstants.HBASE_SCAN_CACHEBLOCKS, Boolean
+ .FALSE)) {
+ hints.append("NO_CACHE ");
+ }
+
+ String queryHint = jobConf.get(tableName + PhoenixStorageHandlerConstants
+ .PHOENIX_TABLE_QUERY_HINT);
+ if (queryHint != null) {
+ hints.append(queryHint);
+ }
+ hints.append(" */");
+
+ return hints.toString();
+ }
+
+ private List<String> buildWhereClause(JobConf jobConf, StringBuilder sql, String whereClause,
+ Map<String, TypeInfo> columnTypeMap) throws IOException {
+ if (whereClause == null || whereClause.isEmpty()) {
+ return Collections.emptyList();
+ }
+
+ List<String> conditionColumnList = Lists.newArrayList();
+ sql.append(" where ");
+
+ whereClause = StringUtils.replaceEach(whereClause, new String[]{"UDFToString"}, new
+ String[]{"to_char"});
+
+ for (String columnName : columnTypeMap.keySet()) {
+ if (whereClause.contains(columnName)) {
+ String column = findReplacement(jobConf, columnName);
+ whereClause = whereClause.replaceAll("\\b" + columnName + "\\b", "\"" + column + "\"");
+ conditionColumnList.add(column);
+
+
+ if (PhoenixStorageHandlerConstants.DATE_TYPE.equals(
+ columnTypeMap.get(columnName).getTypeName())) {
+ whereClause = applyDateFunctionUsingRegex(whereClause, column);
+ } else if (PhoenixStorageHandlerConstants.TIMESTAMP_TYPE.equals(
+ columnTypeMap.get(columnName).getTypeName())) {
+ whereClause = applyTimestampFunctionUsingRegex(whereClause, column);
+ }
+ }
+ }
+
+ sql.append(whereClause);
+
+ return conditionColumnList;
+ }
+
+ private String applyDateFunctionUsingRegex(String whereClause, String columnName) {
+ whereClause = applyFunctionForCommonOperator(whereClause, columnName, true);
+ whereClause = applyFunctionForBetweenOperator(whereClause, columnName, true);
+ whereClause = applyFunctionForInOperator(whereClause, columnName, true);
+
+ return whereClause;
+ }
+
+ private String applyTimestampFunctionUsingRegex(String whereClause, String columnName) {
+ whereClause = applyFunctionForCommonOperator(whereClause, columnName, false);
+ whereClause = applyFunctionForBetweenOperator(whereClause, columnName, false);
+ whereClause = applyFunctionForInOperator(whereClause, columnName, false);
+
+ return whereClause;
+ }
+
+ private String applyFunctionForCommonOperator(String whereClause, String columnName, boolean
+ isDate) {
+ String targetPattern = isDate ? PhoenixStorageHandlerConstants.DATE_PATTERN :
+ PhoenixStorageHandlerConstants.TIMESTAMP_PATTERN;
+ String pattern = StringUtils.replaceEach(PhoenixStorageHandlerConstants
+ .COMMON_OPERATOR_PATTERN,
+ new String[]{PhoenixStorageHandlerConstants.COLUMNE_MARKER,
+ PhoenixStorageHandlerConstants.PATERN_MARKER}, new String[]{columnName,
+ targetPattern});
+
+ Matcher matcher = Pattern.compile(pattern).matcher(whereClause);
+
+ while (matcher.find()) {
+ String token = matcher.group(1);
+ String datePart = matcher.group(3);
+
+ String convertString = token.replace(datePart, applyFunction(isDate ?
+ PhoenixStorageHandlerConstants.DATE_FUNCTION_TEMPLETE :
+ PhoenixStorageHandlerConstants.TIMESTAMP_FUNCTION_TEMPLATE, datePart));
+ whereClause = whereClause.replaceAll(StringUtils.replaceEach(token, new String[]{"(",
+ ")"}, new String[]{"\\(", "\\)"}), convertString);
+ }
+
+ return whereClause;
+ }
+
+ private String applyFunctionForBetweenOperator(String whereClause, String columnName, boolean
+ isDate) {
+ String targetPattern = isDate ? PhoenixStorageHandlerConstants.DATE_PATTERN :
+ PhoenixStorageHandlerConstants.TIMESTAMP_PATTERN;
+ String pattern = StringUtils.replaceEach(PhoenixStorageHandlerConstants
+ .BETWEEN_OPERATOR_PATTERN,
+ new String[]{PhoenixStorageHandlerConstants.COLUMNE_MARKER,
+ PhoenixStorageHandlerConstants.PATERN_MARKER}, new String[]{columnName,
+ targetPattern});
+
+ Matcher matcher = Pattern.compile(pattern).matcher(whereClause);
+
+ while (matcher.find()) {
+ String token = matcher.group(1);
+ boolean isNot = matcher.group(2) == null ? false : true;
+ String fromDate = matcher.group(3);
+ String toDate = matcher.group(4);
+
+ String convertString = StringUtils.replaceEach(token, new String[]{fromDate, toDate},
+ new String[]{applyFunction(isDate ? PhoenixStorageHandlerConstants
+ .DATE_FUNCTION_TEMPLETE : PhoenixStorageHandlerConstants
+ .TIMESTAMP_FUNCTION_TEMPLATE, fromDate),
+ applyFunction(isDate ? PhoenixStorageHandlerConstants
+ .DATE_FUNCTION_TEMPLETE : PhoenixStorageHandlerConstants
+ .TIMESTAMP_FUNCTION_TEMPLATE, toDate)});
+
+ whereClause = whereClause.replaceAll(pattern, convertString);
+ }
+
+ return whereClause;
+ }
+
+ private String applyFunctionForInOperator(String whereClause, String columnName, boolean
+ isDate) {
+ String targetPattern = isDate ? PhoenixStorageHandlerConstants.DATE_PATTERN :
+ PhoenixStorageHandlerConstants.TIMESTAMP_PATTERN;
+ String pattern = StringUtils.replaceEach(PhoenixStorageHandlerConstants.IN_OPERATOR_PATTERN,
+ new String[]{PhoenixStorageHandlerConstants.COLUMNE_MARKER,
+ PhoenixStorageHandlerConstants.PATERN_MARKER}, new String[]{columnName,
+ targetPattern});
+ String itemPattern = "(" + targetPattern + ")";
+
+ Matcher matcher = Pattern.compile(pattern).matcher(whereClause);
+
+ while (matcher.find()) {
+ String token = matcher.group(1);
+ Matcher itemMatcher = Pattern.compile(itemPattern).matcher(token);
+ while (itemMatcher.find()) {
+ String item = itemMatcher.group(1);
+
+ token = token.replace(item, applyFunction(isDate ? PhoenixStorageHandlerConstants
+ .DATE_FUNCTION_TEMPLETE : PhoenixStorageHandlerConstants
+ .TIMESTAMP_FUNCTION_TEMPLATE, item));
+ }
+
+ whereClause = whereClause.replaceAll(pattern, token);
+ }
+
+ return whereClause;
+ }
+
+ /**
+ * replace value to specific part of pattern.
+ * if pattern is to_date($value$) and value is '2016-01-15'. then return to_date('2016-01-15').
+ * if pattern is cast($value$ as date) and value is '2016-01-15'. then return cast
+ * ('2016-01-15' as date).
+ */
+ private String applyFunction(String pattern, String value) {
+ if (!value.startsWith(PhoenixStorageHandlerConstants.QUOTATION_MARK)) {
+ value = PhoenixStorageHandlerConstants.QUOTATION_MARK + value +
+ PhoenixStorageHandlerConstants.QUOTATION_MARK;
+ }
+
+ return pattern.replace(PhoenixStorageHandlerConstants.FUNCTION_VALUE_MARKER, value);
+ }
+
+ private String getCompareValueForDateAndTimestampFunction(String compareValue) {
+ if (compareValue.startsWith(PhoenixStorageHandlerConstants.QUOTATION_MARK)) {
+ return compareValue;
+ } else {
+ return PhoenixStorageHandlerConstants.QUOTATION_MARK + compareValue +
+ PhoenixStorageHandlerConstants.QUOTATION_MARK;
+ }
+ }
+
+ private String applyDateFunction(String whereClause, String columnName) {
+ StringBuilder whereCondition = new StringBuilder();
+ for (Iterator<String> iterator = Splitter.on(CharMatcher.WHITESPACE).omitEmptyStrings()
+ .split(whereClause).iterator(); iterator.hasNext(); whereCondition.append
+ (PhoenixStorageHandlerConstants.SPACE)) {
+ String token = iterator.next();
+ if (isMyCondition(columnName, token)) {
+ whereCondition.append(token);
+
+ String comparator = iterator.next();
+ whereCondition.append(PhoenixStorageHandlerConstants.SPACE);
+ whereCondition.append(comparator).append(PhoenixStorageHandlerConstants.SPACE);
+ if (PhoenixStorageHandlerConstants.BETWEEN_COMPARATOR.equalsIgnoreCase
+ (comparator)) {
+ whereCondition.append("to_date(").append
+ (getCompareValueForDateAndTimestampFunction(iterator.next())).append
+ (") ").append(iterator.next()).append(PhoenixStorageHandlerConstants
+ .SPACE)
+ .append("to_date(");
+
+ String toCompareValue = iterator.next();
+ if (toCompareValue.endsWith(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)) {
+ int rightBracketIndex = toCompareValue.indexOf
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET);
+ whereCondition.append(getCompareValueForDateAndTimestampFunction
+ (toCompareValue.substring(0, rightBracketIndex))).append
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET).append
+ (toCompareValue.substring(rightBracketIndex));
+ } else {
+ whereCondition.append(getCompareValueForDateAndTimestampFunction
+ (toCompareValue)).append(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET);
+ }
+ } else if (PhoenixStorageHandlerConstants.IN_COMPARATOR.equalsIgnoreCase
+ (comparator)) {
+ while (iterator.hasNext()) {
+ String aToken = iterator.next();
+ if (aToken.equals(PhoenixStorageHandlerConstants.LEFT_ROUND_BRACKET) ||
+ aToken.equals(PhoenixStorageHandlerConstants.COMMA)) {
+ whereCondition.append(aToken);
+ } else if (aToken.equals(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)) {
+ whereCondition.append(aToken);
+ break;
+ } else if (aToken.endsWith(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)) {
+ int bracketIndex = aToken.indexOf(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET);
+ whereCondition.append("to_date(").append
+ (getCompareValueForDateAndTimestampFunction(aToken.substring
+ (0, bracketIndex))).append
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET).append
+ (aToken.substring(bracketIndex));
+ break;
+ } else if (aToken.endsWith(PhoenixStorageHandlerConstants.COMMA)) {
+ if (aToken.startsWith(PhoenixStorageHandlerConstants
+ .LEFT_ROUND_BRACKET)) {
+ int bracketIndex = aToken.lastIndexOf
+ (PhoenixStorageHandlerConstants.LEFT_ROUND_BRACKET);
+ whereCondition.append(aToken.substring(0, bracketIndex + 1))
+ .append("to_date(").append
+ (getCompareValueForDateAndTimestampFunction(aToken
+ .substring(bracketIndex + 1, aToken.length() - 1)
+ )).append("),");
+ } else {
+ whereCondition.append("to_date(").append
+ (getCompareValueForDateAndTimestampFunction(aToken
+ .substring(0, aToken.length() - 1))).append("),");
+ }
+ }
+
+ whereCondition.append(PhoenixStorageHandlerConstants.SPACE);
+ }
+ } else if (PhoenixStorageHandlerConstants.COMMON_COMPARATOR.contains(comparator)) {
+ String compareValue = getCompareValueForDateAndTimestampFunction(iterator
+ .next());
+ whereCondition.append("to_date(");
+ if (compareValue.endsWith(PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET)) {
+ int rightBracketIndex = compareValue.indexOf
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET);
+ whereCondition.append(getCompareValueForDateAndTimestampFunction
+ (compareValue.substring(0, rightBracketIndex))).append
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET).append
+ (compareValue.substring(rightBracketIndex));
+ } else {
+ whereCondition.append(getCompareValueForDateAndTimestampFunction
+ (compareValue)).append(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET);
+ }
+ }
+ } else {
+ whereCondition.append(token);
+ }
+ }
+
+ return whereCondition.toString();
+ }
+
+ // Assume timestamp value is yyyy-MM-dd HH:mm:ss.SSS
+ private String applyTimestampFunction(String whereClause, String columnName) {
+ StringBuilder whereCondition = new StringBuilder();
+ for (Iterator<String> iterator = Splitter.on(CharMatcher.WHITESPACE).omitEmptyStrings()
+ .split(whereClause).iterator(); iterator.hasNext(); whereCondition.append
+ (PhoenixStorageHandlerConstants.SPACE)) {
+ String token = iterator.next();
+ if (isMyCondition(columnName, token)) {
+ whereCondition.append(token);
+
+ String comparator = iterator.next();
+ whereCondition.append(PhoenixStorageHandlerConstants.SPACE);
+ whereCondition.append(comparator).append(PhoenixStorageHandlerConstants.SPACE);
+ if (PhoenixStorageHandlerConstants.BETWEEN_COMPARATOR.equalsIgnoreCase
+ (comparator)) {
+ String fromCompareValue = iterator.next() + PhoenixStorageHandlerConstants
+ .SPACE + iterator.next();
+ whereCondition.append("to_timestamp(").append
+ (getCompareValueForDateAndTimestampFunction(fromCompareValue)).append
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET);
+ whereCondition.append(PhoenixStorageHandlerConstants.SPACE).append(iterator
+ .next()).append(PhoenixStorageHandlerConstants.SPACE);
+ whereCondition.append("to_timestamp(");
+
+ String toCompareValue = iterator.next() + PhoenixStorageHandlerConstants
+ .SPACE + iterator.next();
+ if (toCompareValue.endsWith(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)) {
+ int rightBracketIndex = toCompareValue.indexOf
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET);
+ whereCondition.append(getCompareValueForDateAndTimestampFunction
+ (toCompareValue.substring(0, rightBracketIndex))).append
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET).append
+ (toCompareValue.substring(rightBracketIndex));
+ } else {
+ whereCondition.append(getCompareValueForDateAndTimestampFunction
+ (toCompareValue)).append(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET);
+ }
+ } else if (PhoenixStorageHandlerConstants.IN_COMPARATOR.equalsIgnoreCase
+ (comparator)) {
+ while (iterator.hasNext()) {
+ String aToken = iterator.next();
+ if (aToken.equals(PhoenixStorageHandlerConstants.LEFT_ROUND_BRACKET) ||
+ aToken.equals(PhoenixStorageHandlerConstants.COMMA)) {
+ whereCondition.append(aToken);
+ } else if (aToken.equals(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)) {
+ whereCondition.append(aToken);
+ break;
+ } else {
+ String compareValue = aToken + PhoenixStorageHandlerConstants.SPACE +
+ iterator.next();
+
+ if (compareValue.startsWith(PhoenixStorageHandlerConstants
+ .LEFT_ROUND_BRACKET)) {
+ int leftBracketIndex = compareValue.lastIndexOf
+ (PhoenixStorageHandlerConstants.LEFT_ROUND_BRACKET);
+ whereCondition.append(compareValue.substring(0, leftBracketIndex
+ + 1)).append("to_timestamp(");
+
+ if (compareValue.endsWith(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)) {
+ int rightBracketIndex = compareValue.indexOf
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET);
+ whereCondition.append
+ (getCompareValueForDateAndTimestampFunction
+ (compareValue.substring(leftBracketIndex + 1,
+ rightBracketIndex)))
+ .append(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET).append(compareValue
+ .substring(rightBracketIndex));
+ } else if (compareValue.endsWith(PhoenixStorageHandlerConstants
+ .COMMA)) {
+ whereCondition.append
+ (getCompareValueForDateAndTimestampFunction
+ (compareValue.substring(leftBracketIndex + 1,
+ compareValue.length() - 1)))
+ .append(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET).append
+ (PhoenixStorageHandlerConstants.COMMA);
+ } else {
+ whereCondition.append
+ (getCompareValueForDateAndTimestampFunction
+ (compareValue.substring(leftBracketIndex + 1)
+ )).append(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET);
+ }
+ } else if (compareValue.endsWith(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)) {
+ int rightBracketIndex = compareValue.indexOf
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET);
+ whereCondition.append("to_timestamp(").append
+ (getCompareValueForDateAndTimestampFunction(compareValue
+ .substring(0, rightBracketIndex)))
+ .append(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET).append(compareValue
+ .substring(rightBracketIndex));
+ break;
+ } else if (compareValue.endsWith(PhoenixStorageHandlerConstants
+ .COMMA)) {
+ whereCondition.append("to_timestamp(").append
+ (getCompareValueForDateAndTimestampFunction(compareValue
+ .substring(0, compareValue.length() - 1))).append
+ ("),");
+ }
+ }
+
+ whereCondition.append(PhoenixStorageHandlerConstants.SPACE);
+ }
+ } else if (PhoenixStorageHandlerConstants.COMMON_COMPARATOR.contains(comparator)) {
+ String timestampValue = iterator.next() + PhoenixStorageHandlerConstants
+ .SPACE + iterator.next();
+ whereCondition.append("to_timestamp(");
+ if (timestampValue.endsWith(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)) {
+ int rightBracketIndex = timestampValue.indexOf
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET);
+ whereCondition.append(getCompareValueForDateAndTimestampFunction
+ (timestampValue.substring(0, rightBracketIndex))).append
+ (PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET).append
+ (timestampValue.substring(rightBracketIndex));
+ } else {
+ whereCondition.append(getCompareValueForDateAndTimestampFunction
+ (timestampValue)).append(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET);
+ }
+ }
+ } else {
+ whereCondition.append(token);
+ }
+ }
+
+ return whereCondition.toString();
+ }
+
+ private boolean isMyCondition(String columnName, String token) {
+ boolean itsMine = false;
+
+ if (columnName.equals(token)) {
+ itsMine = true;
+ } else if (token.startsWith(PhoenixStorageHandlerConstants.LEFT_ROUND_BRACKET) && token
+ .substring(token.lastIndexOf(PhoenixStorageHandlerConstants.LEFT_ROUND_BRACKET) +
+ 1).equals(columnName)) {
+ itsMine = true;
+ } else if (token.startsWith(PhoenixStorageHandlerConstants.LEFT_ROUND_BRACKET) && token
+ .endsWith(PhoenixStorageHandlerConstants.RIGHT_ROUND_BRACKET)
+ && token.substring(token.lastIndexOf(PhoenixStorageHandlerConstants
+ .LEFT_ROUND_BRACKET) + 1, token.indexOf(PhoenixStorageHandlerConstants
+ .RIGHT_ROUND_BRACKET)).equals(columnName)) {
+ itsMine = true;
+ }
+
+ return itsMine;
+ }
+
+ protected List<String> buildWhereClause(JobConf jobConf, StringBuilder sql,
+ List<IndexSearchCondition> conditions)
+ throws IOException {
+ if (conditions == null || conditions.size() == 0) {
+ return Collections.emptyList();
+ }
+
+ List<String> columns = Lists.newArrayList();
+ sql.append(" where ");
+
+ Iterator<IndexSearchCondition> iter = conditions.iterator();
+ appendExpression(jobConf, sql, iter.next(), columns);
+ while (iter.hasNext()) {
+ sql.append(" and ");
+ appendExpression(jobConf, sql, iter.next(), columns);
+ }
+
+ return columns;
+ }
+
+ private void appendExpression(JobConf jobConf, StringBuilder sql, IndexSearchCondition condition,
+ List<String> columns) {
+ Expression expr = findExpression(condition);
+ if (expr != null) {
+ sql.append(expr.buildExpressionStringFrom(jobConf, condition));
+ String column = condition.getColumnDesc().getColumn();
+ String rColumn = findReplacement(jobConf, column);
+ if(rColumn != null) {
+ column = rColumn;
+ }
+
+ columns.add(column);
+ }
+ }
+
+ private Expression findExpression(final IndexSearchCondition condition) {
+ return Iterables.tryFind(Arrays.asList(Expression.values()), new Predicate<Expression>() {
+ @Override
+ public boolean apply(@Nullable Expression expr) {
+ return expr.isFor(condition);
+ }
+ }).orNull();
+ }
+
+ private static final Joiner JOINER_COMMA = Joiner.on(", ");
+ private static final Joiner JOINER_AND = Joiner.on(" and ");
+ private static final Joiner JOINER_SPACE = Joiner.on(" ");
+
+ private enum Expression {
+ EQUAL("UDFOPEqual", "="),
+ GREATER_THAN_OR_EQUAL_TO("UDFOPEqualOrGreaterThan", ">="),
+ GREATER_THAN("UDFOPGreaterThan", ">"),
+ LESS_THAN_OR_EQUAL_TO("UDFOPEqualOrLessThan", "<="),
+ LESS_THAN("UDFOPLessThan", "<"),
+ NOT_EQUAL("UDFOPNotEqual", "!="),
+ BETWEEN("GenericUDFBetween", "between", JOINER_AND, true) {
+ public boolean checkCondition(IndexSearchCondition condition) {
+ return condition.getConstantDescs() != null;
+ }
+ },
+ IN("GenericUDFIn", "in", JOINER_COMMA, true) {
+ public boolean checkCondition(IndexSearchCondition condition) {
+ return condition.getConstantDescs() != null;
+ }
+
+ public String createConstants(final String typeName, ExprNodeConstantDesc[] desc) {
+ return "(" + super.createConstants(typeName, desc) + ")";
+ }
+ },
+ IS_NULL("GenericUDFOPNull", "is null") {
+ public boolean checkCondition(IndexSearchCondition condition) {
+ return true;
+ }
+ },
+ IS_NOT_NULL("GenericUDFOPNotNull", "is not null") {
+ public boolean checkCondition(IndexSearchCondition condition) {
+ return true;
+ }
+ };
+
+ private final String hiveCompOp;
+ private final String sqlCompOp;
+ private final Joiner joiner;
+ private final boolean supportNotOperator;
+
+ Expression(String hiveCompOp, String sqlCompOp) {
+ this(hiveCompOp, sqlCompOp, null);
+ }
+
+ Expression(String hiveCompOp, String sqlCompOp, Joiner joiner) {
+ this(hiveCompOp, sqlCompOp, joiner, false);
+ }
+
+ Expression(String hiveCompOp, String sqlCompOp, Joiner joiner, boolean supportNotOp) {
+ this.hiveCompOp = hiveCompOp;
+ this.sqlCompOp = sqlCompOp;
+ this.joiner = joiner;
+ this.supportNotOperator = supportNotOp;
+ }
+
+ public boolean checkCondition(IndexSearchCondition condition) {
+ return condition.getConstantDesc().getValue() != null;
+ }
+
+ public boolean isFor(IndexSearchCondition condition) {
+ return condition.getComparisonOp().endsWith(hiveCompOp) && checkCondition(condition);
+ }
+
+ public String buildExpressionStringFrom(JobConf jobConf, IndexSearchCondition condition) {
+ final String type = condition.getColumnDesc().getTypeString();
+ String column = condition.getColumnDesc().getColumn();
+ String rColumn = findReplacement(jobConf, column);
+ if(rColumn != null) {
+ column = rColumn;
+ }
+ return JOINER_SPACE.join(
+ "\"" + column + "\"",
+ getSqlCompOpString(condition),
+ joiner != null ? createConstants(type, condition.getConstantDescs()) :
+ createConstant(type, condition.getConstantDesc()));
+ }
+
+ public String getSqlCompOpString(IndexSearchCondition condition) {
+ return supportNotOperator ?
+ (condition.isNot() ? "not " : "") + sqlCompOp : sqlCompOp;
+ }
+
+ public String createConstant(String typeName, ExprNodeConstantDesc constantDesc) {
+ if (constantDesc == null) {
+ return StringUtil.EMPTY_STRING;
+ }
+
+ return createConstantString(typeName, String.valueOf(constantDesc.getValue()));
+ }
+
+ public String createConstants(final String typeName, ExprNodeConstantDesc[] constantDesc) {
+ if (constantDesc == null) {
+ return StringUtil.EMPTY_STRING;
+ }
+
+ return joiner.join(Iterables.transform(Arrays.asList(constantDesc),
+ new Function<ExprNodeConstantDesc, String>() {
+ @Nullable
+ @Override
+ public String apply(@Nullable ExprNodeConstantDesc desc) {
+ return createConstantString(typeName, String.valueOf(desc.getValue()));
+ }
+ }
+ ));
+ }
+
+ private static class ConstantStringWrapper {
+ private List<String> types;
+ private String prefix;
+ private String postfix;
+
+ ConstantStringWrapper(String type, String prefix, String postfix) {
+ this(Lists.newArrayList(type), prefix, postfix);
+ }
+
+ ConstantStringWrapper(List<String> types, String prefix, String postfix) {
+ this.types = types;
+ this.prefix = prefix;
+ this.postfix = postfix;
+ }
+
+ public String apply(final String typeName, String value) {
+ return Iterables.any(types, new Predicate<String>() {
+
+ @Override
+ public boolean apply(@Nullable String type) {
+ return typeName.startsWith(type);
+ }
+ }) ? prefix + value + postfix : value;
+ }
+ }
+
+ private static final String SINGLE_QUOTATION = "'";
+ private static List<ConstantStringWrapper> WRAPPERS = Lists.newArrayList(
+ new ConstantStringWrapper(Lists.newArrayList(
+ serdeConstants.STRING_TYPE_NAME, serdeConstants.CHAR_TYPE_NAME,
+ serdeConstants.VARCHAR_TYPE_NAME, serdeConstants.DATE_TYPE_NAME,
+ serdeConstants.TIMESTAMP_TYPE_NAME
+ ), SINGLE_QUOTATION, SINGLE_QUOTATION),
+ new ConstantStringWrapper(serdeConstants.DATE_TYPE_NAME, "to_date(", ")"),
+ new ConstantStringWrapper(serdeConstants.TIMESTAMP_TYPE_NAME, "to_timestamp(", ")")
+ );
+
+ private String createConstantString(String typeName, String value) {
+ for (ConstantStringWrapper wrapper : WRAPPERS) {
+ value = wrapper.apply(typeName, value);
+ }
+
+ return value;
+ }
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/ColumnMappingUtils.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/ColumnMappingUtils.java
new file mode 100644
index 0000000..f348c0f
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/ColumnMappingUtils.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.phoenix.hive.util;
+
+import com.google.common.base.Splitter;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+
+import java.util.*;
+
+
+/**
+ * Util class for mapping between Hive and Phoenix column names
+ */
+public class ColumnMappingUtils {
+
+ private static final Log LOG = LogFactory.getLog(ColumnMappingUtils.class);
+
+ public static Map<String, String> getColumnMappingMap(String columnMappings) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Column mappings : " + columnMappings);
+ }
+
+ if (columnMappings == null || columnMappings.length() == 0) {
+ if (LOG.isInfoEnabled()) {
+ LOG.info("phoenix.column.mapping not set. using field definition");
+ }
+
+ return Collections.emptyMap();
+ }
+
+ Map<String, String> columnMappingMap = Splitter.on(PhoenixStorageHandlerConstants.COMMA)
+ .trimResults().withKeyValueSeparator(PhoenixStorageHandlerConstants.COLON).split
+ (columnMappings);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Column mapping map : " + columnMappingMap);
+ }
+
+ return columnMappingMap;
+ }
+
+ public static Map<String, String> getReverseColumnMapping(String columnMapping) {
+ Map<String, String> myNewHashMap = new LinkedHashMap<>();
+ Map<String, String> forward = getColumnMappingMap(columnMapping);
+ for(Map.Entry<String, String> entry : forward.entrySet()){
+ myNewHashMap.put(entry.getValue(), entry.getKey());
+ }
+ return myNewHashMap;
+ }
+
+ public static List<String> quoteColumns(List<String> readColumnList) {
+ List<String> newList = new LinkedList<>();
+ for(String column : readColumnList) {
+ newList.add("\""+ column + "\"");
+ }
+ return newList;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixConnectionUtil.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixConnectionUtil.java
new file mode 100644
index 0000000..8d76ac0
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixConnectionUtil.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.util;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.QueryUtil;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * Set of methods to obtain Connection depending on configuration
+ */
+
+public class PhoenixConnectionUtil {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixConnectionUtil.class);
+
+ public static Connection getInputConnection(final Configuration conf, final Properties props)
+ throws SQLException {
+ String quorum = conf.get(PhoenixStorageHandlerConstants.ZOOKEEPER_QUORUM);
+ quorum = quorum == null ? props.getProperty(PhoenixStorageHandlerConstants
+ .ZOOKEEPER_QUORUM, PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_QUORUM) :
+ quorum;
+
+ int zooKeeperClientPort = conf.getInt(PhoenixStorageHandlerConstants.ZOOKEEPER_PORT, 0);
+ zooKeeperClientPort = zooKeeperClientPort == 0 ?
+ Integer.parseInt(props.getProperty(PhoenixStorageHandlerConstants.ZOOKEEPER_PORT,
+ String.valueOf(PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_PORT))) :
+ zooKeeperClientPort;
+
+ String zNodeParent = conf.get(PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT);
+ zNodeParent = zNodeParent == null ? props.getProperty(PhoenixStorageHandlerConstants
+ .ZOOKEEPER_PARENT, PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_PARENT) :
+ zNodeParent;
+
+ return getConnection(quorum, zooKeeperClientPort, zNodeParent, PropertiesUtil
+ .combineProperties(props, conf));
+ }
+
+ public static Connection getConnection(final Table table) throws SQLException {
+ Map<String, String> tableParameterMap = table.getParameters();
+
+ String zookeeperQuorum = tableParameterMap.get(PhoenixStorageHandlerConstants
+ .ZOOKEEPER_QUORUM);
+ zookeeperQuorum = zookeeperQuorum == null ? PhoenixStorageHandlerConstants
+ .DEFAULT_ZOOKEEPER_QUORUM : zookeeperQuorum;
+
+ String clientPortString = tableParameterMap.get(PhoenixStorageHandlerConstants
+ .ZOOKEEPER_PORT);
+ int clientPort = clientPortString == null ? PhoenixStorageHandlerConstants
+ .DEFAULT_ZOOKEEPER_PORT : Integer.parseInt(clientPortString);
+
+ String zNodeParent = tableParameterMap.get(PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT);
+ zNodeParent = zNodeParent == null ? PhoenixStorageHandlerConstants
+ .DEFAULT_ZOOKEEPER_PARENT : zNodeParent;
+
+ return DriverManager.getConnection(QueryUtil.getUrl(zookeeperQuorum, clientPort,
+ zNodeParent));
+ }
+
+ private static Connection getConnection(final String quorum, final Integer clientPort, String
+ zNodeParent, Properties props) throws SQLException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Connection attrs [quorum, port, znode] : " + quorum + ", " + clientPort +
+ ", " +
+ zNodeParent);
+ }
+
+ return DriverManager.getConnection(clientPort != null ? QueryUtil.getUrl(quorum,
+ clientPort, zNodeParent) : QueryUtil.getUrl(quorum), props);
+ }
+
+ public static Configuration getConfiguration(JobConf jobConf) {
+ Configuration conf = new Configuration(jobConf);
+ String quorum = conf.get(PhoenixStorageHandlerConstants.ZOOKEEPER_QUORUM);
+ if(quorum!=null) {
+ conf.set(HConstants.ZOOKEEPER_QUORUM, quorum);
+ }
+ int zooKeeperClientPort = conf.getInt(PhoenixStorageHandlerConstants.ZOOKEEPER_PORT, 0);
+ if(zooKeeperClientPort != 0) {
+ conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zooKeeperClientPort);
+ }
+ String zNodeParent = conf.get(PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT);
+ if(zNodeParent != null) {
+ conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, zNodeParent);
+ }
+ return conf;
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixStorageHandlerUtil.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixStorageHandlerUtil.java
new file mode 100644
index 0000000..19c26e5
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixStorageHandlerUtil.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.util;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.Maps;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.lang.reflect.Array;
+import java.math.BigDecimal;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.UnknownHostException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+import javax.naming.NamingException;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.util.Strings;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat.Options;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.net.DNS;
+import org.apache.phoenix.hive.PrimaryKeyData;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.hive.ql.index.IndexSearchCondition;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+
+/**
+ * Misc utils for PhoenixStorageHandler
+ */
+
+public class PhoenixStorageHandlerUtil {
+
+ public static String getTargetTableName(Table table) {
+ Map<String, String> tableParameterMap = table.getParameters();
+ String tableName = tableParameterMap.get(PhoenixStorageHandlerConstants
+ .PHOENIX_TABLE_NAME);
+ if (tableName == null) {
+ tableName = table.getTableName();
+ tableParameterMap.put(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME, tableName);
+ }
+
+ return tableName;
+ }
+
+
+ public static Object[] toTypedValues(JobConf jobConf, String typeName, String[] values) throws
+ Exception {
+ Object[] results = new Object[values.length];
+ DateFormat df = null;
+
+ for (int i = 0, limit = values.length; i < limit; i++) {
+ if (serdeConstants.STRING_TYPE_NAME.equals(typeName) ||
+ typeName.startsWith(serdeConstants.CHAR_TYPE_NAME) ||
+ typeName.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ results[i] = values[i];
+ } else if (serdeConstants.INT_TYPE_NAME.equals(typeName)) {
+ results[i] = new Integer(values[i]);
+ } else if (serdeConstants.BIGINT_TYPE_NAME.equals(typeName)) {
+ results[i] = new Long(values[i]);
+ } else if (serdeConstants.DOUBLE_TYPE_NAME.equals(typeName)) {
+ results[i] = new Double(values[i]);
+ } else if (serdeConstants.FLOAT_TYPE_NAME.equals(typeName)) {
+ results[i] = new Float(values[i]);
+ } else if (serdeConstants.SMALLINT_TYPE_NAME.equals(typeName)) {
+ results[i] = new Short(values[i]);
+ } else if (serdeConstants.TINYINT_TYPE_NAME.equals(typeName)) {
+ results[i] = new Byte(values[i]);
+ } else if (serdeConstants.DATE_TYPE_NAME.equals(typeName)) {
+ String dateFormat = jobConf.get(PhoenixStorageHandlerConstants.HBASE_DATE_FORMAT,
+ PhoenixStorageHandlerConstants.DEFAULT_DATE_FORMAT);
+ df = new SimpleDateFormat(dateFormat);
+ results[i] = new Long(df.parse(values[i]).getTime());
+ } else if (serdeConstants.TIMESTAMP_TYPE_NAME.equals(typeName)) {
+ String timestampFormat = jobConf.get(PhoenixStorageHandlerConstants
+ .HBASE_TIMESTAMP_FORMAT, PhoenixStorageHandlerConstants
+ .DEFAULT_TIMESTAMP_FORMAT);
+ df = new SimpleDateFormat(timestampFormat);
+ results[i] = new Long(df.parse(values[i]).getTime());
+ } else if (typeName.contains(serdeConstants.DECIMAL_TYPE_NAME)) {
+ results[i] = new BigDecimal(values[i]);
+ }
+ }
+
+ return results;
+ }
+
+ public static String[] getConstantValues(IndexSearchCondition condition, String comparisonOp) {
+ String[] constantValues = null;
+
+ if (comparisonOp.endsWith("UDFOPEqual") || comparisonOp.endsWith("UDFOPNotEqual")) {
+ constantValues = new String[]{String.valueOf(condition.getConstantDesc().getValue())};
+ } else if (comparisonOp.endsWith("UDFOPEqualOrGreaterThan")) { // key >= 1
+ constantValues = new String[]{String.valueOf(condition.getConstantDesc().getValue())};
+ } else if (comparisonOp.endsWith("UDFOPGreaterThan")) { // key > 1
+ constantValues = new String[]{String.valueOf(condition.getConstantDesc().getValue())};
+ } else if (comparisonOp.endsWith("UDFOPEqualOrLessThan")) { // key <= 1
+ constantValues = new String[]{String.valueOf(condition.getConstantDesc().getValue())};
+ } else if (comparisonOp.endsWith("UDFOPLessThan")) { // key < 1
+ constantValues = new String[]{String.valueOf(condition.getConstantDesc().getValue())};
+ } else if (comparisonOp.endsWith("GenericUDFBetween")) {
+ constantValues = new String[]{String.valueOf(condition.getConstantDesc(0).getValue()),
+ String.valueOf(condition.getConstantDesc(1).getValue())};
+ } else if (comparisonOp.endsWith("GenericUDFIn")) {
+ ExprNodeConstantDesc[] constantDescs = condition.getConstantDescs();
+ constantValues = new String[constantDescs.length];
+ for (int i = 0, limit = constantDescs.length; i < limit; i++) {
+ constantValues[i] = String.valueOf(condition.getConstantDesc(i).getValue());
+ }
+ }
+
+ return constantValues;
+ }
+
+ public static String getRegionLocation(HRegionLocation location, Log log) throws IOException {
+ InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
+ if (isa.isUnresolved()) {
+ log.warn("Failed resolve " + isa);
+ }
+ InetAddress regionAddress = isa.getAddress();
+ String regionLocation = null;
+ try {
+ regionLocation = reverseDNS(regionAddress);
+ } catch (NamingException e) {
+ log.warn("Cannot resolve the host name for " + regionAddress + " because of " + e);
+ regionLocation = location.getHostname();
+ }
+
+ return regionLocation;
+ }
+
+ // Copy from org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.reverseDNS
+ private static final Map<InetAddress, String> reverseDNSCacheMap = Maps.newConcurrentMap();
+
+ private static String reverseDNS(InetAddress ipAddress) throws NamingException,
+ UnknownHostException {
+ String hostName = reverseDNSCacheMap.get(ipAddress);
+
+ if (hostName == null) {
+ String ipAddressString = null;
+ try {
+ ipAddressString = DNS.reverseDns(ipAddress, null);
+ } catch (Exception e) {
+ // We can use InetAddress in case the jndi failed to pull up the reverse DNS entry
+ // from the name service. Also, in case of ipv6, we need to use the InetAddress
+ // since resolving reverse DNS using jndi doesn't work well with ipv6 addresses.
+ ipAddressString = InetAddress.getByName(ipAddress.getHostAddress()).getHostName();
+ }
+
+ if (ipAddressString == null) {
+ throw new UnknownHostException("No host found for " + ipAddress);
+ }
+
+ hostName = Strings.domainNamePointerToHostName(ipAddressString);
+ reverseDNSCacheMap.put(ipAddress, hostName);
+ }
+
+ return hostName;
+ }
+
+ public static String getTableKeyOfSession(JobConf jobConf, String tableName) {
+
+ String sessionId = jobConf.get(PhoenixConfigurationUtil.SESSION_ID);
+ return new StringBuilder("[").append(sessionId).append("]-").append(tableName).toString();
+ }
+
+ public static Map<String, TypeInfo> createColumnTypeMap(JobConf jobConf) {
+ Map<String, TypeInfo> columnTypeMap = Maps.newHashMap();
+
+ String[] columnNames = jobConf.get(serdeConstants.LIST_COLUMNS).split
+ (PhoenixStorageHandlerConstants.COMMA);
+ List<TypeInfo> typeInfos =
+ TypeInfoUtils.getTypeInfosFromTypeString(jobConf.get(serdeConstants.LIST_COLUMN_TYPES));
+
+ for (int i = 0, limit = columnNames.length; i < limit; i++) {
+ columnTypeMap.put(columnNames[i], typeInfos.get(i));
+ }
+
+ return columnTypeMap;
+ }
+
+ public static List<String> getReadColumnNames(Configuration conf) {
+ String colNames = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
+ if (colNames != null && !colNames.isEmpty()) {
+ return Arrays.asList(colNames.split(PhoenixStorageHandlerConstants.COMMA));
+ }
+ return Collections.EMPTY_LIST;
+ }
+
+ public static boolean isTransactionalTable(Properties tableProperties) {
+ String tableIsTransactional = tableProperties.getProperty(hive_metastoreConstants
+ .TABLE_IS_TRANSACTIONAL);
+
+ return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true");
+ }
+
+ public static boolean isTransactionalTable(Configuration config) {
+ String tableIsTransactional = config.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
+
+ return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true");
+ }
+
+ public static void printConfiguration(Configuration config) {
+ if (Boolean.getBoolean("dev")) {
+ for (Iterator<Entry<String, String>> iterator = config.iterator(); iterator.hasNext();
+ ) {
+ Entry<String, String> entry = iterator.next();
+
+ System.out.println(entry.getKey() + "=" + entry.getValue());
+ }
+ }
+ }
+
+ public static String toString(Object obj) {
+ String content = null;
+
+ if (obj instanceof Array) {
+ Object[] values = (Object[]) obj;
+
+ content = Joiner.on(PhoenixStorageHandlerConstants.COMMA).join(values);
+ } else {
+ content = obj.toString();
+ }
+
+ return content;
+ }
+
+ public static Map<?, ?> toMap(byte[] serialized) {
+ ByteArrayInputStream bais = new ByteArrayInputStream(serialized);
+
+ try {
+ return PrimaryKeyData.deserialize(bais).getData();
+ } catch (ClassNotFoundException | IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static String getOptionsValue(Options options) {
+ StringBuilder content = new StringBuilder();
+
+ int bucket = options.getBucket();
+ String inspectorInfo = options.getInspector().getCategory() + ":" + options.getInspector()
+ .getTypeName();
+ long maxTxnId = options.getMaximumTransactionId();
+ long minTxnId = options.getMinimumTransactionId();
+ int recordIdColumn = options.getRecordIdColumn();
+ boolean isCompresses = options.isCompressed();
+ boolean isWritingBase = options.isWritingBase();
+
+ content.append("bucket : ").append(bucket).append(", inspectorInfo : ").append
+ (inspectorInfo).append(", minTxnId : ").append(minTxnId).append(", maxTxnId : ")
+ .append(maxTxnId).append(", recordIdColumn : ").append(recordIdColumn);
+ content.append(", isCompressed : ").append(isCompresses).append(", isWritingBase : ")
+ .append(isWritingBase);
+
+ return content.toString();
+ }
+}
diff --git a/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixUtil.java b/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixUtil.java
new file mode 100644
index 0000000..9dcb3ef
--- /dev/null
+++ b/phoenix-hive/src/main/java/org/apache/phoenix/hive/util/PhoenixUtil.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.util;
+
+import com.google.common.base.CharMatcher;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.phoenix.coprocessor.MetaDataProtocol.MetaDataMutationResult;
+import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.schema.MetaDataClient;
+import org.apache.phoenix.schema.PTable;
+import org.apache.phoenix.schema.TableNotFoundException;
+import org.apache.phoenix.util.ColumnInfo;
+import org.apache.phoenix.util.PhoenixRuntime;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DatabaseMetaData;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * Misc utils
+ */
+public class PhoenixUtil {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixUtil.class);
+
+ public static String getPhoenixType(String hiveTypeName) {
+ if (hiveTypeName.startsWith("array")) {
+ List<String> tokenList = Lists.newArrayList(Splitter.on(CharMatcher.is('<').or
+ (CharMatcher.is('>'))).split(hiveTypeName));
+ return getPhoenixType(tokenList.get(1)) + "[]";
+ } else if (hiveTypeName.startsWith("int")) {
+ return "integer";
+ } else if (hiveTypeName.equals("string")) {
+ return "varchar";
+ } else {
+ return hiveTypeName;
+ }
+ }
+
+ public static boolean existTable(Connection conn, String tableName) throws SQLException {
+ boolean exist = false;
+ DatabaseMetaData dbMeta = conn.getMetaData();
+
+ String[] schemaInfo = getTableSchema(tableName.toUpperCase());
+ try (ResultSet rs = dbMeta.getTables(null, schemaInfo[0], schemaInfo[1], null)) {
+ exist = rs.next();
+
+ if (LOG.isDebugEnabled()) {
+ if (exist) {
+ LOG.debug(rs.getString("TABLE_NAME") + " table exist. ");
+ } else {
+ LOG.debug("table " + tableName + " doesn't exist.");
+ }
+ }
+ }
+
+ return exist;
+ }
+
+ public static List<String> getPrimaryKeyColumnList(Connection conn, String tableName) throws
+ SQLException {
+ Map<Short, String> primaryKeyColumnInfoMap = Maps.newHashMap();
+ DatabaseMetaData dbMeta = conn.getMetaData();
+
+ String[] schemaInfo = getTableSchema(tableName.toUpperCase());
+ try (ResultSet rs = dbMeta.getPrimaryKeys(null, schemaInfo[0], schemaInfo[1])) {
+ while (rs.next()) {
+ primaryKeyColumnInfoMap.put(rs.getShort("KEY_SEQ"), rs.getString("COLUMN_NAME"));
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("PK-columns : " + primaryKeyColumnInfoMap);
+ }
+ }
+
+ return Lists.newArrayList(primaryKeyColumnInfoMap.values());
+ }
+
+ public static List<String> getPrimaryKeyColumnList(Configuration config, String tableName) {
+ List<String> pkColumnNameList = null;
+
+ try (Connection conn = PhoenixConnectionUtil.getInputConnection(config, new Properties())) {
+ pkColumnNameList = getPrimaryKeyColumnList(conn, tableName);
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+
+ return pkColumnNameList;
+ }
+
+ public static void createTable(Connection conn, String createTableStatement) throws
+ SQLException {
+ conn.createStatement().execute(createTableStatement);
+ }
+
+ public static void dropTable(Connection conn, String tableName) throws SQLException {
+ conn.createStatement().execute("drop table " + tableName);
+ }
+
+ public static List<ColumnInfo> getColumnInfoList(Connection conn, String tableName) throws
+ SQLException {
+ List<ColumnInfo> columnInfoList = null;
+
+ try {
+ columnInfoList = PhoenixRuntime.generateColumnInfo(conn, tableName, null);
+ } catch (TableNotFoundException e) {
+ // Exception can be occurred when table create.
+ columnInfoList = Collections.emptyList();
+ }
+
+ return columnInfoList;
+ }
+
+ public static String[] getTableSchema(String tableName) {
+ String[] schemaInfo = new String[2];
+ String[] tokens = tableName.split("\\.");
+
+ if (tokens.length == 2) {
+ schemaInfo = tokens;
+ } else {
+ schemaInfo[1] = tokens[0];
+ }
+
+ return schemaInfo;
+ }
+
+ public static boolean isDisabledWal(MetaDataClient metaDataClient, String tableName) throws
+ SQLException {
+ String[] schemaInfo = getTableSchema(tableName.toUpperCase());
+ MetaDataMutationResult result = metaDataClient.updateCache(schemaInfo[0], schemaInfo[1]);
+ PTable dataTable = result.getTable();
+
+ return dataTable.isWALDisabled();
+ }
+
+ public static void alterTableForWalDisable(Connection conn, String tableName, boolean
+ disableMode) throws SQLException {
+ conn.createStatement().execute("alter table " + tableName + " set disable_wal=" +
+ disableMode);
+ }
+
+ public static void flush(Connection conn, String tableName) throws SQLException {
+ try (HBaseAdmin admin = ((PhoenixConnection) conn).getQueryServices().getAdmin()) {
+ admin.flush(TableName.valueOf(tableName));
+ } catch (IOException e) {
+ throw new SQLException(e);
+ }
+ }
+
+ public static String constructDeleteStatement(Connection conn, String tableName) throws
+ SQLException {
+ StringBuilder deleteQuery = new StringBuilder("delete from ").append(tableName).append(" " +
+ "where ");
+
+ List<String> primaryKeyColumnList = getPrimaryKeyColumnList(conn, tableName);
+ for (int i = 0, limit = primaryKeyColumnList.size(); i < limit; i++) {
+ String pkColumn = primaryKeyColumnList.get(i);
+ deleteQuery.append(pkColumn).append(PhoenixStorageHandlerConstants.EQUAL).append
+ (PhoenixStorageHandlerConstants.QUESTION);
+
+ if ((i + 1) != primaryKeyColumnList.size()) {
+ deleteQuery.append(" and ");
+ }
+ }
+
+ return deleteQuery.toString();
+ }
+
+ public static void closeResource(Statement stmt) throws SQLException {
+ if (stmt != null && !stmt.isClosed()) {
+ stmt.close();
+ }
+ }
+
+ public static void closeResource(Connection conn) throws SQLException {
+ if (conn != null && !conn.isClosed()) {
+ conn.close();
+ }
+ }
+}
diff --git a/phoenix-hive/src/test/java/org/apache/phoenix/hive/PrimaryKeyDataTest.java b/phoenix-hive/src/test/java/org/apache/phoenix/hive/PrimaryKeyDataTest.java
new file mode 100644
index 0000000..3b2634f
--- /dev/null
+++ b/phoenix-hive/src/test/java/org/apache/phoenix/hive/PrimaryKeyDataTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InvalidClassException;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.HashMap;
+
+import org.junit.Test;
+
+public class PrimaryKeyDataTest {
+ private static class Disallowed implements Serializable {
+ private static final long serialVersionUID = 1L;
+ }
+
+ private byte[] serialize(Object o) throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (ObjectOutputStream oos = new ObjectOutputStream(baos)) {
+ oos.writeObject(o);
+ }
+ return baos.toByteArray();
+ }
+
+ @Test
+ public void testSerde() throws Exception {
+ HashMap<String,Object> data = new HashMap<>();
+ data.put("one", 1);
+ data.put("two", "two");
+ data.put("three", 3);
+
+ PrimaryKeyData pkData = new PrimaryKeyData(data);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ pkData.serialize(baos);
+
+ PrimaryKeyData pkCopy = PrimaryKeyData.deserialize(new ByteArrayInputStream(baos.toByteArray()));
+ assertEquals(data, pkCopy.getData());
+ }
+
+ @Test
+ public void testDisallowedDeserialization() throws Exception {
+ byte[] serializedMap = serialize(new HashMap<String,Object>());
+ byte[] serializedClass = serialize(new Disallowed());
+ byte[] serializedString = serialize("asdf");
+
+ try {
+ PrimaryKeyData.deserialize(new ByteArrayInputStream(serializedMap));
+ fail("Expected an InvalidClassException");
+ } catch (InvalidClassException e) {}
+ try {
+ PrimaryKeyData.deserialize(new ByteArrayInputStream(serializedClass));
+ fail("Expected an InvalidClassException");
+ } catch (InvalidClassException e) {}
+ try {
+ PrimaryKeyData.deserialize(new ByteArrayInputStream(serializedString));
+ fail("Expected an InvalidClassException");
+ } catch (InvalidClassException e) {}
+ }
+}
diff --git a/phoenix-hive/src/test/java/org/apache/phoenix/hive/query/PhoenixQueryBuilderTest.java b/phoenix-hive/src/test/java/org/apache/phoenix/hive/query/PhoenixQueryBuilderTest.java
new file mode 100644
index 0000000..bc2cbe3
--- /dev/null
+++ b/phoenix-hive/src/test/java/org/apache/phoenix/hive/query/PhoenixQueryBuilderTest.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.hive.query;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.phoenix.hive.ql.index.IndexSearchCondition;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.junit.Assert.assertEquals;
+
+public class PhoenixQueryBuilderTest {
+ private static final PhoenixQueryBuilder BUILDER = PhoenixQueryBuilder.getInstance();
+ private static final String TABLE_NAME = "TEST_TABLE";
+
+ private IndexSearchCondition mockedIndexSearchCondition(String comparisionOp,
+ Object constantValue,
+ Object[] constantValues,
+ String columnName,
+ String typeString,
+ boolean isNot) {
+ IndexSearchCondition condition = mock(IndexSearchCondition.class);
+ when(condition.getComparisonOp()).thenReturn(comparisionOp);
+
+ if (constantValue != null) {
+ ExprNodeConstantDesc constantDesc = mock(ExprNodeConstantDesc.class);
+ when(constantDesc.getValue()).thenReturn(constantValue);
+ when(condition.getConstantDesc()).thenReturn(constantDesc);
+ }
+
+ ExprNodeColumnDesc columnDesc = mock(ExprNodeColumnDesc.class);
+ when(columnDesc.getColumn()).thenReturn(columnName);
+ when(columnDesc.getTypeString()).thenReturn(typeString);
+ when(condition.getColumnDesc()).thenReturn(columnDesc);
+
+
+ if (ArrayUtils.isNotEmpty(constantValues)) {
+ ExprNodeConstantDesc[] constantDescs = new ExprNodeConstantDesc[constantValues.length];
+ for (int i = 0; i < constantDescs.length; i++) {
+ constantDescs[i] = mock(ExprNodeConstantDesc.class);
+ when(condition.getConstantDesc(i)).thenReturn(constantDescs[i]);
+ when(constantDescs[i].getValue()).thenReturn(constantValues[i]);
+ }
+ when(condition.getConstantDescs()).thenReturn(constantDescs);
+ }
+
+ when(condition.isNot()).thenReturn(isNot);
+
+ return condition;
+ }
+
+ @Test
+ public void testBuildQueryWithCharColumns() throws IOException {
+ final String COLUMN_CHAR = "Column_Char";
+ final String COLUMN_VARCHAR = "Column_VChar";
+ final String expectedQueryPrefix = "select /*+ NO_CACHE */ \"" + COLUMN_CHAR + "\",\"" + COLUMN_VARCHAR +
+ "\" from TEST_TABLE where ";
+
+ JobConf jobConf = new JobConf();
+ List<String> readColumnList = Lists.newArrayList(COLUMN_CHAR, COLUMN_VARCHAR);
+ List<IndexSearchCondition> searchConditions = Lists.newArrayList(
+ mockedIndexSearchCondition("GenericUDFOPEqual", "CHAR_VALUE", null, COLUMN_CHAR, "char(10)", false),
+ mockedIndexSearchCondition("GenericUDFOPEqual", "CHAR_VALUE2", null, COLUMN_VARCHAR, "varchar(10)", false)
+ );
+
+ assertEquals(expectedQueryPrefix + "\"Column_Char\" = 'CHAR_VALUE' and \"Column_VChar\" = 'CHAR_VALUE2'",
+ BUILDER.buildQuery(jobConf, TABLE_NAME, readColumnList, searchConditions));
+
+ searchConditions = Lists.newArrayList(
+ mockedIndexSearchCondition("GenericUDFIn", null,
+ new Object[]{"CHAR1", "CHAR2", "CHAR3"}, COLUMN_CHAR, "char(10)", false)
+ );
+
+ assertEquals(expectedQueryPrefix + "\"Column_Char\" in ('CHAR1', 'CHAR2', 'CHAR3')",
+ BUILDER.buildQuery(jobConf, TABLE_NAME, readColumnList, searchConditions));
+
+ searchConditions = Lists.newArrayList(
+ mockedIndexSearchCondition("GenericUDFIn", null,
+ new Object[]{"CHAR1", "CHAR2", "CHAR3"}, COLUMN_CHAR, "char(10)", true)
+ );
+
+ assertEquals(expectedQueryPrefix + "\"Column_Char\" not in ('CHAR1', 'CHAR2', 'CHAR3')",
+ BUILDER.buildQuery(jobConf, TABLE_NAME, readColumnList, searchConditions));
+
+ searchConditions = Lists.newArrayList(
+ mockedIndexSearchCondition("GenericUDFBetween", null,
+ new Object[]{"CHAR1", "CHAR2"}, COLUMN_CHAR, "char(10)", false)
+ );
+
+ assertEquals(expectedQueryPrefix + "\"Column_Char\" between 'CHAR1' and 'CHAR2'",
+ BUILDER.buildQuery(jobConf, TABLE_NAME, readColumnList, searchConditions));
+
+ searchConditions = Lists.newArrayList(
+ mockedIndexSearchCondition("GenericUDFBetween", null,
+ new Object[]{"CHAR1", "CHAR2"}, COLUMN_CHAR, "char(10)", true)
+ );
+
+ assertEquals(expectedQueryPrefix + "\"Column_Char\" not between 'CHAR1' and 'CHAR2'",
+ BUILDER.buildQuery(jobConf, TABLE_NAME, readColumnList, searchConditions));
+ }
+
+ @Test
+ public void testBuildBetweenQueryWithDateColumns() throws IOException {
+ final String COLUMN_DATE = "Column_Date";
+ final String tableName = "TEST_TABLE";
+ final String expectedQueryPrefix = "select /*+ NO_CACHE */ \"" + COLUMN_DATE +
+ "\" from " + tableName + " where ";
+
+ JobConf jobConf = new JobConf();
+ List<String> readColumnList = Lists.newArrayList(COLUMN_DATE);
+
+ List<IndexSearchCondition> searchConditions = Lists.newArrayList(
+ mockedIndexSearchCondition("GenericUDFBetween", null,
+ new Object[]{"1992-01-02", "1992-02-02"}, COLUMN_DATE, "date", false)
+ );
+
+ assertEquals(expectedQueryPrefix +
+ "\"" + COLUMN_DATE + "\" between to_date('1992-01-02') and to_date('1992-02-02')",
+ BUILDER.buildQuery(jobConf, TABLE_NAME, readColumnList, searchConditions));
+
+ searchConditions = Lists.newArrayList(
+ mockedIndexSearchCondition("GenericUDFBetween", null,
+ new Object[]{"1992-01-02", "1992-02-02"}, COLUMN_DATE, "date", true)
+ );
+
+ assertEquals(expectedQueryPrefix +
+ "\"" + COLUMN_DATE + "\" not between to_date('1992-01-02') and to_date('1992-02-02')",
+ BUILDER.buildQuery(jobConf, TABLE_NAME, readColumnList, searchConditions));
+ }
+
+ @Test
+ public void testBuildQueryWithNotNull() throws IOException {
+ final String COLUMN_DATE = "Column_Date";
+ final String tableName = "TEST_TABLE";
+ final String expectedQueryPrefix = "select /*+ NO_CACHE */ \"" + COLUMN_DATE +
+ "\" from " + tableName + " where ";
+
+ JobConf jobConf = new JobConf();
+ List<String> readColumnList = Lists.newArrayList(COLUMN_DATE);
+
+ List<IndexSearchCondition> searchConditions = Lists.newArrayList(
+ mockedIndexSearchCondition("GenericUDFOPNotNull", null,
+ null, COLUMN_DATE, "date", true)
+ );
+
+ assertEquals(expectedQueryPrefix +
+ "\"" + COLUMN_DATE + "\" is not null ",
+ BUILDER.buildQuery(jobConf, TABLE_NAME, readColumnList, searchConditions));
+ }
+}
diff --git a/phoenix-kafka/pom.xml b/phoenix-kafka/pom.xml
new file mode 100644
index 0000000..8ef3586
--- /dev/null
+++ b/phoenix-kafka/pom.xml
@@ -0,0 +1,420 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-connectors</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-kafka</artifactId>
+ <name>Phoenix - Kafka</name>
+
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments />
+ </license>
+ </licenses>
+
+ <organization>
+ <name>Apache Software Foundation</name>
+ <url>http://www.apache.org</url>
+ </organization>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ </properties>
+
+ <dependencies>
+ <!-- Make sure we have all the antlr dependencies -->
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr-runtime</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>sqlline</groupId>
+ <artifactId>sqlline</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>joda-time</groupId>
+ <artifactId>joda-time</artifactId>
+ </dependency>
+ <!-- JSR-305 and jcip-annotations -->
+ <dependency>
+ <groupId>com.github.stephenc.findbugs</groupId>
+ <artifactId>findbugs-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.jcip</groupId>
+ <artifactId>jcip-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-jaxrs</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-xc</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>${protobuf-java.version}</version>
+ </dependency>
+ <!-- Intentionally avoid an dependencyManagement entry because of conflict with thin-client -->
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ <version>4.0.1</version>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>${log4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>${slf4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.iq80.snappy</groupId>
+ <artifactId>snappy</artifactId>
+ <version>${snappy.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.high-scale-lib</groupId>
+ <artifactId>high-scale-lib</artifactId>
+ <version>1.1.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yammer.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ <version>2.1.2</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.htrace</groupId>
+ <artifactId>htrace-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-csv</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <scope>test</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>xom</groupId>
+ <artifactId>xom</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minikdc</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.jruby.joni</groupId>
+ <artifactId>joni</artifactId>
+ <version>${joni.version}</version>
+ </dependency>
+
+ <!-- To work with kafka with phoenix -->
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka_2.11</artifactId>
+ <version>${kafka.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka-clients</artifactId>
+ <version>${kafka.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka_2.11</artifactId>
+ <version>${kafka.version}</version>
+ <classifier>test</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka-clients</artifactId>
+ <version>${kafka.version}</version>
+ <classifier>test</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka-tools</artifactId>
+ <version>${kafka.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-flume</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-core</artifactId>
+ </dependency>
+<!--
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+-->
+ </dependencies>
+
+ <build>
+ <plugins>
+ <!-- Add the ant-generated sources to the source path -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-site-plugin</artifactId>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.maven.doxia</groupId>
+ <artifactId>doxia-module-markdown</artifactId>
+ <version>1.3</version>
+ </dependency>
+ <dependency>
+ <groupId>lt.velykis.maven.skins</groupId>
+ <artifactId>reflow-velocity-tools</artifactId>
+ <version>1.0.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.velocity</groupId>
+ <artifactId>velocity</artifactId>
+ <version>1.7</version>
+ </dependency>
+ </dependencies>
+ </plugin>
+
+ <!-- Setup eclipse -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-eclipse-plugin</artifactId>
+ <configuration>
+ <buildcommands>
+ <buildcommand>org.jamon.project.templateBuilder</buildcommand>
+ <buildcommand>org.eclipse.jdt.core.javabuilder</buildcommand>
+ </buildcommands>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-resources-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>phoenix-kafka-${project.version}-minimal</finalName>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <shadeTestJar>false</shadeTestJar>
+ <artifactSet>
+ <includes>
+ <include>org.apache.phoenix:phoenix-kafka</include>
+ <include>org.apache.kafka:kafka-clients</include>
+ <include>org.apache.phoenix:phoenix-flume</include>
+ </includes>
+ </artifactSet>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-project-info-reports-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>findbugs-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </reporting>
+</project>
diff --git a/phoenix-kafka/pom.xml~ b/phoenix-kafka/pom.xml~
new file mode 100644
index 0000000..daceffb
--- /dev/null
+++ b/phoenix-kafka/pom.xml~
@@ -0,0 +1,420 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-connectors</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-kafka</artifactId>
+ <name>Phoenix - Kafka</name>
+
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments />
+ </license>
+ </licenses>
+
+ <organization>
+ <name>Apache Software Foundation</name>
+ <url>http://www.apache.org</url>
+ </organization>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ </properties>
+
+ <dependencies>
+ <!-- Make sure we have all the antlr dependencies -->
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr-runtime</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>sqlline</groupId>
+ <artifactId>sqlline</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>joda-time</groupId>
+ <artifactId>joda-time</artifactId>
+ </dependency>
+ <!-- JSR-305 and jcip-annotations -->
+ <dependency>
+ <groupId>com.github.stephenc.findbugs</groupId>
+ <artifactId>findbugs-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.jcip</groupId>
+ <artifactId>jcip-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-jaxrs</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-xc</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>${protobuf-java.version}</version>
+ </dependency>
+ <!-- Intentionally avoid an dependencyManagement entry because of conflict with thin-client -->
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ <version>4.0.1</version>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>${log4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>${slf4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.iq80.snappy</groupId>
+ <artifactId>snappy</artifactId>
+ <version>${snappy.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.high-scale-lib</groupId>
+ <artifactId>high-scale-lib</artifactId>
+ <version>1.1.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yammer.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ <version>2.1.2</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.htrace</groupId>
+ <artifactId>htrace-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-csv</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <scope>test</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>xom</groupId>
+ <artifactId>xom</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minikdc</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.jruby.joni</groupId>
+ <artifactId>joni</artifactId>
+ <version>${joni.version}</version>
+ </dependency>
+
+ <!-- To work with kafka with phoenix -->
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka_2.11</artifactId>
+ <version>${kafka.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka-clients</artifactId>
+ <version>${kafka.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka_2.11</artifactId>
+ <version>${kafka.version}</version>
+ <classifier>test</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka-clients</artifactId>
+ <version>${kafka.version}</version>
+ <classifier>test</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kafka</groupId>
+ <artifactId>kafka-tools</artifactId>
+ <version>${kafka.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-flume</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-core</artifactId>
+ </dependency>
+<!--
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+--
+ </dependencies>
+
+ <build>
+ <plugins>
+ <!-- Add the ant-generated sources to the source path -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-site-plugin</artifactId>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.maven.doxia</groupId>
+ <artifactId>doxia-module-markdown</artifactId>
+ <version>1.3</version>
+ </dependency>
+ <dependency>
+ <groupId>lt.velykis.maven.skins</groupId>
+ <artifactId>reflow-velocity-tools</artifactId>
+ <version>1.0.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.velocity</groupId>
+ <artifactId>velocity</artifactId>
+ <version>1.7</version>
+ </dependency>
+ </dependencies>
+ </plugin>
+
+ <!-- Setup eclipse -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-eclipse-plugin</artifactId>
+ <configuration>
+ <buildcommands>
+ <buildcommand>org.jamon.project.templateBuilder</buildcommand>
+ <buildcommand>org.eclipse.jdt.core.javabuilder</buildcommand>
+ </buildcommands>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-resources-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>phoenix-kafka-${project.version}-minimal</finalName>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <shadeTestJar>false</shadeTestJar>
+ <artifactSet>
+ <includes>
+ <include>org.apache.phoenix:phoenix-kafka</include>
+ <include>org.apache.kafka:kafka-clients</include>
+ <include>org.apache.phoenix:phoenix-flume</include>
+ </includes>
+ </artifactSet>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-project-info-reports-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>findbugs-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </reporting>
+</project>
diff --git a/phoenix-kafka/src/it/java/org/apache/phoenix/kafka/PhoenixConsumerIT.java b/phoenix-kafka/src/it/java/org/apache/phoenix/kafka/PhoenixConsumerIT.java
new file mode 100644
index 0000000..cfec391
--- /dev/null
+++ b/phoenix-kafka/src/it/java/org/apache/phoenix/kafka/PhoenixConsumerIT.java
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.kafka;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.flume.Context;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.phoenix.end2end.BaseHBaseManagedTimeIT;
+import org.apache.phoenix.flume.DefaultKeyGenerator;
+import org.apache.phoenix.flume.FlumeConstants;
+import org.apache.phoenix.flume.serializer.EventSerializers;
+import org.apache.phoenix.kafka.consumer.PhoenixConsumer;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.io.Resources;
+
+import kafka.admin.AdminUtils;
+import kafka.server.KafkaConfig;
+import kafka.server.KafkaServer;
+import kafka.utils.MockTime;
+import kafka.utils.TestUtils;
+import kafka.utils.Time;
+import kafka.utils.ZKStringSerializer$;
+import kafka.utils.ZkUtils;
+import kafka.zk.EmbeddedZookeeper;
+
+public class PhoenixConsumerIT extends BaseHBaseManagedTimeIT {
+ private static final String ZKHOST = "127.0.0.1";
+ private static final String BROKERHOST = "127.0.0.1";
+ private static final String BROKERPORT = "9092";
+ private static final String TOPIC = "topic1";
+ private KafkaServer kafkaServer;
+ private PhoenixConsumer pConsumer;
+ private EmbeddedZookeeper zkServer;
+ private ZkClient zkClient;
+ private Connection conn;
+
+ @Before
+ public void setUp() throws IOException, SQLException {
+ // setup Zookeeper
+ zkServer = new EmbeddedZookeeper();
+ String zkConnect = ZKHOST + ":" + zkServer.port();
+ zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$);
+ ZkUtils zkUtils = ZkUtils.apply(zkClient, false);
+
+ // setup Broker
+ Properties brokerProps = new Properties();
+ brokerProps.setProperty("zookeeper.connect", zkConnect);
+ brokerProps.setProperty("broker.id", "0");
+ brokerProps.setProperty("log.dirs",
+ Files.createTempDirectory("kafka-").toAbsolutePath().toString());
+ brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT);
+ KafkaConfig config = new KafkaConfig(brokerProps);
+ Time mock = new MockTime();
+ kafkaServer = TestUtils.createServer(config, mock);
+ kafkaServer.startup();
+
+ // create topic
+ AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties());
+
+ pConsumer = new PhoenixConsumer();
+
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ conn = DriverManager.getConnection(getUrl(), props);
+ }
+
+ @Test
+ public void testPhoenixConsumerWithFile() throws SQLException {
+ String consumerPath = "consumer.props";
+ PhoenixConsumerThread pConsumerThread = new PhoenixConsumerThread(pConsumer, consumerPath);
+ pConsumerThread.properties.setProperty(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ Thread phoenixConsumer = new Thread(pConsumerThread);
+
+ String producerPath = "producer.props";
+ KafkaProducerThread kProducerThread = new KafkaProducerThread(producerPath, TOPIC);
+ Thread kafkaProducer = new Thread(kProducerThread);
+
+ phoenixConsumer.start();
+
+ try {
+ phoenixConsumer.join(10000);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+
+ kafkaProducer.start();
+
+ try {
+ kafkaProducer.join();
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+
+ if (!kafkaProducer.isAlive()) {
+ System.out.println("kafka producer is not alive");
+ pConsumer.stop();
+ }
+
+ // Verify our serializer wrote out data
+ ResultSet rs = conn.createStatement().executeQuery("SELECT * FROM SAMPLE1");
+ assertTrue(rs.next());
+ assertTrue(rs.getFetchSize() > 0);
+ rs.close();
+ }
+
+ @Test
+ public void testPhoenixConsumerWithProperties() throws SQLException {
+
+ final String fullTableName = "SAMPLE2";
+ final String ddl = "CREATE TABLE IF NOT EXISTS SAMPLE2(uid VARCHAR NOT NULL,c1 VARCHAR,c2 VARCHAR,c3 VARCHAR CONSTRAINT pk PRIMARY KEY(uid))\n";
+
+ Properties consumerProperties = new Properties();
+ consumerProperties.setProperty(FlumeConstants.CONFIG_TABLE, fullTableName);
+ consumerProperties.setProperty(FlumeConstants.CONFIG_JDBC_URL, getUrl());
+ consumerProperties.setProperty(FlumeConstants.CONFIG_SERIALIZER,EventSerializers.REGEX.name());
+ consumerProperties.setProperty(FlumeConstants.CONFIG_TABLE_DDL, ddl);
+ consumerProperties.setProperty(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_REGULAR_EXPRESSION,"([^\\,]*),([^\\,]*),([^\\,]*)");
+ consumerProperties.setProperty(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES,"c1,c2,c3");
+ consumerProperties.setProperty(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR, DefaultKeyGenerator.UUID.name());
+ consumerProperties.setProperty(KafkaConstants.BOOTSTRAP_SERVERS, "localhost:9092");
+ consumerProperties.setProperty(KafkaConstants.TOPICS, "topic1,topic2");
+ consumerProperties.setProperty(KafkaConstants.TIMEOUT, "100");
+
+ PhoenixConsumerThread pConsumerThread = new PhoenixConsumerThread(pConsumer, consumerProperties);
+ Thread phoenixConsumer = new Thread(pConsumerThread);
+
+ Properties producerProperties = new Properties();
+ producerProperties.setProperty(KafkaConstants.BOOTSTRAP_SERVERS, "localhost:9092");
+ producerProperties.setProperty(KafkaConstants.KEY_SERIALIZER, KafkaConstants.DEFAULT_KEY_SERIALIZER);
+ producerProperties.setProperty(KafkaConstants.VALUE_SERIALIZER, KafkaConstants.DEFAULT_VALUE_SERIALIZER);
+ producerProperties.setProperty("auto.commit.interval.ms", "1000");
+
+ KafkaProducerThread kProducerThread = new KafkaProducerThread(producerProperties, TOPIC);
+ Thread kafkaProducer = new Thread(kProducerThread);
+
+ phoenixConsumer.start();
+
+ try {
+ phoenixConsumer.join(10000);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+
+ kafkaProducer.start();
+
+ try {
+ kafkaProducer.join();
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+
+ if (!kafkaProducer.isAlive()) {
+ System.out.println("kafka producer is not alive");
+ pConsumer.stop();
+ }
+
+ // Verify our serializer wrote out data
+ ResultSet rs = conn.createStatement().executeQuery("SELECT * FROM SAMPLE2");
+ assertTrue(rs.next());
+ assertTrue(rs.getFetchSize() > 0);
+ rs.close();
+ }
+
+ @After
+ public void cleanUp() throws Exception {
+ kafkaServer.shutdown();
+ zkClient.close();
+ zkServer.shutdown();
+ conn.close();
+ }
+
+ class PhoenixConsumerThread implements Runnable {
+ PhoenixConsumer pConsumer;
+ Properties properties;
+
+ PhoenixConsumerThread(PhoenixConsumer pConsumer, String path) {
+ this.pConsumer = pConsumer;
+ try (InputStream props = Resources.getResource(path).openStream()) {
+ Properties properties = new Properties();
+ properties.load(props);
+ this.properties = properties;
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ PhoenixConsumerThread(PhoenixConsumer pConsumer, Properties properties) {
+ this.pConsumer = pConsumer;
+ this.properties = properties;
+ }
+
+ @Override
+ public void run() {
+ // intialize the kafka
+ pConsumer.intializeKafka(properties);
+
+ // configure the phoenix
+ Context context = pConsumer.prepareContext();
+ pConsumer.configure(context);
+
+ // start the kafka consumer
+ pConsumer.start();
+
+ // process kafka messages
+ pConsumer.process();
+ }
+ }
+
+ class KafkaProducerThread implements Runnable {
+ KafkaProducer<String, String> producer;
+ String topic;
+
+ KafkaProducerThread(String path, String topic) {
+ this.topic = topic;
+ try (InputStream props = Resources.getResource(path).openStream()) {
+ Properties properties = new Properties();
+ properties.load(props);
+ producer = new KafkaProducer<>(properties);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ KafkaProducerThread(Properties properties, String topic) {
+ this.topic = topic;
+ producer = new KafkaProducer<>(properties);
+ }
+
+ @Override
+ public void run() {
+ try {
+ for (int i = 1; i <= 10; i++) {
+ String message = String.format("%s,%.3f,%d", "msg" + i, i * 2000f, i);
+ producer.send(new ProducerRecord<String, String>(topic, message));
+ producer.flush();
+ Thread.sleep(100);
+ }
+ } catch (Throwable throwable) {
+ System.out.printf("%s", throwable.fillInStackTrace());
+ } finally {
+ producer.close();
+ }
+ }
+ }
+}
diff --git a/phoenix-kafka/src/it/resources/consumer.props b/phoenix-kafka/src/it/resources/consumer.props
new file mode 100644
index 0000000..703fd7c
--- /dev/null
+++ b/phoenix-kafka/src/it/resources/consumer.props
@@ -0,0 +1,32 @@
+############################################################################
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+############################################################################
+
+serializer=regex
+serializer.rowkeyType=uuid
+serializer.regex=([^\,]*),([^\,]*),([^\,]*)
+serializer.columns=c1,c2,c3
+
+jdbcUrl=jdbc:phoenix:localhost
+table=SAMPLE1
+ddl=CREATE TABLE IF NOT EXISTS SAMPLE1(uid VARCHAR NOT NULL,c1 VARCHAR,c2 VARCHAR,c3 VARCHAR CONSTRAINT pk PRIMARY KEY(uid))
+
+bootstrap.servers=localhost:9092
+topics=topic1,topic2
+poll.timeout.ms=100
diff --git a/phoenix-kafka/src/it/resources/producer.props b/phoenix-kafka/src/it/resources/producer.props
new file mode 100644
index 0000000..4c3cd2f
--- /dev/null
+++ b/phoenix-kafka/src/it/resources/producer.props
@@ -0,0 +1,24 @@
+############################################################################
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+############################################################################
+
+bootstrap.servers=localhost:9092
+auto.commit.interval.ms=1000
+key.serializer=org.apache.kafka.common.serialization.StringSerializer
+value.serializer=org.apache.kafka.common.serialization.StringSerializer
diff --git a/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/KafkaConstants.java b/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/KafkaConstants.java
new file mode 100644
index 0000000..cc1aa61
--- /dev/null
+++ b/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/KafkaConstants.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.kafka;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+
+public final class KafkaConstants {
+
+ public static final String BOOTSTRAP_SERVERS = ProducerConfig.BOOTSTRAP_SERVERS_CONFIG;
+
+ public static final String KEY_SERIALIZER = ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG;
+
+ public static final String VALUE_SERIALIZER = ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG;
+
+ public static final String DEFAULT_KEY_SERIALIZER = StringSerializer.class.getName();
+
+ public static final String DEFAULT_VALUE_SERIALIZER = StringSerializer.class.getName();
+
+ public static final String KEY_DESERIALIZER = ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG;
+
+ public static final String VALUE_DESERIALIZER = ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG;
+
+ public static final String DEFAULT_KEY_DESERIALIZER = StringDeserializer.class.getName();
+
+ public static final String DEFAULT_VALUE_DESERIALIZER = StringDeserializer.class.getName();
+
+ public static final String TOPICS = "topics";
+
+ public static final String GROUP_ID = ConsumerConfig.GROUP_ID_CONFIG;
+
+ public static final String TIMEOUT = "poll.timeout.ms";
+
+ public static final long DEFAULT_TIMEOUT = 100;
+}
diff --git a/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/consumer/PhoenixConsumer.java b/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/consumer/PhoenixConsumer.java
new file mode 100644
index 0000000..1759cec
--- /dev/null
+++ b/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/consumer/PhoenixConsumer.java
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.kafka.consumer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Random;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.phoenix.flume.FlumeConstants;
+import org.apache.phoenix.flume.serializer.EventSerializer;
+import org.apache.phoenix.flume.serializer.EventSerializers;
+import org.apache.phoenix.kafka.KafkaConstants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+
+public class PhoenixConsumer {
+ private static final Logger logger = LoggerFactory.getLogger(PhoenixConsumer.class);
+
+ private KafkaConsumer<String, String> consumer = null;
+ private Properties properties = new Properties();
+ private Integer batchSize;
+ private long timeout;
+ private EventSerializer serializer;
+ private Boolean process = true;
+
+ public PhoenixConsumer() {
+
+ }
+
+ public PhoenixConsumer(Configuration conf) throws IOException {
+ // intialize the kafka
+ intializeKafka(conf);
+
+ // configure the phoenix
+ Context context = prepareContext();
+ configure(context);
+
+ // start the kafka consumer
+ start();
+
+ // process kafka messages
+ process();
+ }
+
+ /**
+ * Initializes the kafka with properties file.
+ * @param path
+ * @throws IOException
+ */
+ public void intializeKafka(Configuration conf) throws IOException {
+ // get the kafka consumer file
+ String file = conf.get("kafka.consumer.file");
+ Preconditions.checkNotNull(file,"File path cannot be empty, please specify in the arguments");
+
+ Path path = new Path(file);
+ FileSystem fs = FileSystem.get(conf);
+ try (InputStream props = fs.open(path)) {
+ properties.load(props);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ intializeKafka(properties);
+ }
+
+ /**
+ * Initializes the kafka with properties.
+ * @param properties
+ */
+ public void intializeKafka(Properties properties) {
+ this.properties = properties;
+
+ String servers = properties.getProperty(KafkaConstants.BOOTSTRAP_SERVERS);
+ Preconditions.checkNotNull(servers,"Bootstrap Servers cannot be empty, please specify in the configuration file");
+ properties.setProperty(KafkaConstants.BOOTSTRAP_SERVERS, servers);
+
+ if (properties.getProperty(KafkaConstants.GROUP_ID) == null) {
+ properties.setProperty(KafkaConstants.GROUP_ID, "group-" + new Random().nextInt(100000));
+ }
+
+ if (properties.getProperty(KafkaConstants.TIMEOUT) == null) {
+ properties.setProperty(KafkaConstants.TIMEOUT, String.valueOf(KafkaConstants.DEFAULT_TIMEOUT));
+ }
+
+ String topics = properties.getProperty(KafkaConstants.TOPICS);
+ Preconditions.checkNotNull(topics,"Topics cannot be empty, please specify in the configuration file");
+
+ properties.setProperty(KafkaConstants.KEY_DESERIALIZER, KafkaConstants.DEFAULT_KEY_DESERIALIZER);
+
+ properties.setProperty(KafkaConstants.VALUE_DESERIALIZER, KafkaConstants.DEFAULT_VALUE_DESERIALIZER);
+
+ this.consumer = new KafkaConsumer<>(properties);
+ consumer.subscribe(Arrays.asList(topics.split(",")));
+ }
+
+ /**
+ * Convert the properties to context
+ */
+ public Context prepareContext() {
+ Map<String, String> map = new HashMap<String, String>();
+ for (Entry<Object, Object> entry : properties.entrySet()) {
+ map.put((String) entry.getKey(), (String) entry.getValue());
+ }
+ return new Context(map);
+ }
+
+ /**
+ * Configure the context
+ */
+ public void configure(Context context){
+ this.timeout = context.getLong(KafkaConstants.TIMEOUT, KafkaConstants.DEFAULT_TIMEOUT);
+ this.batchSize = context.getInteger(FlumeConstants.CONFIG_BATCHSIZE, FlumeConstants.DEFAULT_BATCH_SIZE);
+ final String eventSerializerType = context.getString(FlumeConstants.CONFIG_SERIALIZER);
+
+ Preconditions.checkNotNull(eventSerializerType,"Event serializer cannot be empty, please specify in the configuration file");
+ initializeSerializer(context,eventSerializerType);
+ }
+
+ /**
+ * Process the kafka messages
+ */
+ public void process() {
+ int timeouts = 0;
+ // noinspection InfiniteLoopStatement
+ while (process) {
+ // read records with a short timeout.
+ // If we time out, we don't really care.
+ // Assuming only key & value text data
+ ConsumerRecords<String, String> records = consumer.poll(this.timeout);
+ if (records.count() == 0) {
+ timeouts++;
+ } else {
+ System.out.printf("Got %d records after %d timeouts\n", records.count(), timeouts);
+ timeouts = 0;
+ }
+
+ if (!records.isEmpty()) {
+ List<Event> events = Lists.newArrayListWithCapacity(records.count());
+ for (ConsumerRecord<String, String> record : records) {
+ Event event = EventBuilder.withBody(Bytes.toBytes(record.value()));
+ events.add(event);
+ }
+ // save to Hbase
+ try {
+ serializer.upsertEvents(events);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ /**
+ * start the serializer
+ */
+ public void start() {
+ logger.info("Starting consumer {} ", this.getClass());
+ try {
+ serializer.initialize();
+ } catch (Exception ex) {
+ logger.error("Error {} in initializing the serializer.", ex.getMessage());
+ Throwables.propagate(ex);
+ }
+ }
+
+ /**
+ * stop the consumer & serializer
+ */
+ public void stop() {
+ this.close();
+ consumer.close();
+ try {
+ serializer.close();
+ } catch (SQLException e) {
+ logger.error(" Error while closing connection {} for consumer.", e.getMessage());
+ }
+ }
+
+ /**
+ * make the changes to stop in gracefully
+ */
+ public void close(){
+ this.process = false;
+ try {
+ Thread.sleep(30000);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Initializes the serializer for kafka messages.
+ * @param context
+ * @param eventSerializerType
+ */
+ private void initializeSerializer(final Context context, final String eventSerializerType) {
+ String serializerClazz = null;
+ EventSerializers eventSerializer = null;
+
+ try {
+ eventSerializer = EventSerializers.valueOf(eventSerializerType.toUpperCase());
+ } catch (IllegalArgumentException iae) {
+ serializerClazz = eventSerializerType;
+ }
+
+ final Context serializerContext = new Context();
+ serializerContext.putAll(context.getSubProperties(FlumeConstants.CONFIG_SERIALIZER_PREFIX));
+ copyPropertiesToSerializerContext(context,serializerContext);
+
+ try {
+ @SuppressWarnings("unchecked")
+ Class<? extends EventSerializer> clazz = null;
+ if (serializerClazz == null) {
+ clazz = (Class<? extends EventSerializer>) Class.forName(eventSerializer.getClassName());
+ } else {
+ clazz = (Class<? extends EventSerializer>) Class.forName(serializerClazz);
+ }
+
+ serializer = clazz.newInstance();
+ serializer.configure(serializerContext);
+ } catch (Exception e) {
+ logger.error("Could not instantiate event serializer.", e);
+ Throwables.propagate(e);
+ }
+ }
+
+ /**
+ * Copy properties to serializer context.
+ * @param context
+ * @param serializerContext
+ */
+ private void copyPropertiesToSerializerContext(Context context, Context serializerContext) {
+ serializerContext.put(FlumeConstants.CONFIG_TABLE_DDL,context.getString(FlumeConstants.CONFIG_TABLE_DDL));
+ serializerContext.put(FlumeConstants.CONFIG_TABLE,context.getString(FlumeConstants.CONFIG_TABLE));
+ serializerContext.put(FlumeConstants.CONFIG_ZK_QUORUM,context.getString(FlumeConstants.CONFIG_ZK_QUORUM));
+ serializerContext.put(FlumeConstants.CONFIG_JDBC_URL,context.getString(FlumeConstants.CONFIG_JDBC_URL));
+ serializerContext.put(FlumeConstants.CONFIG_BATCHSIZE,context.getString(FlumeConstants.CONFIG_BATCHSIZE));
+ }
+
+}
diff --git a/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/consumer/PhoenixConsumerTool.java b/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/consumer/PhoenixConsumerTool.java
new file mode 100644
index 0000000..8c10aa5
--- /dev/null
+++ b/phoenix-kafka/src/main/java/org/apache/phoenix/kafka/consumer/PhoenixConsumerTool.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.kafka.consumer;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class PhoenixConsumerTool extends Configured implements Tool {
+ private static final Logger logger = LoggerFactory.getLogger(PhoenixConsumerTool.class);
+ static final Option FILE_PATH_OPT = new Option("f", "file", true, "input file path");
+ static final Option HELP_OPT = new Option("h", "help", false, "Show this help and quit");
+
+ public static Options getOptions() {
+ Options options = new Options();
+ options.addOption(FILE_PATH_OPT);
+ options.addOption(HELP_OPT);
+ return options;
+ }
+
+ public static CommandLine parseOptions(String[] args) {
+
+ Options options = getOptions();
+
+ CommandLineParser parser = new PosixParser();
+ CommandLine cmdLine = null;
+ try {
+ cmdLine = parser.parse(options, args);
+ } catch (ParseException e) {
+ printHelpAndExit("Error parsing command line options: " + e.getMessage(), options);
+ }
+
+ if (cmdLine.hasOption(HELP_OPT.getOpt())) {
+ printHelpAndExit(options, 0);
+ }
+
+ if (!cmdLine.hasOption(FILE_PATH_OPT.getOpt())) {
+ throw new IllegalStateException(FILE_PATH_OPT.getLongOpt() + " is a mandatory " + "parameter");
+ }
+
+ if (!cmdLine.getArgList().isEmpty()) {
+ throw new IllegalStateException("Got unexpected extra parameters: " + cmdLine.getArgList());
+ }
+
+ return cmdLine;
+ }
+
+ public static void printHelpAndExit(String errorMessage, Options options) {
+ System.err.println(errorMessage);
+ printHelpAndExit(options, 1);
+ }
+
+ public static void printHelpAndExit(Options options, int exitCode) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("help", options);
+ System.exit(exitCode);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Configuration conf = HBaseConfiguration.create(getConf());
+
+ CommandLine cmdLine = null;
+ try {
+ cmdLine = parseOptions(args);
+ } catch (IllegalStateException e) {
+ printHelpAndExit(e.getMessage(), getOptions());
+ }
+
+ String path = cmdLine.getOptionValue(FILE_PATH_OPT.getOpt());
+ conf.set("kafka.consumer.file", path);
+ new PhoenixConsumer(conf);
+
+ return 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitStatus = ToolRunner.run(new PhoenixConsumerTool(), args);
+ System.exit(exitStatus);
+ }
+}
diff --git a/phoenix-pig/pom.xml b/phoenix-pig/pom.xml
new file mode 100644
index 0000000..d033034
--- /dev/null
+++ b/phoenix-pig/pom.xml
@@ -0,0 +1,460 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-connectors</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-pig</artifactId>
+ <name>Phoenix - Pig</name>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ <shaded.package>org.apache.phoenix.shaded</shaded.package>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>joda-time</groupId>
+ <artifactId>joda-time</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.pig</groupId>
+ <artifactId>pig</artifactId>
+ <classifier>h2</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <scope>test</scope>
+ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>xom</groupId>
+ <artifactId>xom</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ </dependency>
+ <!-- Test Dependencies -->
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>phoenix-${project.version}-pig</finalName>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <shadeTestJar>false</shadeTestJar>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>README.md</resource>
+ <file>${project.basedir}/../README.md</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>LICENSE.txt</resource>
+ <file>${project.basedir}/../LICENSE</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>NOTICE</resource>
+ <file>${project.basedir}/../NOTICE</file>
+ </transformer>
+ </transformers>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ <excludes>
+ <exclude>org.apache.phoenix:phoenix-client</exclude>
+ <exclude>org.apache.pig:pig</exclude>
+ <exclude>joda-time:joda-time</exclude>
+ <exclude>xom:xom</exclude>
+ </excludes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ <exclude>META-INF/license/*</exclude>
+ <exclude>LICENSE.*</exclude>
+ <exclude>NOTICE.*</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <relocations>
+ <!-- COM relocation -->
+ <relocation>
+ <pattern>com.codahale</pattern>
+ <shadedPattern>${shaded.package}.com.codahale</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.fasterxml</pattern>
+ <shadedPattern>${shaded.package}.com.fasterxml</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.google.common</pattern>
+ <shadedPattern>${shaded.package}.com.google.common</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.jamesmurty</pattern>
+ <shadedPattern>${shaded.package}.com.jamesmurty</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.jcraft</pattern>
+ <shadedPattern>${shaded.package}.com.jcraft</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.lmax</pattern>
+ <shadedPattern>${shaded.package}.com.lmax</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.sun.jersey</pattern>
+ <shadedPattern>${shaded.package}.com.sun.jersey</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.thoughtworks</pattern>
+ <shadedPattern>${shaded.package}.com.thoughtworks</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.yammer</pattern>
+ <shadedPattern>${shaded.package}.com.yammer</shadedPattern>
+ </relocation>
+ <!-- IO relocations -->
+ <relocation>
+ <pattern>io.netty</pattern>
+ <shadedPattern>${shaded.package}.io.netty</shadedPattern>
+ </relocation>
+ <!-- ORG relocations -->
+ <relocation>
+ <pattern>org.antlr</pattern>
+ <shadedPattern>${shaded.package}.org.antlr</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.aopalliance</pattern>
+ <shadedPattern>${shaded.package}.org.aopalliance</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.codehaus</pattern>
+ <shadedPattern>${shaded.package}.org.codehaus</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.fusesource</pattern>
+ <shadedPattern>${shaded.package}.org.fusesource</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.hamcrest</pattern>
+ <shadedPattern>${shaded.package}.org.hamcrest</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.hsqldb</pattern>
+ <shadedPattern>${shaded.package}.org.hsqldb</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.iq80</pattern>
+ <shadedPattern>${shaded.package}.org.iq80</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jamon</pattern>
+ <shadedPattern>${shaded.package}.org.jamon</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jboss</pattern>
+ <shadedPattern>${shaded.package}.org.jboss</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jcodings</pattern>
+ <shadedPattern>${shaded.package}.org.jcodings</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jets3t</pattern>
+ <shadedPattern>${shaded.package}.org.jets3t</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.joni</pattern>
+ <shadedPattern>${shaded.package}.org.joni</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.junit</pattern>
+ <shadedPattern>${shaded.package}.org.junit</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.kosmix</pattern>
+ <shadedPattern>${shaded.package}.org.kosmix</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.mortbay</pattern>
+ <shadedPattern>${shaded.package}.org.mortbay</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.objectweb</pattern>
+ <shadedPattern>${shaded.package}.org.objectweb</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.stringtemplate</pattern>
+ <shadedPattern>${shaded.package}.org.stringtemplate</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.tukaani</pattern>
+ <shadedPattern>${shaded.package}.org.tukaani</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.znerd</pattern>
+ <shadedPattern>${shaded.package}.org.znerd</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.avro</pattern>
+ <shadedPattern>${shaded.package}.org.apache.avro</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons</pattern>
+ <shadedPattern>${shaded.package}.org.apache.commons</shadedPattern>
+ <excludes>
+ <exclude>org.apache.commons.csv.**</exclude>
+ <exclude>org.apache.commons.logging.**</exclude>
+ </excludes>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.directory</pattern>
+ <shadedPattern>${shaded.package}.org.apache.directory</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.http</pattern>
+ <shadedPattern>${shaded.package}.org.apache.http</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.jasper</pattern>
+ <shadedPattern>${shaded.package}.org.apache.jasper</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.jute</pattern>
+ <shadedPattern>${shaded.package}.org.apache.jute</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.mina</pattern>
+ <shadedPattern>${shaded.package}.org.apache.mina</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.oro</pattern>
+ <shadedPattern>${shaded.package}.org.apache.oro</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.taglibs</pattern>
+ <shadedPattern>${shaded.package}.org.apache.taglibs</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.thrift</pattern>
+ <shadedPattern>${shaded.package}.org.apache.thrift</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.tools</pattern>
+ <shadedPattern>${shaded.package}.org.apache.tools</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.twill</pattern>
+ <shadedPattern>${shaded.package}.org.apache.twill</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.velocity</pattern>
+ <shadedPattern>${shaded.package}.org.apache.velocity</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.zookeeper</pattern>
+ <shadedPattern>${shaded.package}.org.apache.zookeeper</shadedPattern>
+ </relocation>
+ <!-- NET relocations -->
+ <relocation>
+ <pattern>net</pattern>
+ <shadedPattern>${shaded.package}.net</shadedPattern>
+ </relocation>
+ <!-- Misc relocations -->
+ <relocation>
+ <pattern>antlr</pattern>
+ <shadedPattern>${shaded.package}.antlr</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>it.unimi</pattern>
+ <shadedPattern>${shaded.package}.it.unimi</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>jline</pattern>
+ <shadedPattern>${shaded.package}.jline</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>junit</pattern>
+ <shadedPattern>${shaded.package}.junit</shadedPattern>
+ </relocation>
+ </relocations>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/phoenix-pig/pom.xml~ b/phoenix-pig/pom.xml~
new file mode 100644
index 0000000..db30056
--- /dev/null
+++ b/phoenix-pig/pom.xml~
@@ -0,0 +1,460 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-pig</artifactId>
+ <name>Phoenix - Pig</name>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ <shaded.package>org.apache.phoenix.shaded</shaded.package>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>joda-time</groupId>
+ <artifactId>joda-time</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.pig</groupId>
+ <artifactId>pig</artifactId>
+ <classifier>h2</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <scope>test</scope>
+ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>xom</groupId>
+ <artifactId>xom</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ </dependency>
+ <!-- Test Dependencies -->
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>phoenix-${project.version}-pig</finalName>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <shadeTestJar>false</shadeTestJar>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>README.md</resource>
+ <file>${project.basedir}/../README.md</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>LICENSE.txt</resource>
+ <file>${project.basedir}/../LICENSE</file>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ <resource>NOTICE</resource>
+ <file>${project.basedir}/../NOTICE</file>
+ </transformer>
+ </transformers>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ <excludes>
+ <exclude>org.apache.phoenix:phoenix-client</exclude>
+ <exclude>org.apache.pig:pig</exclude>
+ <exclude>joda-time:joda-time</exclude>
+ <exclude>xom:xom</exclude>
+ </excludes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ <exclude>META-INF/license/*</exclude>
+ <exclude>LICENSE.*</exclude>
+ <exclude>NOTICE.*</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <relocations>
+ <!-- COM relocation -->
+ <relocation>
+ <pattern>com.codahale</pattern>
+ <shadedPattern>${shaded.package}.com.codahale</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.fasterxml</pattern>
+ <shadedPattern>${shaded.package}.com.fasterxml</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.google.common</pattern>
+ <shadedPattern>${shaded.package}.com.google.common</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.jamesmurty</pattern>
+ <shadedPattern>${shaded.package}.com.jamesmurty</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.jcraft</pattern>
+ <shadedPattern>${shaded.package}.com.jcraft</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.lmax</pattern>
+ <shadedPattern>${shaded.package}.com.lmax</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.sun.jersey</pattern>
+ <shadedPattern>${shaded.package}.com.sun.jersey</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.thoughtworks</pattern>
+ <shadedPattern>${shaded.package}.com.thoughtworks</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.yammer</pattern>
+ <shadedPattern>${shaded.package}.com.yammer</shadedPattern>
+ </relocation>
+ <!-- IO relocations -->
+ <relocation>
+ <pattern>io.netty</pattern>
+ <shadedPattern>${shaded.package}.io.netty</shadedPattern>
+ </relocation>
+ <!-- ORG relocations -->
+ <relocation>
+ <pattern>org.antlr</pattern>
+ <shadedPattern>${shaded.package}.org.antlr</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.aopalliance</pattern>
+ <shadedPattern>${shaded.package}.org.aopalliance</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.codehaus</pattern>
+ <shadedPattern>${shaded.package}.org.codehaus</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.fusesource</pattern>
+ <shadedPattern>${shaded.package}.org.fusesource</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.hamcrest</pattern>
+ <shadedPattern>${shaded.package}.org.hamcrest</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.hsqldb</pattern>
+ <shadedPattern>${shaded.package}.org.hsqldb</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.iq80</pattern>
+ <shadedPattern>${shaded.package}.org.iq80</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jamon</pattern>
+ <shadedPattern>${shaded.package}.org.jamon</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jboss</pattern>
+ <shadedPattern>${shaded.package}.org.jboss</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jcodings</pattern>
+ <shadedPattern>${shaded.package}.org.jcodings</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jets3t</pattern>
+ <shadedPattern>${shaded.package}.org.jets3t</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.joni</pattern>
+ <shadedPattern>${shaded.package}.org.joni</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.junit</pattern>
+ <shadedPattern>${shaded.package}.org.junit</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.kosmix</pattern>
+ <shadedPattern>${shaded.package}.org.kosmix</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.mortbay</pattern>
+ <shadedPattern>${shaded.package}.org.mortbay</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.objectweb</pattern>
+ <shadedPattern>${shaded.package}.org.objectweb</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.stringtemplate</pattern>
+ <shadedPattern>${shaded.package}.org.stringtemplate</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.tukaani</pattern>
+ <shadedPattern>${shaded.package}.org.tukaani</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.znerd</pattern>
+ <shadedPattern>${shaded.package}.org.znerd</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.avro</pattern>
+ <shadedPattern>${shaded.package}.org.apache.avro</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons</pattern>
+ <shadedPattern>${shaded.package}.org.apache.commons</shadedPattern>
+ <excludes>
+ <exclude>org.apache.commons.csv.**</exclude>
+ <exclude>org.apache.commons.logging.**</exclude>
+ </excludes>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.directory</pattern>
+ <shadedPattern>${shaded.package}.org.apache.directory</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.http</pattern>
+ <shadedPattern>${shaded.package}.org.apache.http</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.jasper</pattern>
+ <shadedPattern>${shaded.package}.org.apache.jasper</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.jute</pattern>
+ <shadedPattern>${shaded.package}.org.apache.jute</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.mina</pattern>
+ <shadedPattern>${shaded.package}.org.apache.mina</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.oro</pattern>
+ <shadedPattern>${shaded.package}.org.apache.oro</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.taglibs</pattern>
+ <shadedPattern>${shaded.package}.org.apache.taglibs</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.thrift</pattern>
+ <shadedPattern>${shaded.package}.org.apache.thrift</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.tools</pattern>
+ <shadedPattern>${shaded.package}.org.apache.tools</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.twill</pattern>
+ <shadedPattern>${shaded.package}.org.apache.twill</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.velocity</pattern>
+ <shadedPattern>${shaded.package}.org.apache.velocity</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.zookeeper</pattern>
+ <shadedPattern>${shaded.package}.org.apache.zookeeper</shadedPattern>
+ </relocation>
+ <!-- NET relocations -->
+ <relocation>
+ <pattern>net</pattern>
+ <shadedPattern>${shaded.package}.net</shadedPattern>
+ </relocation>
+ <!-- Misc relocations -->
+ <relocation>
+ <pattern>antlr</pattern>
+ <shadedPattern>${shaded.package}.antlr</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>it.unimi</pattern>
+ <shadedPattern>${shaded.package}.it.unimi</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>jline</pattern>
+ <shadedPattern>${shaded.package}.jline</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>junit</pattern>
+ <shadedPattern>${shaded.package}.junit</shadedPattern>
+ </relocation>
+ </relocations>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/phoenix-pig/src/it/java/org/apache/phoenix/pig/BasePigIT.java b/phoenix-pig/src/it/java/org/apache/phoenix/pig/BasePigIT.java
new file mode 100644
index 0000000..4de9854
--- /dev/null
+++ b/phoenix-pig/src/it/java/org/apache/phoenix/pig/BasePigIT.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig;
+
+import static org.apache.phoenix.util.PhoenixRuntime.JDBC_PROTOCOL_SEPARATOR;
+import static org.apache.phoenix.util.TestUtil.LOCALHOST;
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.phoenix.end2end.BaseHBaseManagedTimeIT;
+import org.apache.phoenix.end2end.Shadower;
+import org.apache.phoenix.query.QueryConstants;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.query.QueryServicesOptions;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.data.TupleFactory;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+
+import com.google.common.collect.Maps;
+
+public class BasePigIT extends BaseHBaseManagedTimeIT {
+ protected TupleFactory tupleFactory;
+ protected String zkQuorum;
+ protected Connection conn;
+ protected Configuration conf;
+ protected PigServer pigServer;
+
+ @BeforeClass
+ @Shadower(classBeingShadowed = BaseHBaseManagedTimeIT.class)
+ public static void doSetup() throws Exception {
+ Map<String,String> props = Maps.newHashMapWithExpectedSize(3);
+ props.put(QueryServices.EXTRA_JDBC_ARGUMENTS_ATTRIB, QueryServicesOptions.DEFAULT_EXTRA_JDBC_ARGUMENTS);
+ // Must update config before starting server
+ setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ conf = getTestClusterConfig();
+ conf.set(QueryServices.EXTRA_JDBC_ARGUMENTS_ATTRIB, QueryServicesOptions.DEFAULT_EXTRA_JDBC_ARGUMENTS);
+ // Set CURRENT_SCN to confirm that it's ignored
+ conf.set(PhoenixRuntime.CURRENT_SCN_ATTRIB, Long.toString(System.currentTimeMillis()+QueryConstants.MILLIS_IN_DAY));
+ pigServer = new PigServer(ExecType.LOCAL, conf);
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ conn = DriverManager.getConnection(getUrl(), props);
+ zkQuorum = LOCALHOST + JDBC_PROTOCOL_SEPARATOR + getZKClientPort(conf);
+ tupleFactory = TupleFactory.getInstance();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ if(conn != null) {
+ conn.close();
+ }
+ if (pigServer != null) {
+ pigServer.shutdown();
+ }
+ }
+
+
+}
diff --git a/phoenix-pig/src/it/java/org/apache/phoenix/pig/PhoenixHBaseLoaderIT.java b/phoenix-pig/src/it/java/org/apache/phoenix/pig/PhoenixHBaseLoaderIT.java
new file mode 100644
index 0000000..7fd5574
--- /dev/null
+++ b/phoenix-pig/src/it/java/org/apache/phoenix/pig/PhoenixHBaseLoaderIT.java
@@ -0,0 +1,838 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.sql.Array;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.phoenix.util.SchemaUtil;
+import org.apache.pig.builtin.mock.Storage;
+import org.apache.pig.builtin.mock.Storage.Data;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ *
+ * Test class to run all the integration tests against a virtual map reduce cluster.
+ */
+public class PhoenixHBaseLoaderIT extends BasePigIT {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixHBaseLoaderIT.class);
+ private static final String SCHEMA_NAME = "T";
+ private static final String TABLE_NAME = "A";
+ private static final String INDEX_NAME = "I";
+ private static final String TABLE_FULL_NAME = SchemaUtil.getTableName(SCHEMA_NAME, TABLE_NAME);
+ private static final String CASE_SENSITIVE_TABLE_NAME = SchemaUtil.getEscapedArgument("a");
+ private static final String CASE_SENSITIVE_TABLE_FULL_NAME = SchemaUtil.getTableName(SCHEMA_NAME,CASE_SENSITIVE_TABLE_NAME);
+
+ /**
+ * Validates the schema returned for a table with Pig data types.
+ * @throws Exception
+ */
+ @Test
+ public void testSchemaForTable() throws Exception {
+ final String TABLE = "TABLE1";
+ final String ddl = String.format("CREATE TABLE %s "
+ + " (a_string varchar not null, a_binary varbinary not null, a_integer integer, cf1.a_float float"
+ + " CONSTRAINT pk PRIMARY KEY (a_string, a_binary))\n", TABLE);
+ conn.createStatement().execute(ddl);
+ conn.commit();
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE,
+ zkQuorum));
+
+ final Schema schema = pigServer.dumpSchema("A");
+ List<FieldSchema> fields = schema.getFields();
+ assertEquals(4, fields.size());
+ assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string"));
+ assertTrue(fields.get(0).type == DataType.CHARARRAY);
+ assertTrue(fields.get(1).alias.equalsIgnoreCase("a_binary"));
+ assertTrue(fields.get(1).type == DataType.BYTEARRAY);
+ assertTrue(fields.get(2).alias.equalsIgnoreCase("a_integer"));
+ assertTrue(fields.get(2).type == DataType.INTEGER);
+ assertTrue(fields.get(3).alias.equalsIgnoreCase("a_float"));
+ assertTrue(fields.get(3).type == DataType.FLOAT);
+ }
+
+ /**
+ * Validates the schema returned when specific columns of a table are given as part of LOAD .
+ * @throws Exception
+ */
+ @Test
+ public void testSchemaForTableWithSpecificColumns() throws Exception {
+
+ //create the table
+ final String TABLE = "TABLE2";
+ final String ddl = "CREATE TABLE " + TABLE
+ + " (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) ";
+ conn.createStatement().execute(ddl);
+
+ final String selectColumns = "ID,NAME";
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",
+ TABLE, selectColumns, zkQuorum));
+
+ Schema schema = pigServer.dumpSchema("A");
+ List<FieldSchema> fields = schema.getFields();
+ assertEquals(2, fields.size());
+ assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));
+ assertTrue(fields.get(0).type == DataType.INTEGER);
+ assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME"));
+ assertTrue(fields.get(1).type == DataType.CHARARRAY);
+ }
+
+ /**
+ * Validates the schema returned when a SQL SELECT query is given as part of LOAD .
+ * @throws Exception
+ */
+ @Test
+ public void testSchemaForQuery() throws Exception {
+
+ //create the table.
+ final String TABLE = "TABLE3";
+ String ddl = String.format("CREATE TABLE " + TABLE +
+ " (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE"
+ + " CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL))\n", TABLE);
+ conn.createStatement().execute(ddl);
+
+
+
+ //sql query for LOAD
+ final String sqlQuery = "SELECT A_STRING,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE;
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",
+ sqlQuery, zkQuorum));
+
+ //assert the schema.
+ Schema schema = pigServer.dumpSchema("A");
+ List<FieldSchema> fields = schema.getFields();
+ assertEquals(3, fields.size());
+ assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string"));
+ assertTrue(fields.get(0).type == DataType.CHARARRAY);
+ assertTrue(fields.get(1).alias.equalsIgnoreCase("a_integer"));
+ assertTrue(fields.get(1).type == DataType.INTEGER);
+ assertTrue(fields.get(2).alias.equalsIgnoreCase("a_double"));
+ assertTrue(fields.get(2).type == DataType.DOUBLE);
+ }
+
+ /**
+ * Validates the schema when it is given as part of LOAD..AS
+ * @throws Exception
+ */
+ @Test
+ public void testSchemaForTableWithAlias() throws Exception {
+
+ //create the table.
+ final String TABLE = "S.TABLE4";
+ String ddl = "CREATE TABLE " + TABLE
+ + " (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE"
+ + " CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL)) \n";
+ conn.createStatement().execute(ddl);
+
+ //select query given as part of LOAD.
+ final String sqlQuery = "SELECT A_STRING,A_DECIMAL,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE;
+
+ LOG.info(String.format("Generated SQL Query [%s]",sqlQuery));
+
+ pigServer.registerQuery(String.format(
+ "raw = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s') AS (a:chararray,b:bigdecimal,c:int,d:double);",
+ sqlQuery, zkQuorum));
+
+ //test the schema.
+ Schema schema = pigServer.dumpSchema("raw");
+ List<FieldSchema> fields = schema.getFields();
+ assertEquals(4, fields.size());
+ assertTrue(fields.get(0).alias.equalsIgnoreCase("a"));
+ assertTrue(fields.get(0).type == DataType.CHARARRAY);
+ assertTrue(fields.get(1).alias.equalsIgnoreCase("b"));
+ assertTrue(fields.get(1).type == DataType.BIGDECIMAL);
+ assertTrue(fields.get(2).alias.equalsIgnoreCase("c"));
+ assertTrue(fields.get(2).type == DataType.INTEGER);
+ assertTrue(fields.get(3).alias.equalsIgnoreCase("d"));
+ assertTrue(fields.get(3).type == DataType.DOUBLE);
+ }
+
+ /**
+ * @throws Exception
+ */
+ @Test
+ public void testDataForTable() throws Exception {
+
+ //create the table
+ String ddl = "CREATE TABLE " + CASE_SENSITIVE_TABLE_FULL_NAME
+ + " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, AGE INTEGER) ";
+
+ conn.createStatement().execute(ddl);
+
+ //prepare data with 10 rows having age 25 and the other 30.
+ final String dml = "UPSERT INTO " + CASE_SENSITIVE_TABLE_FULL_NAME + " VALUES(?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ int rows = 20;
+ for(int i = 0 ; i < rows; i++) {
+ stmt.setInt(1, i);
+ stmt.setString(2, "a"+i);
+ stmt.setInt(3, (i % 2 == 0) ? 25 : 30);
+ stmt.execute();
+ }
+ conn.commit();
+
+ //load data and filter rows whose age is > 25
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", CASE_SENSITIVE_TABLE_FULL_NAME,
+ zkQuorum));
+ pigServer.registerQuery("B = FILTER A BY AGE > 25;");
+
+ final Iterator<Tuple> iterator = pigServer.openIterator("B");
+ int recordsRead = 0;
+ while (iterator.hasNext()) {
+ final Tuple each = iterator.next();
+ assertEquals(3, each.size());
+ recordsRead++;
+ }
+ assertEquals(rows/2, recordsRead);
+ }
+
+ /**
+ * @throws Exception
+ */
+ @Test
+ public void testDataForSQLQuery() throws Exception {
+
+ //create the table
+ String ddl = "CREATE TABLE " + TABLE_FULL_NAME
+ + " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, AGE INTEGER) ";
+
+ conn.createStatement().execute(ddl);
+
+ //prepare data with 10 rows having age 25 and the other 30.
+ final String dml = "UPSERT INTO " + TABLE_FULL_NAME + " VALUES(?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ int rows = 20;
+ for(int i = 0 ; i < rows; i++) {
+ stmt.setInt(1, i);
+ stmt.setString(2, "a"+i);
+ stmt.setInt(3, (i % 2 == 0) ? 25 : 30);
+ stmt.execute();
+ }
+ conn.commit();
+
+ //sql query
+ final String sqlQuery = " SELECT ID,NAME,AGE FROM " + TABLE_FULL_NAME + " WHERE AGE > 25";
+ //load data and filter rows whose age is > 25
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://query/%s' using org.apache.phoenix.pig.PhoenixHBaseLoader('%s');", sqlQuery,
+ zkQuorum));
+
+ final Iterator<Tuple> iterator = pigServer.openIterator("A");
+ int recordsRead = 0;
+ while (iterator.hasNext()) {
+ iterator.next();
+ recordsRead++;
+ }
+ assertEquals(rows/2, recordsRead);
+ }
+
+ /**
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testForNonPKSQLQuery() throws Exception {
+
+ //create the table
+ final String TABLE = "TABLE5";
+ String ddl = "CREATE TABLE " + TABLE
+ + " ( ID VARCHAR PRIMARY KEY, FOO VARCHAR, BAR INTEGER, BAZ UNSIGNED_INT)";
+
+ conn.createStatement().execute(ddl);
+
+ //upsert data.
+ final String dml = "UPSERT INTO " + TABLE + " VALUES(?,?,?,?) ";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ stmt.setString(1, "a");
+ stmt.setString(2, "a");
+ stmt.setInt(3,-1);
+ stmt.setInt(4,1);
+ stmt.execute();
+ stmt.setString(1, "b");
+ stmt.setString(2, "b");
+ stmt.setInt(3,-2);
+ stmt.setInt(4,2);
+ stmt.execute();
+
+ conn.commit();
+
+ //sql query
+ final String sqlQuery = String.format(" SELECT FOO, BAZ FROM %s WHERE BAR = -1 " , TABLE);
+
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", sqlQuery,
+ zkQuorum));
+
+ final Iterator<Tuple> iterator = pigServer.openIterator("A");
+ int recordsRead = 0;
+ while (iterator.hasNext()) {
+ final Tuple tuple = iterator.next();
+ assertEquals("a", tuple.get(0));
+ assertEquals(1, tuple.get(1));
+ recordsRead++;
+ }
+ assertEquals(1, recordsRead);
+
+ //test the schema. Test for PHOENIX-1123
+ Schema schema = pigServer.dumpSchema("A");
+ List<FieldSchema> fields = schema.getFields();
+ assertEquals(2, fields.size());
+ assertTrue(fields.get(0).alias.equalsIgnoreCase("FOO"));
+ assertTrue(fields.get(0).type == DataType.CHARARRAY);
+ assertTrue(fields.get(1).alias.equalsIgnoreCase("BAZ"));
+ assertTrue(fields.get(1).type == DataType.INTEGER);
+ }
+
+ /**
+ * @throws Exception
+ */
+ @Test
+ public void testGroupingOfDataForTable() throws Exception {
+
+ //create the table
+ final String TABLE = "TABLE6";
+ String ddl = "CREATE TABLE " + TABLE
+ + " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, AGE INTEGER, SAL INTEGER) ";
+
+ conn.createStatement().execute(ddl);
+
+ //prepare data with 10 rows having age 25 and the other 30.
+ final String dml = "UPSERT INTO " + TABLE + " VALUES(?,?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ int rows = 20;
+ int j = 0, k = 0;
+ for(int i = 0 ; i < rows; i++) {
+ stmt.setInt(1, i);
+ stmt.setString(2, "a"+i);
+ if(i % 2 == 0) {
+ stmt.setInt(3, 25);
+ stmt.setInt(4, 10 * 2 * j++);
+ } else {
+ stmt.setInt(3, 30);
+ stmt.setInt(4, 10 * 3 * k++);
+ }
+
+ stmt.execute();
+ }
+ conn.commit();
+
+ //prepare the mock storage with expected output
+ final Data data = Storage.resetData(pigServer);
+ List<Tuple> expectedList = new ArrayList<Tuple>();
+ expectedList.add(Storage.tuple(0,180));
+ expectedList.add(Storage.tuple(0,270));
+
+ //load data and filter rows whose age is > 25
+ pigServer.setBatchOn();
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE,
+ zkQuorum));
+
+ pigServer.registerQuery("B = GROUP A BY AGE;");
+ pigServer.registerQuery("C = FOREACH B GENERATE MIN(A.SAL),MAX(A.SAL);");
+ pigServer.registerQuery("STORE C INTO 'out' using mock.Storage();");
+ pigServer.executeBatch();
+
+ List<Tuple> actualList = data.get("out");
+ assertEquals(expectedList, actualList);
+ }
+
+ @Test
+ public void testTimestampForSQLQuery() throws Exception {
+ //create the table
+ String ddl = "CREATE TABLE TIMESTAMP_T (MYKEY VARCHAR,DATE_STP TIMESTAMP CONSTRAINT PK PRIMARY KEY (MYKEY)) ";
+ conn.createStatement().execute(ddl);
+
+ final String dml = "UPSERT INTO TIMESTAMP_T VALUES('foo',TO_TIMESTAMP('2006-04-12 00:00:00'))";
+ conn.createStatement().execute(dml);
+ conn.commit();
+
+ //sql query
+ final String sqlQuery = " SELECT mykey, year(DATE_STP) FROM TIMESTAMP_T ";
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://query/%s' using org.apache.phoenix.pig.PhoenixHBaseLoader('%s');", sqlQuery,
+ zkQuorum));
+
+ final Iterator<Tuple> iterator = pigServer.openIterator("A");
+ while (iterator.hasNext()) {
+ Tuple tuple = iterator.next();
+ assertEquals("foo", tuple.get(0));
+ assertEquals(2006, tuple.get(1));
+ }
+ }
+
+ @Test
+ public void testDateForSQLQuery() throws Exception {
+ //create the table
+ String ddl = "CREATE TABLE DATE_T (MYKEY VARCHAR,DATE_STP Date CONSTRAINT PK PRIMARY KEY (MYKEY)) ";
+ conn.createStatement().execute(ddl);
+
+ final String dml = "UPSERT INTO DATE_T VALUES('foo',TO_DATE('2004-03-10 10:00:00'))";
+ conn.createStatement().execute(dml);
+ conn.commit();
+
+ //sql query
+ final String sqlQuery = " SELECT mykey, hour(DATE_STP) FROM DATE_T ";
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://query/%s' using org.apache.phoenix.pig.PhoenixHBaseLoader('%s');", sqlQuery,
+ zkQuorum));
+
+ final Iterator<Tuple> iterator = pigServer.openIterator("A");
+ while (iterator.hasNext()) {
+ Tuple tuple = iterator.next();
+ assertEquals("foo", tuple.get(0));
+ assertEquals(10, tuple.get(1));
+ }
+ }
+
+ @Test
+ public void testTimeForSQLQuery() throws Exception {
+ //create the table
+ String ddl = "CREATE TABLE TIME_T (MYKEY VARCHAR,DATE_STP TIME CONSTRAINT PK PRIMARY KEY (MYKEY)) ";
+ conn.createStatement().execute(ddl);
+
+ final String dml = "UPSERT INTO TIME_T VALUES('foo',TO_TIME('2008-05-16 00:30:00'))";
+ conn.createStatement().execute(dml);
+ conn.commit();
+
+ //sql query
+ final String sqlQuery = " SELECT mykey, minute(DATE_STP) FROM TIME_T ";
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://query/%s' using org.apache.phoenix.pig.PhoenixHBaseLoader('%s');", sqlQuery,
+ zkQuorum));
+
+ final Iterator<Tuple> iterator = pigServer.openIterator("A");
+ while (iterator.hasNext()) {
+ Tuple tuple = iterator.next();
+ assertEquals("foo", tuple.get(0));
+ assertEquals(30, tuple.get(1));
+ }
+ }
+
+ /**
+ * Tests both {@link PhoenixHBaseLoader} and {@link PhoenixHBaseStorage}
+ * @throws Exception
+ */
+ @Test
+ public void testLoadAndStore() throws Exception {
+
+ //create the tables
+ final String TABLE = "TABLE7";
+ final String sourceTableddl = "CREATE TABLE " + TABLE
+ + " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, AGE INTEGER, SAL INTEGER) ";
+
+ final String targetTable = "AGGREGATE";
+ final String targetTableddl = "CREATE TABLE " + targetTable
+ + "(AGE INTEGER NOT NULL PRIMARY KEY , MIN_SAL INTEGER , MAX_SAL INTEGER) ";
+
+ conn.createStatement().execute(sourceTableddl);
+ conn.createStatement().execute(targetTableddl);
+
+ //prepare data with 10 rows having age 25 and the other 30.
+ final String dml = "UPSERT INTO " + TABLE + " VALUES(?,?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ int rows = 20;
+ int j = 0, k = 0;
+ for(int i = 0 ; i < rows; i++) {
+ stmt.setInt(1, i);
+ stmt.setString(2, "a"+i);
+ if(i % 2 == 0) {
+ stmt.setInt(3, 25);
+ stmt.setInt(4, 10 * 2 * j++);
+ } else {
+ stmt.setInt(3, 30);
+ stmt.setInt(4, 10 * 3 * k++);
+ }
+
+ stmt.execute();
+ }
+ conn.commit();
+
+
+ //load data and filter rows whose age is > 25
+ pigServer.setBatchOn();
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE,
+ zkQuorum));
+
+ pigServer.registerQuery("B = GROUP A BY AGE;");
+ pigServer.registerQuery("C = FOREACH B GENERATE group as AGE,MIN(A.SAL),MAX(A.SAL);");
+ pigServer.registerQuery("STORE C INTO 'hbase://" + targetTable
+ + "' using " + PhoenixHBaseStorage.class.getName() + "('"
+ + zkQuorum + "', '-batchSize 1000');");
+ pigServer.executeBatch();
+
+ //validate the data with what is stored.
+ final String selectQuery = "SELECT AGE , MIN_SAL ,MAX_SAL FROM " + targetTable + " ORDER BY AGE";
+ final ResultSet rs = conn.createStatement().executeQuery(selectQuery);
+ assertTrue(rs.next());
+ assertEquals(25, rs.getInt("AGE"));
+ assertEquals(0, rs.getInt("MIN_SAL"));
+ assertEquals(180, rs.getInt("MAX_SAL"));
+ assertTrue(rs.next());
+ assertEquals(30, rs.getInt("AGE"));
+ assertEquals(0, rs.getInt("MIN_SAL"));
+ assertEquals(270, rs.getInt("MAX_SAL"));
+ }
+
+ /**
+ * Test for Sequence
+ * @throws Exception
+ */
+ @Test
+ public void testDataForSQLQueryWithSequences() throws Exception {
+
+ //create the table
+ final String TABLE = "TABLE8";
+ String ddl = "CREATE TABLE " + TABLE
+ + " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, AGE INTEGER) ";
+
+ conn.createStatement().execute(ddl);
+
+ String sequenceDdl = "CREATE SEQUENCE my_sequence";
+
+ conn.createStatement().execute(sequenceDdl);
+
+ //prepare data with 10 rows having age 25 and the other 30.
+ final String dml = "UPSERT INTO " + TABLE + " VALUES(?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ int rows = 20;
+ for(int i = 0 ; i < rows; i++) {
+ stmt.setInt(1, i);
+ stmt.setString(2, "a"+i);
+ stmt.setInt(3, (i % 2 == 0) ? 25 : 30);
+ stmt.execute();
+ }
+ conn.commit();
+
+ //sql query load data and filter rows whose age is > 25
+ final String sqlQuery = " SELECT NEXT VALUE FOR my_sequence AS my_seq,ID,NAME,AGE FROM " + TABLE + " WHERE AGE > 25";
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", sqlQuery,
+ zkQuorum));
+
+
+ Iterator<Tuple> iterator = pigServer.openIterator("A");
+ int recordsRead = 0;
+ while (iterator.hasNext()) {
+ iterator.next();
+ recordsRead++;
+ }
+ assertEquals(rows/2, recordsRead);
+ }
+
+ @Test
+ public void testDataForSQLQueryWithFunctions() throws Exception {
+
+ //create the table
+ final String TABLE = "TABLE9";
+ String ddl = "CREATE TABLE " + TABLE
+ + " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR) ";
+
+ conn.createStatement().execute(ddl);
+
+ final String dml = "UPSERT INTO " + TABLE + " VALUES(?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ int rows = 20;
+ for(int i = 0 ; i < rows; i++) {
+ stmt.setInt(1, i);
+ stmt.setString(2, "a"+i);
+ stmt.execute();
+ }
+ conn.commit();
+
+ //sql query
+ final String sqlQuery = " SELECT UPPER(NAME) AS n FROM " + TABLE + " ORDER BY ID" ;
+
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", sqlQuery,
+ zkQuorum));
+
+
+ Iterator<Tuple> iterator = pigServer.openIterator("A");
+ int i = 0;
+ while (iterator.hasNext()) {
+ Tuple tuple = iterator.next();
+ String name = (String)tuple.get(0);
+ assertEquals("A" + i, name);
+ i++;
+ }
+
+ }
+
+ @Test
+ public void testDataFromIndexTable() throws Exception {
+ //create the table
+ String ddl = "CREATE TABLE " + TABLE_NAME
+ + " (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, EMPLID INTEGER CONSTRAINT pk PRIMARY KEY (ID, NAME)) IMMUTABLE_ROWS=true";
+
+ conn.createStatement().execute(ddl);
+
+ //create a index table
+ String indexDdl = " CREATE INDEX " + INDEX_NAME + " ON " + TABLE_NAME + " (EMPLID) INCLUDE (NAME) ";
+ conn.createStatement().execute(indexDdl);
+
+ //upsert the data.
+ final String dml = "UPSERT INTO " + TABLE_NAME + " VALUES(?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ int rows = 20;
+ for(int i = 0 ; i < rows; i++) {
+ stmt.setInt(1, i);
+ stmt.setString(2, "a"+i);
+ stmt.setInt(3, i * 5);
+ stmt.execute();
+ }
+ conn.commit();
+ pigServer.registerQuery("A = load 'hbase://query/SELECT NAME , EMPLID FROM A WHERE EMPLID = 25 ' using " + PhoenixHBaseLoader.class.getName() + "('"+zkQuorum + "') ;");
+ Iterator<Tuple> iterator = pigServer.openIterator("A");
+ while (iterator.hasNext()) {
+ Tuple tuple = iterator.next();
+ assertEquals("a5", tuple.get(0));
+ assertEquals(25, tuple.get(1));
+ }
+ }
+
+ @Test
+ public void testLoadOfSaltTable() throws Exception {
+ final String TABLE = "TABLE11";
+ final String sourceTableddl = "CREATE TABLE " + TABLE
+ + " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, AGE INTEGER, SAL INTEGER) SALT_BUCKETS=2 ";
+
+ conn.createStatement().execute(sourceTableddl);
+
+ //prepare data with 10 rows having age 25 and the other 30.
+ final String dml = "UPSERT INTO " + TABLE + " VALUES(?,?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ int rows = 20;
+ int j = 0, k = 0;
+ for(int i = 0 ; i < rows; i++) {
+ stmt.setInt(1, i);
+ stmt.setString(2, "a"+i);
+ if(i % 2 == 0) {
+ stmt.setInt(3, 25);
+ stmt.setInt(4, 10 * 2 * j++);
+ } else {
+ stmt.setInt(3, 30);
+ stmt.setInt(4, 10 * 3 * k++);
+ }
+
+ stmt.execute();
+ }
+ conn.commit();
+
+ final Data data = Storage.resetData(pigServer);
+ List<Tuple> expectedList = new ArrayList<Tuple>();
+ expectedList.add(Storage.tuple(25,10));
+ expectedList.add(Storage.tuple(30,10));
+
+ pigServer.setBatchOn();
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE,
+ zkQuorum));
+
+ pigServer.registerQuery("B = GROUP A BY AGE;");
+ pigServer.registerQuery("C = FOREACH B GENERATE group,COUNT(A);");
+ pigServer.registerQuery("STORE C INTO 'out' using mock.Storage();");
+ pigServer.executeBatch();
+
+ List<Tuple> actualList = data.get("out");
+ assertEquals(expectedList.size(), actualList.size());
+ }
+
+ /**
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testLoadForArrayWithQuery() throws Exception {
+ //create the table
+ final String TABLE = "TABLE14";
+ String ddl = "CREATE TABLE " + TABLE
+ + " ( ID INTEGER PRIMARY KEY, a_double_array double array[] , a_varchar_array varchar array, a_concat_str varchar, sep varchar)";
+
+ conn.createStatement().execute(ddl);
+
+ Double[] doubleArr = new Double[3];
+ doubleArr[0] = 2.2;
+ doubleArr[1] = 4.4;
+ doubleArr[2] = 6.6;
+ Array doubleArray = conn.createArrayOf("DOUBLE", doubleArr);
+ Tuple doubleArrTuple = Storage.tuple(2.2d, 4.4d, 6.6d);
+
+ Double[] doubleArr2 = new Double[2];
+ doubleArr2[0] = 12.2;
+ doubleArr2[1] = 22.2;
+ Array doubleArray2 = conn.createArrayOf("DOUBLE", doubleArr2);
+ Tuple doubleArrTuple2 = Storage.tuple(12.2d, 22.2d);
+
+ String[] strArr = new String[4];
+ strArr[0] = "ABC";
+ strArr[1] = "DEF";
+ strArr[2] = "GHI";
+ strArr[3] = "JKL";
+ Array strArray = conn.createArrayOf("VARCHAR", strArr);
+ Tuple strArrTuple = Storage.tuple("ABC", "DEF", "GHI", "JKL");
+
+ String[] strArr2 = new String[2];
+ strArr2[0] = "ABC";
+ strArr2[1] = "XYZ";
+ Array strArray2 = conn.createArrayOf("VARCHAR", strArr2);
+ Tuple strArrTuple2 = Storage.tuple("ABC", "XYZ");
+
+ //upsert data.
+ final String dml = "UPSERT INTO " + TABLE + " VALUES(?, ?, ?, ?, ?) ";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ stmt.setInt(1, 1);
+ stmt.setArray(2, doubleArray);
+ stmt.setArray(3, strArray);
+ stmt.setString(4, "ONE,TWO,THREE");
+ stmt.setString(5, ",");
+ stmt.execute();
+
+ stmt.setInt(1, 2);
+ stmt.setArray(2, doubleArray2);
+ stmt.setArray(3, strArray2);
+ stmt.setString(4, "FOUR:five:six");
+ stmt.setString(5, ":");
+ stmt.execute();
+
+ conn.commit();
+
+ Tuple dynArrTuple = Storage.tuple("ONE", "TWO", "THREE");
+ Tuple dynArrTuple2 = Storage.tuple("FOUR", "five", "six");
+
+ //sql query
+ final String sqlQuery = String.format(" SELECT ID, A_DOUBLE_ARRAY, A_VARCHAR_ARRAY, REGEXP_SPLIT(a_concat_str, sep) AS flattend_str FROM %s ", TABLE);
+
+ final Data data = Storage.resetData(pigServer);
+ List<Tuple> expectedList = new ArrayList<Tuple>();
+ expectedList.add(Storage.tuple(1, 3L, 4L, dynArrTuple));
+ expectedList.add(Storage.tuple(2, 2L, 2L, dynArrTuple2));
+ final String load = String.format("A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",sqlQuery,zkQuorum);
+ pigServer.setBatchOn();
+ pigServer.registerQuery(load);
+ pigServer.registerQuery("B = FOREACH A GENERATE ID, SIZE(A_DOUBLE_ARRAY), SIZE(A_VARCHAR_ARRAY), FLATTEND_STR;");
+ pigServer.registerQuery("STORE B INTO 'out' using mock.Storage();");
+ pigServer.executeBatch();
+
+ List<Tuple> actualList = data.get("out");
+ assertEquals(expectedList.size(), actualList.size());
+ assertEquals(expectedList, actualList);
+
+ Schema schema = pigServer.dumpSchema("A");
+ List<FieldSchema> fields = schema.getFields();
+ assertEquals(4, fields.size());
+ assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));
+ assertTrue(fields.get(0).type == DataType.INTEGER);
+ assertTrue(fields.get(1).alias.equalsIgnoreCase("A_DOUBLE_ARRAY"));
+ assertTrue(fields.get(1).type == DataType.TUPLE);
+ assertTrue(fields.get(2).alias.equalsIgnoreCase("A_VARCHAR_ARRAY"));
+ assertTrue(fields.get(2).type == DataType.TUPLE);
+ assertTrue(fields.get(3).alias.equalsIgnoreCase("FLATTEND_STR"));
+ assertTrue(fields.get(3).type == DataType.TUPLE);
+
+ Iterator<Tuple> iterator = pigServer.openIterator("A");
+ Tuple firstTuple = Storage.tuple(1, doubleArrTuple, strArrTuple, dynArrTuple);
+ Tuple secondTuple = Storage.tuple(2, doubleArrTuple2, strArrTuple2, dynArrTuple2);
+ List<Tuple> expectedRows = Lists.newArrayList(firstTuple, secondTuple);
+ List<Tuple> actualRows = Lists.newArrayList();
+ while (iterator.hasNext()) {
+ Tuple tuple = iterator.next();
+ actualRows.add(tuple);
+ }
+ assertEquals(expectedRows, actualRows);
+ }
+
+
+ /**
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testLoadForArrayWithTable() throws Exception {
+ //create the table
+ final String TABLE = "TABLE15";
+ String ddl = "CREATE TABLE " + TABLE
+ + " ( ID INTEGER PRIMARY KEY, a_double_array double array[])";
+
+ conn.createStatement().execute(ddl);
+
+ Double[] doubleArr = new Double[3];
+ doubleArr[0] = 2.2;
+ doubleArr[1] = 4.4;
+ doubleArr[2] = 6.6;
+ Array doubleArray = conn.createArrayOf("DOUBLE", doubleArr);
+ Tuple doubleArrTuple = Storage.tuple(2.2d, 4.4d, 6.6d);
+
+ Double[] doubleArr2 = new Double[2];
+ doubleArr2[0] = 12.2;
+ doubleArr2[1] = 22.2;
+ Array doubleArray2 = conn.createArrayOf("DOUBLE", doubleArr2);
+ Tuple doubleArrTuple2 = Storage.tuple(12.2d, 22.2d);
+
+ //upsert data.
+ final String dml = "UPSERT INTO " + TABLE + " VALUES(?, ?) ";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ stmt.setInt(1, 1);
+ stmt.setArray(2, doubleArray);
+ stmt.execute();
+
+ stmt.setInt(1, 2);
+ stmt.setArray(2, doubleArray2);
+ stmt.execute();
+
+ conn.commit();
+
+ final Data data = Storage.resetData(pigServer);
+ List<Tuple> expectedList = new ArrayList<Tuple>();
+ expectedList.add(Storage.tuple(1, doubleArrTuple));
+ expectedList.add(Storage.tuple(2, doubleArrTuple2));
+
+ pigServer.setBatchOn();
+ pigServer.registerQuery(String.format(
+ "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE,
+ zkQuorum));
+
+ pigServer.registerQuery("STORE A INTO 'out' using mock.Storage();");
+ pigServer.executeBatch();
+
+ List<Tuple> actualList = data.get("out");
+ assertEquals(expectedList.size(), actualList.size());
+ assertEquals(expectedList, actualList);
+ }
+}
diff --git a/phoenix-pig/src/it/java/org/apache/phoenix/pig/PhoenixHBaseStorerIT.java b/phoenix-pig/src/it/java/org/apache/phoenix/pig/PhoenixHBaseStorerIT.java
new file mode 100644
index 0000000..2634c44
--- /dev/null
+++ b/phoenix-pig/src/it/java/org/apache/phoenix/pig/PhoenixHBaseStorerIT.java
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig;
+
+import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.sql.Array;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.phoenix.util.SchemaUtil;
+import org.apache.pig.backend.executionengine.ExecJob.JOB_STATUS;
+import org.apache.pig.builtin.mock.Storage;
+import org.apache.pig.builtin.mock.Storage.Data;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.joda.time.DateTime;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+
+public class PhoenixHBaseStorerIT extends BasePigIT {
+ /**
+ * Basic test - writes data to a Phoenix table and compares the data written
+ * to expected
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testStorer() throws Exception {
+ final String tableName = "TABLE1";
+ final Statement stmt = conn.createStatement();
+
+ stmt.execute("CREATE TABLE " + tableName +
+ " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR)");
+
+ final Data data = Storage.resetData(pigServer);
+ final Collection<Tuple> list = Lists.newArrayList();
+
+ // Create input dataset
+ int rows = 100;
+ for (int i = 0; i < rows; i++) {
+ Tuple t = tupleFactory.newTuple();
+ t.append(i);
+ t.append("a" + i);
+ list.add(t);
+ }
+ data.set("in", "id:int, name:chararray", list);
+
+ pigServer.setBatchOn();
+ pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
+
+ pigServer.registerQuery("Store A into 'hbase://" + tableName
+ + "' using " + PhoenixHBaseStorage.class.getName() + "('"
+ + zkQuorum + "', '-batchSize 1000');");
+
+ // Now run the Pig script
+ if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
+ throw new RuntimeException("Job failed", pigServer.executeBatch()
+ .get(0).getException());
+ }
+
+ // Compare data in Phoenix table to the expected
+ final ResultSet rs = stmt
+ .executeQuery("SELECT id, name FROM table1 ORDER BY id");
+
+ for (int i = 0; i < rows; i++) {
+ assertTrue(rs.next());
+ assertEquals(i, rs.getInt(1));
+ assertEquals("a" + i, rs.getString(2));
+ }
+ }
+
+ /**
+ * Basic test - writes specific columns data to a Phoenix table and compares the data written
+ * to expected
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testStorerForSpecificColumns() throws Exception {
+ final String tableName = SchemaUtil.getTableName("TABLE2", SchemaUtil.getEscapedArgument("zo2"));
+ final Statement stmt = conn.createStatement();
+
+ stmt.execute("CREATE TABLE " + tableName +
+ " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, AGE INTEGER)");
+ final Data data = Storage.resetData(pigServer);
+ final Collection<Tuple> list = Lists.newArrayList();
+
+ // Create input dataset
+ int rows = 100;
+ for (int i = 0; i < rows; i++) {
+ Tuple t = tupleFactory.newTuple();
+ t.append(i);
+ t.append("a" + i);
+ t.append(i * 2);
+ list.add(t);
+ }
+ data.set("in", "id:int, name:chararray,age:int", list);
+
+ pigServer.setBatchOn();
+ pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
+ pigServer.registerQuery("B = FOREACH A GENERATE id,name;");
+ pigServer.registerQuery("Store B into 'hbase://" + tableName + "/ID,NAME"
+ + "' using " + PhoenixHBaseStorage.class.getName() + "('"
+ + zkQuorum + "', '-batchSize 1000');");
+
+ // Now run the Pig script
+ if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
+ throw new RuntimeException("Job failed", pigServer.executeBatch()
+ .get(0).getException());
+ }
+
+ // Compare data in Phoenix table to the expected
+ final ResultSet rs = stmt
+ .executeQuery("SELECT id, name,age FROM " + tableName + " ORDER BY id");
+
+ for (int i = 0; i < rows; i++) {
+ assertTrue(rs.next());
+ assertEquals(i, rs.getInt(1));
+ assertEquals("a" + i, rs.getString(2));
+ assertEquals(0, rs.getInt(3));
+ }
+ }
+
+ /**
+ * Test storage of DataByteArray columns to Phoenix
+ * Maps the DataByteArray with the target PhoenixDataType and persists in HBase.
+ * @throws Exception
+ */
+ @Test
+ public void testStoreWithBinaryDataTypes() throws Exception {
+
+ final String tableName = "TABLE3";
+ final Statement stmt = conn.createStatement();
+
+ stmt.execute("CREATE TABLE " + tableName +
+ " (col1 BIGINT NOT NULL, col2 INTEGER , col3 FLOAT, col4 DOUBLE , col5 TINYINT , " +
+ " col6 BOOLEAN , col7 VARBINARY CONSTRAINT my_pk PRIMARY KEY (col1))");
+
+ final Data data = Storage.resetData(pigServer);
+ final Collection<Tuple> list = Lists.newArrayList();
+
+ int rows = 10;
+ for (int i = 1; i <= rows; i++) {
+ Tuple t = tupleFactory.newTuple();
+ t.append(i);
+ t.append(new DataByteArray(Bytes.toBytes(i * 5)));
+ t.append(new DataByteArray(Bytes.toBytes(i * 10.0F)));
+ t.append(new DataByteArray(Bytes.toBytes(i * 15.0D)));
+ t.append(new DataByteArray(Bytes.toBytes(i)));
+ t.append(new DataByteArray(Bytes.toBytes( i % 2 == 0)));
+ t.append(new DataByteArray(Bytes.toBytes(i)));
+ list.add(t);
+ }
+ data.set("in", "col1:int,col2:bytearray,col3:bytearray,col4:bytearray,col5:bytearray,col6:bytearray,col7:bytearray ", list);
+
+ pigServer.setBatchOn();
+ pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
+
+ pigServer.registerQuery("Store A into 'hbase://" + tableName
+ + "' using " + PhoenixHBaseStorage.class.getName() + "('"
+ + zkQuorum + "', '-batchSize 1000');");
+
+ if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
+ throw new RuntimeException("Job failed", pigServer.executeBatch()
+ .get(0).getException());
+ }
+
+ final ResultSet rs = stmt
+ .executeQuery(String.format("SELECT col1 , col2 , col3 , col4 , col5 , col6, col7 FROM %s ORDER BY col1" , tableName));
+
+ int count = 0;
+ for (int i = 1; i <= rows; i++) {
+ assertTrue(rs.next());
+ assertEquals(i, rs.getInt(1));
+ assertEquals(i * 5, rs.getInt(2));
+ assertEquals(i * 10.0F, rs.getFloat(3),0.0);
+ assertEquals(i * 15.0D, rs.getInt(4),0.0);
+ assertEquals(i,rs.getInt(5));
+ assertEquals(i % 2 == 0, rs.getBoolean(6));
+ assertArrayEquals(Bytes.toBytes(i), rs.getBytes(7));
+ count++;
+ }
+ assertEquals(rows, count);
+ }
+
+ @Test
+ public void testStoreWithDateTime() throws Exception {
+
+ final String tableName = "TABLE4";
+ final Statement stmt = conn.createStatement();
+
+ stmt.execute("CREATE TABLE " + tableName +
+ " (col1 BIGINT NOT NULL, col2 DATE , col3 TIME, " +
+ " col4 TIMESTAMP CONSTRAINT my_pk PRIMARY KEY (col1))");
+
+ long now = System.currentTimeMillis();
+ final DateTime dt = new DateTime(now);
+
+ final Data data = Storage.resetData(pigServer);
+ final Collection<Tuple> list = Lists.newArrayList();
+ Tuple t = tupleFactory.newTuple();
+
+ t.append(1);
+ t.append(dt);
+ t.append(dt);
+ t.append(dt);
+
+ list.add(t);
+
+ data.set("in", "col1:int,col2:datetime,col3:datetime,col4:datetime", list);
+
+ pigServer.setBatchOn();
+ pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
+
+ pigServer.registerQuery("Store A into 'hbase://" + tableName
+ + "' using " + PhoenixHBaseStorage.class.getName() + "('"
+ + zkQuorum + "', '-batchSize 1000');");
+
+ if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
+ throw new RuntimeException("Job failed", pigServer.executeBatch()
+ .get(0).getException());
+ }
+
+ final ResultSet rs = stmt
+ .executeQuery(String.format("SELECT col1 , col2 , col3 , col4 FROM %s " , tableName));
+
+ assertTrue(rs.next());
+ assertEquals(1, rs.getInt(1));
+ assertEquals(now, rs.getDate(2).getTime());
+ assertEquals(now, rs.getTime(3).getTime());
+ assertEquals(now, rs.getTimestamp(4).getTime());
+
+ }
+
+ @Test
+ public void testStoreForArray() throws Exception {
+
+ final String tableName = "TABLE5";
+ final Statement stmt = conn.createStatement();
+ String ddl = "CREATE TABLE " + tableName
+ + " ( ID INTEGER PRIMARY KEY, dbl double array[], a_varchar_array varchar array)";
+
+ stmt.execute(ddl);
+
+ final Data data = Storage.resetData(pigServer);
+ data.set("in", tuple(1, tuple(2.2)),
+ tuple(2, tuple(2.4, 2.5)),
+ tuple(3, tuple(2.3)));
+
+ pigServer.setBatchOn();
+ pigServer.registerQuery("A = LOAD 'in' USING mock.Storage() as (id:int, dbl:tuple());");
+ pigServer.registerQuery("Store A into 'hbase://" + tableName + "/ID,DBL"
+ + "' using " + PhoenixHBaseStorage.class.getName() + "('"
+ + zkQuorum + "', '-batchSize 1000');");
+
+ if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
+ throw new RuntimeException("Job failed", pigServer.executeBatch()
+ .get(0).getException());
+ }
+
+ final ResultSet rs = stmt
+ .executeQuery(String.format("SELECT id , dbl FROM %s where id = 2" , tableName));
+
+ assertTrue(rs.next());
+ assertEquals(2, rs.getInt(1));
+ Array expectedDoubleArr = conn.createArrayOf("DOUBLE", new Double[] { 2.4, 2.5 });
+ assertEquals(expectedDoubleArr,rs.getArray(2));
+ }
+}
diff --git a/phoenix-pig/src/it/java/org/apache/phoenix/pig/udf/ReserveNSequenceTestIT.java b/phoenix-pig/src/it/java/org/apache/phoenix/pig/udf/ReserveNSequenceTestIT.java
new file mode 100644
index 0000000..98f46f0
--- /dev/null
+++ b/phoenix-pig/src/it/java/org/apache/phoenix/pig/udf/ReserveNSequenceTestIT.java
@@ -0,0 +1,306 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.udf;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.Properties;
+
+import org.apache.phoenix.pig.BasePigIT;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.util.UDFContext;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+/**
+ * Test class to run all the Pig Sequence UDF integration tests against a virtual map reduce cluster.
+ */
+public class ReserveNSequenceTestIT extends BasePigIT {
+
+ private static final String CREATE_SEQUENCE_SYNTAX = "CREATE SEQUENCE %s START WITH %s INCREMENT BY %s MINVALUE %s MAXVALUE %s CACHE %s";
+ private static final String SEQUENCE_NAME = "my_schema.my_sequence";
+ private static final long MAX_VALUE = 10;
+
+ private static UDFContext udfContext;
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ @Override
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ createSequence(conn);
+ createUdfContext();
+ }
+
+ @Override
+ @After
+ public void tearDown() throws Exception {
+ udfContext.reset();
+ dropSequence(conn);
+ super.tearDown();
+ }
+
+ @Test
+ public void testReserve() throws Exception {
+ doTest(new UDFTestProperties(1));
+ }
+
+ @Test
+ public void testReserveN() throws Exception {
+ doTest(new UDFTestProperties(5));
+ }
+
+ @Test
+ public void testReserveNwithPreviousAllocations() throws Exception {
+ UDFTestProperties props = new UDFTestProperties(5);
+ props.setCurrentValue(4);
+ doTest(props);
+ }
+
+ @Test
+ public void testReserveWithZero() throws Exception {
+ UDFTestProperties props = new UDFTestProperties(0);
+ props.setExceptionExpected(true);
+ props.setExceptionClass(IllegalArgumentException.class);
+ props.setErrorMessage(ReserveNSequence.INVALID_NUMBER_MESSAGE);
+ doTest(props);
+ }
+
+ @Test
+ public void testReserveWithNegativeNumber() throws Exception {
+ UDFTestProperties props = new UDFTestProperties(-1);
+ props.setExceptionExpected(true);
+ props.setExceptionClass(IllegalArgumentException.class);
+ props.setErrorMessage(ReserveNSequence.INVALID_NUMBER_MESSAGE);
+ doTest(props);
+ }
+
+ @Test
+ public void testReserveMaxLimit() throws Exception {
+ UDFTestProperties props = new UDFTestProperties(MAX_VALUE);
+ props.setExceptionExpected(true);
+ props.setExceptionClass(IOException.class);
+ props.setErrorMessage("Reached MAXVALUE of sequence");
+ doTest(props);
+ }
+
+ @Test
+ public void testNoSequenceName() throws Exception {
+ UDFTestProperties props = new UDFTestProperties(1);
+ props.setExceptionExpected(true);
+ props.setSequenceName(null);
+ props.setExceptionClass(NullPointerException.class);
+ props.setErrorMessage(ReserveNSequence.EMPTY_SEQUENCE_NAME_MESSAGE);
+ doTest(props);
+ }
+
+ @Test
+ public void testSequenceNotExisting() throws Exception {
+ UDFTestProperties props = new UDFTestProperties(1);
+ props.setExceptionExpected(true);
+ props.setSequenceName("foo.bar");
+ props.setExceptionClass(IOException.class);
+ props.setErrorMessage("Sequence undefined");
+ doTest(props);
+ }
+
+ /**
+ * Test reserving sequence with tenant Id passed to udf.
+ * @throws Exception
+ */
+ @Test
+ public void testTenantSequence() throws Exception {
+ Properties tentantProps = new Properties();
+ String tenantId = "TENANT";
+ tentantProps.put(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId);
+ Connection tenantConn = DriverManager.getConnection(getUrl(), tentantProps);
+ createSequence(tenantConn);
+
+ try {
+ UDFTestProperties props = new UDFTestProperties(3);
+
+ // validates UDF reservation is for that tentant
+ doTest(tenantConn, props);
+
+ // validate global sequence value is still set to 1
+ assertEquals(1L, getNextSequenceValue(conn));
+ } finally {
+ dropSequence(tenantConn);
+ }
+ }
+
+ /**
+ * Test Use the udf to reserve multiple tuples
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testMultipleTuples() throws Exception {
+ Tuple tuple = tupleFactory.newTuple(2);
+ tuple.set(0, 2L);
+ tuple.set(1, SEQUENCE_NAME);
+
+ final String tentantId = conn.getClientInfo(PhoenixRuntime.TENANT_ID_ATTRIB);
+ ReserveNSequence udf = new ReserveNSequence(zkQuorum, tentantId);
+
+ for (int i = 0; i < 2; i++) {
+ udf.exec(tuple);
+ }
+ long nextValue = getNextSequenceValue(conn);
+ assertEquals(5L, nextValue);
+ }
+
+ private void doTest(UDFTestProperties props) throws Exception {
+ doTest(conn, props);
+ }
+
+ private void doTest(Connection conn, UDFTestProperties props) throws Exception {
+ setCurrentValue(conn, props.getCurrentValue());
+ Tuple tuple = tupleFactory.newTuple(3);
+ tuple.set(0, props.getNumToReserve());
+ tuple.set(1, props.getSequenceName());
+ tuple.set(2, zkQuorum);
+ Long result = null;
+ try {
+ final String tenantId = conn.getClientInfo(PhoenixRuntime.TENANT_ID_ATTRIB);
+ ReserveNSequence udf = new ReserveNSequence(zkQuorum, tenantId);
+ result = udf.exec(tuple);
+ validateReservedSequence(conn, props.getCurrentValue(), props.getNumToReserve(), result);
+ // Calling this to cleanup for the udf. To close the connection
+ udf.finish();
+ } catch (Exception e) {
+ if (props.isExceptionExpected()) {
+ assertEquals(props.getExceptionClass(), e.getClass());
+ e.getMessage().contains(props.getErrorMessage());
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ private void createUdfContext() {
+ udfContext = UDFContext.getUDFContext();
+ udfContext.addJobConf(conf);
+ }
+
+ private void validateReservedSequence(Connection conn, Long currentValue, long count, Long result) throws SQLException {
+ Long startIndex = currentValue + 1;
+ assertEquals("Start index is incorrect", startIndex, result);
+ final long newNextSequenceValue = getNextSequenceValue(conn);
+ assertEquals(startIndex + count, newNextSequenceValue);
+ }
+
+ private void createSequence(Connection conn) throws SQLException {
+ conn.createStatement().execute(String.format(CREATE_SEQUENCE_SYNTAX, SEQUENCE_NAME, 1, 1, 1, MAX_VALUE, 1));
+ conn.commit();
+ }
+
+ private void setCurrentValue(Connection conn, long currentValue) throws SQLException {
+ for (int i = 1; i <= currentValue; i++) {
+ getNextSequenceValue(conn);
+ }
+ }
+
+ private long getNextSequenceValue(Connection conn) throws SQLException {
+ String ddl = new StringBuilder().append("SELECT NEXT VALUE FOR ").append(SEQUENCE_NAME).toString();
+ ResultSet rs = conn.createStatement().executeQuery(ddl);
+ assertTrue(rs.next());
+ conn.commit();
+ return rs.getLong(1);
+ }
+
+ private void dropSequence(Connection conn) throws Exception {
+ String ddl = new StringBuilder().append("DROP SEQUENCE ").append(SEQUENCE_NAME).toString();
+ conn.createStatement().execute(ddl);
+ conn.commit();
+ }
+
+ /**
+ * Static class to define properties for the test
+ */
+ private static class UDFTestProperties {
+ private final Long numToReserve;
+ private Long currentValue = 1L;
+ private String sequenceName = SEQUENCE_NAME;
+ private boolean exceptionExpected = false;
+ private Class exceptionClass = null;
+ private String errorMessage = null;
+
+ public UDFTestProperties(long numToReserve) {
+ this.numToReserve = numToReserve;
+ }
+
+ public Long getCurrentValue() {
+ return currentValue;
+ }
+
+ public void setCurrentValue(long currentValue) {
+ this.currentValue = currentValue;
+ }
+
+ public String getSequenceName() {
+ return sequenceName;
+ }
+
+ public void setSequenceName(String sequenceName) {
+ this.sequenceName = sequenceName;
+ }
+
+ public boolean isExceptionExpected() {
+ return exceptionExpected;
+ }
+
+ public void setExceptionExpected(boolean shouldThrowException) {
+ this.exceptionExpected = shouldThrowException;
+ }
+
+ public String getErrorMessage() {
+ return errorMessage;
+ }
+
+ public void setErrorMessage(String errorMessage) {
+ this.errorMessage = errorMessage;
+ }
+
+ public Long getNumToReserve() {
+ return numToReserve;
+ }
+
+ public Class getExceptionClass() {
+ return exceptionClass;
+ }
+
+ public void setExceptionClass(Class exceptionClass) {
+ this.exceptionClass = exceptionClass;
+ }
+
+ }
+
+}
diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/PhoenixHBaseLoader.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/PhoenixHBaseLoader.java
new file mode 100644
index 0000000..7380b8a
--- /dev/null
+++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/PhoenixHBaseLoader.java
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig;
+
+import static org.apache.commons.lang.StringUtils.isEmpty;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.phoenix.mapreduce.PhoenixInputFormat;
+import org.apache.phoenix.mapreduce.PhoenixRecordWritable;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.SchemaType;
+import org.apache.phoenix.pig.util.PhoenixPigSchemaUtil;
+import org.apache.phoenix.pig.util.QuerySchemaParserFunction;
+import org.apache.phoenix.pig.util.TableSchemaParserFunction;
+import org.apache.phoenix.pig.util.TypeUtil;
+import org.apache.pig.Expression;
+import org.apache.pig.LoadFunc;
+import org.apache.pig.LoadMetadata;
+import org.apache.pig.PigException;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceStatistics;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.util.ObjectSerializer;
+import org.apache.pig.impl.util.UDFContext;
+
+import com.google.common.base.Preconditions;
+
+
+/**
+ * LoadFunc to load data from HBase using Phoenix .
+ *
+ * Example usage:
+ * a) TABLE
+ * i) A = load 'hbase://table/HIRES' using
+ * org.apache.phoenix.pig.PhoenixHBaseLoader('localhost');
+ *
+ * The above loads the data from a table 'HIRES'
+ *
+ * ii) A = load 'hbase://table/HIRES/id,name' using
+ * org.apache.phoenix.pig.PhoenixHBaseLoader('localhost');
+ *
+ * Here, only id, name are returned from the table HIRES as part of LOAD.
+ *
+ * b) QUERY
+ * i) B = load 'hbase://query/SELECT fname, lname FROM HIRES' using
+ * org.apache.phoenix.pig.PhoenixHBaseLoader('localhost');
+ *
+ * The above loads fname and lname columns from 'HIRES' table.
+ *
+ */
+public final class PhoenixHBaseLoader extends LoadFunc implements LoadMetadata {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixHBaseLoader.class);
+ private static final String PHOENIX_TABLE_NAME_SCHEME = "hbase://table/";
+ private static final String PHOENIX_QUERY_SCHEME = "hbase://query/";
+ private static final String RESOURCE_SCHEMA_SIGNATURE = "phoenix.pig.schema";
+
+ private Configuration config;
+ private String tableName;
+ private String selectQuery;
+ private String zkQuorum ;
+ private PhoenixInputFormat<PhoenixRecordWritable> inputFormat;
+ private RecordReader<NullWritable,PhoenixRecordWritable> reader;
+ private String contextSignature;
+ private ResourceSchema schema;
+
+ /**
+ * @param zkQuorum
+ */
+ public PhoenixHBaseLoader(String zkQuorum) {
+ super();
+ Preconditions.checkNotNull(zkQuorum);
+ Preconditions.checkState(zkQuorum.length() > 0, "Zookeeper quorum cannot be empty!");
+ this.zkQuorum = zkQuorum;
+ }
+
+ @Override
+ public void setLocation(String location, Job job) throws IOException {
+ PhoenixConfigurationUtil.loadHBaseConfiguration(job);
+
+ final Configuration configuration = job.getConfiguration();
+ //explicitly turning off combining splits.
+ configuration.setBoolean("pig.noSplitCombination", true);
+
+ this.initializePhoenixPigConfiguration(location, configuration);
+ }
+
+ /**
+ * Initialize PhoenixPigConfiguration if it is null. Called by {@link #setLocation} and {@link #getSchema}
+ * @param location
+ * @param configuration
+ * @throws PigException
+ */
+ private void initializePhoenixPigConfiguration(final String location, final Configuration configuration) throws IOException {
+ if(this.config != null) {
+ return;
+ }
+ this.config = configuration;
+ this.config.set(HConstants.ZOOKEEPER_QUORUM,this.zkQuorum);
+ PhoenixConfigurationUtil.setInputClass(this.config, PhoenixRecordWritable.class);
+ Pair<String,String> pair = null;
+ try {
+ if (location.startsWith(PHOENIX_TABLE_NAME_SCHEME)) {
+ String tableSchema = location.substring(PHOENIX_TABLE_NAME_SCHEME.length());
+ final TableSchemaParserFunction parseFunction = new TableSchemaParserFunction();
+ pair = parseFunction.apply(tableSchema);
+ PhoenixConfigurationUtil.setSchemaType(this.config, SchemaType.TABLE);
+ } else if (location.startsWith(PHOENIX_QUERY_SCHEME)) {
+ this.selectQuery = location.substring(PHOENIX_QUERY_SCHEME.length());
+ final QuerySchemaParserFunction queryParseFunction = new QuerySchemaParserFunction(this.config);
+ pair = queryParseFunction.apply(this.selectQuery);
+ PhoenixConfigurationUtil.setInputQuery(this.config, this.selectQuery);
+ PhoenixConfigurationUtil.setSchemaType(this.config, SchemaType.QUERY);
+ }
+ this.tableName = pair.getFirst();
+ final String selectedColumns = pair.getSecond();
+
+ if(isEmpty(this.tableName) && isEmpty(this.selectQuery)) {
+ printUsage(location);
+ }
+ PhoenixConfigurationUtil.setInputTableName(this.config, this.tableName);
+ if(!isEmpty(selectedColumns)) {
+ PhoenixConfigurationUtil.setSelectColumnNames(this.config, selectedColumns.split(","));
+ }
+ } catch(IllegalArgumentException iae) {
+ printUsage(location);
+ }
+ }
+
+
+ @Override
+ public String relativeToAbsolutePath(String location, Path curDir) throws IOException {
+ return location;
+ }
+
+ @Override
+ public InputFormat getInputFormat() throws IOException {
+ if(inputFormat == null) {
+ inputFormat = new PhoenixInputFormat<PhoenixRecordWritable>();
+ PhoenixConfigurationUtil.setInputClass(this.config, PhoenixRecordWritable.class);
+ }
+ return inputFormat;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
+ this.reader = reader;
+ final String resourceSchemaAsStr = getValueFromUDFContext(this.contextSignature,RESOURCE_SCHEMA_SIGNATURE);
+ if (resourceSchemaAsStr == null) {
+ throw new IOException("Could not find schema in UDF context");
+ }
+ schema = (ResourceSchema)ObjectSerializer.deserialize(resourceSchemaAsStr);
+ }
+
+ /*
+ * @see org.apache.pig.LoadFunc#setUDFContextSignature(java.lang.String)
+ */
+ @Override
+ public void setUDFContextSignature(String signature) {
+ this.contextSignature = signature;
+ }
+
+ @Override
+ public Tuple getNext() throws IOException {
+ try {
+ if(!reader.nextKeyValue()) {
+ return null;
+ }
+ final PhoenixRecordWritable record = reader.getCurrentValue();
+ if(record == null) {
+ return null;
+ }
+ final Tuple tuple = TypeUtil.transformToTuple(record, schema.getFields());
+ return tuple;
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ int errCode = 6018;
+ final String errMsg = "Error while reading input";
+ throw new ExecException(errMsg, errCode,PigException.REMOTE_ENVIRONMENT, e);
+ }
+ }
+
+ private void printUsage(final String location) throws PigException {
+ String locationErrMsg = String.format("The input location in load statement should be of the form " +
+ "%s<table name> or %s<query>. Got [%s] ",PHOENIX_TABLE_NAME_SCHEME,PHOENIX_QUERY_SCHEME,location);
+ LOG.error(locationErrMsg);
+ throw new PigException(locationErrMsg);
+ }
+
+ @Override
+ public ResourceSchema getSchema(String location, Job job) throws IOException {
+ if(schema != null) {
+ return schema;
+ }
+
+ PhoenixConfigurationUtil.loadHBaseConfiguration(job);
+ final Configuration configuration = job.getConfiguration();
+ this.initializePhoenixPigConfiguration(location, configuration);
+ this.schema = PhoenixPigSchemaUtil.getResourceSchema(this.config);
+ if(LOG.isDebugEnabled()) {
+ LOG.debug(String.format("Resource Schema generated for location [%s] is [%s]", location, schema.toString()));
+ }
+ this.storeInUDFContext(this.contextSignature, RESOURCE_SCHEMA_SIGNATURE, ObjectSerializer.serialize(schema));
+ return schema;
+ }
+
+ @Override
+ public ResourceStatistics getStatistics(String location, Job job) throws IOException {
+ // not implemented
+ return null;
+ }
+
+ @Override
+ public String[] getPartitionKeys(String location, Job job) throws IOException {
+ // not implemented
+ return null;
+ }
+
+ @Override
+ public void setPartitionFilter(Expression partitionFilter) throws IOException {
+ // not implemented
+ }
+
+ private void storeInUDFContext(final String signature,final String key,final String value) {
+ final UDFContext udfContext = UDFContext.getUDFContext();
+ final Properties props = udfContext.getUDFProperties(this.getClass(), new String[]{signature});
+ props.put(key, value);
+ }
+
+ private String getValueFromUDFContext(final String signature,final String key) {
+ final UDFContext udfContext = UDFContext.getUDFContext();
+ final Properties props = udfContext.getUDFProperties(this.getClass(), new String[]{signature});
+ return props.getProperty(key);
+ }
+}
diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/PhoenixHBaseStorage.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/PhoenixHBaseStorage.java
new file mode 100644
index 0000000..e061c1c
--- /dev/null
+++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/PhoenixHBaseStorage.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Arrays;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.phoenix.mapreduce.PhoenixOutputFormat;
+import org.apache.phoenix.mapreduce.PhoenixRecordWritable;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.pig.util.TableSchemaParserFunction;
+import org.apache.phoenix.pig.util.TypeUtil;
+import org.apache.phoenix.schema.types.PDataType;
+import org.apache.phoenix.util.ColumnInfo;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.StoreFuncInterface;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.util.ObjectSerializer;
+import org.apache.pig.impl.util.UDFContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * StoreFunc that uses Phoenix to store data into HBase.
+ *
+ * Example usage: A = load 'testdata' as (a:chararray, b:chararray, c:chararray,
+ * d:chararray, e: datetime); STORE A into 'hbase://CORE.ENTITY_HISTORY' using
+ * org.apache.bdaas.PhoenixHBaseStorage('localhost','-batchSize 5000');
+ *
+ * The above reads a file 'testdata' and writes the elements to HBase. First
+ * argument to this StoreFunc is the server, the 2nd argument is the batch size
+ * for upserts via Phoenix.
+ *
+ * Alternative usage: A = load 'testdata' as (a:chararray, b:chararray,
+ * e: datetime); STORE A into 'hbase://CORE.ENTITY_HISTORY/ID,F.B,F.E' using
+ * org.apache.bdaas.PhoenixHBaseStorage('localhost','-batchSize 5000');
+ *
+ * The above reads a file 'testdata' and writes the elements ID, F.B, and F.E to HBase.
+ * In this example, ID is the row key, and F is the column family for the data elements.
+ * First argument to this StoreFunc is the server, the 2nd argument is the batch size
+ * for upserts via Phoenix. In this case, less than the full table row is required.
+ * For configuration message, look in the info log file.
+ *
+ * Note that Pig types must be in sync with the target Phoenix data types. This
+ * StoreFunc tries best to cast based on input Pig types and target Phoenix data
+ * types, but it is recommended to supply appropriate schema.
+ *
+ *
+ *
+ *
+ *
+ */
+@SuppressWarnings("rawtypes")
+public class PhoenixHBaseStorage implements StoreFuncInterface {
+
+ private static final Logger LOG = LoggerFactory.getLogger(PhoenixHBaseStorage.class);
+ private static final Set<String> PROPS_TO_IGNORE = new HashSet<>(Arrays.asList(PhoenixRuntime.CURRENT_SCN_ATTRIB));
+
+ private Configuration config;
+ private RecordWriter<NullWritable, PhoenixRecordWritable> writer;
+ private List<ColumnInfo> columnInfo = null;
+ private String contextSignature = null;
+ private ResourceSchema schema;
+ private long batchSize;
+ private final PhoenixOutputFormat outputFormat = new PhoenixOutputFormat<PhoenixRecordWritable>(PROPS_TO_IGNORE);
+ // Set of options permitted
+ private final static Options validOptions = new Options();
+ private final static CommandLineParser parser = new GnuParser();
+ private final static String SCHEMA = "_schema";
+ private final static String PHOENIX_TABLE_NAME_SCHEME = "hbase://";
+
+ private final CommandLine configuredOptions;
+ private final String server;
+
+ public PhoenixHBaseStorage(String server) throws ParseException {
+ this(server, null);
+ }
+
+ public PhoenixHBaseStorage(String server, String optString)
+ throws ParseException {
+ populateValidOptions();
+ this.server = server;
+
+ String[] optsArr = optString == null ? new String[0] : optString.split(" ");
+ try {
+ configuredOptions = parser.parse(validOptions, optsArr);
+ } catch (ParseException e) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("[-batchSize]", validOptions);
+ throw e;
+ }
+ batchSize = Long.parseLong(configuredOptions.getOptionValue("batchSize"));
+ }
+
+ private static void populateValidOptions() {
+ validOptions.addOption("batchSize", true, "Specify upsert batch size");
+ }
+
+ /**
+ * Returns UDFProperties based on <code>contextSignature</code>.
+ */
+ private Properties getUDFProperties() {
+ return UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { contextSignature });
+ }
+
+
+ /**
+ * Parse the HBase table name and configure job
+ */
+ @Override
+ public void setStoreLocation(String location, Job job) throws IOException {
+ String tableSchema = location.substring(PHOENIX_TABLE_NAME_SCHEME.length());
+ final TableSchemaParserFunction parseFunction = new TableSchemaParserFunction();
+ Pair<String,String> pair = parseFunction.apply(tableSchema);
+ PhoenixConfigurationUtil.loadHBaseConfiguration(job);
+ config = job.getConfiguration();
+ config.set(HConstants.ZOOKEEPER_QUORUM, server);
+ String tableName = pair.getFirst();
+ String columns = pair.getSecond();
+ if(columns != null && columns.length() > 0) {
+ PhoenixConfigurationUtil.setUpsertColumnNames(config, columns.split(","));
+ }
+ PhoenixConfigurationUtil.setPhysicalTableName(config,tableName);
+ PhoenixConfigurationUtil.setOutputTableName(config,tableName);
+ PhoenixConfigurationUtil.setBatchSize(config,batchSize);
+ String serializedSchema = getUDFProperties().getProperty(contextSignature + SCHEMA);
+ if (serializedSchema != null) {
+ schema = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void prepareToWrite(RecordWriter writer) throws IOException {
+ this.writer = writer;
+ try {
+ this.columnInfo = PhoenixConfigurationUtil.getUpsertColumnMetadataList(this.config);
+ } catch(SQLException sqle) {
+ throw new IOException(sqle);
+ }
+ }
+
+ @Override
+ public void putNext(Tuple t) throws IOException {
+ ResourceFieldSchema[] fieldSchemas = (schema == null) ? null : schema.getFields();
+ PhoenixRecordWritable record = new PhoenixRecordWritable(this.columnInfo);
+ try {
+ for(int i=0; i<t.size(); i++) {
+ Object value = t.get(i);
+ if(value == null) {
+ record.add(null);
+ continue;
+ }
+ ColumnInfo cinfo = this.columnInfo.get(i);
+ byte type = (fieldSchemas == null) ? DataType.findType(value) : fieldSchemas[i].getType();
+ PDataType pDataType = PDataType.fromTypeId(cinfo.getSqlType());
+ Object v = TypeUtil.castPigTypeToPhoenix(value, type, pDataType);
+ record.add(v);
+ }
+ this.writer.write(null, record);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(e);
+ } catch (SQLException e) {
+ LOG.error("Error on tuple {} .",t);
+ throw new IOException(e);
+ }
+
+ }
+
+ @Override
+ public void setStoreFuncUDFContextSignature(String signature) {
+ this.contextSignature = signature;
+ }
+
+ @Override
+ public void cleanupOnFailure(String location, Job job) throws IOException {
+ }
+
+ @Override
+ public void cleanupOnSuccess(String location, Job job) throws IOException {
+ }
+
+ @Override
+ public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException {
+ return location;
+ }
+
+ @Override
+ public OutputFormat getOutputFormat() throws IOException {
+ return outputFormat;
+ }
+
+ @Override
+ public void checkSchema(ResourceSchema s) throws IOException {
+ schema = s;
+ getUDFProperties().setProperty(contextSignature + SCHEMA, ObjectSerializer.serialize(schema));
+ }
+}
diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/udf/ReserveNSequence.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/udf/ReserveNSequence.java
new file mode 100644
index 0000000..eaf4e91
--- /dev/null
+++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/udf/ReserveNSequence.java
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.udf;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.phoenix.mapreduce.util.ConnectionUtil;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.util.UDFContext;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+import edu.umd.cs.findbugs.annotations.NonNull;
+import edu.umd.cs.findbugs.annotations.Nullable;
+
+/**
+ * UDF to Reserve a chunk of numbers for a given sequence
+ *
+ * @note The way this UDF is invoked we open a new connection for every tuple row. The UDF will not perform well on
+ * large datasets as it involves creating a new connection for every tuple row
+ */
+public class ReserveNSequence extends EvalFunc<Long> {
+
+ public static final String INVALID_TUPLE_MESSAGE = "Tuple should have correct fields(NumtoReserve,SequenceName).";
+ public static final String EMPTY_SEQUENCE_NAME_MESSAGE = "Sequence name should be not null";
+ public static final String EMPTY_ZK_MESSAGE = "ZKQuorum should be not null";
+ public static final String INVALID_NUMBER_MESSAGE = "Number of Sequences to Reserve should be greater than 0";
+
+ private final String zkQuorum;
+ private final String tenantId;
+ private Configuration configuration;
+ Connection connection;
+
+ public ReserveNSequence(@NonNull String zkQuorum, @Nullable String tenantId) {
+ Preconditions.checkNotNull(zkQuorum, EMPTY_ZK_MESSAGE);
+ this.zkQuorum = zkQuorum;
+ this.tenantId = tenantId;
+ }
+ /**
+ * Reserve N next sequences for a sequence name. N is the first field in the tuple. Sequence name is the second
+ * field in the tuple zkquorum is the third field in the tuple
+ */
+ @Override
+ public Long exec(Tuple input) throws IOException {
+ Preconditions.checkArgument(input != null && input.size() >= 2, INVALID_TUPLE_MESSAGE);
+ Long numToReserve = (Long)(input.get(0));
+ Preconditions.checkArgument(numToReserve > 0, INVALID_NUMBER_MESSAGE);
+ String sequenceName = (String)input.get(1);
+ Preconditions.checkNotNull(sequenceName, EMPTY_SEQUENCE_NAME_MESSAGE);
+ // It will create a connection when called for the first Tuple per task.
+ // The connection gets cleaned up in finish() method
+ if (connection == null) {
+ initConnection();
+ }
+ ResultSet rs = null;
+ try {
+ String sql = getNextNSequenceSelectStatement(Long.valueOf(numToReserve), sequenceName);
+ rs = connection.createStatement().executeQuery(sql);
+ Preconditions.checkArgument(rs.next());
+ Long startIndex = rs.getLong(1);
+ rs.close();
+ connection.commit();
+ return startIndex;
+ } catch (SQLException e) {
+ throw new IOException("Caught exception while processing row." + e.getMessage(), e);
+ }
+ }
+
+ /**
+ * Cleanup to be performed at the end.
+ * Close connection
+ */
+ @Override
+ public void finish() {
+ if (connection != null) {
+ try {
+ connection.close();
+ } catch (SQLException e) {
+ throw new RuntimeException("Caught exception while closing connection", e);
+ }
+ }
+ }
+
+ private void initConnection() throws IOException {
+ // Create correct configuration to be used to make phoenix connections
+ UDFContext context = UDFContext.getUDFContext();
+ configuration = new Configuration(context.getJobConf());
+ configuration.set(HConstants.ZOOKEEPER_QUORUM, this.zkQuorum);
+ if (Strings.isNullOrEmpty(tenantId)) {
+ configuration.unset(PhoenixRuntime.TENANT_ID_ATTRIB);
+ } else {
+ configuration.set(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId);
+ }
+ try {
+ connection = ConnectionUtil.getOutputConnection(configuration);
+ } catch (SQLException e) {
+ throw new IOException("Caught exception while creating connection", e);
+ }
+ }
+
+ private String getNextNSequenceSelectStatement(Long numToReserve, String sequenceName) {
+ return new StringBuilder().append("SELECT NEXT " + numToReserve + " VALUES" + " FOR ").append(sequenceName)
+ .toString();
+ }
+
+}
diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/PhoenixPigSchemaUtil.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/PhoenixPigSchemaUtil.java
new file mode 100644
index 0000000..7e0203f
--- /dev/null
+++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/PhoenixPigSchemaUtil.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.util;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.SchemaType;
+import org.apache.phoenix.schema.types.PDataType;
+import org.apache.phoenix.util.ColumnInfo;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+
+import com.google.common.base.Preconditions;
+
+/**
+ *
+ * Utility to generate the ResourceSchema from the list of {@link ColumnInfo}
+ *
+ */
+public final class PhoenixPigSchemaUtil {
+
+ private static final Log LOG = LogFactory.getLog(PhoenixPigSchemaUtil.class);
+
+ private PhoenixPigSchemaUtil() {
+ }
+
+ static class Dependencies {
+ List<ColumnInfo> getSelectColumnMetadataList(Configuration configuration) throws SQLException {
+ return PhoenixConfigurationUtil.getSelectColumnMetadataList(configuration);
+ }
+ }
+
+ public static ResourceSchema getResourceSchema(final Configuration configuration, Dependencies dependencies) throws IOException {
+
+ final ResourceSchema schema = new ResourceSchema();
+ try {
+ List<ColumnInfo> columns = null;
+ final SchemaType schemaType = PhoenixConfigurationUtil.getSchemaType(configuration);
+ if(schemaType == SchemaType.QUERY) {
+ final String sqlQuery = PhoenixConfigurationUtil.getSelectStatement(configuration);
+ Preconditions.checkNotNull(sqlQuery, "No Sql Query exists within the configuration");
+ final SqlQueryToColumnInfoFunction function = new SqlQueryToColumnInfoFunction(configuration);
+ columns = function.apply(sqlQuery);
+ } else if (schemaType == SchemaType.TABLE) {
+ columns = dependencies.getSelectColumnMetadataList(configuration);
+ }
+ ResourceFieldSchema fields[] = new ResourceFieldSchema[columns.size()];
+ int i = 0;
+ for(ColumnInfo cinfo : columns) {
+ int sqlType = cinfo.getSqlType();
+ PDataType phoenixDataType = PDataType.fromTypeId(sqlType);
+ byte pigType = TypeUtil.getPigDataTypeForPhoenixType(phoenixDataType);
+ ResourceFieldSchema field = new ResourceFieldSchema();
+ field.setType(pigType).setName(cinfo.getDisplayName());
+ fields[i++] = field;
+ }
+ schema.setFields(fields);
+ } catch(SQLException sqle) {
+ LOG.error(String.format("Error: SQLException [%s] ",sqle.getMessage()));
+ throw new IOException(sqle);
+ }
+
+ return schema;
+ }
+
+ public static ResourceSchema getResourceSchema(final Configuration configuration) throws IOException {
+ return getResourceSchema(configuration, new Dependencies());
+ }
+}
diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/QuerySchemaParserFunction.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/QuerySchemaParserFunction.java
new file mode 100644
index 0000000..8e4defb
--- /dev/null
+++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/QuerySchemaParserFunction.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.util;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.phoenix.compile.ColumnProjector;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.util.ConnectionUtil;
+
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ *
+ * A function to parse the select query passed to LOAD into a Pair of <table Name, List<columns>
+ *
+ */
+public class QuerySchemaParserFunction implements Function<String,Pair<String,String>> {
+
+ private static final Log LOG = LogFactory.getLog(QuerySchemaParserFunction.class);
+ private final Configuration configuration;
+
+ public QuerySchemaParserFunction(Configuration configuration) {
+ Preconditions.checkNotNull(configuration);
+ this.configuration = configuration;
+ }
+
+ @Override
+ public Pair<String, String> apply(final String selectStatement) {
+ Preconditions.checkNotNull(selectStatement);
+ Preconditions.checkArgument(!selectStatement.isEmpty(), "Select Query is empty!!");
+ Connection connection = null;
+ try {
+ connection = ConnectionUtil.getInputConnection(this.configuration);
+ final Statement statement = connection.createStatement();
+ final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class);
+ final QueryPlan queryPlan = pstmt.compileQuery(selectStatement);
+ isValidStatement(queryPlan);
+ final String tableName = queryPlan.getTableRef().getTable().getName().getString();
+ final List<? extends ColumnProjector> projectedColumns = queryPlan.getProjector().getColumnProjectors();
+ final List<String> columns = Lists.transform(projectedColumns,
+ new Function<ColumnProjector,String>() {
+ @Override
+ public String apply(ColumnProjector column) {
+ return column.getName();
+ }
+ });
+ final String columnsAsStr = Joiner.on(",").join(columns);
+ return new Pair<String, String>(tableName, columnsAsStr);
+ } catch (SQLException e) {
+ LOG.error(String.format(" Error [%s] parsing SELECT query [%s] ",e.getMessage(),selectStatement));
+ throw new RuntimeException(e);
+ } finally {
+ if(connection != null) {
+ try {
+ connection.close();
+ } catch(SQLException sqle) {
+ LOG.error(" Error closing connection ");
+ throw new RuntimeException(sqle);
+ }
+ }
+ }
+ }
+
+ /**
+ * The method validates the statement passed to the query plan. List of conditions are
+ * <ol>
+ * <li>Is a SELECT statement</li>
+ * <li>doesn't contain ORDER BY expression</li>
+ * <li>doesn't contain LIMIT</li>
+ * <li>doesn't contain GROUP BY expression</li>
+ * <li>doesn't contain DISTINCT</li>
+ * <li>doesn't contain AGGREGATE functions</li>
+ * </ol>
+ * @param queryPlan
+ * @return
+ */
+ private boolean isValidStatement(final QueryPlan queryPlan) {
+ if(queryPlan.getStatement().getOperation() != PhoenixStatement.Operation.QUERY) {
+ throw new IllegalArgumentException("Query passed isn't a SELECT statement");
+ }
+ if(!queryPlan.getOrderBy().getOrderByExpressions().isEmpty()
+ || queryPlan.getLimit() != null
+ || (queryPlan.getGroupBy() != null && !queryPlan.getGroupBy().isEmpty())
+ || queryPlan.getStatement().isDistinct()
+ || queryPlan.getStatement().isAggregate()) {
+ throw new IllegalArgumentException("SELECT statement shouldn't contain DISTINCT or ORDER BY or LIMIT or GROUP BY expressions");
+ }
+ return true;
+ }
+
+}
diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/SqlQueryToColumnInfoFunction.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/SqlQueryToColumnInfoFunction.java
new file mode 100644
index 0000000..b29ba81
--- /dev/null
+++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/SqlQueryToColumnInfoFunction.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.util;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.phoenix.compile.ColumnProjector;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.util.ConnectionUtil;
+import org.apache.phoenix.util.ColumnInfo;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+public final class SqlQueryToColumnInfoFunction implements Function<String,List<ColumnInfo>> {
+
+ private static final Log LOG = LogFactory.getLog(SqlQueryToColumnInfoFunction.class);
+ private final Configuration configuration;
+
+ public SqlQueryToColumnInfoFunction(final Configuration configuration) {
+ this.configuration = configuration;
+ }
+
+ @Override
+ public List<ColumnInfo> apply(String sqlQuery) {
+ Preconditions.checkNotNull(sqlQuery);
+ Connection connection = null;
+ List<ColumnInfo> columnInfos = null;
+ try {
+ connection = ConnectionUtil.getInputConnection(this.configuration);
+ final Statement statement = connection.createStatement();
+ final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class);
+ final QueryPlan queryPlan = pstmt.compileQuery(sqlQuery);
+ final List<? extends ColumnProjector> projectedColumns = queryPlan.getProjector().getColumnProjectors();
+ columnInfos = Lists.newArrayListWithCapacity(projectedColumns.size());
+ columnInfos = Lists.transform(projectedColumns, new Function<ColumnProjector,ColumnInfo>() {
+ @Override
+ public ColumnInfo apply(final ColumnProjector columnProjector) {
+ return new ColumnInfo(columnProjector.getName(), columnProjector.getExpression().getDataType().getSqlType());
+ }
+
+ });
+ } catch (SQLException e) {
+ LOG.error(String.format(" Error [%s] parsing SELECT query [%s] ",e.getMessage(),sqlQuery));
+ throw new RuntimeException(e);
+ } finally {
+ if(connection != null) {
+ try {
+ connection.close();
+ } catch(SQLException sqle) {
+ LOG.error("Error closing connection!!");
+ throw new RuntimeException(sqle);
+ }
+ }
+ }
+ return columnInfos;
+ }
+
+}
diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/TableSchemaParserFunction.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/TableSchemaParserFunction.java
new file mode 100644
index 0000000..5e2f24a
--- /dev/null
+++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/TableSchemaParserFunction.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.util;
+
+
+
+import org.apache.hadoop.hbase.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Iterables;
+
+/**
+ *
+ * A function to parse the table schema passed to LOAD/STORE into a Pair of <table Name, columns>
+ *
+ */
+public final class TableSchemaParserFunction implements Function<String,Pair<String,String>> {
+
+ private static final char TABLE_COLUMN_DELIMITER = '/';
+
+ @Override
+ public Pair<String, String> apply(final String tableSchema) {
+ Preconditions.checkNotNull(tableSchema);
+ Preconditions.checkArgument(!tableSchema.isEmpty(), "HBase Table name is empty!!");
+
+ final String tokens[] = Iterables.toArray(Splitter.on(TABLE_COLUMN_DELIMITER).
+ trimResults().omitEmptyStrings().split(tableSchema) , String.class);
+ final String tableName = tokens[0];
+ String columns = null;
+ if(tokens.length > 1) {
+ columns = tokens[1];
+ }
+ return new Pair<String, String>(tableName, columns);
+ }
+}
diff --git a/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/TypeUtil.java b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/TypeUtil.java
new file mode 100644
index 0000000..8c9bd6a
--- /dev/null
+++ b/phoenix-pig/src/main/java/org/apache/phoenix/pig/util/TypeUtil.java
@@ -0,0 +1,349 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.phoenix.pig.util;
+
+import java.io.IOException;
+import java.sql.Date;
+import java.sql.SQLException;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.phoenix.mapreduce.PhoenixRecordWritable;
+import org.apache.phoenix.schema.types.PArrayDataType;
+import org.apache.phoenix.schema.types.PBinary;
+import org.apache.phoenix.schema.types.PBoolean;
+import org.apache.phoenix.schema.types.PChar;
+import org.apache.phoenix.schema.types.PDataType;
+import org.apache.phoenix.schema.types.PDate;
+import org.apache.phoenix.schema.types.PDecimal;
+import org.apache.phoenix.schema.types.PDouble;
+import org.apache.phoenix.schema.types.PFloat;
+import org.apache.phoenix.schema.types.PInteger;
+import org.apache.phoenix.schema.types.PLong;
+import org.apache.phoenix.schema.types.PSmallint;
+import org.apache.phoenix.schema.types.PTime;
+import org.apache.phoenix.schema.types.PTimestamp;
+import org.apache.phoenix.schema.types.PTinyint;
+import org.apache.phoenix.schema.types.PUnsignedDate;
+import org.apache.phoenix.schema.types.PUnsignedDouble;
+import org.apache.phoenix.schema.types.PUnsignedFloat;
+import org.apache.phoenix.schema.types.PUnsignedInt;
+import org.apache.phoenix.schema.types.PUnsignedLong;
+import org.apache.phoenix.schema.types.PUnsignedSmallint;
+import org.apache.phoenix.schema.types.PUnsignedTime;
+import org.apache.phoenix.schema.types.PUnsignedTimestamp;
+import org.apache.phoenix.schema.types.PUnsignedTinyint;
+import org.apache.phoenix.schema.types.PVarbinary;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.apache.phoenix.schema.types.PhoenixArray;
+import org.apache.pig.PigException;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.backend.hadoop.hbase.HBaseBinaryConverter;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.joda.time.DateTime;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMap.Builder;
+
+public final class TypeUtil {
+
+ private static final Log LOG = LogFactory.getLog(TypeUtil.class);
+ private static final HBaseBinaryConverter BINARY_CONVERTER = new HBaseBinaryConverter();
+ private static final ImmutableMap<PDataType, Byte> PHOENIX_TO_PIG_TYPE = init();
+ private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
+
+ private TypeUtil() {}
+
+ /**
+ * @return map of Phoenix to Pig data types.
+ */
+ private static ImmutableMap<PDataType, Byte> init() {
+ final ImmutableMap.Builder<PDataType, Byte> builder = new Builder<PDataType, Byte>();
+ builder.put(PLong.INSTANCE, DataType.LONG);
+ builder.put(PVarbinary.INSTANCE, DataType.BYTEARRAY);
+ builder.put(PChar.INSTANCE, DataType.CHARARRAY);
+ builder.put(PVarchar.INSTANCE, DataType.CHARARRAY);
+ builder.put(PDouble.INSTANCE, DataType.DOUBLE);
+ builder.put(PFloat.INSTANCE, DataType.FLOAT);
+ builder.put(PInteger.INSTANCE, DataType.INTEGER);
+ builder.put(PTinyint.INSTANCE, DataType.INTEGER);
+ builder.put(PSmallint.INSTANCE, DataType.INTEGER);
+ builder.put(PDecimal.INSTANCE, DataType.BIGDECIMAL);
+ builder.put(PTime.INSTANCE, DataType.DATETIME);
+ builder.put(PTimestamp.INSTANCE, DataType.DATETIME);
+ builder.put(PBoolean.INSTANCE, DataType.BOOLEAN);
+ builder.put(PDate.INSTANCE, DataType.DATETIME);
+ builder.put(PUnsignedDate.INSTANCE, DataType.DATETIME);
+ builder.put(PUnsignedDouble.INSTANCE, DataType.DOUBLE);
+ builder.put(PUnsignedFloat.INSTANCE, DataType.FLOAT);
+ builder.put(PUnsignedInt.INSTANCE, DataType.INTEGER);
+ builder.put(PUnsignedLong.INSTANCE, DataType.LONG);
+ builder.put(PUnsignedSmallint.INSTANCE, DataType.INTEGER);
+ builder.put(PUnsignedTime.INSTANCE, DataType.DATETIME);
+ builder.put(PUnsignedTimestamp.INSTANCE, DataType.DATETIME);
+ builder.put(PUnsignedTinyint.INSTANCE, DataType.INTEGER);
+ return builder.build();
+ }
+
+ /**
+ * This method returns the most appropriate PDataType associated with the incoming Pig type. Note for Pig DataType
+ * DATETIME, returns DATE as inferredSqlType. This is later used to make a cast to targetPhoenixType accordingly.
+ * See {@link #castPigTypeToPhoenix(Object, byte, PDataType)}
+ *
+ * @param obj
+ * @return PDataType
+ */
+ public static PDataType getType(Object obj, byte type) {
+ if (obj == null) { return null; }
+ PDataType sqlType;
+
+ switch (type) {
+ case DataType.BYTEARRAY:
+ sqlType = PVarbinary.INSTANCE;
+ break;
+ case DataType.CHARARRAY:
+ sqlType = PVarchar.INSTANCE;
+ break;
+ case DataType.DOUBLE:
+ case DataType.BIGDECIMAL:
+ sqlType = PDouble.INSTANCE;
+ break;
+ case DataType.FLOAT:
+ sqlType = PFloat.INSTANCE;
+ break;
+ case DataType.INTEGER:
+ sqlType = PInteger.INSTANCE;
+ break;
+ case DataType.LONG:
+ case DataType.BIGINTEGER:
+ sqlType = PLong.INSTANCE;
+ break;
+ case DataType.BOOLEAN:
+ sqlType = PBoolean.INSTANCE;
+ break;
+ case DataType.DATETIME:
+ sqlType = PDate.INSTANCE;
+ break;
+ case DataType.BYTE:
+ sqlType = PTinyint.INSTANCE;
+ break;
+ default:
+ throw new RuntimeException("Unknown type " + obj.getClass().getName() + " passed to PhoenixHBaseStorage");
+ }
+
+ return sqlType;
+
+ }
+
+ /**
+ * This method encodes a value with Phoenix data type. It begins with checking whether an object is TUPLE. A {@link Tuple} is mapped
+ * to a {@link PArrayDataType} . It then checks if it is BINARY and makes
+ * a call to {@link #castBytes(Object, PDataType)} to convert bytes to targetPhoenixType. It returns a {@link RuntimeException}
+ * when object can not be coerced.
+ *
+ * @param o
+ * @param targetPhoenixType
+ * @return Object
+ * @throws SQLException
+ */
+ public static Object castPigTypeToPhoenix(Object o, byte objectType, PDataType targetPhoenixType) throws SQLException {
+
+ if(DataType.TUPLE == objectType) {
+ Tuple tuple = (Tuple)o;
+ List<Object> data = tuple.getAll();
+ return data.toArray();
+ }
+
+ PDataType inferredPType = getType(o, objectType);
+
+ if (inferredPType == null) { return null; }
+
+ if (inferredPType == PVarbinary.INSTANCE) {
+ try {
+ o = castBytes(o, targetPhoenixType);
+ if (targetPhoenixType != PVarbinary.INSTANCE && targetPhoenixType != PBinary.INSTANCE) {
+ inferredPType = getType(o, DataType.findType(o));
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error while casting bytes for object " + o);
+ }
+ }
+ if (inferredPType == PDate.INSTANCE) {
+ int inferredSqlType = targetPhoenixType.getSqlType();
+
+ if (inferredSqlType == Types.DATE) { return new Date(((DateTime)o).getMillis()); }
+ if (inferredSqlType == Types.TIME) { return new Time(((DateTime)o).getMillis()); }
+ if (inferredSqlType == Types.TIMESTAMP) { return new Timestamp(((DateTime)o).getMillis()); }
+ }
+
+ if (targetPhoenixType == inferredPType || inferredPType.isCoercibleTo(targetPhoenixType)) { return inferredPType
+ .toObject(o, targetPhoenixType); }
+
+ throw new RuntimeException(o.getClass().getName() + " cannot be coerced to " + targetPhoenixType.toString());
+ }
+
+ /**
+ * This method converts bytes to the target type required for Phoenix. It uses {@link HBaseBinaryConverter} for the
+ * conversion.
+ *
+ * @param o
+ * @param targetPhoenixType
+ * @return Object
+ * @throws IOException
+ */
+ private static Object castBytes(Object o, PDataType targetPhoenixType) throws IOException {
+ byte[] bytes = ((DataByteArray)o).get();
+
+ if (PDataType.equalsAny(targetPhoenixType, PChar.INSTANCE, PVarchar.INSTANCE)) {
+ return BINARY_CONVERTER.bytesToCharArray(bytes);
+ } else if (PDataType.equalsAny(targetPhoenixType, PUnsignedSmallint.INSTANCE, PSmallint.INSTANCE)) {
+ return BINARY_CONVERTER.bytesToInteger(bytes).shortValue();
+ } else if (PDataType.equalsAny(targetPhoenixType, PUnsignedTinyint.INSTANCE, PTinyint.INSTANCE)) {
+ return BINARY_CONVERTER.bytesToInteger(bytes).byteValue();
+ } else if (PDataType.equalsAny(targetPhoenixType, PUnsignedInt.INSTANCE, PInteger.INSTANCE)) {
+ return BINARY_CONVERTER.bytesToInteger(bytes);
+ } else if (targetPhoenixType.equals(PBoolean.INSTANCE)) {
+ return BINARY_CONVERTER.bytesToBoolean(bytes);
+ } else if (PDataType.equalsAny(targetPhoenixType, PFloat.INSTANCE, PUnsignedFloat.INSTANCE)) {
+ return BINARY_CONVERTER.bytesToFloat(bytes);
+ } else if (PDataType.equalsAny(targetPhoenixType, PDouble.INSTANCE, PUnsignedDouble.INSTANCE)) {
+ return BINARY_CONVERTER.bytesToDouble(bytes);
+ } else if (PDataType.equalsAny(targetPhoenixType, PUnsignedLong.INSTANCE, PLong.INSTANCE)) {
+ return BINARY_CONVERTER.bytesToLong(bytes);
+ } else if (PDataType.equalsAny(targetPhoenixType, PVarbinary.INSTANCE, PBinary.INSTANCE)) {
+ return bytes;
+ } else {
+ return o;
+ }
+ }
+
+ /**
+ * Transforms the PhoenixRecord to Pig {@link Tuple}.
+ *
+ * @param record
+ * @param projectedColumns
+ * @return
+ * @throws IOException
+ */
+ public static Tuple transformToTuple(final PhoenixRecordWritable record, final ResourceFieldSchema[] projectedColumns)
+ throws IOException {
+
+ Map<String, Object> columnValues = record.getResultMap();
+
+ if (columnValues == null || columnValues.size() == 0 || projectedColumns == null
+ || projectedColumns.length != columnValues.size()) { return null; }
+ int numColumns = columnValues.size();
+ Tuple tuple = TUPLE_FACTORY.newTuple(numColumns);
+ try {
+ int i = 0;
+ for (Map.Entry<String,Object> entry : columnValues.entrySet()) {
+ final ResourceFieldSchema fieldSchema = projectedColumns[i];
+ Object object = entry.getValue();
+ if (object == null) {
+ tuple.set(i++, null);
+ continue;
+ }
+
+ switch (fieldSchema.getType()) {
+ case DataType.BYTEARRAY:
+ byte[] bytes = PDataType.fromTypeId(PBinary.INSTANCE.getSqlType()).toBytes(object);
+ tuple.set(i, new DataByteArray(bytes, 0, bytes.length));
+ break;
+ case DataType.CHARARRAY:
+ tuple.set(i, DataType.toString(object));
+ break;
+ case DataType.DOUBLE:
+ tuple.set(i, DataType.toDouble(object));
+ break;
+ case DataType.FLOAT:
+ tuple.set(i, DataType.toFloat(object));
+ break;
+ case DataType.INTEGER:
+ tuple.set(i, DataType.toInteger(object));
+ break;
+ case DataType.LONG:
+ tuple.set(i, DataType.toLong(object));
+ break;
+ case DataType.BOOLEAN:
+ tuple.set(i, DataType.toBoolean(object));
+ break;
+ case DataType.DATETIME:
+ if (object instanceof java.sql.Timestamp)
+ tuple.set(i,new DateTime(((java.sql.Timestamp)object).getTime()));
+ else
+ tuple.set(i,new DateTime(object));
+ break;
+ case DataType.BIGDECIMAL:
+ tuple.set(i, DataType.toBigDecimal(object));
+ break;
+ case DataType.BIGINTEGER:
+ tuple.set(i, DataType.toBigInteger(object));
+ break;
+ case DataType.TUPLE:
+ {
+ PhoenixArray array = (PhoenixArray)object;
+ Tuple t = TUPLE_FACTORY.newTuple(array.getDimensions());;
+ for(int j = 0 ; j < array.getDimensions() ; j++) {
+ t.set(j,array.getElement(j));
+ }
+ tuple.set(i, t);
+ break;
+ }
+ default:
+ throw new RuntimeException(String.format(" Not supported [%s] pig type", fieldSchema));
+ }
+ i++;
+ }
+ } catch (Exception ex) {
+ final String errorMsg = String.format(" Error transforming PhoenixRecord to Tuple [%s] ", ex.getMessage());
+ LOG.error(errorMsg);
+ throw new PigException(errorMsg);
+ }
+ return tuple;
+ }
+
+ /**
+ * Returns the mapping pig data type for a given phoenix data type.
+ *
+ * @param phoenixDataType
+ * @return
+ */
+ public static Byte getPigDataTypeForPhoenixType(final PDataType phoenixDataType) {
+ Preconditions.checkNotNull(phoenixDataType);
+ if(phoenixDataType instanceof PArrayDataType) {
+ return DataType.TUPLE;
+ }
+ final Byte pigDataType = PHOENIX_TO_PIG_TYPE.get(phoenixDataType);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format(" For PhoenixDataType [%s] , pigDataType is [%s] ",
+ phoenixDataType.getSqlTypeName(), DataType.findTypeName(pigDataType)));
+ }
+ return pigDataType;
+ }
+
+}
diff --git a/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/PhoenixPigSchemaUtilTest.java b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/PhoenixPigSchemaUtilTest.java
new file mode 100644
index 0000000..9dc703b
--- /dev/null
+++ b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/PhoenixPigSchemaUtilTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.SchemaType;
+import org.apache.phoenix.pig.util.PhoenixPigSchemaUtil.Dependencies;
+import org.apache.phoenix.schema.IllegalDataException;
+import org.apache.phoenix.util.ColumnInfo;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.data.DataType;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ *
+ * Tests on PhoenixPigSchemaUtil
+ */
+public class PhoenixPigSchemaUtilTest {
+ private static final ColumnInfo ID_COLUMN = new ColumnInfo("ID", Types.BIGINT);
+ private static final ColumnInfo NAME_COLUMN = new ColumnInfo("NAME", Types.VARCHAR);
+ private static final ColumnInfo LOCATION_COLUMN = new ColumnInfo("LOCATION", Types.ARRAY);
+
+
+ @Test
+ public void testSchema() throws SQLException, IOException {
+
+ final Configuration configuration = mock(Configuration.class);
+ when(configuration.get(PhoenixConfigurationUtil.SCHEMA_TYPE)).thenReturn(SchemaType.TABLE.name());
+ final ResourceSchema actual = PhoenixPigSchemaUtil.getResourceSchema(
+ configuration, new Dependencies() {
+ List<ColumnInfo> getSelectColumnMetadataList(
+ Configuration configuration) throws SQLException {
+ return Lists.newArrayList(ID_COLUMN, NAME_COLUMN);
+ }
+ });
+ // expected schema.
+ final ResourceFieldSchema[] fields = new ResourceFieldSchema[2];
+ fields[0] = new ResourceFieldSchema().setName("ID")
+ .setType(DataType.LONG);
+
+ fields[1] = new ResourceFieldSchema().setName("NAME")
+ .setType(DataType.CHARARRAY);
+ final ResourceSchema expected = new ResourceSchema().setFields(fields);
+
+ assertEquals(expected.toString(), actual.toString());
+
+ }
+
+ @Test(expected=IllegalDataException.class)
+ public void testUnSupportedTypes() throws SQLException, IOException {
+
+ final Configuration configuration = mock(Configuration.class);
+ when(configuration.get(PhoenixConfigurationUtil.SCHEMA_TYPE)).thenReturn(SchemaType.TABLE.name());
+ PhoenixPigSchemaUtil.getResourceSchema(
+ configuration, new Dependencies() {
+ List<ColumnInfo> getSelectColumnMetadataList(
+ Configuration configuration) throws SQLException {
+ return Lists.newArrayList(ID_COLUMN, LOCATION_COLUMN);
+ }
+ });
+ fail("We currently don't support Array type yet. WIP!!");
+ }
+}
diff --git a/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/QuerySchemaParserFunctionTest.java b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/QuerySchemaParserFunctionTest.java
new file mode 100644
index 0000000..474d9e2
--- /dev/null
+++ b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/QuerySchemaParserFunctionTest.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.sql.SQLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.phoenix.query.BaseConnectionlessQueryTest;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import com.google.common.base.Joiner;
+
+/**
+ *
+ * Unit tests to validate the query passed to LOAD .
+ *
+ */
+public class QuerySchemaParserFunctionTest extends BaseConnectionlessQueryTest {
+
+ private Configuration configuration;
+ private QuerySchemaParserFunction function;
+
+ @Before
+ public void setUp() throws SQLException {
+ configuration = Mockito.mock(Configuration.class);
+ Mockito.when(configuration.get(HConstants.ZOOKEEPER_QUORUM)).thenReturn(getUrl());
+ function = new QuerySchemaParserFunction(configuration);
+ }
+
+ @Test(expected=RuntimeException.class)
+ public void testSelectQuery() {
+ final String selectQuery = "SELECT col1 FROM test";
+ function.apply(selectQuery);
+ fail("Should fail as the table [test] doesn't exist");
+ }
+
+ @Test
+ public void testValidSelectQuery() throws SQLException {
+ String ddl = "CREATE TABLE EMPLOYEE " +
+ " (id integer not null, name varchar, age integer,location varchar " +
+ " CONSTRAINT pk PRIMARY KEY (id))\n";
+ createTestTable(getUrl(), ddl);
+
+ final String selectQuery = "SELECT name,age,location FROM EMPLOYEE";
+ Pair<String,String> pair = function.apply(selectQuery);
+
+ assertEquals(pair.getFirst(), "EMPLOYEE");
+ assertEquals(pair.getSecond(),Joiner.on(',').join("NAME","AGE","LOCATION"));
+ }
+
+ @Test(expected=RuntimeException.class)
+ public void testUpsertQuery() throws SQLException {
+ String ddl = "CREATE TABLE EMPLOYEE " +
+ " (id integer not null, name varchar, age integer,location varchar " +
+ " CONSTRAINT pk PRIMARY KEY (id))\n";
+ createTestTable(getUrl(), ddl);
+
+ final String upsertQuery = "UPSERT INTO EMPLOYEE (ID, NAME) VALUES (?, ?)";
+
+ function.apply(upsertQuery);
+ fail(" Function call successful despite passing an UPSERT query");
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testAggregationQuery() throws SQLException {
+ String ddl = "CREATE TABLE EMPLOYEE " +
+ " (id integer not null, name varchar, age integer,location varchar " +
+ " CONSTRAINT pk PRIMARY KEY (id))\n";
+ createTestTable(getUrl(), ddl);
+
+ final String selectQuery = "SELECT MAX(ID) FROM EMPLOYEE";
+ function.apply(selectQuery);
+ fail(" Function call successful despite passing an aggreagate query");
+ }
+}
diff --git a/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/SqlQueryToColumnInfoFunctionTest.java b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/SqlQueryToColumnInfoFunctionTest.java
new file mode 100644
index 0000000..dde8bf0
--- /dev/null
+++ b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/SqlQueryToColumnInfoFunctionTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.util;
+
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.phoenix.query.BaseConnectionlessQueryTest;
+import org.apache.phoenix.util.ColumnInfo;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import com.google.common.collect.ImmutableList;
+
+public class SqlQueryToColumnInfoFunctionTest extends BaseConnectionlessQueryTest {
+
+ private Configuration configuration;
+ private SqlQueryToColumnInfoFunction function;
+
+ @Before
+ public void setUp() throws SQLException {
+ configuration = Mockito.mock(Configuration.class);
+ Mockito.when(configuration.get(HConstants.ZOOKEEPER_QUORUM)).thenReturn(getUrl());
+ function = new SqlQueryToColumnInfoFunction(configuration);
+ }
+
+ @Test
+ public void testValidSelectQuery() throws SQLException {
+ String ddl = "CREATE TABLE EMPLOYEE " +
+ " (id integer not null, name varchar, age integer,location varchar " +
+ " CONSTRAINT pk PRIMARY KEY (id))\n";
+ createTestTable(getUrl(), ddl);
+
+ final String selectQuery = "SELECT name as a ,age AS b,UPPER(location) AS c FROM EMPLOYEE";
+ final ColumnInfo NAME_COLUMN = new ColumnInfo("A", Types.VARCHAR);
+ final ColumnInfo AGE_COLUMN = new ColumnInfo("B", Types.INTEGER);
+ final ColumnInfo LOCATION_COLUMN = new ColumnInfo("C", Types.VARCHAR);
+ final List<ColumnInfo> expectedColumnInfos = ImmutableList.of(NAME_COLUMN, AGE_COLUMN,LOCATION_COLUMN);
+ final List<ColumnInfo> actualColumnInfos = function.apply(selectQuery);
+ Assert.assertEquals(expectedColumnInfos, actualColumnInfos);
+
+ }
+}
diff --git a/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/TableSchemaParserFunctionTest.java b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/TableSchemaParserFunctionTest.java
new file mode 100644
index 0000000..9e5a294
--- /dev/null
+++ b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/TableSchemaParserFunctionTest.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.pig.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.phoenix.pig.util.TableSchemaParserFunction;
+import org.junit.Test;
+
+import com.google.common.base.Joiner;
+
+public class TableSchemaParserFunctionTest {
+
+ final TableSchemaParserFunction function = new TableSchemaParserFunction();
+
+ @Test
+ public void testTableSchema() {
+ final String loadTableSchema = "EMPLOYEE/col1,col2";
+ final Pair<String,String> pair = function.apply(loadTableSchema);
+ assertEquals("EMPLOYEE", pair.getFirst());
+ assertEquals(pair.getSecond(),Joiner.on(',').join("col1","col2"));
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testEmptyTableSchema() {
+ final String loadTableSchema = "";
+ function.apply(loadTableSchema);
+ }
+
+ @Test
+ public void testTableOnlySchema() {
+ final String loadTableSchema = "EMPLOYEE";
+ final Pair<String,String> pair = function.apply(loadTableSchema);
+ assertEquals("EMPLOYEE", pair.getFirst());
+ assertNull(pair.getSecond());
+ }
+}
diff --git a/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/TypeUtilTest.java b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/TypeUtilTest.java
new file mode 100644
index 0000000..e459dc1
--- /dev/null
+++ b/phoenix-pig/src/test/java/org/apache/phoenix/pig/util/TypeUtilTest.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.phoenix.pig.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Map;
+
+import org.apache.phoenix.mapreduce.PhoenixRecordWritable;
+import org.apache.phoenix.schema.types.PArrayDataType;
+import org.apache.phoenix.schema.types.PDouble;
+import org.apache.phoenix.schema.types.PhoenixArray;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.junit.Test;
+
+import com.google.common.collect.Maps;
+
+public class TypeUtilTest {
+
+ @Test
+ public void testTransformToTuple() throws Exception {
+ PhoenixRecordWritable record = mock(PhoenixRecordWritable.class);
+ Double[] doubleArr = new Double[2];
+ doubleArr[0] = 64.87;
+ doubleArr[1] = 89.96;
+ PhoenixArray arr = PArrayDataType.instantiatePhoenixArray(PDouble.INSTANCE, doubleArr);
+ Map<String,Object> values = Maps.newLinkedHashMap();
+ values.put("first", "213123");
+ values.put("second", 1231123);
+ values.put("third", 31231231232131L);
+ values.put("four", "bytearray".getBytes());
+ values.put("five", arr);
+ when(record.getResultMap()).thenReturn(values);
+
+ ResourceFieldSchema field = new ResourceFieldSchema().setType(DataType.CHARARRAY);
+ ResourceFieldSchema field1 = new ResourceFieldSchema().setType(DataType.INTEGER);
+ ResourceFieldSchema field2 = new ResourceFieldSchema().setType(DataType.LONG);
+ ResourceFieldSchema field3 = new ResourceFieldSchema().setType(DataType.BYTEARRAY);
+ ResourceFieldSchema field4 = new ResourceFieldSchema().setType(DataType.TUPLE);
+ ResourceFieldSchema[] projectedColumns = { field, field1, field2, field3 , field4 };
+
+ Tuple t = TypeUtil.transformToTuple(record, projectedColumns);
+
+ assertEquals(DataType.LONG, DataType.findType(t.get(2)));
+ assertEquals(DataType.TUPLE, DataType.findType(t.get(4)));
+ Tuple doubleArrayTuple = (Tuple)t.get(4);
+ assertEquals(2,doubleArrayTuple.size());
+
+ field = new ResourceFieldSchema().setType(DataType.BIGDECIMAL);
+ field1 = new ResourceFieldSchema().setType(DataType.BIGINTEGER);
+ values.clear();
+ values.put("first", new BigDecimal(123123123.123213));
+ values.put("second", new BigInteger("1312313231312"));
+ ResourceFieldSchema[] columns = { field, field1 };
+
+ t = TypeUtil.transformToTuple(record, columns);
+
+ assertEquals(DataType.BIGDECIMAL, DataType.findType(t.get(0)));
+ assertEquals(DataType.BIGINTEGER, DataType.findType(t.get(1)));
+ }
+}
diff --git a/phoenix-spark/README.md b/phoenix-spark/README.md
new file mode 100644
index 0000000..3674b8f
--- /dev/null
+++ b/phoenix-spark/README.md
@@ -0,0 +1,164 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+phoenix-spark extends Phoenix's MapReduce support to allow Spark to load Phoenix tables as RDDs or
+DataFrames, and enables persisting RDDs of Tuples back to Phoenix.
+
+## Reading Phoenix Tables
+
+Given a Phoenix table with the following DDL
+
+```sql
+CREATE TABLE TABLE1 (ID BIGINT NOT NULL PRIMARY KEY, COL1 VARCHAR);
+UPSERT INTO TABLE1 (ID, COL1) VALUES (1, 'test_row_1');
+UPSERT INTO TABLE1 (ID, COL1) VALUES (2, 'test_row_2');
+```
+
+### Load as a DataFrame using the Data Source API
+```scala
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+import org.apache.phoenix.spark._
+
+val sc = new SparkContext("local", "phoenix-test")
+val sqlContext = new SQLContext(sc)
+
+val df = sqlContext.load(
+ "org.apache.phoenix.spark",
+ Map("table" -> "TABLE1", "zkUrl" -> "phoenix-server:2181")
+)
+
+df
+ .filter(df("COL1") === "test_row_1" && df("ID") === 1L)
+ .select(df("ID"))
+ .show
+```
+
+### Load as a DataFrame directly using a Configuration object
+```scala
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+import org.apache.phoenix.spark._
+
+val configuration = new Configuration()
+// Can set Phoenix-specific settings, requires 'hbase.zookeeper.quorum'
+
+val sc = new SparkContext("local", "phoenix-test")
+val sqlContext = new SQLContext(sc)
+
+// Load the columns 'ID' and 'COL1' from TABLE1 as a DataFrame
+val df = sqlContext.phoenixTableAsDataFrame(
+ "TABLE1", Array("ID", "COL1"), conf = configuration
+)
+
+df.show
+```
+
+### Load as an RDD, using a Zookeeper URL
+```scala
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+import org.apache.phoenix.spark._
+
+val sc = new SparkContext("local", "phoenix-test")
+
+// Load the columns 'ID' and 'COL1' from TABLE1 as an RDD
+val rdd: RDD[Map[String, AnyRef]] = sc.phoenixTableAsRDD(
+ "TABLE1", Seq("ID", "COL1"), zkUrl = Some("phoenix-server:2181")
+)
+
+rdd.count()
+
+val firstId = rdd1.first()("ID").asInstanceOf[Long]
+val firstCol = rdd1.first()("COL1").asInstanceOf[String]
+```
+
+## Saving RDDs to Phoenix
+
+`saveToPhoenix` is an implicit method on RDD[Product], or an RDD of Tuples. The data types must
+correspond to the Java types Phoenix supports (http://phoenix.apache.org/language/datatypes.html)
+
+Given a Phoenix table with the following DDL
+
+```sql
+CREATE TABLE OUTPUT_TEST_TABLE (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR, col2 INTEGER);
+```
+
+```scala
+import org.apache.spark.SparkContext
+import org.apache.phoenix.spark._
+
+val sc = new SparkContext("local", "phoenix-test")
+val dataSet = List((1L, "1", 1), (2L, "2", 2), (3L, "3", 3))
+
+sc
+ .parallelize(dataSet)
+ .saveToPhoenix(
+ "OUTPUT_TEST_TABLE",
+ Seq("ID","COL1","COL2"),
+ zkUrl = Some("phoenix-server:2181")
+ )
+```
+
+## Saving DataFrames to Phoenix
+
+The `save` is method on DataFrame allows passing in a data source type. You can use
+`org.apache.phoenix.spark`, and must also pass in a `table` and `zkUrl` parameter to
+specify which table and server to persist the DataFrame to. The column names are derived from
+the DataFrame's schema field names, and must match the Phoenix column names.
+
+The `save` method also takes a `SaveMode` option, for which only `SaveMode.Overwrite` is supported.
+
+Given two Phoenix tables with the following DDL:
+
+```sql
+CREATE TABLE INPUT_TABLE (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR, col2 INTEGER);
+CREATE TABLE OUTPUT_TABLE (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR, col2 INTEGER);
+```
+
+```scala
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+import org.apache.phoenix.spark._
+
+// Load INPUT_TABLE
+val sc = new SparkContext("local", "phoenix-test")
+val sqlContext = new SQLContext(sc)
+val df = sqlContext.load("org.apache.phoenix.spark", Map("table" -> "INPUT_TABLE",
+ "zkUrl" -> hbaseConnectionString))
+
+// Save to OUTPUT_TABLE
+df.save("org.apache.phoenix.spark", SaveMode.Overwrite, Map("table" -> "OUTPUT_TABLE",
+ "zkUrl" -> hbaseConnectionString))
+```
+
+## Notes
+
+The functions `phoenixTableAsDataFrame`, `phoenixTableAsRDD` and `saveToPhoenix` all support
+optionally specifying a `conf` Hadoop configuration parameter with custom Phoenix client settings,
+as well as an optional `zkUrl` parameter for the Phoenix connection URL.
+
+If `zkUrl` isn't specified, it's assumed that the "hbase.zookeeper.quorum" property has been set
+in the `conf` parameter. Similarly, if no configuration is passed in, `zkUrl` must be specified.
+
+## Limitations
+
+- Basic support for column and predicate pushdown using the Data Source API
+- The Data Source API does not support passing custom Phoenix settings in configuration, you must
+create the DataFrame or RDD directly if you need fine-grained configuration.
+- No support for aggregate or distinct functions (http://phoenix.apache.org/phoenix_mr.html)
diff --git a/phoenix-spark/pom.xml b/phoenix-spark/pom.xml
new file mode 100644
index 0000000..de634c4
--- /dev/null
+++ b/phoenix-spark/pom.xml
@@ -0,0 +1,598 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-connectors</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-spark</artifactId>
+ <name>Phoenix - Spark</name>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+
+ <!-- Force import of Spark's servlet API for unit tests -->
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>javax.servlet-api</artifactId>
+ <version>3.0.1</version>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- Mark Spark / Scala as provided -->
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>${scala.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.binary.version}</artifactId>
+ <version>2.2.4</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalamock</groupId>
+ <artifactId>scalamock-scalatest-support_${scala.binary.version}</artifactId>
+ <version>3.1.4</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop-two.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop-two.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop-two.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${hadoop-two.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>thrift</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-api-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api-2.5</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-runtime</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-compiler</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>${hbase.version}</version>
+ <scope>test</scope>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>thrift</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-api-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api-2.5</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-runtime</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-compiler</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <version>${hbase.version}</version>
+ <scope>test</scope>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>thrift</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-api-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api-2.5</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-runtime</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-compiler</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <version>${hbase.version}</version>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <testSourceDirectory>src/it/scala</testSourceDirectory>
+ <testResources><testResource><directory>src/it/resources</directory></testResource></testResources>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.8</source>
+ <target>1.8</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ <version>3.4.4</version>
+ <configuration>
+ <charset>${project.build.sourceEncoding}</charset>
+ <jvmArgs>
+ <jvmArg>-Xmx1024m</jvmArg>
+ </jvmArgs>
+ <scalaVersion>${scala.version}</scalaVersion>
+ <scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
+ </configuration>
+ <executions>
+ <execution>
+ <id>scala-compile-first</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>add-source</goal>
+ <goal>compile</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>scala-test-compile</id>
+ <phase>process-test-resources</phase>
+ <goals>
+ <goal>testCompile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ <version>1.0</version>
+ <configuration>
+ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+ <junitxml>.</junitxml>
+ <filereports>WDF TestSuite.txt</filereports>
+ </configuration>
+ <executions>
+ <execution>
+ <id>test</id>
+ <phase>test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <skipTests>true</skipTests>
+ </configuration>
+ </execution>
+ <execution>
+ <id>integration-test</id>
+ <phase>integration-test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <!-- Need this false until we can switch to JUnit 4.13 due to
+ https://github.com/junit-team/junit4/issues/1223
+ -->
+ <parallel>false</parallel>
+ <tagsToExclude>Integration-Test</tagsToExclude>
+ <argLine>-Xmx1536m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m</argLine>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>empty-javadoc-jar</id>
+ <phase>package</phase>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <configuration>
+ <classifier>javadoc</classifier>
+ <classesDirectory>${basedir}/javadoc</classesDirectory>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/phoenix-spark/pom.xml~ b/phoenix-spark/pom.xml~
new file mode 100644
index 0000000..b50a7e9
--- /dev/null
+++ b/phoenix-spark/pom.xml~
@@ -0,0 +1,598 @@
+<?xml version='1.0'?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>phoenix-spark</artifactId>
+ <name>Phoenix - Spark</name>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ </dependency>
+
+ <!-- Force import of Spark's servlet API for unit tests -->
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>javax.servlet-api</artifactId>
+ <version>3.0.1</version>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- Mark Spark / Scala as provided -->
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>${scala.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <version>${spark.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.binary.version}</artifactId>
+ <version>2.2.4</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.scalamock</groupId>
+ <artifactId>scalamock-scalatest-support_${scala.binary.version}</artifactId>
+ <version>3.1.4</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop-two.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop-two.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop-two.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${hadoop-two.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>thrift</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-api-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api-2.5</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-runtime</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-compiler</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>${hbase.version}</version>
+ <scope>test</scope>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>thrift</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-api-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api-2.5</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-runtime</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-compiler</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <version>${hbase.version}</version>
+ <scope>test</scope>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>thrift</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-api-2.1</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api-2.5</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-runtime</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>tomcat</groupId>
+ <artifactId>jasper-compiler</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <version>${hbase.version}</version>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <testSourceDirectory>src/it/scala</testSourceDirectory>
+ <testResources><testResource><directory>src/it/resources</directory></testResource></testResources>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.8</source>
+ <target>1.8</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ </plugin>
+
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ <version>3.4.4</version>
+ <configuration>
+ <charset>${project.build.sourceEncoding}</charset>
+ <jvmArgs>
+ <jvmArg>-Xmx1024m</jvmArg>
+ </jvmArgs>
+ <scalaVersion>${scala.version}</scalaVersion>
+ <scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
+ </configuration>
+ <executions>
+ <execution>
+ <id>scala-compile-first</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>add-source</goal>
+ <goal>compile</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>scala-test-compile</id>
+ <phase>process-test-resources</phase>
+ <goals>
+ <goal>testCompile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ <version>1.0</version>
+ <configuration>
+ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+ <junitxml>.</junitxml>
+ <filereports>WDF TestSuite.txt</filereports>
+ </configuration>
+ <executions>
+ <execution>
+ <id>test</id>
+ <phase>test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <skipTests>true</skipTests>
+ </configuration>
+ </execution>
+ <execution>
+ <id>integration-test</id>
+ <phase>integration-test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <!-- Need this false until we can switch to JUnit 4.13 due to
+ https://github.com/junit-team/junit4/issues/1223
+ -->
+ <parallel>false</parallel>
+ <tagsToExclude>Integration-Test</tagsToExclude>
+ <argLine>-Xmx1536m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m</argLine>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>empty-javadoc-jar</id>
+ <phase>package</phase>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <configuration>
+ <classifier>javadoc</classifier>
+ <classesDirectory>${basedir}/javadoc</classesDirectory>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/phoenix-spark/src/it/java/org/apache/phoenix/spark/AggregateIT.java b/phoenix-spark/src/it/java/org/apache/phoenix/spark/AggregateIT.java
new file mode 100644
index 0000000..e4b96a3
--- /dev/null
+++ b/phoenix-spark/src/it/java/org/apache/phoenix/spark/AggregateIT.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.phoenix.end2end.BaseAggregateIT;
+import org.apache.phoenix.util.QueryBuilder;
+
+public class AggregateIT extends BaseAggregateIT {
+
+ @Override
+ protected ResultSet executeQueryThrowsException(Connection conn, QueryBuilder queryBuilder,
+ String expectedPhoenixExceptionMsg, String expectedSparkExceptionMsg) {
+ ResultSet rs = null;
+ try {
+ rs = executeQuery(conn, queryBuilder);
+ fail();
+ }
+ catch(Exception e) {
+ assertTrue(e.getMessage().contains(expectedSparkExceptionMsg));
+ }
+ return rs;
+ }
+
+ @Override
+ protected ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder) throws SQLException {
+ return SparkUtil.executeQuery(conn, queryBuilder, getUrl(), config);
+ }
+
+ @Override
+ protected void testCountNullInNonEmptyKeyValueCF(int columnEncodedBytes) throws Exception {
+ try (Connection conn = DriverManager.getConnection(getUrl())) {
+ //Type is INT
+ String intTableName=generateUniqueName();
+ String sql="create table " + intTableName + " (mykey integer not null primary key, A.COLA integer, B.COLB integer) "
+ + "IMMUTABLE_ROWS=true, IMMUTABLE_STORAGE_SCHEME = ONE_CELL_PER_COLUMN, COLUMN_ENCODED_BYTES = " + columnEncodedBytes + ", DISABLE_WAL=true";
+
+ conn.createStatement().execute(sql);
+ conn.createStatement().execute("UPSERT INTO "+intTableName+" VALUES (1,1)");
+ conn.createStatement().execute("UPSERT INTO "+intTableName+" VALUES (2,1)");
+ conn.createStatement().execute("UPSERT INTO "+intTableName+" VALUES (3,1,2)");
+ conn.createStatement().execute("UPSERT INTO "+intTableName+" VALUES (4,1)");
+ conn.createStatement().execute("UPSERT INTO "+intTableName+" VALUES (5,1)");
+ conn.commit();
+
+ sql="select count(*) from "+intTableName;
+ QueryBuilder queryBuilder = new QueryBuilder()
+ .setSelectExpression("COUNT(*)")
+ .setFullTableName(intTableName);
+ ResultSet rs = executeQuery(conn, queryBuilder);
+ assertTrue(rs.next());
+ assertEquals(5, rs.getLong(1));
+
+ sql="select count(*) from "+intTableName + " where b.colb is not null";
+ queryBuilder.setWhereClause("`B.COLB` IS NOT NULL");
+ rs = executeQuery(conn, queryBuilder);
+ assertTrue(rs.next());
+ assertEquals(1, rs.getLong(1));
+
+ sql="select count(*) from "+intTableName + " where b.colb is null";
+ queryBuilder.setWhereClause("`B.COLB` IS NULL");
+ rs = executeQuery(conn, queryBuilder);
+ assertTrue(rs.next());
+ assertEquals(4, rs.getLong(1));
+ }
+ }
+
+}
diff --git a/phoenix-spark/src/it/java/org/apache/phoenix/spark/OrderByIT.java b/phoenix-spark/src/it/java/org/apache/phoenix/spark/OrderByIT.java
new file mode 100644
index 0000000..e44b011
--- /dev/null
+++ b/phoenix-spark/src/it/java/org/apache/phoenix/spark/OrderByIT.java
@@ -0,0 +1,449 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.sql.Connection;
+import java.sql.Date;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.phoenix.end2end.BaseOrderByIT;
+import org.apache.phoenix.spark.datasource.v2.PhoenixDataSource;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.QueryBuilder;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+import scala.Option;
+import scala.collection.JavaConverters;
+
+public class OrderByIT extends BaseOrderByIT {
+
+ @Override
+ protected ResultSet executeQueryThrowsException(Connection conn, QueryBuilder queryBuilder,
+ String expectedPhoenixExceptionMsg, String expectedSparkExceptionMsg) {
+ ResultSet rs = null;
+ try {
+ rs = executeQuery(conn, queryBuilder);
+ fail();
+ }
+ catch(Exception e) {
+ assertTrue(e.getMessage().contains(expectedSparkExceptionMsg));
+ }
+ return rs;
+ }
+
+ @Override
+ protected ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder) throws SQLException {
+ return SparkUtil.executeQuery(conn, queryBuilder, getUrl(), config);
+ }
+
+ @Test
+ public void testOrderByWithJoin() throws Exception {
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ try (Connection conn = DriverManager.getConnection(getUrl(), props)) {
+ conn.setAutoCommit(false);
+ String tableName1 = generateUniqueName();
+ String ddl = "CREATE TABLE " + tableName1 +
+ " (a_string varchar not null, cf1.a integer, cf1.b varchar, col1 integer, cf2.c varchar, cf2.d integer " +
+ " CONSTRAINT pk PRIMARY KEY (a_string))\n";
+ createTestTable(getUrl(), ddl);
+ String dml = "UPSERT INTO " + tableName1 + " VALUES(?,?,?,?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ stmt.setString(1, "a");
+ stmt.setInt(2, 40);
+ stmt.setString(3, "aa");
+ stmt.setInt(4, 10);
+ stmt.setString(5, "bb");
+ stmt.setInt(6, 20);
+ stmt.execute();
+ stmt.setString(1, "c");
+ stmt.setInt(2, 30);
+ stmt.setString(3, "cc");
+ stmt.setInt(4, 50);
+ stmt.setString(5, "dd");
+ stmt.setInt(6, 60);
+ stmt.execute();
+ stmt.setString(1, "b");
+ stmt.setInt(2, 40);
+ stmt.setString(3, "bb");
+ stmt.setInt(4, 5);
+ stmt.setString(5, "aa");
+ stmt.setInt(6, 80);
+ stmt.execute();
+ conn.commit();
+
+ String tableName2 = generateUniqueName();
+ ddl = "CREATE TABLE " + tableName2 +
+ " (a_string varchar not null, col1 integer" +
+ " CONSTRAINT pk PRIMARY KEY (a_string))\n";
+ createTestTable(getUrl(), ddl);
+
+ dml = "UPSERT INTO " + tableName2 + " VALUES(?, ?)";
+ stmt = conn.prepareStatement(dml);
+ stmt.setString(1, "a");
+ stmt.setInt(2, 40);
+ stmt.execute();
+ stmt.setString(1, "b");
+ stmt.setInt(2, 20);
+ stmt.execute();
+ stmt.setString(1, "c");
+ stmt.setInt(2, 30);
+ stmt.execute();
+ conn.commit();
+
+ // create two PhoenixRDDs using the table names and columns that are required for the JOIN query
+ List<String> table1Columns = Lists.newArrayList("A_STRING", "CF1.A", "CF1.B", "COL1", "CF2.C", "CF2.D");
+ SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
+ Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
+ .option(DataSourceOptions.TABLE_KEY, tableName1)
+ .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
+ phoenixDataSet.createOrReplaceTempView(tableName1);
+ phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
+ .option(DataSourceOptions.TABLE_KEY, tableName2)
+ .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
+ phoenixDataSet.createOrReplaceTempView(tableName2);
+
+ String query =
+ "SELECT T1.* FROM " + tableName1 + " T1 JOIN " + tableName2
+ + " T2 ON T1.A_STRING = T2.A_STRING ORDER BY T1.`CF1.B`";
+ Dataset<Row> dataset =
+ sqlContext.sql(query);
+ List<Row> rows = dataset.collectAsList();
+ ResultSet rs = new SparkResultSet(rows, dataset.columns());
+
+ assertTrue(rs.next());
+ assertEquals("a",rs.getString(1));
+ assertEquals(40,rs.getInt(2));
+ assertEquals("aa",rs.getString(3));
+ assertEquals(10,rs.getInt(4));
+ assertEquals("bb",rs.getString(5));
+ assertEquals(20,rs.getInt(6));
+ assertTrue(rs.next());
+ assertEquals("b",rs.getString(1));
+ assertEquals(40,rs.getInt(2));
+ assertEquals("bb",rs.getString(3));
+ assertEquals(5,rs.getInt(4));
+ assertEquals("aa",rs.getString(5));
+ assertEquals(80,rs.getInt(6));
+ assertTrue(rs.next());
+ assertEquals("c",rs.getString(1));
+ assertEquals(30,rs.getInt(2));
+ assertEquals("cc",rs.getString(3));
+ assertEquals(50,rs.getInt(4));
+ assertEquals("dd",rs.getString(5));
+ assertEquals(60,rs.getInt(6));
+ assertFalse(rs.next());
+
+ query =
+ "SELECT T1.A_STRING, T2.COL1 FROM " + tableName1 + " T1 JOIN " + tableName2
+ + " T2 ON T1.A_STRING = T2.A_STRING ORDER BY T2.COL1";
+ dataset = sqlContext.sql(query);
+ rows = dataset.collectAsList();
+ rs = new SparkResultSet(rows, dataset.columns());
+ assertTrue(rs.next());
+ assertEquals("b",rs.getString(1));
+ assertEquals(20,rs.getInt(2));
+ assertTrue(rs.next());
+ assertEquals("c",rs.getString(1));
+ assertEquals(30,rs.getInt(2));
+ assertTrue(rs.next());
+ assertEquals("a",rs.getString(1));
+ assertEquals(40,rs.getInt(2));
+ assertFalse(rs.next());
+ }
+ }
+
+ @Test
+ public void testOrderByWithUnionAll() throws Exception {
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ try (Connection conn = DriverManager.getConnection(getUrl(), props)){
+ conn.setAutoCommit(false);
+ String tableName1 = generateUniqueName();
+ String ddl = "CREATE TABLE " + tableName1 +
+ " (a_string varchar not null, cf1.a integer, cf1.b varchar, col1 integer, cf2.c varchar, cf2.d integer " +
+ " CONSTRAINT pk PRIMARY KEY (a_string))\n";
+ createTestTable(getUrl(), ddl);
+ String dml = "UPSERT INTO " + tableName1 + " VALUES(?,?,?,?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ stmt.setString(1, "a");
+ stmt.setInt(2, 40);
+ stmt.setString(3, "aa");
+ stmt.setInt(4, 10);
+ stmt.setString(5, "bb");
+ stmt.setInt(6, 20);
+ stmt.execute();
+ stmt.setString(1, "c");
+ stmt.setInt(2, 30);
+ stmt.setString(3, "cc");
+ stmt.setInt(4, 50);
+ stmt.setString(5, "dd");
+ stmt.setInt(6, 60);
+ stmt.execute();
+ stmt.setString(1, "b");
+ stmt.setInt(2, 40);
+ stmt.setString(3, "bb");
+ stmt.setInt(4, 5);
+ stmt.setString(5, "aa");
+ stmt.setInt(6, 80);
+ stmt.execute();
+ conn.commit();
+
+ String tableName2 = generateUniqueName();
+ ddl = "CREATE TABLE " + tableName2 +
+ " (a_string varchar not null, col1 integer" +
+ " CONSTRAINT pk PRIMARY KEY (a_string))\n";
+ createTestTable(getUrl(), ddl);
+
+ dml = "UPSERT INTO " + tableName2 + " VALUES(?, ?)";
+ stmt = conn.prepareStatement(dml);
+ stmt.setString(1, "aa");
+ stmt.setInt(2, 40);
+ stmt.execute();
+ stmt.setString(1, "bb");
+ stmt.setInt(2, 10);
+ stmt.execute();
+ stmt.setString(1, "cc");
+ stmt.setInt(2, 30);
+ stmt.execute();
+ conn.commit();
+
+
+ SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
+ Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
+ .option(DataSourceOptions.TABLE_KEY, tableName1)
+ .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
+ phoenixDataSet.createOrReplaceTempView(tableName1);
+ phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
+ .option(DataSourceOptions.TABLE_KEY, tableName2)
+ .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
+ phoenixDataSet.createOrReplaceTempView(tableName2);
+
+ String query =
+ "select a_string, `cf2.d` from " + tableName1 + " union all select * from "
+ + tableName2 + " order by `cf2.d`";
+ Dataset<Row> dataset =
+ sqlContext.sql(query);
+ List<Row> rows = dataset.collectAsList();
+ ResultSet rs = new SparkResultSet(rows, dataset.columns());
+ assertTrue(rs.next());
+ assertEquals("bb",rs.getString(1));
+ assertEquals(10,rs.getInt(2));
+ assertTrue(rs.next());
+ assertEquals("a",rs.getString(1));
+ assertEquals(20,rs.getInt(2));
+ assertTrue(rs.next());
+ assertEquals("cc",rs.getString(1));
+ assertEquals(30,rs.getInt(2));
+ assertTrue(rs.next());
+ assertEquals("aa",rs.getString(1));
+ assertEquals(40,rs.getInt(2));
+ assertTrue(rs.next());
+ assertEquals("c",rs.getString(1));
+ assertEquals(60,rs.getInt(2));
+ assertTrue(rs.next());
+ assertEquals("b",rs.getString(1));
+ assertEquals(80,rs.getInt(2));
+ assertFalse(rs.next());
+ }
+ }
+
+ @Test
+ public void testOrderByWithExpression() throws Exception {
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ Connection conn = DriverManager.getConnection(getUrl(), props);
+ conn.setAutoCommit(false);
+
+ try {
+ String tableName = generateUniqueName();
+ String ddl = "CREATE TABLE " + tableName +
+ " (a_string varchar not null, col1 integer, col2 integer, col3 timestamp, col4 varchar" +
+ " CONSTRAINT pk PRIMARY KEY (a_string))\n";
+ createTestTable(getUrl(), ddl);
+
+ Date date = new Date(System.currentTimeMillis());
+ String dml = "UPSERT INTO " + tableName + " VALUES(?, ?, ?, ?, ?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ stmt.setString(1, "a");
+ stmt.setInt(2, 40);
+ stmt.setInt(3, 20);
+ stmt.setDate(4, new Date(date.getTime()));
+ stmt.setString(5, "xxyy");
+ stmt.execute();
+ stmt.setString(1, "b");
+ stmt.setInt(2, 50);
+ stmt.setInt(3, 30);
+ stmt.setDate(4, new Date(date.getTime()-500));
+ stmt.setString(5, "yyzz");
+ stmt.execute();
+ stmt.setString(1, "c");
+ stmt.setInt(2, 60);
+ stmt.setInt(3, 20);
+ stmt.setDate(4, new Date(date.getTime()-300));
+ stmt.setString(5, "ddee");
+ stmt.execute();
+ conn.commit();
+
+ SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
+ Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
+ .option(DataSourceOptions.TABLE_KEY, tableName)
+ .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
+ phoenixDataSet.createOrReplaceTempView(tableName);
+ Dataset<Row> dataset =
+ sqlContext.sql("SELECT col1+col2, col4, a_string FROM " + tableName
+ + " ORDER BY col1+col2, col4");
+ List<Row> rows = dataset.collectAsList();
+ ResultSet rs = new SparkResultSet(rows, dataset.columns());
+ assertTrue(rs.next());
+ assertEquals("a", rs.getString(3));
+ assertTrue(rs.next());
+ assertEquals("c", rs.getString(3));
+ assertTrue(rs.next());
+ assertEquals("b", rs.getString(3));
+ assertFalse(rs.next());
+ } catch (SQLException e) {
+ } finally {
+ conn.close();
+ }
+ }
+
+ @Test
+ public void testColumnFamily() throws Exception {
+ Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+ try (Connection conn = DriverManager.getConnection(getUrl(), props)) {
+ conn.setAutoCommit(false);
+ String tableName = generateUniqueName();
+ String ddl = "CREATE TABLE " + tableName +
+ " (a_string varchar not null, cf1.a integer, cf1.b varchar, col1 integer, cf2.c varchar, cf2.d integer, col2 integer" +
+ " CONSTRAINT pk PRIMARY KEY (a_string))\n";
+ createTestTable(getUrl(), ddl);
+ String dml = "UPSERT INTO " + tableName + " VALUES(?,?,?,?,?,?,?)";
+ PreparedStatement stmt = conn.prepareStatement(dml);
+ stmt.setString(1, "a");
+ stmt.setInt(2, 40);
+ stmt.setString(3, "aa");
+ stmt.setInt(4, 10);
+ stmt.setString(5, "bb");
+ stmt.setInt(6, 20);
+ stmt.setInt(7, 1);
+ stmt.execute();
+ stmt.setString(1, "c");
+ stmt.setInt(2, 30);
+ stmt.setString(3, "cc");
+ stmt.setInt(4, 50);
+ stmt.setString(5, "dd");
+ stmt.setInt(6, 60);
+ stmt.setInt(7, 3);
+ stmt.execute();
+ stmt.setString(1, "b");
+ stmt.setInt(2, 40);
+ stmt.setString(3, "bb");
+ stmt.setInt(4, 5);
+ stmt.setString(5, "aa");
+ stmt.setInt(6, 80);
+ stmt.setInt(7, 2);
+ stmt.execute();
+ conn.commit();
+
+
+ SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
+ Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
+ .option(DataSourceOptions.TABLE_KEY, tableName)
+ .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
+ phoenixDataSet.createOrReplaceTempView(tableName);
+ Dataset<Row> dataset =
+ sqlContext.sql("SELECT A_STRING, `CF1.A`, `CF1.B`, COL1, `CF2.C`, `CF2.D`, COL2 from "
+ + tableName + " ORDER BY `CF1.A`,`CF2.C`");
+ List<Row> rows = dataset.collectAsList();
+ ResultSet rs = new SparkResultSet(rows, dataset.columns());
+
+ assertTrue(rs.next());
+ assertEquals("c",rs.getString(1));
+ assertEquals(30,rs.getInt(2));
+ assertEquals("cc",rs.getString(3));
+ assertEquals(50,rs.getInt(4));
+ assertEquals("dd",rs.getString(5));
+ assertEquals(60,rs.getInt(6));
+ assertEquals(3,rs.getInt(7));
+ assertTrue(rs.next());
+ assertEquals("b",rs.getString(1));
+ assertEquals(40,rs.getInt(2));
+ assertEquals("bb",rs.getString(3));
+ assertEquals(5,rs.getInt(4));
+ assertEquals("aa",rs.getString(5));
+ assertEquals(80,rs.getInt(6));
+ assertEquals(2,rs.getInt(7));
+ assertTrue(rs.next());
+ assertEquals("a",rs.getString(1));
+ assertEquals(40,rs.getInt(2));
+ assertEquals("aa",rs.getString(3));
+ assertEquals(10,rs.getInt(4));
+ assertEquals("bb",rs.getString(5));
+ assertEquals(20,rs.getInt(6));
+ assertEquals(1,rs.getInt(7));
+ assertFalse(rs.next());
+
+ dataset =
+ sqlContext.sql("SELECT A_STRING, `CF1.A`, `CF1.B`, COL1, `CF2.C`, `CF2.D`, COL2 from "
+ + tableName + " ORDER BY COL2");
+ rows = dataset.collectAsList();
+ rs = new SparkResultSet(rows, dataset.columns());
+
+ assertTrue(rs.next());
+ assertEquals("a",rs.getString(1));
+ assertEquals(40,rs.getInt(2));
+ assertEquals("aa",rs.getString(3));
+ assertEquals(10,rs.getInt(4));
+ assertEquals("bb",rs.getString(5));
+ assertEquals(20,rs.getInt(6));
+ assertEquals(1,rs.getInt(7));
+ assertTrue(rs.next());
+ assertEquals("b",rs.getString(1));
+ assertEquals(40,rs.getInt(2));
+ assertEquals("bb",rs.getString(3));
+ assertEquals(5,rs.getInt(4));
+ assertEquals("aa",rs.getString(5));
+ assertEquals(80,rs.getInt(6));
+ assertEquals(2,rs.getInt(7));
+ assertTrue(rs.next());
+ assertEquals("c",rs.getString(1));
+ assertEquals(30,rs.getInt(2));
+ assertEquals("cc",rs.getString(3));
+ assertEquals(50,rs.getInt(4));
+ assertEquals("dd",rs.getString(5));
+ assertEquals(60,rs.getInt(6));
+ assertEquals(3,rs.getInt(7));
+ assertFalse(rs.next());
+ }
+ }
+}
diff --git a/phoenix-spark/src/it/java/org/apache/phoenix/spark/SaltedTableIT.java b/phoenix-spark/src/it/java/org/apache/phoenix/spark/SaltedTableIT.java
new file mode 100644
index 0000000..d72acbd
--- /dev/null
+++ b/phoenix-spark/src/it/java/org/apache/phoenix/spark/SaltedTableIT.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.phoenix.end2end.salted.BaseSaltedTableIT;
+import org.apache.phoenix.util.QueryBuilder;
+
+public class SaltedTableIT extends BaseSaltedTableIT {
+
+ @Override
+ protected ResultSet executeQueryThrowsException(Connection conn, QueryBuilder queryBuilder,
+ String expectedPhoenixExceptionMsg, String expectedSparkExceptionMsg) {
+ ResultSet rs = null;
+ try {
+ rs = executeQuery(conn, queryBuilder);
+ fail();
+ }
+ catch(Exception e) {
+ assertTrue(e.getMessage().contains(expectedSparkExceptionMsg));
+ }
+ return rs;
+ }
+
+ @Override
+ protected ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder) throws SQLException {
+ return SparkUtil.executeQuery(conn, queryBuilder, getUrl(), config);
+ }
+
+}
\ No newline at end of file
diff --git a/phoenix-spark/src/it/java/org/apache/phoenix/spark/SparkUtil.java b/phoenix-spark/src/it/java/org/apache/phoenix/spark/SparkUtil.java
new file mode 100644
index 0000000..668c3c8
--- /dev/null
+++ b/phoenix-spark/src/it/java/org/apache/phoenix/spark/SparkUtil.java
@@ -0,0 +1,80 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark;
+
+import com.google.common.base.Joiner;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.spark.datasource.v2.PhoenixDataSource;
+import org.apache.phoenix.util.QueryBuilder;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.execution.SparkPlan;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import scala.Option;
+import scala.collection.JavaConverters;
+
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+
+public class SparkUtil {
+
+ public static final String APP_NAME = "Java Spark Tests";
+ public static final String NUM_EXECUTORS = "local[2]";
+ public static final String UI_SHOW_CONSOLE_PROGRESS = "spark.ui.showConsoleProgress";
+
+ public static SparkSession getSparkSession() {
+ return SparkSession.builder().appName(APP_NAME).master(NUM_EXECUTORS)
+ .config(UI_SHOW_CONSOLE_PROGRESS, false).getOrCreate();
+ }
+
+ public static ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder, String url, Configuration config)
+ throws SQLException {
+ SQLContext sqlContext = getSparkSession().sqlContext();
+
+ boolean forceRowKeyOrder =
+ conn.unwrap(PhoenixConnection.class).getQueryServices().getProps()
+ .getBoolean(QueryServices.FORCE_ROW_KEY_ORDER_ATTRIB, false);
+ // if we are forcing row key order we have to add an ORDER BY
+ // here we assume that the required columns are in the primary key column order
+ String prevOrderBy = queryBuilder.getOrderByClause();
+ if (forceRowKeyOrder && (queryBuilder.getOrderByClause()==null || queryBuilder.getOrderByClause().isEmpty())) {
+ queryBuilder.setOrderByClause(Joiner.on(", ").join(queryBuilder.getRequiredColumns()));
+ }
+
+ // create PhoenixRDD using the table name and columns that are required by the query
+ // since we don't set the predicate filtering is done after rows are returned from spark
+ Dataset phoenixDataSet = getSparkSession().read().format("phoenix")
+ .option(DataSourceOptions.TABLE_KEY, queryBuilder.getFullTableName())
+ .option(PhoenixDataSource.ZOOKEEPER_URL, url).load();
+
+ phoenixDataSet.createOrReplaceTempView(queryBuilder.getFullTableName());
+ Dataset<Row> dataset = sqlContext.sql(queryBuilder.build());
+ SparkPlan plan = dataset.queryExecution().executedPlan();
+ List<Row> rows = dataset.collectAsList();
+ queryBuilder.setOrderByClause(prevOrderBy);
+ ResultSet rs = new SparkResultSet(rows, dataset.columns());
+ return rs;
+ }
+}
diff --git a/phoenix-spark/src/it/resources/globalSetup.sql b/phoenix-spark/src/it/resources/globalSetup.sql
new file mode 100644
index 0000000..efdb8cb
--- /dev/null
+++ b/phoenix-spark/src/it/resources/globalSetup.sql
@@ -0,0 +1,64 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+CREATE TABLE table1 (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR)
+CREATE TABLE table1_copy (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR)
+CREATE TABLE table2 (id BIGINT NOT NULL PRIMARY KEY, table1_id BIGINT, "t2col1" VARCHAR)
+CREATE TABLE table3 (id BIGINT NOT NULL PRIMARY KEY, table3_id BIGINT, "t2col1" VARCHAR)
+UPSERT INTO table1 (id, col1) VALUES (1, 'test_row_1')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (1, 1, 'test_child_1')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (2, 1, 'test_child_2')
+UPSERT INTO table1 (id, col1) VALUES (2, 'test_row_2')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (3, 2, 'test_child_1')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (4, 2, 'test_child_2')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (5, 2, 'test_child_3')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (6, 2, 'test_child_4')
+CREATE TABLE "table4" ("id" BIGINT NOT NULL PRIMARY KEY, "col1" VARCHAR)
+UPSERT INTO "table4" ("id", "col1") VALUES (1, 'foo')
+UPSERT INTO "table4" ("id", "col1") VALUES (2, 'bar')
+CREATE TABLE ARRAY_TEST_TABLE (ID BIGINT NOT NULL PRIMARY KEY, VCARRAY VARCHAR[])
+UPSERT INTO ARRAY_TEST_TABLE (ID, VCARRAY) VALUES (1, ARRAY['String1', 'String2', 'String3'])
+CREATE TABLE ARRAYBUFFER_TEST_TABLE (ID BIGINT NOT NULL PRIMARY KEY, VCARRAY VARCHAR[], INTARRAY INTEGER[])
+UPSERT INTO ARRAYBUFFER_TEST_TABLE (ID, VCARRAY, INTARRAY) VALUES (1, ARRAY['String1', 'String2', 'String3'], ARRAY[1, 2, 3])
+CREATE TABLE ARRAY_ANYVAL_TEST_TABLE (ID BIGINT NOT NULL PRIMARY KEY, INTARRAY INTEGER[], BIGINTARRAY BIGINT[])
+UPSERT INTO ARRAY_ANYVAL_TEST_TABLE (ID, INTARRAY, BIGINTARRAY) VALUES (1, ARRAY[1, 2, 3], ARRAY[1, 2, 3])
+CREATE TABLE ARRAY_BYTE_TEST_TABLE (ID BIGINT NOT NULL PRIMARY KEY, BYTEARRAY TINYINT[])
+UPSERT INTO ARRAY_BYTE_TEST_TABLE (ID, BYTEARRAY) VALUES (1, ARRAY[1, 2, 3])
+CREATE TABLE VARBINARY_TEST_TABLE (ID BIGINT NOT NULL PRIMARY KEY, BIN BINARY(1), VARBIN VARBINARY, BINARRAY BINARY(1)[])
+CREATE TABLE DATE_PREDICATE_TEST_TABLE (ID BIGINT NOT NULL, TIMESERIES_KEY TIMESTAMP NOT NULL CONSTRAINT pk PRIMARY KEY (ID, TIMESERIES_KEY))
+UPSERT INTO DATE_PREDICATE_TEST_TABLE (ID, TIMESERIES_KEY) VALUES (1, CAST(CURRENT_TIME() AS TIMESTAMP))
+CREATE TABLE OUTPUT_TEST_TABLE (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR, col2 INTEGER, col3 DATE)
+CREATE TABLE CUSTOM_ENTITY."z02"(id BIGINT NOT NULL PRIMARY KEY)
+UPSERT INTO CUSTOM_ENTITY."z02" (id) VALUES(1)
+CREATE TABLE TEST_DECIMAL (ID BIGINT NOT NULL PRIMARY KEY, COL1 DECIMAL(9, 6))
+UPSERT INTO TEST_DECIMAL VALUES (1, 123.456789)
+CREATE TABLE TEST_SMALL_TINY (ID BIGINT NOT NULL PRIMARY KEY, COL1 SMALLINT, COL2 TINYINT)
+UPSERT INTO TEST_SMALL_TINY VALUES (1, 32767, 127)
+CREATE TABLE DATE_TEST(ID BIGINT NOT NULL PRIMARY KEY, COL1 DATE)
+UPSERT INTO DATE_TEST VALUES(1, CURRENT_DATE())
+CREATE TABLE TIME_TEST(ID BIGINT NOT NULL PRIMARY KEY, COL1 TIME)
+UPSERT INTO TIME_TEST VALUES(1, CURRENT_TIME())
+CREATE TABLE "space" ("key" VARCHAR PRIMARY KEY, "first name" VARCHAR)
+UPSERT INTO "space" VALUES ('key1', 'xyz')
+CREATE TABLE "small" ("key" VARCHAR PRIMARY KEY, "first name" VARCHAR, "salary" INTEGER )
+UPSERT INTO "small" VALUES ('key1', 'foo', 10000)
+UPSERT INTO "small" VALUES ('key2', 'bar', 20000)
+UPSERT INTO "small" VALUES ('key3', 'xyz', 30000)
+
+CREATE TABLE MULTITENANT_TEST_TABLE (TENANT_ID VARCHAR NOT NULL, ORGANIZATION_ID VARCHAR, GLOBAL_COL1 VARCHAR CONSTRAINT pk PRIMARY KEY (TENANT_ID, ORGANIZATION_ID)) MULTI_TENANT=true
+CREATE TABLE IF NOT EXISTS GIGANTIC_TABLE (ID INTEGER PRIMARY KEY,unsig_id UNSIGNED_INT,big_id BIGINT,unsig_long_id UNSIGNED_LONG,tiny_id TINYINT,unsig_tiny_id UNSIGNED_TINYINT,small_id SMALLINT,unsig_small_id UNSIGNED_SMALLINT,float_id FLOAT,unsig_float_id UNSIGNED_FLOAT,double_id DOUBLE,unsig_double_id UNSIGNED_DOUBLE,decimal_id DECIMAL,boolean_id BOOLEAN,time_id TIME,date_id DATE,timestamp_id TIMESTAMP,unsig_time_id UNSIGNED_TIME,unsig_date_id UNSIGNED_DATE,unsig_timestamp_id UNSIGNED_TIMESTAMP,varchar_id VARCHAR (30),char_id CHAR (30),binary_id BINARY (100),varbinary_id VARBINARY (100))
+ CREATE TABLE IF NOT EXISTS OUTPUT_GIGANTIC_TABLE (ID INTEGER PRIMARY KEY,unsig_id UNSIGNED_INT,big_id BIGINT,unsig_long_id UNSIGNED_LONG,tiny_id TINYINT,unsig_tiny_id UNSIGNED_TINYINT,small_id SMALLINT,unsig_small_id UNSIGNED_SMALLINT,float_id FLOAT,unsig_float_id UNSIGNED_FLOAT,double_id DOUBLE,unsig_double_id UNSIGNED_DOUBLE,decimal_id DECIMAL,boolean_id BOOLEAN,time_id TIME,date_id DATE,timestamp_id TIMESTAMP,unsig_time_id UNSIGNED_TIME,unsig_date_id UNSIGNED_DATE,unsig_timestamp_id UNSIGNED_TIMESTAMP,varchar_id VARCHAR (30),char_id CHAR (30),binary_id BINARY (100),varbinary_id VARBINARY (100))
+ upsert into GIGANTIC_TABLE values(0,2,3,4,-5,6,7,8,9.3,10.4,11.5,12.6,13.7,true,null,null,CURRENT_TIME(),CURRENT_TIME(),CURRENT_DATE(),CURRENT_TIME(),'This is random textA','a','a','a')
diff --git a/phoenix-spark/src/it/resources/log4j.xml b/phoenix-spark/src/it/resources/log4j.xml
new file mode 100644
index 0000000..10c2dc0
--- /dev/null
+++ b/phoenix-spark/src/it/resources/log4j.xml
@@ -0,0 +1,70 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+
+<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
+ <appender name="console" class="org.apache.log4j.ConsoleAppender">
+ <param name="Target" value="System.out"/>
+
+ <layout class="org.apache.log4j.PatternLayout">
+ <param name="ConversionPattern" value="%-4r [%t] %-5p %c %x - %m%n"/>
+ </layout>
+ </appender>
+
+ <logger name="org.eclipse">
+ <level value="ERROR"/>
+ </logger>
+
+ <logger name="org.apache">
+ <level value="ERROR"/>
+ </logger>
+
+ <logger name = "org.apache.phoenix.mapreduce">
+ <level value="FATAL"/>
+ </logger>
+
+ <logger name="org.mortbay">
+ <level value="ERROR"/>
+ </logger>
+
+ <logger name="org.spark-project.jetty">
+ <level value="ERROR"/>
+ </logger>
+
+ <logger name="akka">
+ <level value="ERROR"/>
+ </logger>
+
+ <logger name="BlockStateChange">
+ <level value="ERROR"/>
+ </logger>
+
+ <logger name="io.netty">
+ <level value="ERROR"/>
+ </logger>
+
+ <root>
+ <priority value="INFO"/>
+ <appender-ref ref="console"/>
+ </root>
+</log4j:configuration>
diff --git a/phoenix-spark/src/it/resources/tenantSetup.sql b/phoenix-spark/src/it/resources/tenantSetup.sql
new file mode 100644
index 0000000..f62d843
--- /dev/null
+++ b/phoenix-spark/src/it/resources/tenantSetup.sql
@@ -0,0 +1,18 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+CREATE VIEW IF NOT EXISTS TENANT_VIEW(TENANT_ONLY_COL VARCHAR) AS SELECT * FROM MULTITENANT_TEST_TABLE
+UPSERT INTO TENANT_VIEW (ORGANIZATION_ID, TENANT_ONLY_COL) VALUES ('defaultOrg', 'defaultData')
diff --git a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/AbstractPhoenixSparkIT.scala b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/AbstractPhoenixSparkIT.scala
new file mode 100644
index 0000000..a9c2070
--- /dev/null
+++ b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/AbstractPhoenixSparkIT.scala
@@ -0,0 +1,117 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import java.sql.{Connection, DriverManager}
+import java.util.Properties
+
+import org.apache.phoenix.end2end.BaseHBaseManagedTimeIT
+import org.apache.phoenix.query.BaseTest
+import org.apache.phoenix.util.PhoenixRuntime
+import org.apache.spark.sql.{SQLContext, SparkSession}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite, Matchers}
+
+
+// Helper object to access the protected abstract static methods hidden in BaseHBaseManagedTimeIT
+object PhoenixSparkITHelper extends BaseHBaseManagedTimeIT {
+ def getTestClusterConfig = BaseHBaseManagedTimeIT.getTestClusterConfig
+
+ def doSetup = {
+ // The @ClassRule doesn't seem to be getting picked up, force creation here before setup
+ BaseTest.tmpFolder.create()
+ BaseHBaseManagedTimeIT.doSetup()
+ }
+
+ def doTeardown = {
+ BaseHBaseManagedTimeIT.doTeardown()
+ BaseTest.tmpFolder.delete()
+ }
+
+ def getUrl = BaseTest.getUrl
+}
+
+/**
+ * Base class for PhoenixSparkIT
+ */
+class AbstractPhoenixSparkIT extends FunSuite with Matchers with BeforeAndAfter with BeforeAndAfterAll {
+
+ // A global tenantId we can use across tests
+ final val TenantId = "theTenant"
+
+ var conn: Connection = _
+ var spark: SparkSession = _
+
+ lazy val hbaseConfiguration = {
+ val conf = PhoenixSparkITHelper.getTestClusterConfig
+ conf
+ }
+
+ lazy val quorumAddress = {
+ ConfigurationUtil.getZookeeperURL(hbaseConfiguration).get
+ }
+
+ // Runs SQL commands located in the file defined in the sqlSource argument
+ // Optional argument tenantId used for running tenant-specific SQL
+ def setupTables(sqlSource: String, tenantId: Option[String]): Unit = {
+ val props = new Properties
+ if(tenantId.isDefined) {
+ props.setProperty(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId.get)
+ }
+
+ conn = DriverManager.getConnection(PhoenixSparkITHelper.getUrl, props)
+ conn.setAutoCommit(true)
+
+ val setupSqlSource = getClass.getClassLoader.getResourceAsStream(sqlSource)
+
+ // each SQL statement used to set up Phoenix must be on a single line. Yes, that
+ // can potentially make large lines.
+ val setupSql = scala.io.Source.fromInputStream(setupSqlSource).getLines()
+ .filter(line => !line.startsWith("--") && !line.isEmpty)
+
+ for (sql <- setupSql) {
+ val stmt = conn.createStatement()
+ stmt.execute(sql)
+ }
+ conn.commit()
+ }
+
+ override def beforeAll() {
+ PhoenixSparkITHelper.doSetup
+
+ // We pass in null for TenantId here since these tables will be globally visible
+ setupTables("globalSetup.sql", None)
+ // We pass in a TenantId to allow the DDL to create tenant-specific tables/views
+ setupTables("tenantSetup.sql", Some(TenantId))
+
+ val conf = new SparkConf()
+ .setAppName("PhoenixSparkIT")
+ .setMaster("local[2]") // 2 threads, some parallelism
+ .set("spark.ui.showConsoleProgress", "false") // Disable printing stage progress
+
+ spark = SparkSession
+ .builder()
+ .appName("PhoenixSparkIT")
+ .master("local[2]") // 2 threads, some parallelism
+ .config("spark.ui.showConsoleProgress", "false")
+ .getOrCreate()
+ }
+
+ override def afterAll() {
+ conn.close()
+ spark.stop()
+ PhoenixSparkITHelper.cleanUpAfterTest()
+ PhoenixSparkITHelper.doTeardown
+ }
+}
diff --git a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
new file mode 100644
index 0000000..b40b638
--- /dev/null
+++ b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
@@ -0,0 +1,733 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import java.sql.DriverManager
+import java.util.Date
+
+import org.apache.phoenix.schema.types.PVarchar
+import org.apache.phoenix.spark.datasource.v2.PhoenixDataSource
+import org.apache.phoenix.util.{ColumnInfo, SchemaUtil}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Row, SaveMode}
+
+import scala.collection.mutable.ListBuffer
+
+/**
+ * Note: If running directly from an IDE, these are the recommended VM parameters:
+ * -Xmx1536m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m
+ */
+class PhoenixSparkIT extends AbstractPhoenixSparkIT {
+
+ test("Can persist data with case sensitive columns (like in avro schema)") {
+ val df = spark.createDataFrame(
+ Seq(
+ (1, 1, "test_child_1"),
+ (2, 1, "test_child_2"))).
+ // column names are case sensitive
+ toDF("ID", "TABLE3_ID", "t2col1")
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "TABLE3",
+ PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress, PhoenixDataSource.SKIP_NORMALIZING_IDENTIFIER -> "true"))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+
+ // Verify results
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT * FROM TABLE3")
+
+ val checkResults = List((1, 1, "test_child_1"), (2, 1, "test_child_2"))
+ val results = ListBuffer[(Long, Long, String)]()
+ while (rs.next()) {
+ results.append((rs.getLong(1), rs.getLong(2), rs.getString(3)))
+ }
+ stmt.close()
+
+ results.toList shouldEqual checkResults
+ }
+
+ // INSERT is not support using DataSource v2 api yet
+ ignore("Can use write data using spark SQL INSERT") {
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE3", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+ df1.createOrReplaceTempView("TABLE3")
+
+ // Insert data
+ spark.sql("INSERT INTO TABLE3 VALUES(10, 10, 10)")
+ spark.sql("INSERT INTO TABLE3 VALUES(20, 20, 20)")
+
+ // Verify results
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT * FROM TABLE3 WHERE ID>=10")
+ val expectedResults = List((10, 10, "10"), (20, 20, "20"))
+ val results = ListBuffer[(Long, Long, String)]()
+ while (rs.next()) {
+ results.append((rs.getLong(1), rs.getLong(2), rs.getString(3)))
+ }
+ stmt.close()
+
+ results.toList shouldEqual expectedResults
+ }
+
+ test("Can convert Phoenix schema") {
+ val phoenixSchema = List(
+ new ColumnInfo("varcharColumn", PVarchar.INSTANCE.getSqlType)
+ )
+
+ val catalystSchema = SparkSchemaUtil.phoenixSchemaToCatalystSchema(phoenixSchema)
+
+ val expected = new StructType(List(StructField("varcharColumn", StringType, nullable = true)).toArray)
+
+ catalystSchema shouldEqual expected
+ }
+
+ test("Can create schema RDD and execute query") {
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE1", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+
+ df1.createOrReplaceTempView("sql_table_1")
+
+ val df2 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE2", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+
+ df2.createOrReplaceTempView("sql_table_2")
+
+ val sqlRdd = spark.sql(
+ """
+ |SELECT t1.ID, t1.COL1, t2.ID, t2.TABLE1_ID FROM sql_table_1 AS t1
+ |INNER JOIN sql_table_2 AS t2 ON (t2.TABLE1_ID = t1.ID)""".stripMargin
+ )
+
+ val count = sqlRdd.count()
+
+ count shouldEqual 6L
+ }
+
+ ignore("Ordering by pk columns should not require sorting") {
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE1", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+ df1.createOrReplaceTempView("TABLE1")
+
+ val sqlRdd = spark.sql("SELECT * FROM TABLE1 ORDER BY ID, COL1")
+ val plan = sqlRdd.queryExecution.sparkPlan
+ // verify the spark plan doesn't have a sort
+ assert(!plan.toString.contains("Sort"))
+
+ val expectedResults = Array(Row.fromSeq(Seq(1, "test_row_1")), Row.fromSeq(Seq(2, "test_row_2")))
+ val actual = sqlRdd.collect()
+
+ actual shouldEqual expectedResults
+ }
+
+ test("Verify correct number of partitions are created") {
+ val conn = DriverManager.getConnection(PhoenixSparkITHelper.getUrl)
+ val ddl = "CREATE TABLE SPLIT_TABLE (id VARCHAR NOT NULL PRIMARY KEY, val VARCHAR) split on ('e','j','o')"
+ conn.createStatement.execute(ddl)
+ val keys = Array("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s",
+ "t", "u", "v", "w", "x", "y", "z")
+ for (key <- keys) {
+ conn.createStatement.execute("UPSERT INTO SPLIT_TABLE VALUES('" + key + "', '" + key + "')")
+ }
+ conn.commit()
+
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "SPLIT_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+ df1.createOrReplaceTempView("SPLIT_TABLE")
+ val sqlRdd = spark.sql("SELECT * FROM SPLIT_TABLE")
+ val numPartitions = sqlRdd.rdd.partitions.size
+
+ numPartitions shouldEqual 4
+ }
+
+ test("Can create schema RDD and execute query on case sensitive table (no config)") {
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> SchemaUtil.getEscapedArgument("table4"), PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+
+ df1.createOrReplaceTempView("table4")
+
+ val sqlRdd = spark.sql("SELECT id FROM table4")
+
+ val count = sqlRdd.count()
+
+ count shouldEqual 2L
+ }
+
+ test("Can create schema RDD and execute constrained query") {
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE1", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+
+ df1.createOrReplaceTempView("sql_table_1")
+
+ val df2 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE2", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load.filter("ID = 1")
+
+ df2.createOrReplaceTempView("sql_table_2")
+
+ val sqlRdd = spark.sql(
+ """
+ |SELECT t1.ID, t1.COL1, t2.ID, t2.TABLE1_ID FROM sql_table_1 AS t1
+ |INNER JOIN sql_table_2 AS t2 ON (t2.TABLE1_ID = t1.ID)""".stripMargin
+ )
+
+ val count = sqlRdd.count()
+
+ count shouldEqual 1L
+ }
+
+ test("Can create schema RDD with predicate that will never match") {
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE1", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load.filter("ID = -1")
+
+ df1.createOrReplaceTempView("table3")
+
+ val sqlRdd = spark.sql("SELECT * FROM table3")
+
+ val count = sqlRdd.count()
+
+ count shouldEqual 0L
+ }
+
+ test("Can create schema RDD with complex predicate") {
+ val predicate = "ID > 0 AND TIMESERIES_KEY BETWEEN " +
+ "CAST(TO_DATE('1990-01-01 00:00:01', 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AND " +
+ "CAST(TO_DATE('1990-01-30 00:00:01', 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP)"
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options(Map("table" -> "DATE_PREDICATE_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .load
+ .filter(predicate)
+
+ df1.createOrReplaceTempView("date_predicate_test_table")
+
+ val sqlRdd = spark.sqlContext.sql("SELECT * FROM date_predicate_test_table")
+
+ val count = sqlRdd.count()
+
+ count shouldEqual 0L
+ }
+
+ test("Can query an array table") {
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "ARRAY_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+
+ df1.createOrReplaceTempView("ARRAY_TEST_TABLE")
+
+ val sqlRdd = spark.sql("SELECT * FROM ARRAY_TEST_TABLE")
+
+ val count = sqlRdd.count()
+
+ // get row 0, column 1, which should be "VCARRAY"
+ val arrayValues = sqlRdd.collect().apply(0).apply(1)
+
+ arrayValues should equal(Array("String1", "String2", "String3"))
+
+ count shouldEqual 1L
+ }
+
+ test("Can read a table as an RDD") {
+ val rdd1 = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "ARRAY_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+
+ val count = rdd1.count()
+
+ val arrayValues = rdd1.take(1)(0)(1)
+
+ arrayValues should equal(Array("String1", "String2", "String3"))
+
+ count shouldEqual 1L
+ }
+
+ test("Can save to phoenix table") {
+ val dataSet = List(Row(1L, "1", 1), Row(2L, "2", 2), Row(3L, "3", 3))
+
+ val schema = StructType(
+ Seq(StructField("ID", LongType, nullable = false),
+ StructField("COL1", StringType),
+ StructField("COL2", IntegerType)))
+
+ val rowRDD = spark.sparkContext.parallelize(dataSet)
+
+ // Apply the schema to the RDD.
+ val df = spark.sqlContext.createDataFrame(rowRDD, schema)
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "OUTPUT_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ // Load the results back
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT ID, COL1, COL2 FROM OUTPUT_TEST_TABLE")
+ val results = ListBuffer[Row]()
+ while (rs.next()) {
+ results.append(Row(rs.getLong(1), rs.getString(2), rs.getInt(3)))
+ }
+
+ // Verify they match
+ (0 to results.size - 1).foreach { i =>
+ dataSet(i) shouldEqual results(i)
+ }
+ }
+
+ test("Can save dates to Phoenix using java.sql.Date") {
+ val date = java.sql.Date.valueOf("2016-09-30")
+
+ // Since we are creating a Row we have to use java.sql.date
+ // java.util.date or joda.DateTime is not supported
+ val dataSet = Seq(Row(1L, "1", 1, date), Row(2L, "2", 2, date))
+
+ val schema = StructType(
+ Seq(StructField("ID", LongType, nullable = false),
+ StructField("COL1", StringType),
+ StructField("COL2", IntegerType),
+ StructField("COL3", DateType)))
+
+ val rowRDD = spark.sparkContext.parallelize(dataSet)
+
+ // Apply the schema to the RDD.
+ val df = spark.sqlContext.createDataFrame(rowRDD, schema)
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "OUTPUT_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ // Load the results back
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT COL3 FROM OUTPUT_TEST_TABLE WHERE ID = 1 OR ID = 2 ORDER BY ID ASC")
+ val results = ListBuffer[java.sql.Date]()
+ while (rs.next()) {
+ results.append(rs.getDate(1))
+ }
+
+ // Verify the epochs are equal
+ results(0).getTime shouldEqual date.getTime
+ results(1).getTime shouldEqual date.getTime
+ }
+
+ test("Can infer schema without defining columns") {
+ val df = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE2", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load()
+ df.schema("ID").dataType shouldEqual LongType
+ df.schema("TABLE1_ID").dataType shouldEqual LongType
+ df.schema("t2col1").dataType shouldEqual StringType
+ }
+
+ test("Spark SQL can use Phoenix as a data source with no schema specified") {
+ val df = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE1", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+ df.count() shouldEqual 2
+ df.schema("ID").dataType shouldEqual LongType
+ df.schema("COL1").dataType shouldEqual StringType
+ }
+
+ test("Datasource v2 pushes down filters") {
+ val df = spark.sqlContext.read.format("phoenix")
+ .options( Map("table" -> "TABLE1", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+ val res = df.filter(df("COL1") === "test_row_1" && df("ID") === 1L).select(df("ID"))
+
+ // Make sure we got the right value back
+ assert(res.first().getLong(0) == 1L)
+
+ val plan = res.queryExecution.sparkPlan
+ // filters should be pushed into scan
+ assert(".*ScanV2 phoenix.*Filters.*ID.*COL1.*".r.findFirstIn(plan.toString).isDefined)
+ // spark should not do post scan filtering
+ assert(".*Filter .*ID.*COL1.*".r.findFirstIn(plan.toString).isEmpty)
+ }
+
+ test("Can persist a dataframe") {
+ // Load from TABLE1
+ val df = spark.sqlContext.read.format("phoenix").options( Map("table" -> "TABLE1", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load
+
+ // Save to TABLE1_COPY
+ df
+ .write
+ .format("phoenix")
+ .mode(SaveMode.Overwrite)
+ .option("table", "TABLE1_COPY")
+ .option(PhoenixDataSource.ZOOKEEPER_URL, quorumAddress)
+ .save()
+
+ // Verify results
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT * FROM TABLE1_COPY")
+
+ val checkResults = List((1L, "test_row_1"), (2, "test_row_2"))
+ val results = ListBuffer[(Long, String)]()
+ while (rs.next()) {
+ results.append((rs.getLong(1), rs.getString(2)))
+ }
+ stmt.close()
+
+ results.toList shouldEqual checkResults
+ }
+
+ test("Can save arrays back to phoenix") {
+ val dataSet = List(Row(2L, Array("String1", "String2", "String3")))
+ val schema = StructType(Seq(
+ StructField("ID", LongType, nullable = false),
+ StructField("VCARRAY", ArrayType(StringType, true))
+ ))
+
+ val rowRDD = spark.sparkContext.parallelize(dataSet)
+
+ // Apply the schema to the RDD.
+ val df = spark.sqlContext.createDataFrame(rowRDD, schema)
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "ARRAY_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ // Load the results back
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT VCARRAY FROM ARRAY_TEST_TABLE WHERE ID = 2")
+ rs.next()
+ val sqlArray = rs.getArray(1).getArray().asInstanceOf[Array[String]]
+
+ // Verify the arrays are equal
+ sqlArray shouldEqual dataSet(0).get(1)
+ }
+
+ test("Can read from table with schema and escaped table name") {
+ // Manually escape
+ val df1 = spark.sqlContext.read.format("phoenix")
+ .options(Map("table" -> "CUSTOM_ENTITY.\"z02\"", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load()
+
+ var count = df1.count()
+
+ count shouldEqual 1L
+
+ // Use SchemaUtil
+ val df2 = spark.sqlContext.read.format("phoenix")
+ .options(
+ Map("table" -> SchemaUtil.getEscapedFullTableName("CUSTOM_ENTITY.z02"), PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .load()
+
+ count = df2.count()
+
+ count shouldEqual 1L
+ }
+
+ test("Ensure DataFrame field normalization (PHOENIX-2196)") {
+ val rdd1 = spark.sparkContext
+ .parallelize(Seq((1L, 1L, "One"), (2L, 2L, "Two")))
+ .map(p => Row(p._1, p._2, p._3))
+
+ val schema = StructType(Seq(
+ StructField("id", LongType, nullable = false),
+ StructField("table1_id", LongType, nullable = true),
+ StructField("\"t2col1\"", StringType, nullable = true)
+ ))
+
+ val df = spark.sqlContext.createDataFrame(rdd1, schema)
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "TABLE2", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+ }
+
+ test("Ensure Dataframe supports LIKE and IN filters (PHOENIX-2328)") {
+ val df = spark.sqlContext.read.format("phoenix").options(Map("table" -> "TABLE1", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load()
+ // Prefix match
+ val res1 = df.filter("COL1 like 'test_row_%'")
+ val plan = res1.groupBy().count().queryExecution.sparkPlan
+ res1.count() shouldEqual 2
+
+ // Suffix match
+ val res2 = df.filter("COL1 like '%_1'")
+ res2.count() shouldEqual 1
+ res2.first.getString(1) shouldEqual "test_row_1"
+
+ // Infix match
+ val res3 = df.filter("COL1 like '%_row_%'")
+ res3.count() shouldEqual 2
+
+ // Not like, match none
+ val res4 = df.filter("COL1 not like '%_row_%'")
+ res4.count() shouldEqual 0
+
+ // Not like, match all
+ val res5 = df.filter("COL1 not like '%_wor_%'")
+ res5.count() shouldEqual 2
+
+ // "IN", match all
+ val res6 = df.filter("COL1 in ('test_row_1', 'test_row_2')")
+ res6.count() shouldEqual 2
+
+ // "IN", match none
+ val res7 = df.filter("COL1 in ('foo', 'bar')")
+ res7.count() shouldEqual 0
+
+ // AND (and not again)
+ val res8 = df.filter("COL1 like '%_row_%' AND COL1 not like '%_1'")
+ res8.count() shouldEqual 1
+ res8.first.getString(1) shouldEqual "test_row_2"
+
+ // OR
+ val res9 = df.filter("COL1 like '%_1' OR COL1 like '%_2'")
+ res9.count() shouldEqual 2
+ }
+
+ test("Can load decimal types with accurate precision and scale (PHOENIX-2288)") {
+ val df = spark.sqlContext.read.format("phoenix")
+ .options(Map("table" -> "TEST_DECIMAL", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load()
+ assert(df.select("COL1").first().getDecimal(0) == BigDecimal("123.456789").bigDecimal)
+ }
+
+ test("Can load small and tiny integer types (PHOENIX-2426)") {
+ val df = spark.sqlContext.read.format("phoenix")
+ .options(Map("table" -> "TEST_SMALL_TINY", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)).load()
+ assert(df.select("COL1").first().getShort(0).toInt == 32767)
+ assert(df.select("COL2").first().getByte(0).toInt == 127)
+ }
+
+ test("Can save arrays from custom dataframes back to phoenix") {
+ val dataSet = List(Row(2L, Array("String1", "String2", "String3"), Array(1, 2, 3)))
+
+ val schema = StructType(
+ Seq(StructField("ID", LongType, nullable = false),
+ StructField("VCARRAY", ArrayType(StringType)),
+ StructField("INTARRAY", ArrayType(IntegerType))))
+
+ val rowRDD = spark.sparkContext.parallelize(dataSet)
+
+ // Apply the schema to the RDD.
+ val df = spark.sqlContext.createDataFrame(rowRDD, schema)
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "ARRAYBUFFER_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ // Load the results back
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT VCARRAY, INTARRAY FROM ARRAYBUFFER_TEST_TABLE WHERE ID = 2")
+ rs.next()
+ val stringArray = rs.getArray(1).getArray().asInstanceOf[Array[String]]
+ val intArray = rs.getArray(2).getArray().asInstanceOf[Array[Int]]
+
+ // Verify the arrays are equal
+ stringArray shouldEqual dataSet(0).getAs[Array[String]](1)
+ intArray shouldEqual dataSet(0).getAs[Array[Int]](2)
+ }
+
+ test("Can save arrays of AnyVal type back to phoenix") {
+ val dataSet = List(Row(2L, Array(1, 2, 3), Array(1L, 2L, 3L)))
+
+ val schema = StructType(
+ Seq(StructField("ID", LongType, nullable = false),
+ StructField("INTARRAY", ArrayType(IntegerType)),
+ StructField("BIGINTARRAY", ArrayType(LongType))))
+
+ val rowRDD = spark.sparkContext.parallelize(dataSet)
+
+ // Apply the schema to the RDD.
+ val df = spark.sqlContext.createDataFrame(rowRDD, schema)
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "ARRAY_ANYVAL_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ // Load the results back
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT INTARRAY, BIGINTARRAY FROM ARRAY_ANYVAL_TEST_TABLE WHERE ID = 2")
+ rs.next()
+ val intArray = rs.getArray(1).getArray().asInstanceOf[Array[Int]]
+ val longArray = rs.getArray(2).getArray().asInstanceOf[Array[Long]]
+
+ // Verify the arrays are equal
+ intArray shouldEqual dataSet(0).get(1)
+ longArray shouldEqual dataSet(0).get(2)
+ }
+
+ test("Can save arrays of Byte type back to phoenix") {
+ val dataSet = List(Row(2L, Array(1.toByte, 2.toByte, 3.toByte)))
+
+ val schema = StructType(
+ Seq(StructField("ID", LongType, nullable = false),
+ StructField("BYTEARRAY", ArrayType(ByteType))))
+
+ val rowRDD = spark.sparkContext.parallelize(dataSet)
+
+ // Apply the schema to the RDD.
+ val df = spark.sqlContext.createDataFrame(rowRDD, schema)
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "ARRAY_BYTE_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ // Load the results back
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT BYTEARRAY FROM ARRAY_BYTE_TEST_TABLE WHERE ID = 2")
+ rs.next()
+ val byteArray = rs.getArray(1).getArray().asInstanceOf[Array[Byte]]
+
+ // Verify the arrays are equal
+ byteArray shouldEqual dataSet(0).get(1)
+ }
+
+ test("Can save binary types back to phoenix") {
+ val dataSet = List(Row(2L, Array[Byte](1), Array[Byte](1, 2, 3), Array[Array[Byte]](Array[Byte](1), Array[Byte](2))))
+
+ val schema = StructType(
+ Seq(StructField("ID", LongType, false),
+ StructField("BIN", BinaryType),
+ StructField("VARBIN", BinaryType),
+ StructField("BINARRAY", ArrayType(BinaryType))))
+
+ val rowRDD = spark.sparkContext.parallelize(dataSet)
+
+ // Apply the schema to the RDD.
+ val df = spark.sqlContext.createDataFrame(rowRDD, schema)
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "VARBINARY_TEST_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ // Load the results back
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT BIN, VARBIN, BINARRAY FROM VARBINARY_TEST_TABLE WHERE ID = 2")
+ rs.next()
+ val byte = rs.getBytes("BIN")
+ val varByte = rs.getBytes("VARBIN")
+ val varByteArray = rs.getArray("BINARRAY").getArray().asInstanceOf[Array[Array[Byte]]]
+
+ // Verify the arrays are equal
+ byte shouldEqual dataSet(0).get(1)
+ varByte shouldEqual dataSet(0).get(2)
+ varByteArray shouldEqual dataSet(0).get(3)
+ }
+
+ test("Can load and filter Phoenix DATE columns through DataFrame API") {
+ val df = spark.sqlContext.read
+ .format("phoenix")
+ .options(Map("table" -> "DATE_TEST", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .load
+ val dt = df.select("COL1").first().getDate(0).getTime
+ val epoch = new Date().getTime
+
+ // NOTE: Spark DateType drops hour, minute, second, as per the java.sql.Date spec
+ // Use 'dateAsTimestamp' option to coerce DATE to TIMESTAMP without losing resolution
+
+ // Note that Spark also applies the timezone offset to the returned date epoch. Rather than perform timezone
+ // gymnastics, just make sure we're within 24H of the epoch generated just now
+ assert(Math.abs(epoch - dt) < 86400000)
+
+ df.createOrReplaceTempView("DATE_TEST")
+ val df2 = spark.sql("SELECT * FROM DATE_TEST WHERE COL1 > TO_DATE('1990-01-01 00:00:01', 'yyyy-MM-dd HH:mm:ss')")
+ assert(df2.count() == 1L)
+ }
+
+ test("Filter operation doesn't work for column names containing a white space (PHOENIX-2547)") {
+ val df = spark.sqlContext.read.format("phoenix")
+ .options(Map("table" -> SchemaUtil.getEscapedArgument("space"), PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .load
+ val res = df.filter(df.col("first name").equalTo("xyz"))
+ // Make sure we got the right value back
+ assert(res.collectAsList().size() == 1L)
+ }
+
+ test("Spark Phoenix cannot recognize Phoenix view fields (PHOENIX-2290)") {
+ val df = spark.sqlContext.read.format("phoenix")
+ .options(Map("table" -> SchemaUtil.getEscapedArgument("small"), PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .load
+ df.createOrReplaceTempView("temp")
+
+ // limitation: filter / where expressions are not allowed with "double quotes", instead of that pass it as column expressions
+ // reason: if the expression contains "double quotes" then spark sql parser, ignoring evaluating .. giving to next level to handle
+
+ val res1 = spark.sql("select * from temp where salary = '10000' ")
+ assert(res1.collectAsList().size() == 1L)
+
+ val res2 = spark.sql("select * from temp where \"salary\" = '10000' ")
+ assert(res2.collectAsList().size() == 0L)
+
+ val res3 = spark.sql("select * from temp where salary > '10000' ")
+ assert(res3.collectAsList().size() == 2L)
+ }
+
+ test("Queries with small case column-names return empty result-set when working with Spark Datasource Plugin (PHOENIX-2336)") {
+ val df = spark.sqlContext.read.format("phoenix")
+ .options(Map("table" -> SchemaUtil.getEscapedArgument("small"), PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .load
+
+ // limitation: filter / where expressions are not allowed with "double quotes", instead of that pass it as column expressions
+ // reason: if the expression contains "double quotes" then spark sql parser, ignoring evaluating .. giving to next level to handle
+
+ val res1 = df.filter(df.col("first name").equalTo("foo"))
+ assert(res1.collectAsList().size() == 1L)
+
+ val res2 = df.filter("\"first name\" = 'foo'")
+ assert(res2.collectAsList().size() == 0L)
+
+ val res3 = df.filter("salary = '10000'")
+ assert(res3.collectAsList().size() == 1L)
+
+ val res4 = df.filter("salary > '10000'")
+ assert(res4.collectAsList().size() == 2L)
+ }
+
+ test("Can coerce Phoenix DATE columns to TIMESTAMP through DataFrame API") {
+ val df = spark.sqlContext.read
+ .format("phoenix")
+ .options(Map("table" -> "DATE_TEST", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress, "dateAsTimestamp" -> "true"))
+ .load
+ val dtRes = df.select("COL1").first()
+ val ts = dtRes.getTimestamp(0).getTime
+ val epoch = new Date().getTime
+
+ assert(Math.abs(epoch - ts) < 300000)
+ }
+
+ test("Can load Phoenix Time columns through DataFrame API") {
+ val df = spark.sqlContext.read
+ .format("phoenix")
+ .options(Map("table" -> "TIME_TEST", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .load
+ val time = df.select("COL1").first().getTimestamp(0).getTime
+ val epoch = new Date().getTime
+ assert(Math.abs(epoch - time) < 86400000)
+ }
+
+ test("can read all Phoenix data types") {
+ val df = spark.sqlContext.read
+ .format("phoenix")
+ .options(Map("table" -> "GIGANTIC_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .load
+
+ df.write
+ .format("phoenix")
+ .options(Map("table" -> "OUTPUT_GIGANTIC_TABLE", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress))
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ df.count() shouldEqual 1
+ }
+
+}
diff --git a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkITTenantSpecific.scala b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkITTenantSpecific.scala
new file mode 100644
index 0000000..291ea2a
--- /dev/null
+++ b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkITTenantSpecific.scala
@@ -0,0 +1,135 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.phoenix.util.PhoenixRuntime
+import org.apache.spark.sql.SQLContext
+
+import scala.collection.mutable.ListBuffer
+
+/**
+ * Sub-class of PhoenixSparkIT used for tenant-specific tests
+ *
+ * Note: All schema related variables (table name, column names, default data, etc) are coupled with
+ * phoenix-spark/src/it/resources/tenantSetup.sql
+ *
+ * Note: If running directly from an IDE, these are the recommended VM parameters:
+ * -Xmx1536m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m
+ *
+ */
+class PhoenixSparkITTenantSpecific extends AbstractPhoenixSparkIT {
+
+ // Tenant-specific schema info
+ val OrgIdCol = "ORGANIZATION_ID"
+ val TenantOnlyCol = "TENANT_ONLY_COL"
+ val TenantTable = "TENANT_VIEW"
+
+ // Data set for tests that write to Phoenix
+ val TestDataSet = List(("testOrg1", "data1"), ("testOrg2", "data2"), ("testOrg3", "data3"))
+
+ /**
+ * Helper method used by write tests to verify content written.
+ * Assumes the caller has written the TestDataSet (defined above) to Phoenix
+ * and that 1 row of default data exists (upserted after table creation in tenantSetup.sql)
+ */
+ def verifyResults(): Unit = {
+ // Contains the default data upserted into the tenant-specific table in tenantSetup.sql and the data upserted by tests
+ val VerificationDataSet = List(("defaultOrg", "defaultData")) ::: TestDataSet
+
+ val SelectStatement = "SELECT " + OrgIdCol + "," + TenantOnlyCol + " FROM " + TenantTable
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery(SelectStatement)
+
+ val results = ListBuffer[(String, String)]()
+ while (rs.next()) {
+ results.append((rs.getString(1), rs.getString(2)))
+ }
+ stmt.close()
+ results.toList shouldEqual VerificationDataSet
+ }
+
+ /*****************/
+ /** Read tests **/
+ /*****************/
+
+ test("Can read from tenant-specific table as DataFrame") {
+ val df = spark.sqlContext.phoenixTableAsDataFrame(
+ TenantTable,
+ Seq(OrgIdCol, TenantOnlyCol),
+ zkUrl = Some(quorumAddress),
+ tenantId = Some(TenantId),
+ conf = hbaseConfiguration)
+
+ // There should only be 1 row upserted in tenantSetup.sql
+ val count = df.count()
+ count shouldEqual 1L
+ }
+
+ test("Can read from tenant-specific table as RDD") {
+ val rdd = spark.sparkContext.phoenixTableAsRDD(
+ TenantTable,
+ Seq(OrgIdCol, TenantOnlyCol),
+ zkUrl = Some(quorumAddress),
+ tenantId = Some(TenantId),
+ conf = hbaseConfiguration)
+
+ // There should only be 1 row upserted in tenantSetup.sql
+ val count = rdd.count()
+ count shouldEqual 1L
+ }
+
+ /*****************/
+ /** Write tests **/
+ /*****************/
+
+ test("Can write a DataFrame using 'DataFrame.saveToPhoenix' to tenant-specific view") {
+ val sqlContext = spark.sqlContext
+ import sqlContext.implicits._
+
+ val df = spark.sparkContext.parallelize(TestDataSet).toDF(OrgIdCol, TenantOnlyCol)
+ df.saveToPhoenix(TenantTable, zkUrl = Some(quorumAddress), tenantId = Some(TenantId))
+
+ verifyResults
+ }
+
+ test("Can write a DataFrame using 'DataFrame.write' to tenant-specific view") {
+ val sqlContext = spark.sqlContext
+ import sqlContext.implicits._
+
+ val df = spark.sparkContext.parallelize(TestDataSet).toDF(OrgIdCol, TenantOnlyCol)
+
+ df.write
+ .format("phoenix")
+ .mode("overwrite")
+ .option("table", TenantTable)
+ .option(PhoenixRuntime.TENANT_ID_ATTRIB, TenantId)
+ .option("zkUrl", PhoenixSparkITHelper.getUrl)
+ .save()
+
+ verifyResults
+ }
+
+ test("Can write an RDD to Phoenix tenant-specific view") {
+ spark.sparkContext
+ .parallelize(TestDataSet)
+ .saveToPhoenix(
+ TenantTable,
+ Seq(OrgIdCol, TenantOnlyCol),
+ hbaseConfiguration,
+ tenantId = Some(TenantId)
+ )
+
+ verifyResults
+ }
+}
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/SparkResultSet.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/SparkResultSet.java
new file mode 100644
index 0000000..0cb8009
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/SparkResultSet.java
@@ -0,0 +1,1056 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark;
+
+import org.apache.phoenix.exception.SQLExceptionCode;
+import org.apache.phoenix.exception.SQLExceptionInfo;
+import org.apache.phoenix.util.SQLCloseable;
+import org.apache.spark.sql.Row;
+
+import java.io.InputStream;
+import java.io.Reader;
+import java.math.BigDecimal;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.sql.Array;
+import java.sql.Blob;
+import java.sql.Clob;
+import java.sql.Date;
+import java.sql.NClob;
+import java.sql.Ref;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.RowId;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.sql.SQLWarning;
+import java.sql.SQLXML;
+import java.sql.Statement;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Helper class to convert a List of Rows returned from a dataset to a sql ResultSet
+ */
+public class SparkResultSet implements ResultSet, SQLCloseable {
+
+ private int index = -1;
+ private List<Row> dataSetRows;
+ private List<String> columnNames;
+ private boolean wasNull = false;
+
+ public SparkResultSet(List<Row> rows, String[] columnNames) {
+ this.dataSetRows = rows;
+ this.columnNames = Arrays.asList(columnNames);
+ }
+
+ private Row getCurrentRow() {
+ return dataSetRows.get(index);
+ }
+
+ @Override
+ public boolean absolute(int row) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void afterLast() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void beforeFirst() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void cancelRowUpdates() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void clearWarnings() throws SQLException {
+ }
+
+ @Override
+ public void close() throws SQLException {
+ }
+
+ @Override
+ public void deleteRow() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public int findColumn(String columnLabel) throws SQLException {
+ return columnNames.indexOf(columnLabel.toUpperCase())+1;
+ }
+
+ @Override
+ public boolean first() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Array getArray(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Array getArray(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public InputStream getAsciiStream(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public InputStream getAsciiStream(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ private void checkOpen() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ private void checkCursorState() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public BigDecimal getBigDecimal(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public BigDecimal getBigDecimal(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public BigDecimal getBigDecimal(int columnIndex, int scale) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public BigDecimal getBigDecimal(String columnLabel, int scale) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public InputStream getBinaryStream(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public InputStream getBinaryStream(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Blob getBlob(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Blob getBlob(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean getBoolean(int columnIndex) throws SQLException {
+ wasNull = getCurrentRow().isNullAt(columnIndex-1);
+ return wasNull ? false : getCurrentRow().getBoolean(columnIndex-1);
+ }
+
+ @Override
+ public boolean getBoolean(String columnLabel) throws SQLException {
+ return getBoolean(findColumn(columnLabel));
+ }
+
+ @Override
+ public byte[] getBytes(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public byte[] getBytes(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public byte getByte(int columnIndex) throws SQLException {
+ wasNull = getCurrentRow().isNullAt(columnIndex-1);
+ return wasNull ? 0 : getCurrentRow().getByte(columnIndex-1);
+ }
+
+ @Override
+ public byte getByte(String columnLabel) throws SQLException {
+ return getByte(findColumn(columnLabel));
+ }
+
+ @Override
+ public Reader getCharacterStream(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Reader getCharacterStream(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Clob getClob(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Clob getClob(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public int getConcurrency() throws SQLException {
+ return ResultSet.CONCUR_READ_ONLY;
+ }
+
+ @Override
+ public String getCursorName() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Date getDate(int columnIndex) throws SQLException {
+ wasNull = getCurrentRow().isNullAt(columnIndex-1);
+ return getCurrentRow().getDate(columnIndex-1);
+ }
+
+ @Override
+ public Date getDate(String columnLabel) throws SQLException {
+ return getDate(findColumn(columnLabel));
+ }
+
+ @Override
+ public Date getDate(int columnIndex, Calendar cal) throws SQLException {
+ cal.setTime(getCurrentRow().getDate(columnIndex-1));
+ return new Date(cal.getTimeInMillis());
+ }
+
+ @Override
+ public Date getDate(String columnLabel, Calendar cal) throws SQLException {
+ return getDate(findColumn(columnLabel), cal);
+ }
+
+ @Override
+ public double getDouble(int columnIndex) throws SQLException {
+ wasNull = getCurrentRow().isNullAt(columnIndex-1);
+ return wasNull ? 0 : getCurrentRow().getDouble(columnIndex-1);
+ }
+
+ @Override
+ public double getDouble(String columnLabel) throws SQLException {
+ return getDouble(findColumn(columnLabel));
+ }
+
+ @Override
+ public int getFetchDirection() throws SQLException {
+ return ResultSet.FETCH_FORWARD;
+ }
+
+ @Override
+ public int getFetchSize() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public float getFloat(int columnIndex) throws SQLException {
+ wasNull = getCurrentRow().isNullAt(columnIndex-1);
+ return wasNull ? 0 : getCurrentRow().getFloat(columnIndex-1);
+ }
+
+ @Override
+ public float getFloat(String columnLabel) throws SQLException {
+ return getFloat(findColumn(columnLabel));
+ }
+
+ @Override
+ public int getHoldability() throws SQLException {
+ return ResultSet.CLOSE_CURSORS_AT_COMMIT;
+ }
+
+ @Override
+ public int getInt(int columnIndex) throws SQLException {
+ wasNull = getCurrentRow().isNullAt(columnIndex-1);
+ return wasNull ? 0 : getCurrentRow().getInt(columnIndex-1);
+ }
+
+ @Override
+ public int getInt(String columnLabel) throws SQLException {
+ return getInt(findColumn(columnLabel));
+ }
+
+ @Override
+ public long getLong(int columnIndex) throws SQLException {
+ wasNull = getCurrentRow().isNullAt(columnIndex-1);
+ return wasNull ? 0 : getCurrentRow().getLong(columnIndex-1);
+ }
+
+ @Override
+ public long getLong(String columnLabel) throws SQLException {
+ return getLong(findColumn(columnLabel));
+ }
+
+ @Override
+ public ResultSetMetaData getMetaData() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Reader getNCharacterStream(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Reader getNCharacterStream(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public NClob getNClob(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public NClob getNClob(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public String getNString(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public String getNString(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Object getObject(int columnIndex) throws SQLException {
+ return getCurrentRow().get(columnIndex-1);
+ }
+
+ @Override
+ public Object getObject(String columnLabel) throws SQLException {
+ return getObject(findColumn(columnLabel));
+ }
+
+ @Override
+ public Object getObject(int columnIndex, Map<String, Class<?>> map) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Object getObject(String columnLabel, Map<String, Class<?>> map) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Ref getRef(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Ref getRef(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public int getRow() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public RowId getRowId(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public RowId getRowId(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public SQLXML getSQLXML(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public SQLXML getSQLXML(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public short getShort(int columnIndex) throws SQLException {
+ return getCurrentRow().getShort(columnIndex-1);
+ }
+
+ @Override
+ public short getShort(String columnLabel) throws SQLException {
+ return getShort(findColumn(columnLabel));
+ }
+
+ @Override
+ public Statement getStatement() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public String getString(int columnIndex) throws SQLException {
+ wasNull = getCurrentRow().isNullAt(columnIndex-1);
+ return wasNull ? null : getCurrentRow().getString(columnIndex-1);
+ }
+
+ @Override
+ public String getString(String columnLabel) throws SQLException {
+ return getString(findColumn(columnLabel));
+ }
+
+ @Override
+ public Time getTime(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Time getTime(String columnLabel) throws SQLException {
+ return getTime(findColumn(columnLabel));
+ }
+
+ @Override
+ public Time getTime(int columnIndex, Calendar cal) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public Time getTime(String columnLabel, Calendar cal) throws SQLException {
+ return getTime(findColumn(columnLabel),cal);
+ }
+
+ @Override
+ public Timestamp getTimestamp(int columnIndex) throws SQLException {
+ return getCurrentRow().getTimestamp(columnIndex-1);
+ }
+
+ @Override
+ public Timestamp getTimestamp(String columnLabel) throws SQLException {
+ return getTimestamp(findColumn(columnLabel));
+ }
+
+ @Override
+ public Timestamp getTimestamp(int columnIndex, Calendar cal) throws SQLException {
+ return getTimestamp(columnIndex-1);
+ }
+
+ @Override
+ public Timestamp getTimestamp(String columnLabel, Calendar cal) throws SQLException {
+ return getTimestamp(findColumn(columnLabel),cal);
+ }
+
+ @Override
+ public int getType() throws SQLException {
+ return ResultSet.TYPE_FORWARD_ONLY;
+ }
+
+ @Override
+ public URL getURL(int columnIndex) throws SQLException {
+ try {
+ return new URL(getCurrentRow().getString(columnIndex-1));
+ } catch (MalformedURLException e) {
+ throw new SQLExceptionInfo.Builder(SQLExceptionCode.MALFORMED_URL).setRootCause(e)
+ .build().buildException();
+ }
+ }
+
+ @Override
+ public URL getURL(String columnLabel) throws SQLException {
+ return getURL(findColumn(columnLabel));
+ }
+
+ @Override
+ public InputStream getUnicodeStream(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public InputStream getUnicodeStream(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public SQLWarning getWarnings() throws SQLException {
+ return null;
+ }
+
+ @Override
+ public void insertRow() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean isAfterLast() throws SQLException {
+ return index >= dataSetRows.size();
+ }
+
+ @Override
+ public boolean isBeforeFirst() throws SQLException {
+ return index == -1;
+ }
+
+ @Override
+ public boolean isClosed() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean isFirst() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean isLast() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean last() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void moveToCurrentRow() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void moveToInsertRow() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean next() throws SQLException {
+ index++;
+ return index < dataSetRows.size();
+ }
+
+ @Override
+ public boolean previous() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void refreshRow() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean relative(int rows) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean rowDeleted() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean rowInserted() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean rowUpdated() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void setFetchDirection(int direction) throws SQLException {
+ if (direction != ResultSet.FETCH_FORWARD) {
+ throw new SQLFeatureNotSupportedException();
+ }
+ }
+
+ @Override
+ public void setFetchSize(int rows) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateArray(int columnIndex, Array x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateArray(String columnLabel, Array x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateAsciiStream(int columnIndex, InputStream x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateAsciiStream(String columnLabel, InputStream x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateAsciiStream(int columnIndex, InputStream x, int length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateAsciiStream(String columnLabel, InputStream x, int length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateAsciiStream(int columnIndex, InputStream x, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateAsciiStream(String columnLabel, InputStream x, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBigDecimal(int columnIndex, BigDecimal x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBigDecimal(String columnLabel, BigDecimal x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBinaryStream(int columnIndex, InputStream x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBinaryStream(String columnLabel, InputStream x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBinaryStream(int columnIndex, InputStream x, int length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBinaryStream(String columnLabel, InputStream x, int length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBinaryStream(int columnIndex, InputStream x, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBinaryStream(String columnLabel, InputStream x, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBlob(int columnIndex, Blob x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBlob(String columnLabel, Blob x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBlob(int columnIndex, InputStream inputStream) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBlob(String columnLabel, InputStream inputStream) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBlob(int columnIndex, InputStream inputStream, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBlob(String columnLabel, InputStream inputStream, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBoolean(int columnIndex, boolean x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBoolean(String columnLabel, boolean x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateByte(int columnIndex, byte x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateByte(String columnLabel, byte x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBytes(int columnIndex, byte[] x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateBytes(String columnLabel, byte[] x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateCharacterStream(int columnIndex, Reader x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateCharacterStream(String columnLabel, Reader reader) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateCharacterStream(int columnIndex, Reader x, int length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateCharacterStream(String columnLabel, Reader reader, int length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateCharacterStream(int columnIndex, Reader x, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateCharacterStream(String columnLabel, Reader reader, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateClob(int columnIndex, Clob x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateClob(String columnLabel, Clob x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateClob(int columnIndex, Reader reader) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateClob(String columnLabel, Reader reader) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateClob(int columnIndex, Reader reader, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateClob(String columnLabel, Reader reader, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateDate(int columnIndex, Date x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateDate(String columnLabel, Date x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateDouble(int columnIndex, double x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateDouble(String columnLabel, double x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateFloat(int columnIndex, float x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateFloat(String columnLabel, float x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateInt(int columnIndex, int x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateInt(String columnLabel, int x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateLong(int columnIndex, long x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateLong(String columnLabel, long x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNCharacterStream(int columnIndex, Reader x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNCharacterStream(String columnLabel, Reader reader) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNCharacterStream(int columnIndex, Reader x, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNCharacterStream(String columnLabel, Reader reader, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNClob(int columnIndex, NClob nClob) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNClob(String columnLabel, NClob nClob) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNClob(int columnIndex, Reader reader) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNClob(String columnLabel, Reader reader) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNClob(int columnIndex, Reader reader, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNClob(String columnLabel, Reader reader, long length) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNString(int columnIndex, String nString) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNString(String columnLabel, String nString) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNull(int columnIndex) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateNull(String columnLabel) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateObject(int columnIndex, Object x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateObject(String columnLabel, Object x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateObject(int columnIndex, Object x, int scaleOrLength) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateObject(String columnLabel, Object x, int scaleOrLength) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateRef(int columnIndex, Ref x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateRef(String columnLabel, Ref x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateRow() throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateRowId(int columnIndex, RowId x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateRowId(String columnLabel, RowId x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateSQLXML(int columnIndex, SQLXML xmlObject) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateSQLXML(String columnLabel, SQLXML xmlObject) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateShort(int columnIndex, short x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateShort(String columnLabel, short x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateString(int columnIndex, String x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateString(String columnLabel, String x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateTime(int columnIndex, Time x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateTime(String columnLabel, Time x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateTimestamp(int columnIndex, Timestamp x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public void updateTimestamp(String columnLabel, Timestamp x) throws SQLException {
+ throw new SQLFeatureNotSupportedException();
+ }
+
+ @Override
+ public boolean wasNull() throws SQLException {
+ return wasNull;
+ }
+
+ @Override
+ public boolean isWrapperFor(Class<?> iface) throws SQLException {
+ return iface.isInstance(this);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T> T unwrap(Class<T> iface) throws SQLException {
+ if (!iface.isInstance(this)) {
+ throw new SQLExceptionInfo.Builder(SQLExceptionCode.CLASS_NOT_UNWRAPPABLE)
+ .setMessage(this.getClass().getName() + " not unwrappable from " + iface.getName())
+ .build().buildException();
+ }
+ return (T)this;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T> T getObject(int columnIndex, Class<T> type) throws SQLException {
+ return (T) getObject(columnIndex-1); // Just ignore type since we only support built-in types
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T> T getObject(String columnLabel, Class<T> type) throws SQLException {
+ return (T) getObject(columnLabel); // Just ignore type since we only support built-in types
+ }
+
+}
\ No newline at end of file
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/PhoenixDataSource.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/PhoenixDataSource.java
new file mode 100644
index 0000000..ad79d1c
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/PhoenixDataSource.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2;
+
+import java.util.Optional;
+
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.spark.datasource.v2.reader.PhoenixDataSourceReader;
+import org.apache.phoenix.spark.datasource.v2.writer.PhoenixDataSourceWriteOptions;
+import org.apache.phoenix.spark.datasource.v2.writer.PhoenixDatasourceWriter;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.sources.DataSourceRegister;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.ReadSupport;
+import org.apache.spark.sql.sources.v2.WriteSupport;
+import org.apache.spark.sql.sources.v2.reader.DataSourceReader;
+import org.apache.spark.sql.sources.v2.writer.DataSourceWriter;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * Implements the DataSourceV2 api to read and write from Phoenix tables
+ */
+public class PhoenixDataSource implements DataSourceV2, ReadSupport, WriteSupport, DataSourceRegister {
+
+ public static final String SKIP_NORMALIZING_IDENTIFIER = "skipNormalizingIdentifier";
+ public static final String ZOOKEEPER_URL = "zkUrl";
+
+ @Override
+ public DataSourceReader createReader(DataSourceOptions options) {
+ return new PhoenixDataSourceReader(options);
+ }
+
+ @Override
+ public Optional<DataSourceWriter> createWriter(String writeUUID, StructType schema, SaveMode mode,
+ DataSourceOptions options) {
+ if (!mode.equals(SaveMode.Overwrite)) {
+ throw new RuntimeException("SaveMode other than SaveMode.OverWrite is not supported");
+ }
+ if (!options.tableName().isPresent()) {
+ throw new RuntimeException("No Phoenix option " + DataSourceOptions.TABLE_KEY + " defined");
+ }
+ if (!options.get(PhoenixDataSource.ZOOKEEPER_URL).isPresent()) {
+ throw new RuntimeException("No Phoenix option " + PhoenixDataSource.ZOOKEEPER_URL + " defined");
+ }
+
+ PhoenixDataSourceWriteOptions writeOptions = createPhoenixDataSourceWriteOptions(options, schema);
+ return Optional.of(new PhoenixDatasourceWriter(writeOptions));
+ }
+
+ private PhoenixDataSourceWriteOptions createPhoenixDataSourceWriteOptions(DataSourceOptions options,
+ StructType schema) {
+ String scn = options.get(PhoenixConfigurationUtil.CURRENT_SCN_VALUE).orElse(null);
+ String tenantId = options.get(PhoenixRuntime.TENANT_ID_ATTRIB).orElse(null);
+ String zkUrl = options.get(ZOOKEEPER_URL).get();
+ boolean skipNormalizingIdentifier = options.getBoolean(SKIP_NORMALIZING_IDENTIFIER, false);
+ return new PhoenixDataSourceWriteOptions.Builder().setTableName(options.tableName().get())
+ .setZkUrl(zkUrl).setScn(scn).setTenantId(tenantId).setSchema(schema)
+ .setSkipNormalizingIdentifier(skipNormalizingIdentifier).build();
+ }
+
+ @Override
+ public String shortName() {
+ return "phoenix";
+ }
+}
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixDataSourceReadOptions.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixDataSourceReadOptions.java
new file mode 100644
index 0000000..8c2fdb1
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixDataSourceReadOptions.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2.reader;
+
+import java.io.Serializable;
+
+public class PhoenixDataSourceReadOptions implements Serializable {
+
+ private final String tenantId;
+ private final String zkUrl;
+ private final String scn;
+ private final String selectStatement;
+
+ public PhoenixDataSourceReadOptions(String zkUrl, String scn, String tenantId, String selectStatement) {
+ this.zkUrl = zkUrl;
+ this.scn = scn;
+ this.tenantId = tenantId;
+ this.selectStatement = selectStatement;
+ }
+
+ public String getSelectStatement() {
+ return selectStatement;
+ }
+
+ public String getScn() {
+ return scn;
+ }
+
+ public String getZkUrl() {
+ return zkUrl;
+ }
+
+ public String getTenantId() {
+ return tenantId;
+ }
+}
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixDataSourceReader.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixDataSourceReader.java
new file mode 100644
index 0000000..446d96f
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixDataSourceReader.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2.reader;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.RegionSizeCalculator;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.iterate.MapReduceParallelScanGrouper;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.PhoenixInputSplit;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.query.KeyRange;
+import org.apache.phoenix.spark.FilterExpressionCompiler;
+import org.apache.phoenix.spark.SparkSchemaUtil;
+import org.apache.phoenix.spark.datasource.v2.PhoenixDataSource;
+import org.apache.phoenix.util.ColumnInfo;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.phoenix.util.QueryUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.Filter;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.apache.spark.sql.sources.v2.reader.DataSourceReader;
+import org.apache.spark.sql.sources.v2.reader.InputPartition;
+import org.apache.spark.sql.sources.v2.reader.SupportsPushDownFilters;
+import org.apache.spark.sql.sources.v2.reader.SupportsPushDownRequiredColumns;
+import org.apache.spark.sql.types.StructType;
+import scala.Tuple3;
+import scala.collection.JavaConverters;
+import scala.collection.Seq;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+import java.util.Properties;
+
+public class PhoenixDataSourceReader implements DataSourceReader, SupportsPushDownFilters,
+ SupportsPushDownRequiredColumns {
+
+ private final DataSourceOptions options;
+ private final String tableName;
+ private final String zkUrl;
+ private final boolean dateAsTimestamp;
+
+ private StructType schema;
+ private Filter[] pushedFilters = new Filter[]{};
+ // derived from pushedFilters
+ private String whereClause;
+
+ public PhoenixDataSourceReader(DataSourceOptions options) {
+ if (!options.tableName().isPresent()) {
+ throw new RuntimeException("No Phoenix option " + DataSourceOptions.TABLE_KEY + " defined");
+ }
+ if (!options.get(PhoenixDataSource.ZOOKEEPER_URL).isPresent()) {
+ throw new RuntimeException("No Phoenix option " + PhoenixDataSource.ZOOKEEPER_URL + " defined");
+ }
+ this.options = options;
+ this.tableName = options.tableName().get();
+ this.zkUrl = options.get("zkUrl").get();
+ this.dateAsTimestamp = options.getBoolean("dateAsTimestamp", false);
+ setSchema();
+ }
+
+ /**
+ * Sets the schema using all the table columns before any column pruning has been done
+ */
+ private void setSchema() {
+ try (Connection conn = DriverManager.getConnection("jdbc:phoenix:" + zkUrl)) {
+ List<ColumnInfo> columnInfos = PhoenixRuntime.generateColumnInfo(conn, tableName, null);
+ Seq<ColumnInfo> columnInfoSeq = JavaConverters.asScalaIteratorConverter(columnInfos.iterator()).asScala().toSeq();
+ schema = SparkSchemaUtil.phoenixSchemaToCatalystSchema(columnInfoSeq, dateAsTimestamp);
+ }
+ catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public StructType readSchema() {
+ return schema;
+ }
+
+ @Override
+ public Filter[] pushFilters(Filter[] filters) {
+ Tuple3<String, Filter[], Filter[]> tuple3 = new FilterExpressionCompiler().pushFilters(filters);
+ whereClause = tuple3._1();
+ pushedFilters = tuple3._3();
+ return tuple3._2();
+ }
+
+ @Override
+ public List<InputPartition<InternalRow>> planInputPartitions() {
+ Optional<String> currentScnValue = options.get(PhoenixConfigurationUtil.CURRENT_SCN_VALUE);
+ Optional<String> tenantId = options.get(PhoenixConfigurationUtil.MAPREDUCE_TENANT_ID);
+ // Generate splits based off statistics, or just region splits?
+ boolean splitByStats = options.getBoolean(
+ PhoenixConfigurationUtil.MAPREDUCE_SPLIT_BY_STATS, PhoenixConfigurationUtil.DEFAULT_SPLIT_BY_STATS);
+ Properties overridingProps = new Properties();
+ if(currentScnValue.isPresent()) {
+ overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, currentScnValue.get());
+ }
+ if (tenantId.isPresent()){
+ overridingProps.put(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId.get());
+ }
+ try (Connection conn = DriverManager.getConnection("jdbc:phoenix:" + zkUrl, overridingProps)) {
+ List<ColumnInfo> columnInfos = PhoenixRuntime.generateColumnInfo(conn, tableName, Lists.newArrayList(schema.names()));
+ final Statement statement = conn.createStatement();
+ final String selectStatement = QueryUtil.constructSelectStatement(tableName, columnInfos, whereClause);
+ Preconditions.checkNotNull(selectStatement);
+
+ final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class);
+ // Optimize the query plan so that we potentially use secondary indexes
+ final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement);
+ final Scan scan = queryPlan.getContext().getScan();
+
+ // setting the snapshot configuration
+ Optional<String> snapshotName = options.get(PhoenixConfigurationUtil.SNAPSHOT_NAME_KEY);
+ if (snapshotName.isPresent())
+ PhoenixConfigurationUtil.setSnapshotNameKey(queryPlan.getContext().getConnection().
+ getQueryServices().getConfiguration(), snapshotName.get());
+
+ // Initialize the query plan so it sets up the parallel scans
+ queryPlan.iterator(MapReduceParallelScanGrouper.getInstance());
+
+ List<KeyRange> allSplits = queryPlan.getSplits();
+ // Get the RegionSizeCalculator
+ PhoenixConnection phxConn = conn.unwrap(PhoenixConnection.class);
+ org.apache.hadoop.hbase.client.Connection connection =
+ phxConn.getQueryServices().getAdmin().getConnection();
+ RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(queryPlan
+ .getTableRef().getTable().getPhysicalName().toString()));
+ RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection
+ .getAdmin());
+
+ final List<InputPartition<InternalRow>> partitions = Lists.newArrayListWithExpectedSize(allSplits.size());
+ for (List<Scan> scans : queryPlan.getScans()) {
+ // Get the region location
+ HRegionLocation location = regionLocator.getRegionLocation(
+ scans.get(0).getStartRow(),
+ false
+ );
+
+ String regionLocation = location.getHostname();
+
+ // Get the region size
+ long regionSize = sizeCalculator.getRegionSize(
+ location.getRegionInfo().getRegionName()
+ );
+
+ PhoenixDataSourceReadOptions phoenixDataSourceOptions = new PhoenixDataSourceReadOptions(zkUrl,
+ currentScnValue.orElse(null), tenantId.orElse(null), selectStatement);
+ if (splitByStats) {
+ for (Scan aScan : scans) {
+ partitions.add(new PhoenixInputPartition(phoenixDataSourceOptions, schema,
+ new PhoenixInputSplit(Collections.singletonList(aScan), regionSize, regionLocation)));
+ }
+ } else {
+ partitions.add(new PhoenixInputPartition(phoenixDataSourceOptions, schema,
+ new PhoenixInputSplit(scans, regionSize, regionLocation)));
+ }
+ }
+ return partitions;
+ } catch (Exception e) {
+ throw new RuntimeException("Unable to plan query", e);
+ }
+ }
+
+ @Override
+ public Filter[] pushedFilters() {
+ return pushedFilters;
+ }
+
+ @Override
+ public void pruneColumns(StructType schema) {
+ this.schema = schema;
+ }
+}
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixInputPartition.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixInputPartition.java
new file mode 100644
index 0000000..624ff0f
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixInputPartition.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2.reader;
+
+import org.apache.phoenix.mapreduce.PhoenixInputSplit;
+import org.apache.spark.SerializableWritable;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.reader.InputPartition;
+import org.apache.spark.sql.sources.v2.reader.InputPartitionReader;
+import org.apache.spark.sql.types.StructType;
+
+public class PhoenixInputPartition implements InputPartition<InternalRow> {
+
+ private SerializableWritable<PhoenixInputSplit> phoenixInputSplit;
+ private StructType schema;
+ private PhoenixDataSourceReadOptions options;
+
+ public PhoenixInputPartition(PhoenixDataSourceReadOptions options, StructType schema, PhoenixInputSplit phoenixInputSplit) {
+ this.phoenixInputSplit = new SerializableWritable<>(phoenixInputSplit);
+ this.schema = schema;
+ this.options = options;
+ }
+
+ @Override
+ public InputPartitionReader<InternalRow> createPartitionReader() {
+ return new PhoenixInputPartitionReader(options, schema, phoenixInputSplit);
+ }
+
+}
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixInputPartitionReader.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixInputPartitionReader.java
new file mode 100644
index 0000000..30e84db
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/reader/PhoenixInputPartitionReader.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2.reader;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.compile.StatementContext;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ConcatResultIterator;
+import org.apache.phoenix.iterate.LookAheadResultIterator;
+import org.apache.phoenix.iterate.MapReduceParallelScanGrouper;
+import org.apache.phoenix.iterate.PeekingResultIterator;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.iterate.RoundRobinResultIterator;
+import org.apache.phoenix.iterate.SequenceResultIterator;
+import org.apache.phoenix.iterate.TableResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.PhoenixInputSplit;
+import org.apache.phoenix.monitoring.ReadMetricQueue;
+import org.apache.phoenix.monitoring.ScanMetricsHolder;
+import org.apache.phoenix.query.ConnectionQueryServices;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.spark.SerializableWritable;
+import org.apache.spark.executor.InputMetrics;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.datasources.SparkJdbcUtil;
+import org.apache.spark.sql.sources.v2.reader.InputPartitionReader;
+import org.apache.spark.sql.types.StructType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+import scala.collection.Iterator;
+
+public class PhoenixInputPartitionReader implements InputPartitionReader<InternalRow> {
+
+ private SerializableWritable<PhoenixInputSplit> phoenixInputSplit;
+ private StructType schema;
+ private Iterator<InternalRow> iterator;
+ private PhoenixResultSet resultSet;
+ private InternalRow currentRow;
+ private PhoenixDataSourceReadOptions options;
+
+ public PhoenixInputPartitionReader(PhoenixDataSourceReadOptions options, StructType schema, SerializableWritable<PhoenixInputSplit> phoenixInputSplit) {
+ this.options = options;
+ this.phoenixInputSplit = phoenixInputSplit;
+ this.schema = schema;
+ initialize();
+ }
+
+ private QueryPlan getQueryPlan() throws SQLException {
+ String scn = options.getScn();
+ String tenantId = options.getTenantId();
+ String zkUrl = options.getZkUrl();
+ Properties overridingProps = new Properties();
+ if (scn != null) {
+ overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, scn);
+ }
+ if (tenantId != null) {
+ overridingProps.put(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId);
+ }
+ try (Connection conn = DriverManager.getConnection("jdbc:phoenix:" + zkUrl, overridingProps)) {
+ final Statement statement = conn.createStatement();
+ final String selectStatement = options.getSelectStatement();
+ Preconditions.checkNotNull(selectStatement);
+
+ final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class);
+ // Optimize the query plan so that we potentially use secondary indexes
+ final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement);
+ return queryPlan;
+ }
+ }
+
+ private void initialize() {
+ try {
+ final QueryPlan queryPlan = getQueryPlan();
+ final List<Scan> scans = phoenixInputSplit.value().getScans();
+ List<PeekingResultIterator> iterators = Lists.newArrayListWithExpectedSize(scans.size());
+ StatementContext ctx = queryPlan.getContext();
+ ReadMetricQueue readMetrics = ctx.getReadMetricsQueue();
+ String tableName = queryPlan.getTableRef().getTable().getPhysicalName().getString();
+
+ // Clear the table region boundary cache to make sure long running jobs stay up to date
+ byte[] tableNameBytes = queryPlan.getTableRef().getTable().getPhysicalName().getBytes();
+ ConnectionQueryServices services = queryPlan.getContext().getConnection().getQueryServices();
+ services.clearTableRegionCache(tableNameBytes);
+
+ long renewScannerLeaseThreshold = queryPlan.getContext().getConnection().getQueryServices().getRenewLeaseThresholdMilliSeconds();
+ for (Scan scan : scans) {
+ // For MR, skip the region boundary check exception if we encounter a split. ref: PHOENIX-2599
+ scan.setAttribute(BaseScannerRegionObserver.SKIP_REGION_BOUNDARY_CHECK, Bytes.toBytes(true));
+
+ PeekingResultIterator peekingResultIterator;
+ ScanMetricsHolder scanMetricsHolder =
+ ScanMetricsHolder.getInstance(readMetrics, tableName, scan,
+ queryPlan.getContext().getConnection().getLogLevel());
+ final TableResultIterator tableResultIterator =
+ new TableResultIterator(
+ queryPlan.getContext().getConnection().getMutationState(), scan,
+ scanMetricsHolder, renewScannerLeaseThreshold, queryPlan,
+ MapReduceParallelScanGrouper.getInstance());
+ peekingResultIterator = LookAheadResultIterator.wrap(tableResultIterator);
+ iterators.add(peekingResultIterator);
+ }
+ ResultIterator iterator = queryPlan.useRoundRobinIterator() ? RoundRobinResultIterator.newIterator(iterators, queryPlan) : ConcatResultIterator.newIterator(iterators);
+ if (queryPlan.getContext().getSequenceManager().getSequenceCount() > 0) {
+ iterator = new SequenceResultIterator(iterator, queryPlan.getContext().getSequenceManager());
+ }
+ // Clone the row projector as it's not thread safe and would be used simultaneously by
+ // multiple threads otherwise.
+ this.resultSet = new PhoenixResultSet(iterator, queryPlan.getProjector().cloneIfNecessary(), queryPlan.getContext());
+ this.iterator = SparkJdbcUtil.resultSetToSparkInternalRows(resultSet, schema, new InputMetrics());
+ }
+ catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public boolean next() {
+ if (!iterator.hasNext()) {
+ return false;
+ }
+ currentRow = iterator.next();
+ return true;
+ }
+
+ @Override
+ public InternalRow get() {
+ return currentRow;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if(resultSet != null) {
+ try {
+ resultSet.close();
+ } catch (SQLException e) {
+ throw new IOException(e);
+ }
+ }
+ }
+}
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataSourceWriteOptions.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataSourceWriteOptions.java
new file mode 100644
index 0000000..781d1c8
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataSourceWriteOptions.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2.writer;
+
+import org.apache.spark.sql.types.StructType;
+
+import java.io.Serializable;
+
+public class PhoenixDataSourceWriteOptions implements Serializable {
+
+ private final String tableName;
+ private final String zkUrl;
+ private final String tenantId;
+ private final String scn;
+ private final StructType schema;
+ private final boolean skipNormalizingIdentifier;
+
+ private PhoenixDataSourceWriteOptions(String tableName, String zkUrl, String scn, String tenantId,
+ StructType schema, boolean skipNormalizingIdentifier) {
+ this.tableName = tableName;
+ this.zkUrl = zkUrl;
+ this.scn = scn;
+ this.tenantId = tenantId;
+ this.schema = schema;
+ this.skipNormalizingIdentifier = skipNormalizingIdentifier;
+ }
+
+ public String getScn() {
+ return scn;
+ }
+
+ public String getZkUrl() {
+ return zkUrl;
+ }
+
+ public String getTenantId() {
+ return tenantId;
+ }
+
+ public StructType getSchema() {
+ return schema;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public boolean skipNormalizingIdentifier() {
+ return skipNormalizingIdentifier;
+ }
+
+ public static class Builder {
+ private String tableName;
+ private String zkUrl;
+ private String scn;
+ private String tenantId;
+ private StructType schema;
+ private boolean skipNormalizingIdentifier;
+
+ public Builder setTableName(String tableName) {
+ this.tableName = tableName;
+ return this;
+ }
+
+ public Builder setZkUrl(String zkUrl) {
+ this.zkUrl = zkUrl;
+ return this;
+ }
+
+ public Builder setScn(String scn) {
+ this.scn = scn;
+ return this;
+ }
+
+ public Builder setTenantId(String tenantId) {
+ this.tenantId = tenantId;
+ return this;
+ }
+
+ public Builder setSchema(StructType schema) {
+ this.schema = schema;
+ return this;
+ }
+
+ public Builder setSkipNormalizingIdentifier(boolean skipNormalizingIdentifier) {
+ this.skipNormalizingIdentifier = skipNormalizingIdentifier;
+ return this;
+ }
+
+ public PhoenixDataSourceWriteOptions build() {
+ return new PhoenixDataSourceWriteOptions(tableName, zkUrl, scn, tenantId, schema, skipNormalizingIdentifier);
+ }
+ }
+}
\ No newline at end of file
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataWriter.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataWriter.java
new file mode 100644
index 0000000..32dc07a
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataWriter.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2.writer;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Properties;
+import java.util.stream.Collectors;
+
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.phoenix.util.QueryUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.datasources.SparkJdbcUtil;
+import org.apache.spark.sql.execution.datasources.jdbc.PhoenixJdbcDialect$;
+import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import com.google.common.collect.Lists;
+
+public class PhoenixDataWriter implements DataWriter<InternalRow> {
+
+ private final StructType schema;
+ private final Connection conn;
+ private final PreparedStatement statement;
+
+ public PhoenixDataWriter(PhoenixDataSourceWriteOptions options) {
+ String scn = options.getScn();
+ String tenantId = options.getTenantId();
+ String zkUrl = options.getZkUrl();
+ Properties overridingProps = new Properties();
+ if (scn != null) {
+ overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, scn);
+ }
+ if (tenantId != null) {
+ overridingProps.put(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId);
+ }
+ this.schema = options.getSchema();
+ try {
+ this.conn = DriverManager.getConnection("jdbc:phoenix:" + zkUrl, overridingProps);
+ List<String> colNames = Lists.newArrayList(options.getSchema().names());
+ if (!options.skipNormalizingIdentifier()){
+ colNames = colNames.stream().map(colName -> SchemaUtil.normalizeIdentifier(colName)).collect(Collectors.toList());
+ }
+ String upsertSql = QueryUtil.constructUpsertStatement(options.getTableName(), colNames, null);
+ this.statement = this.conn.prepareStatement(upsertSql);
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void write(InternalRow internalRow) throws IOException {
+ try {
+ int i=0;
+ for (StructField field : schema.fields()) {
+ DataType dataType = field.dataType();
+ if (internalRow.isNullAt(i)) {
+ statement.setNull(i + 1, SparkJdbcUtil.getJdbcType(dataType,
+ PhoenixJdbcDialect$.MODULE$).jdbcNullType());
+ } else {
+ Row row = SparkJdbcUtil.toRow(schema, internalRow);
+ SparkJdbcUtil.makeSetter(conn, PhoenixJdbcDialect$.MODULE$, dataType).apply(statement, row, i);
+ }
+ ++i;
+ }
+ statement.execute();
+ } catch (SQLException e) {
+ throw new IOException("Exception while executing Phoenix prepared statement", e);
+ }
+ }
+
+ @Override
+ public WriterCommitMessage commit() throws IOException {
+ try {
+ conn.commit();
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ } finally {
+ try {
+ statement.close();
+ conn.close();
+ }
+ catch (SQLException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public void abort() throws IOException {
+ }
+}
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataWriterFactory.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataWriterFactory.java
new file mode 100644
index 0000000..f7654e3
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDataWriterFactory.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2.writer;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.sources.v2.writer.DataWriterFactory;
+
+public class PhoenixDataWriterFactory implements DataWriterFactory<InternalRow> {
+
+ private final PhoenixDataSourceWriteOptions options;
+
+ public PhoenixDataWriterFactory(PhoenixDataSourceWriteOptions options) {
+ this.options = options;
+ }
+
+ @Override
+ public DataWriter<InternalRow> createDataWriter(int partitionId, long taskId, long epochId) {
+ return new PhoenixDataWriter(options);
+ }
+}
diff --git a/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDatasourceWriter.java b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDatasourceWriter.java
new file mode 100644
index 0000000..04f243d
--- /dev/null
+++ b/phoenix-spark/src/main/java/org/apache/phoenix/spark/datasource/v2/writer/PhoenixDatasourceWriter.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark.datasource.v2.writer;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.apache.spark.sql.sources.v2.writer.DataSourceWriter;
+import org.apache.spark.sql.sources.v2.writer.DataWriterFactory;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+
+public class PhoenixDatasourceWriter implements DataSourceWriter {
+
+ private final PhoenixDataSourceWriteOptions options;
+
+ public PhoenixDatasourceWriter(PhoenixDataSourceWriteOptions options) {
+ this.options = options;
+ }
+
+ @Override
+ public DataWriterFactory<InternalRow> createWriterFactory() {
+ return new PhoenixDataWriterFactory(options);
+ }
+
+ @Override
+ public boolean useCommitCoordinator() {
+ return false;
+ }
+
+ @Override
+ public void commit(WriterCommitMessage[] messages) {
+ }
+
+ @Override
+ public void abort(WriterCommitMessage[] messages) {
+ }
+}
diff --git a/phoenix-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/phoenix-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
new file mode 100644
index 0000000..2093b84
--- /dev/null
+++ b/phoenix-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+org.apache.phoenix.spark.datasource.v2.PhoenixDataSource
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/ConfigurationUtil.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/ConfigurationUtil.scala
new file mode 100644
index 0000000..9377986
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/ConfigurationUtil.scala
@@ -0,0 +1,100 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants}
+import org.apache.phoenix.jdbc.PhoenixEmbeddedDriver
+import org.apache.phoenix.mapreduce.util.{ColumnInfoToStringEncoderDecoder, PhoenixConfigurationUtil}
+import org.apache.phoenix.query.HBaseFactoryProvider
+import org.apache.phoenix.util.{ColumnInfo, PhoenixRuntime}
+
+import scala.collection.JavaConversions._
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+object ConfigurationUtil extends Serializable {
+
+ def getOutputConfiguration(tableName: String, columns: Seq[String], zkUrl: Option[String], tenantId: Option[String] = None, conf: Option[Configuration] = None): Configuration = {
+
+ // Create an HBaseConfiguration object from the passed in config, if present
+ val config = conf match {
+ case Some(c) => HBaseFactoryProvider.getConfigurationFactory.getConfiguration(c)
+ case _ => HBaseFactoryProvider.getConfigurationFactory.getConfiguration()
+ }
+
+ // Set the tenantId in the config if present
+ tenantId match {
+ case Some(id) => setTenantId(config, id)
+ case _ =>
+ }
+
+ // Set the table to save to
+ PhoenixConfigurationUtil.setOutputTableName(config, tableName)
+ PhoenixConfigurationUtil.setPhysicalTableName(config, tableName)
+ // disable property provider evaluation
+ PhoenixConfigurationUtil.setPropertyPolicyProviderDisabled(config);
+
+ // Infer column names from the DataFrame schema
+ PhoenixConfigurationUtil.setUpsertColumnNames(config, Array(columns : _*))
+
+ // Override the Zookeeper URL if present. Throw exception if no address given.
+ zkUrl match {
+ case Some(url) => setZookeeperURL(config, url)
+ case _ => {
+ if (ConfigurationUtil.getZookeeperURL(config).isEmpty) {
+ throw new UnsupportedOperationException(
+ s"One of zkUrl or '${HConstants.ZOOKEEPER_QUORUM}' config property must be provided"
+ )
+ }
+ }
+ }
+ // Return the configuration object
+ config
+ }
+
+ def setZookeeperURL(conf: Configuration, zkUrl: String) = {
+ val info = PhoenixEmbeddedDriver.ConnectionInfo.create(zkUrl)
+ conf.set(HConstants.ZOOKEEPER_QUORUM, info.getZookeeperQuorum)
+ if (info.getPort != null)
+ conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, info.getPort)
+ if (info.getRootNode != null)
+ conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, info.getRootNode)
+ }
+
+ def setTenantId(conf: Configuration, tenantId: String) = {
+ conf.set(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId)
+ }
+
+ // Return a serializable representation of the columns
+ def encodeColumns(conf: Configuration) = {
+ ColumnInfoToStringEncoderDecoder.encode(conf, PhoenixConfigurationUtil.getUpsertColumnMetadataList(conf)
+ )
+ }
+
+ // Decode the columns to a list of ColumnInfo objects
+ def decodeColumns(conf: Configuration): List[ColumnInfo] = {
+ ColumnInfoToStringEncoderDecoder.decode(conf).toList
+ }
+
+ def getZookeeperURL(conf: Configuration): Option[String] = {
+ List(
+ Option(conf.get(HConstants.ZOOKEEPER_QUORUM)),
+ Option(conf.get(HConstants.ZOOKEEPER_CLIENT_PORT)),
+ Option(conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT))
+ ).flatten match {
+ case Nil => None
+ case x: List[String] => Some(x.mkString(":"))
+ }
+ }
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DataFrameFunctions.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DataFrameFunctions.scala
new file mode 100644
index 0000000..3b0289d
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DataFrameFunctions.scala
@@ -0,0 +1,79 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.io.NullWritable
+import org.apache.phoenix.mapreduce.PhoenixOutputFormat
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil
+import org.apache.phoenix.util.SchemaUtil
+import org.apache.spark.sql.DataFrame
+
+import scala.collection.JavaConversions._
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+class DataFrameFunctions(data: DataFrame) extends Serializable {
+ def saveToPhoenix(parameters: Map[String, String]): Unit = {
+ saveToPhoenix(parameters("table"), zkUrl = parameters.get("zkUrl"), tenantId = parameters.get("TenantId"),
+ skipNormalizingIdentifier=parameters.contains("skipNormalizingIdentifier"))
+ }
+ def saveToPhoenix(tableName: String, conf: Configuration = new Configuration,
+ zkUrl: Option[String] = None, tenantId: Option[String] = None, skipNormalizingIdentifier: Boolean = false): Unit = {
+
+ // Retrieve the schema field names and normalize to Phoenix, need to do this outside of mapPartitions
+ val fieldArray = getFieldArray(skipNormalizingIdentifier, data)
+
+
+ // Create a configuration object to use for saving
+ @transient val outConfig = ConfigurationUtil.getOutputConfiguration(tableName, fieldArray, zkUrl, tenantId, Some(conf))
+
+ // Retrieve the zookeeper URL
+ val zkUrlFinal = ConfigurationUtil.getZookeeperURL(outConfig)
+
+ // Map the row objects into PhoenixRecordWritable
+ val phxRDD = data.rdd.mapPartitions{ rows =>
+
+ // Create a within-partition config to retrieve the ColumnInfo list
+ @transient val partitionConfig = ConfigurationUtil.getOutputConfiguration(tableName, fieldArray, zkUrlFinal, tenantId)
+ @transient val columns = PhoenixConfigurationUtil.getUpsertColumnMetadataList(partitionConfig).toList
+
+ rows.map { row =>
+ val rec = new PhoenixRecordWritable(columns)
+ row.toSeq.foreach { e => rec.add(e) }
+ (null, rec)
+ }
+ }
+
+ // Save it
+ phxRDD.saveAsNewAPIHadoopFile(
+ Option(
+ conf.get("mapreduce.output.fileoutputformat.outputdir")
+ ).getOrElse(
+ Option(conf.get("mapred.output.dir")).getOrElse("")
+ ),
+ classOf[NullWritable],
+ classOf[PhoenixRecordWritable],
+ classOf[PhoenixOutputFormat[PhoenixRecordWritable]],
+ outConfig
+ )
+ }
+
+ def getFieldArray(skipNormalizingIdentifier: Boolean = false, data: DataFrame) = {
+ if (skipNormalizingIdentifier) {
+ data.schema.fieldNames.map(x => x)
+ } else {
+ data.schema.fieldNames.map(x => SchemaUtil.normalizeIdentifier(x))
+ }
+ }
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DefaultSource.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DefaultSource.scala
new file mode 100644
index 0000000..ccdf595
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DefaultSource.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider}
+import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+class DefaultSource extends RelationProvider with CreatableRelationProvider {
+
+ // Override 'RelationProvider.createRelation', this enables DataFrame.load()
+ override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = {
+ verifyParameters(parameters)
+
+ new PhoenixRelation(
+ parameters("table"),
+ parameters("zkUrl"),
+ parameters.contains("dateAsTimestamp")
+ )(sqlContext)
+ }
+
+ // Override 'CreatableRelationProvider.createRelation', this enables DataFrame.save()
+ override def createRelation(sqlContext: SQLContext, mode: SaveMode,
+ parameters: Map[String, String], data: DataFrame): BaseRelation = {
+
+ if (!mode.equals(SaveMode.Overwrite)) {
+ throw new Exception("SaveMode other than SaveMode.OverWrite is not supported")
+ }
+
+ verifyParameters(parameters)
+
+ // Save the DataFrame to Phoenix
+ data.saveToPhoenix(parameters)
+
+ // Return a relation of the saved data
+ createRelation(sqlContext, parameters)
+ }
+
+ // Ensure the required parameters are present
+ def verifyParameters(parameters: Map[String, String]): Unit = {
+ if (parameters.get("table").isEmpty) throw new RuntimeException("No Phoenix 'table' option defined")
+ if (parameters.get("zkUrl").isEmpty) throw new RuntimeException("No Phoenix 'zkUrl' option defined")
+ }
+
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala
new file mode 100644
index 0000000..1d6973c
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import java.sql.Date
+import java.sql.Timestamp
+import java.text.Format
+
+import org.apache.phoenix.util.{DateUtil, SchemaUtil}
+import org.apache.phoenix.util.StringUtil.escapeStringConstant
+import org.apache.spark.sql.sources._
+
+class FilterExpressionCompiler() {
+
+ val dateformatter:Format = DateUtil.getDateFormatter(DateUtil.DEFAULT_DATE_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID)
+ val timeformatter:Format = DateUtil.getTimestampFormatter(DateUtil.DEFAULT_TIME_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID)
+
+ /**
+ * Attempt to create Phoenix-accepted WHERE clause from Spark filters,
+ * mostly inspired from Spark SQL JDBCRDD and the couchbase-spark-connector
+ *
+ * @return tuple representing where clause (derived from supported filters),
+ * array of unsupported filters and array of supported filters
+ */
+ def pushFilters(filters: Array[Filter]): (String, Array[Filter], Array[Filter]) = {
+ if (filters.isEmpty) {
+ return ("" , Array[Filter](), Array[Filter]())
+ }
+
+ val filter = new StringBuilder("")
+ val unsupportedFilters = Array[Filter]();
+ var i = 0
+
+ filters.foreach(f => {
+ // Assume conjunction for multiple filters, unless otherwise specified
+ if (i > 0) {
+ filter.append(" AND")
+ }
+
+ f match {
+ // Spark 1.3.1+ supported filters
+ case And(leftFilter, rightFilter) => {
+ val(whereClause, currUnsupportedFilters, _) = pushFilters(Array(leftFilter, rightFilter))
+ if (currUnsupportedFilters.isEmpty)
+ filter.append(whereClause)
+ else
+ unsupportedFilters :+ f
+ }
+ case Or(leftFilter, rightFilter) => {
+ val(whereLeftClause, leftUnsupportedFilters, _) = pushFilters(Array(leftFilter))
+ val(whereRightClause, rightUnsupportedFilters, _) = pushFilters(Array(rightFilter))
+ if (leftUnsupportedFilters.isEmpty && rightUnsupportedFilters.isEmpty) {
+ filter.append(whereLeftClause + " OR " + whereRightClause)
+ }
+ else {
+ unsupportedFilters :+ f
+ }
+ }
+ case Not(aFilter) => {
+ val(whereClause, currUnsupportedFilters, _) = pushFilters(Array(aFilter))
+ if (currUnsupportedFilters.isEmpty)
+ filter.append(" NOT " + whereClause)
+ else
+ unsupportedFilters :+ f
+ }
+ case EqualTo(attr, value) => filter.append(s" ${escapeKey(attr)} = ${compileValue(value)}")
+ case GreaterThan(attr, value) => filter.append(s" ${escapeKey(attr)} > ${compileValue(value)}")
+ case GreaterThanOrEqual(attr, value) => filter.append(s" ${escapeKey(attr)} >= ${compileValue(value)}")
+ case LessThan(attr, value) => filter.append(s" ${escapeKey(attr)} < ${compileValue(value)}")
+ case LessThanOrEqual(attr, value) => filter.append(s" ${escapeKey(attr)} <= ${compileValue(value)}")
+ case IsNull(attr) => filter.append(s" ${escapeKey(attr)} IS NULL")
+ case IsNotNull(attr) => filter.append(s" ${escapeKey(attr)} IS NOT NULL")
+ case In(attr, values) => filter.append(s" ${escapeKey(attr)} IN ${values.map(compileValue).mkString("(", ",", ")")}")
+ case StringStartsWith(attr, value) => filter.append(s" ${escapeKey(attr)} LIKE ${compileValue(value + "%")}")
+ case StringEndsWith(attr, value) => filter.append(s" ${escapeKey(attr)} LIKE ${compileValue("%" + value)}")
+ case StringContains(attr, value) => filter.append(s" ${escapeKey(attr)} LIKE ${compileValue("%" + value + "%")}")
+ case _ => unsupportedFilters :+ f
+ }
+
+ i = i + 1
+ })
+
+ (filter.toString(), unsupportedFilters, filters diff unsupportedFilters)
+ }
+
+ // Helper function to escape string values in SQL queries
+ private def compileValue(value: Any): Any = value match {
+ case stringValue: String => s"'${escapeStringConstant(stringValue)}'"
+
+ case timestampValue: Timestamp => getTimestampString(timestampValue)
+
+ case dateValue: Date => getDateString(dateValue)
+
+ // Borrowed from 'elasticsearch-hadoop', support these internal UTF types across Spark versions
+ // Spark 1.4
+ case utf if (isClass(utf, "org.apache.spark.sql.types.UTF8String")) => s"'${escapeStringConstant(utf.toString)}'"
+ // Spark 1.5
+ case utf if (isClass(utf, "org.apache.spark.unsafe.types.UTF8String")) => s"'${escapeStringConstant(utf.toString)}'"
+
+ // Pass through anything else
+ case _ => value
+ }
+
+ private def getTimestampString(timestampValue: Timestamp): String = {
+ "TO_TIMESTAMP('%s', '%s', '%s')".format(timeformatter.format(timestampValue),
+ DateUtil.DEFAULT_TIME_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID)
+ }
+
+ private def getDateString(dateValue: Date): String = {
+ "TO_DATE('%s', '%s', '%s')".format(dateformatter.format(dateValue),
+ DateUtil.DEFAULT_DATE_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID)
+ }
+
+ // Helper function to escape column key to work with SQL queries
+ private def escapeKey(key: String): String = SchemaUtil.getEscapedFullColumnName(key)
+
+ private def isClass(obj: Any, className: String) = {
+ className.equals(obj.getClass().getName())
+ }
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
new file mode 100644
index 0000000..34033b7
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
@@ -0,0 +1,150 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import java.sql.DriverManager
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants}
+import org.apache.hadoop.io.NullWritable
+import org.apache.phoenix.jdbc.PhoenixDriver
+import org.apache.phoenix.mapreduce.PhoenixInputFormat
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil
+import org.apache.phoenix.query.HBaseFactoryProvider
+import org.apache.spark._
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+
+import scala.collection.JavaConverters._
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+class PhoenixRDD(sc: SparkContext, table: String, columns: Seq[String],
+ predicate: Option[String] = None,
+ zkUrl: Option[String] = None,
+ @transient conf: Configuration, dateAsTimestamp: Boolean = false,
+ tenantId: Option[String] = None
+ )
+ extends RDD[PhoenixRecordWritable](sc, Nil) {
+
+ // Make sure to register the Phoenix driver
+ DriverManager.registerDriver(new PhoenixDriver)
+
+ @transient lazy val phoenixConf = {
+ getPhoenixConfiguration
+ }
+
+ val phoenixRDD = sc.newAPIHadoopRDD(phoenixConf,
+ classOf[PhoenixInputFormat[PhoenixRecordWritable]],
+ classOf[NullWritable],
+ classOf[PhoenixRecordWritable])
+
+ override protected def getPartitions: Array[Partition] = {
+ phoenixRDD.partitions
+ }
+
+ override protected def getPreferredLocations(split: Partition): Seq[String] = {
+ phoenixRDD.preferredLocations(split)
+ }
+
+ @DeveloperApi
+ override def compute(split: Partition, context: TaskContext) = {
+ phoenixRDD.compute(split, context).map(r => r._2)
+ }
+
+ def printPhoenixConfig(conf: Configuration): Unit = {
+ for (mapEntry <- conf.iterator().asScala) {
+ val k = mapEntry.getKey
+ val v = mapEntry.getValue
+
+ if (k.startsWith("phoenix")) {
+ println(s"$k = $v")
+ }
+ }
+ }
+
+ def getPhoenixConfiguration: Configuration = {
+
+ val config = HBaseFactoryProvider.getConfigurationFactory.getConfiguration(conf);
+
+ PhoenixConfigurationUtil.setInputClass(config, classOf[PhoenixRecordWritable])
+ PhoenixConfigurationUtil.setInputTableName(config, table)
+ PhoenixConfigurationUtil.setPropertyPolicyProviderDisabled(config);
+
+ if(!columns.isEmpty) {
+ PhoenixConfigurationUtil.setSelectColumnNames(config, columns.toArray)
+ }
+
+ if(predicate.isDefined) {
+ PhoenixConfigurationUtil.setInputTableConditions(config, predicate.get)
+ }
+
+ // Override the Zookeeper URL if present. Throw exception if no address given.
+ zkUrl match {
+ case Some(url) => ConfigurationUtil.setZookeeperURL(config, url)
+ case _ => {
+ if(ConfigurationUtil.getZookeeperURL(config).isEmpty) {
+ throw new UnsupportedOperationException(
+ s"One of zkUrl or '${HConstants.ZOOKEEPER_QUORUM}' config property must be provided"
+ )
+ }
+ }
+ }
+
+ tenantId match {
+ case Some(tid) => ConfigurationUtil.setTenantId(config, tid)
+ case _ =>
+ }
+
+ config
+ }
+
+ // Convert our PhoenixRDD to a DataFrame
+ def toDataFrame(sqlContext: SQLContext): DataFrame = {
+ val columnInfoList = PhoenixConfigurationUtil
+ .getSelectColumnMetadataList(new Configuration(phoenixConf))
+ .asScala
+
+ // Keep track of the sql type and column names.
+ val columns: Seq[(String, Int)] = columnInfoList.map(ci => {
+ (ci.getDisplayName, ci.getSqlType)
+ })
+
+
+ // Lookup the Spark catalyst types from the Phoenix schema
+ val structType = SparkSchemaUtil.phoenixSchemaToCatalystSchema(columnInfoList, dateAsTimestamp)
+
+ // Create the data frame from the converted Spark schema
+ sqlContext.createDataFrame(map(pr => {
+
+ // Create a sequence of column data
+ val rowSeq = columns.map { case (name, sqlType) =>
+ val res = pr.resultMap(name)
+ // Special handling for data types
+ if (dateAsTimestamp && (sqlType == 91 || sqlType == 19) && res!=null) { // 91 is the defined type for Date and 19 for UNSIGNED_DATE
+ new java.sql.Timestamp(res.asInstanceOf[java.sql.Date].getTime)
+ } else if ((sqlType == 92 || sqlType == 18) && res!=null) { // 92 is the defined type for Time and 18 for UNSIGNED_TIME
+ new java.sql.Timestamp(res.asInstanceOf[java.sql.Time].getTime)
+ } else {
+ res
+ }
+ }
+
+ // Create a Spark Row from the sequence
+ Row.fromSeq(rowSeq)
+ }), structType)
+ }
+
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRecordWritable.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRecordWritable.scala
new file mode 100644
index 0000000..6d4c4cc
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRecordWritable.scala
@@ -0,0 +1,115 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import java.sql.{PreparedStatement, ResultSet}
+import org.apache.hadoop.mapreduce.lib.db.DBWritable
+import org.apache.phoenix.schema.types._
+import org.apache.phoenix.util.ColumnInfo
+import org.joda.time.DateTime
+import scala.collection.{mutable, immutable}
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+class PhoenixRecordWritable(columnMetaDataList: List[ColumnInfo]) extends DBWritable {
+ val upsertValues = mutable.ArrayBuffer[Any]()
+ val resultMap = mutable.Map[String, AnyRef]()
+
+ def result : immutable.Map[String, AnyRef] = {
+ resultMap.toMap
+ }
+
+ override def write(statement: PreparedStatement): Unit = {
+ // Make sure we at least line up in size
+ if(upsertValues.length != columnMetaDataList.length) {
+ throw new UnsupportedOperationException(
+ s"Upsert values ($upsertValues) do not match the specified columns (columnMetaDataList)"
+ )
+ }
+
+ // Correlate each value (v) to a column type (c) and an index (i)
+ upsertValues.zip(columnMetaDataList).zipWithIndex.foreach {
+ case ((v, c), i) => {
+ if (v != null) {
+
+ // Both Java and Joda dates used to work in 4.2.3, but now they must be java.sql.Date
+ // Can override any other types here as needed
+ val (finalObj, finalType) = v match {
+ case dt: DateTime => (new java.sql.Date(dt.getMillis), PDate.INSTANCE)
+ case d: java.util.Date => (new java.sql.Date(d.getTime), PDate.INSTANCE)
+ case _ => (v, c.getPDataType)
+ }
+
+
+ // Helper method to create an SQL array for a specific PDatatype, and set it on the statement
+ def setArrayInStatement(obj: Array[AnyRef]): Unit = {
+ // Create a java.sql.Array, need to lookup the base sql type name
+ val sqlArray = statement.getConnection.createArrayOf(
+ PDataType.arrayBaseType(finalType).getSqlTypeName,
+ obj
+ )
+ statement.setArray(i + 1, sqlArray)
+ }
+
+ // Determine whether to save as an array or object
+ (finalObj, finalType) match {
+ case (obj: Array[AnyRef], _) => setArrayInStatement(obj)
+ case (obj: mutable.ArrayBuffer[AnyVal], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]).toArray)
+ case (obj: mutable.ArrayBuffer[AnyRef], _) => setArrayInStatement(obj.toArray)
+ case (obj: mutable.WrappedArray[AnyVal], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]).toArray)
+ case (obj: mutable.WrappedArray[AnyRef], _) => setArrayInStatement(obj.toArray)
+ case (obj: Array[Int], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]))
+ case (obj: Array[Long], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]))
+ case (obj: Array[Char], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]))
+ case (obj: Array[Short], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]))
+ case (obj: Array[Float], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]))
+ case (obj: Array[Double], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]))
+ // PVarbinary and PBinary come in as Array[Byte] but they're SQL objects
+ case (obj: Array[Byte], _ : PVarbinary) => statement.setObject(i + 1, obj)
+ case (obj: Array[Byte], _ : PBinary) => statement.setObject(i + 1, obj)
+ // Otherwise set as array type
+ case (obj: Array[Byte], _) => setArrayInStatement(obj.map(_.asInstanceOf[AnyRef]))
+ case _ => statement.setObject(i + 1, finalObj)
+ }
+ } else {
+ statement.setNull(i + 1, c.getSqlType)
+ }
+ }
+ }
+ }
+
+ override def readFields(resultSet: ResultSet): Unit = {
+ val metadata = resultSet.getMetaData
+ for(i <- 1 to metadata.getColumnCount) {
+
+ // Return the contents of a PhoenixArray, if necessary
+ val value = resultSet.getObject(i) match {
+ case x: PhoenixArray => x.getArray
+ case y => y
+ }
+
+ // Put a (ColumnLabel -> value) entry in the result map
+ resultMap(metadata.getColumnLabel(i)) = value
+ }
+ }
+
+ def add(value: Any): Unit = {
+ upsertValues.append(value)
+ }
+
+ // Empty constructor for MapReduce
+ def this() = {
+ this(List[ColumnInfo]())
+ }
+
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRelation.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRelation.scala
new file mode 100644
index 0000000..2f6ea8c
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRelation.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{Row, SQLContext}
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+case class PhoenixRelation(tableName: String, zkUrl: String, dateAsTimestamp: Boolean = false)(@transient val sqlContext: SQLContext)
+ extends BaseRelation with PrunedFilteredScan {
+
+ /*
+ This is the buildScan() implementing Spark's PrunedFilteredScan.
+ Spark SQL queries with columns or predicates specified will be pushed down
+ to us here, and we can pass that on to Phoenix. According to the docs, this
+ is an optimization, and the filtering/pruning will be re-evaluated again,
+ but this prevents having to load the whole table into Spark first.
+ */
+ override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
+ val(pushedFilters, _, _) = new FilterExpressionCompiler().pushFilters(filters)
+ new PhoenixRDD(
+ sqlContext.sparkContext,
+ tableName,
+ requiredColumns,
+ Some(pushedFilters),
+ Some(zkUrl),
+ new Configuration(),
+ dateAsTimestamp
+ ).toDataFrame(sqlContext).rdd
+ }
+
+ // Required by BaseRelation, this will return the full schema for a table
+ override def schema: StructType = {
+ new PhoenixRDD(
+ sqlContext.sparkContext,
+ tableName,
+ Seq(),
+ None,
+ Some(zkUrl),
+ new Configuration(),
+ dateAsTimestamp
+ ).toDataFrame(sqlContext).schema
+ }
+
+
+ override def unhandledFilters(filters: Array[Filter]): Array[Filter] = {
+ val (_, unhandledFilters, _) = new FilterExpressionCompiler().pushFilters(filters)
+ unhandledFilters
+ }
+
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/ProductRDDFunctions.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/ProductRDDFunctions.scala
new file mode 100644
index 0000000..b073521
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/ProductRDDFunctions.scala
@@ -0,0 +1,64 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.io.NullWritable
+import org.apache.phoenix.mapreduce.PhoenixOutputFormat
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil
+import org.apache.spark.rdd.RDD
+
+import scala.collection.JavaConversions._
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+class ProductRDDFunctions[A <: Product](data: RDD[A]) extends Serializable {
+
+ def saveToPhoenix(tableName: String, cols: Seq[String],
+ conf: Configuration = new Configuration, zkUrl: Option[String] = None, tenantId: Option[String] = None)
+ : Unit = {
+
+ // Create a configuration object to use for saving
+ @transient val outConfig = ConfigurationUtil.getOutputConfiguration(tableName, cols, zkUrl, tenantId, Some(conf))
+
+ // Retrieve the zookeeper URL
+ val zkUrlFinal = ConfigurationUtil.getZookeeperURL(outConfig)
+
+ // Map the row objects into PhoenixRecordWritable
+ val phxRDD = data.mapPartitions{ rows =>
+
+ // Create a within-partition config to retrieve the ColumnInfo list
+ @transient val partitionConfig = ConfigurationUtil.getOutputConfiguration(tableName, cols, zkUrlFinal, tenantId)
+ @transient val columns = PhoenixConfigurationUtil.getUpsertColumnMetadataList(partitionConfig).toList
+
+ rows.map { row =>
+ val rec = new PhoenixRecordWritable(columns)
+ row.productIterator.foreach { e => rec.add(e) }
+ (null, rec)
+ }
+ }
+
+ // Save it
+ phxRDD.saveAsNewAPIHadoopFile(
+ Option(
+ conf.get("mapreduce.output.fileoutputformat.outputdir")
+ ).getOrElse(
+ Option(conf.get("mapred.output.dir")).getOrElse("")
+ ),
+ classOf[NullWritable],
+ classOf[PhoenixRecordWritable],
+ classOf[PhoenixOutputFormat[PhoenixRecordWritable]],
+ outConfig
+ )
+ }
+}
\ No newline at end of file
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkContextFunctions.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkContextFunctions.scala
new file mode 100644
index 0000000..1b377ab
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkContextFunctions.scala
@@ -0,0 +1,42 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+class SparkContextFunctions(@transient val sc: SparkContext) extends Serializable {
+
+ /*
+ This will return an RDD of Map[String, AnyRef], where the String key corresponds to the column
+ name and the AnyRef value will be a java.sql type as returned by Phoenix
+
+ 'table' is the corresponding Phoenix table
+ 'columns' is a sequence of of columns to query
+ 'predicate' is a set of statements to go after a WHERE clause, e.g. "TID = 123"
+ 'zkUrl' is an optional Zookeeper URL to use to connect to Phoenix
+ 'conf' is a Hadoop Configuration object. If zkUrl is not set, the "hbase.zookeeper.quorum"
+ property will be used
+ */
+
+ def phoenixTableAsRDD(table: String, columns: Seq[String], predicate: Option[String] = None,
+ zkUrl: Option[String] = None, tenantId: Option[String] = None, conf: Configuration = new Configuration())
+ : RDD[Map[String, AnyRef]] = {
+
+ // Create a PhoenixRDD, but only return the serializable 'result' map
+ new PhoenixRDD(sc, table, columns, predicate, zkUrl, conf, tenantId = tenantId).map(_.result)
+ }
+}
\ No newline at end of file
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkSchemaUtil.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkSchemaUtil.scala
new file mode 100644
index 0000000..f69e988
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkSchemaUtil.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.phoenix.query.QueryConstants
+import org.apache.phoenix.schema.types._
+import org.apache.phoenix.util.{ColumnInfo, SchemaUtil}
+import org.apache.spark.sql.types._
+
+object SparkSchemaUtil {
+
+ def phoenixSchemaToCatalystSchema(columnList: Seq[ColumnInfo], dateAsTimestamp: Boolean = false) : StructType = {
+ val structFields = columnList.map(ci => {
+ val structType = phoenixTypeToCatalystType(ci, dateAsTimestamp)
+ StructField(normalizeColumnName(ci.getColumnName), structType)
+ })
+ new StructType(structFields.toArray)
+ }
+
+ def normalizeColumnName(columnName: String) = {
+ val unescapedColumnName = SchemaUtil.getUnEscapedFullColumnName(columnName)
+ var normalizedColumnName = ""
+ if (unescapedColumnName.indexOf(QueryConstants.NAME_SEPARATOR) < 0) {
+ normalizedColumnName = unescapedColumnName
+ }
+ else {
+ // split by separator to get the column family and column name
+ val tokens = unescapedColumnName.split(QueryConstants.NAME_SEPARATOR_REGEX)
+ normalizedColumnName = if (tokens(0) == "0") tokens(1) else unescapedColumnName
+ }
+ normalizedColumnName
+ }
+
+
+ // Lookup table for Phoenix types to Spark catalyst types
+ def phoenixTypeToCatalystType(columnInfo: ColumnInfo, dateAsTimestamp: Boolean): DataType = columnInfo.getPDataType match {
+ case t if t.isInstanceOf[PVarchar] || t.isInstanceOf[PChar] => StringType
+ case t if t.isInstanceOf[PLong] || t.isInstanceOf[PUnsignedLong] => LongType
+ case t if t.isInstanceOf[PInteger] || t.isInstanceOf[PUnsignedInt] => IntegerType
+ case t if t.isInstanceOf[PSmallint] || t.isInstanceOf[PUnsignedSmallint] => ShortType
+ case t if t.isInstanceOf[PTinyint] || t.isInstanceOf[PUnsignedTinyint] => ByteType
+ case t if t.isInstanceOf[PFloat] || t.isInstanceOf[PUnsignedFloat] => FloatType
+ case t if t.isInstanceOf[PDouble] || t.isInstanceOf[PUnsignedDouble] => DoubleType
+ // Use Spark system default precision for now (explicit to work with < 1.5)
+ case t if t.isInstanceOf[PDecimal] =>
+ if (columnInfo.getPrecision == null || columnInfo.getPrecision < 0) DecimalType(38, 18) else DecimalType(columnInfo.getPrecision, columnInfo.getScale)
+ case t if t.isInstanceOf[PTimestamp] || t.isInstanceOf[PUnsignedTimestamp] => TimestampType
+ case t if t.isInstanceOf[PTime] || t.isInstanceOf[PUnsignedTime] => TimestampType
+ case t if (t.isInstanceOf[PDate] || t.isInstanceOf[PUnsignedDate]) && !dateAsTimestamp => DateType
+ case t if (t.isInstanceOf[PDate] || t.isInstanceOf[PUnsignedDate]) && dateAsTimestamp => TimestampType
+ case t if t.isInstanceOf[PBoolean] => BooleanType
+ case t if t.isInstanceOf[PVarbinary] || t.isInstanceOf[PBinary] => BinaryType
+ case t if t.isInstanceOf[PIntegerArray] || t.isInstanceOf[PUnsignedIntArray] => ArrayType(IntegerType, containsNull = true)
+ case t if t.isInstanceOf[PBooleanArray] => ArrayType(BooleanType, containsNull = true)
+ case t if t.isInstanceOf[PVarcharArray] || t.isInstanceOf[PCharArray] => ArrayType(StringType, containsNull = true)
+ case t if t.isInstanceOf[PVarbinaryArray] || t.isInstanceOf[PBinaryArray] => ArrayType(BinaryType, containsNull = true)
+ case t if t.isInstanceOf[PLongArray] || t.isInstanceOf[PUnsignedLongArray] => ArrayType(LongType, containsNull = true)
+ case t if t.isInstanceOf[PSmallintArray] || t.isInstanceOf[PUnsignedSmallintArray] => ArrayType(IntegerType, containsNull = true)
+ case t if t.isInstanceOf[PTinyintArray] || t.isInstanceOf[PUnsignedTinyintArray] => ArrayType(ByteType, containsNull = true)
+ case t if t.isInstanceOf[PFloatArray] || t.isInstanceOf[PUnsignedFloatArray] => ArrayType(FloatType, containsNull = true)
+ case t if t.isInstanceOf[PDoubleArray] || t.isInstanceOf[PUnsignedDoubleArray] => ArrayType(DoubleType, containsNull = true)
+ case t if t.isInstanceOf[PDecimalArray] => ArrayType(
+ if (columnInfo.getPrecision == null || columnInfo.getPrecision < 0) DecimalType(38, 18) else DecimalType(columnInfo.getPrecision, columnInfo.getScale), containsNull = true)
+ case t if t.isInstanceOf[PTimestampArray] || t.isInstanceOf[PUnsignedTimestampArray] => ArrayType(TimestampType, containsNull = true)
+ case t if t.isInstanceOf[PDateArray] || t.isInstanceOf[PUnsignedDateArray] => ArrayType(TimestampType, containsNull = true)
+ case t if t.isInstanceOf[PTimeArray] || t.isInstanceOf[PUnsignedTimeArray] => ArrayType(TimestampType, containsNull = true)
+ }
+
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkSqlContextFunctions.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkSqlContextFunctions.scala
new file mode 100644
index 0000000..f9154ad
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/SparkSqlContextFunctions.scala
@@ -0,0 +1,42 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.sql.{DataFrame, SQLContext}
+
+@deprecated("Use the DataSource V2 API implementation (see PhoenixDataSource)")
+class SparkSqlContextFunctions(@transient val sqlContext: SQLContext) extends Serializable {
+
+ /*
+ This will return a Spark DataFrame, with Phoenix types converted Spark SQL catalyst types
+
+ 'table' is the corresponding Phoenix table
+ 'columns' is a sequence of of columns to query
+ 'predicate' is a set of statements to go after a WHERE clause, e.g. "TID = 123"
+ 'zkUrl' is an optional Zookeeper URL to use to connect to Phoenix
+ 'conf' is a Hadoop Configuration object. If zkUrl is not set, the "hbase.zookeeper.quorum"
+ property will be used
+ */
+ def phoenixTableAsDataFrame(table: String, columns: Seq[String],
+ predicate: Option[String] = None,
+ zkUrl: Option[String] = None,
+ tenantId: Option[String] = None,
+ conf: Configuration = new Configuration): DataFrame = {
+
+ // Create the PhoenixRDD and convert it to a DataFrame
+ new PhoenixRDD(sqlContext.sparkContext, table, columns, predicate, zkUrl, conf, tenantId = tenantId)
+ .toDataFrame(sqlContext)
+ }
+}
\ No newline at end of file
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/package.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/package.scala
new file mode 100644
index 0000000..3fed79e
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/package.scala
@@ -0,0 +1,36 @@
+/*
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package org.apache.phoenix
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, SQLContext}
+
+package object spark {
+ implicit def toProductRDDFunctions[A <: Product](rdd: RDD[A]): ProductRDDFunctions[A] = {
+ new ProductRDDFunctions[A](rdd)
+ }
+
+ implicit def toSparkContextFunctions(sc: SparkContext): SparkContextFunctions = {
+ new SparkContextFunctions(sc)
+ }
+
+ implicit def toSparkSqlContextFunctions(sqlContext: SQLContext): SparkSqlContextFunctions = {
+ new SparkSqlContextFunctions(sqlContext)
+ }
+
+ implicit def toDataFrameFunctions(data: DataFrame): DataFrameFunctions = {
+ new DataFrameFunctions(data)
+ }
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/PhoenixJdbcDialect.scala b/phoenix-spark/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/PhoenixJdbcDialect.scala
new file mode 100644
index 0000000..01437f0
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/PhoenixJdbcDialect.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc
+
+import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcType}
+import org.apache.spark.sql.types._
+
+private object PhoenixJdbcDialect extends JdbcDialect {
+
+ override def canHandle(url: String): Boolean = url.startsWith("jdbc:phoenix")
+
+ /**
+ * This is only called for ArrayType (see JdbcUtils.makeSetter)
+ */
+ override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
+ case StringType => Some(JdbcType("VARCHAR", java.sql.Types.VARCHAR))
+ case BinaryType => Some(JdbcType("BINARY(" + dt.defaultSize + ")", java.sql.Types.BINARY))
+ case ByteType => Some(JdbcType("TINYINT", java.sql.Types.TINYINT))
+ case _ => None
+ }
+
+
+}
diff --git a/phoenix-spark/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/SparkJdbcUtil.scala b/phoenix-spark/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/SparkJdbcUtil.scala
new file mode 100644
index 0000000..eac483a
--- /dev/null
+++ b/phoenix-spark/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/SparkJdbcUtil.scala
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.sql.{Connection, PreparedStatement, ResultSet}
+import java.util.Locale
+
+import org.apache.spark.executor.InputMetrics
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils
+import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils._
+import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcType}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.NextIterator
+
+object SparkJdbcUtil {
+
+ def toRow(schema: StructType, internalRow: InternalRow) : Row = {
+ val encoder = RowEncoder(schema).resolveAndBind()
+ encoder.fromRow(internalRow)
+ }
+
+ // A `JDBCValueGetter` is responsible for getting a value from `ResultSet` into a field
+ // for `MutableRow`. The last argument `Int` means the index for the value to be set in
+ // the row and also used for the value in `ResultSet`.
+ private type JDBCValueGetter = (ResultSet, InternalRow, Int) => Unit
+
+ private def nullSafeConvert[T](input: T, f: T => Any): Any = {
+ if (input == null) {
+ null
+ } else {
+ f(input)
+ }
+ }
+
+ /**
+ * Creates `JDBCValueGetter`s according to [[StructType]], which can set
+ * each value from `ResultSet` to each field of [[InternalRow]] correctly.
+ */
+ private def makeGetters(schema: StructType): Array[JDBCValueGetter] =
+ schema.fields.map(sf => makeGetter(sf.dataType, sf.metadata))
+
+ private def makeGetter(dt: DataType, metadata: Metadata): JDBCValueGetter = dt match {
+ case BooleanType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ row.setBoolean(pos, rs.getBoolean(pos + 1))
+
+ case DateType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ // DateTimeUtils.fromJavaDate does not handle null value, so we need to check it.
+ val dateVal = rs.getDate(pos + 1)
+ if (dateVal != null) {
+ row.setInt(pos, DateTimeUtils.fromJavaDate(dateVal))
+ } else {
+ row.update(pos, null)
+ }
+
+ // When connecting with Oracle DB through JDBC, the precision and scale of BigDecimal
+ // object returned by ResultSet.getBigDecimal is not correctly matched to the table
+ // schema reported by ResultSetMetaData.getPrecision and ResultSetMetaData.getScale.
+ // If inserting values like 19999 into a column with NUMBER(12, 2) type, you get through
+ // a BigDecimal object with scale as 0. But the dataframe schema has correct type as
+ // DecimalType(12, 2). Thus, after saving the dataframe into parquet file and then
+ // retrieve it, you will get wrong result 199.99.
+ // So it is needed to set precision and scale for Decimal based on JDBC metadata.
+ case DecimalType.Fixed(p, s) =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ val decimal =
+ nullSafeConvert[java.math.BigDecimal](rs.getBigDecimal(pos + 1), d => Decimal(d, p, s))
+ row.update(pos, decimal)
+
+ case DoubleType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ row.setDouble(pos, rs.getDouble(pos + 1))
+
+ case FloatType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ row.setFloat(pos, rs.getFloat(pos + 1))
+
+ case IntegerType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ row.setInt(pos, rs.getInt(pos + 1))
+
+ case LongType if metadata.contains("binarylong") =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ val bytes = rs.getBytes(pos + 1)
+ var ans = 0L
+ var j = 0
+ while (j < bytes.length) {
+ ans = 256 * ans + (255 & bytes(j))
+ j = j + 1
+ }
+ row.setLong(pos, ans)
+
+ case LongType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ row.setLong(pos, rs.getLong(pos + 1))
+
+ case ShortType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ row.setShort(pos, rs.getShort(pos + 1))
+
+ case StringType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ // TODO(davies): use getBytes for better performance, if the encoding is UTF-8
+ row.update(pos, UTF8String.fromString(rs.getString(pos + 1)))
+
+ case TimestampType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ val t = rs.getTimestamp(pos + 1)
+ if (t != null) {
+ row.setLong(pos, DateTimeUtils.fromJavaTimestamp(t))
+ } else {
+ row.update(pos, null)
+ }
+
+ case BinaryType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ row.update(pos, rs.getBytes(pos + 1))
+
+ case ByteType =>
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ row.update(pos, rs.getByte(pos + 1))
+
+ case ArrayType(et, _) =>
+ val elementConversion = et match {
+ case TimestampType =>
+ (array: Object) =>
+ array.asInstanceOf[Array[java.sql.Timestamp]].map { timestamp =>
+ nullSafeConvert(timestamp, DateTimeUtils.fromJavaTimestamp)
+ }
+
+ case StringType =>
+ (array: Object) =>
+ // some underling types are not String such as uuid, inet, cidr, etc.
+ array.asInstanceOf[Array[java.lang.Object]]
+ .map(obj => if (obj == null) null else UTF8String.fromString(obj.toString))
+
+ case DateType =>
+ (array: Object) =>
+ array.asInstanceOf[Array[java.sql.Date]].map { date =>
+ nullSafeConvert(date, DateTimeUtils.fromJavaDate)
+ }
+
+ case dt: DecimalType =>
+ (array: Object) =>
+ array.asInstanceOf[Array[java.math.BigDecimal]].map { decimal =>
+ nullSafeConvert[java.math.BigDecimal](
+ decimal, d => Decimal(d, dt.precision, dt.scale))
+ }
+
+ case LongType if metadata.contains("binarylong") =>
+ throw new IllegalArgumentException(s"Unsupported array element " +
+ s"type ${dt.catalogString} based on binary")
+
+ case ArrayType(_, _) =>
+ throw new IllegalArgumentException("Nested arrays unsupported")
+
+ case _ => (array: Object) => array.asInstanceOf[Array[Any]]
+ }
+
+ (rs: ResultSet, row: InternalRow, pos: Int) =>
+ val array = nullSafeConvert[java.sql.Array](
+ input = rs.getArray(pos + 1),
+ array => new GenericArrayData(elementConversion.apply(array.getArray)))
+ row.update(pos, array)
+
+ case _ => throw new IllegalArgumentException(s"Unsupported type ${dt.catalogString}")
+ }
+
+ // TODO just use JdbcUtils.resultSetToSparkInternalRows in Spark 3.0 (see SPARK-26499)
+ def resultSetToSparkInternalRows(
+ resultSet: ResultSet,
+ schema: StructType,
+ inputMetrics: InputMetrics): Iterator[InternalRow] = {
+ // JdbcUtils.resultSetToSparkInternalRows(resultSet, schema, inputMetrics)
+ new NextIterator[InternalRow] {
+ private[this] val rs = resultSet
+ private[this] val getters: Array[JDBCValueGetter] = makeGetters(schema)
+ private[this] val mutableRow = new SpecificInternalRow(schema.fields.map(x => x.dataType))
+
+ override protected def close(): Unit = {
+ try {
+ rs.close()
+ } catch {
+ case e: Exception =>
+ }
+ }
+
+ override protected def getNext(): InternalRow = {
+ if (rs.next()) {
+ inputMetrics.incRecordsRead(1)
+ var i = 0
+ while (i < getters.length) {
+ getters(i).apply(rs, mutableRow, i)
+ if (rs.wasNull) mutableRow.setNullAt(i)
+ i = i + 1
+ }
+ mutableRow
+ } else {
+ finished = true
+ null.asInstanceOf[InternalRow]
+ }
+ }
+ }
+ }
+
+ // A `JDBCValueSetter` is responsible for setting a value from `Row` into a field for
+ // `PreparedStatement`. The last argument `Int` means the index for the value to be set
+ // in the SQL statement and also used for the value in `Row`.
+ private type JDBCValueSetter = (PreparedStatement, Row, Int) => Unit
+
+ // take from Spark JdbcUtils.scala, cannot be used directly because the method is private
+ def makeSetter(
+ conn: Connection,
+ dialect: JdbcDialect,
+ dataType: DataType): JDBCValueSetter = dataType match {
+ case IntegerType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setInt(pos + 1, row.getInt(pos))
+
+ case LongType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setLong(pos + 1, row.getLong(pos))
+
+ case DoubleType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setDouble(pos + 1, row.getDouble(pos))
+
+ case FloatType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setFloat(pos + 1, row.getFloat(pos))
+
+ case ShortType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setInt(pos + 1, row.getShort(pos))
+
+ case ByteType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setInt(pos + 1, row.getByte(pos))
+
+ case BooleanType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setBoolean(pos + 1, row.getBoolean(pos))
+
+ case StringType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setString(pos + 1, row.getString(pos))
+
+ case BinaryType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setBytes(pos + 1, row.getAs[Array[Byte]](pos))
+
+ case TimestampType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setTimestamp(pos + 1, row.getAs[java.sql.Timestamp](pos))
+
+ case DateType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setDate(pos + 1, row.getAs[java.sql.Date](pos))
+
+ case t: DecimalType =>
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ stmt.setBigDecimal(pos + 1, row.getDecimal(pos))
+
+ case ArrayType(et, _) =>
+ // remove type length parameters from end of type name
+ val typeName = getJdbcType(et, dialect).databaseTypeDefinition
+ .toLowerCase(Locale.ROOT).split("\\(")(0)
+ (stmt: PreparedStatement, row: Row, pos: Int) =>
+ val array = conn.createArrayOf(
+ typeName,
+ row.getSeq[AnyRef](pos).toArray)
+ stmt.setArray(pos + 1, array)
+
+ case _ =>
+ (_: PreparedStatement, _: Row, pos: Int) =>
+ throw new IllegalArgumentException(
+ s"Can't translate non-null value for field $pos")
+ }
+
+ // taken from Spark JdbcUtils
+ def getJdbcType(dt: DataType, dialect: JdbcDialect): JdbcType = {
+ dialect.getJDBCType(dt).orElse(getCommonJDBCType(dt)).getOrElse(
+ throw new IllegalArgumentException(s"Can't get JDBC type for ${dt.catalogString}"))
+ }
+
+}
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..ec1ff81
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,1081 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-connectors</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <packaging>pom</packaging>
+ <name>Apache Phoenix Connectors</name>
+ <description>Connectors for third party libraries to access data stored in Phoenix</description>
+
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments/>
+ </license>
+ </licenses>
+
+ <organization>
+ <name>Apache Software Foundation</name>
+ <url>http://www.apache.org</url>
+ </organization>
+
+ <modules>
+ <module>phoenix-flume</module>
+ <module>phoenix-kafka</module>
+ <module>phoenix-pig</module>
+ <module>phoenix-spark</module>
+ <module>phoenix-hive</module>
+ </modules>
+
+ <repositories>
+ <repository>
+ <id>apache release</id>
+ <url>https://repository.apache.org/content/repositories/releases/</url>
+ </repository>
+ </repositories>
+
+ <parent>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <version>14</version>
+ </parent>
+
+ <scm>
+ <connection>scm:git:https://gitbox.apache.org/repos/asf/phoenix-connectors.git</connection>
+ <url>https://gitbox.apache.org/repos/asf/phoenix-connectors.git</url>
+ <developerConnection>scm:git:https://gitbox.apache.org/repos/asf/phoenix-connectors.git</developerConnection>
+ </scm>
+
+ <properties>
+ <!-- General Properties -->
+ <antlr-input.dir>src/main/antlr3</antlr-input.dir>
+ <antlr-output.dir>target/generated-sources/antlr3</antlr-output.dir>
+ <test.output.tofile>true</test.output.tofile>
+ <top.dir>${project.basedir}</top.dir>
+
+ <!-- Hadoop Versions -->
+ <phoenix.version>4.15.0-HBase-1.4-SNAPSHOT</phoenix.version>
+ <hbase.version>1.4.0</hbase.version>
+ <hadoop-two.version>2.7.5</hadoop-two.version>
+
+ <!-- Dependency versions -->
+ <commons-cli.version>1.2</commons-cli.version>
+ <hive.version>1.2.1</hive.version>
+ <hadoop.version>2.7.1</hadoop.version>
+ <pig.version>0.13.0</pig.version>
+ <jackson.version>1.9.2</jackson.version>
+ <antlr.version>3.5.2</antlr.version>
+ <log4j.version>1.2.17</log4j.version>
+ <disruptor.version>3.3.6</disruptor.version>
+ <slf4j.version>1.6.4</slf4j.version>
+ <protobuf-java.version>2.5.0</protobuf-java.version>
+ <commons-io.version>2.1</commons-io.version>
+ <commons-lang.version>2.5</commons-lang.version>
+ <commons-logging.version>1.2</commons-logging.version>
+ <commons-csv.version>1.0</commons-csv.version>
+ <sqlline.version>1.2.0</sqlline.version>
+ <guava.version>13.0.1</guava.version>
+ <flume.version>1.4.0</flume.version>
+ <kafka.version>0.9.0.0</kafka.version>
+ <findbugs-annotations.version>1.3.9-1</findbugs-annotations.version>
+ <jcip-annotations.version>1.0-1</jcip-annotations.version>
+ <jline.version>2.11</jline.version>
+ <snappy.version>0.3</snappy.version>
+ <commons-codec.version>1.7</commons-codec.version>
+ <htrace.version>3.1.0-incubating</htrace.version>
+ <collections.version>3.2.2</collections.version>
+ <!-- Do not change jodatime.version until HBASE-15199 is fixed -->
+ <jodatime.version>1.6</jodatime.version>
+ <joni.version>2.1.2</joni.version>
+ <avatica.version>1.12.0</avatica.version>
+ <jettyVersion>8.1.7.v20120910</jettyVersion>
+ <tephra.version>0.15.0-incubating</tephra.version>
+ <omid.version>1.0.0</omid.version>
+ <spark.version>2.4.0</spark.version>
+ <scala.version>2.11.8</scala.version>
+ <scala.binary.version>2.11</scala.binary.version>
+ <stream.version>2.9.5</stream.version>
+ <i18n-util.version>1.0.4</i18n-util.version>
+ <servlet.api.version>3.1.0</servlet.api.version>
+ <!-- Test Dependencies -->
+ <mockito-all.version>1.8.5</mockito-all.version>
+ <junit.version>4.12</junit.version>
+
+ <!-- Plugin versions -->
+ <maven-eclipse-plugin.version>2.9</maven-eclipse-plugin.version>
+ <maven-build-helper-plugin.version>1.9.1</maven-build-helper-plugin.version>
+ <maven-surefire-plugin.version>2.20</maven-surefire-plugin.version>
+ <maven-failsafe-plugin.version>2.20</maven-failsafe-plugin.version>
+
+ <maven-dependency-plugin.version>2.1</maven-dependency-plugin.version>
+ <maven.assembly.version>2.5.2</maven.assembly.version>
+
+ <!-- Plugin options -->
+ <numForkedUT>8</numForkedUT>
+ <numForkedIT>7</numForkedIT>
+ <it.failIfNoSpecifiedTests>false</it.failIfNoSpecifiedTests>
+ <surefire.failIfNoSpecifiedTests>false</surefire.failIfNoSpecifiedTests>
+
+ <!-- Set default encoding so multi-byte tests work correctly on the Mac -->
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+ <curator.version>2.12.0</curator.version>
+
+ </properties>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>3.0</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ </configuration>
+ </plugin>
+ <!--This plugin's configuration is used to store Eclipse m2e settings
+ only. It has no influence on the Maven build itself. -->
+ <plugin>
+ <groupId>org.eclipse.m2e</groupId>
+ <artifactId>lifecycle-mapping</artifactId>
+ <version>1.0.0</version>
+ <configuration>
+ <lifecycleMappingMetadata>
+ <pluginExecutions>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr3-maven-plugin</artifactId>
+ <versionRange>[3.5,)</versionRange>
+ <goals>
+ <goal>antlr</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore />
+ </action>
+ </pluginExecution>
+ </pluginExecutions>
+ </lifecycleMappingMetadata>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-install-plugin</artifactId>
+ <version>2.5.2</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-eclipse-plugin</artifactId>
+ <version>${maven-eclipse-plugin.version}</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>${maven.assembly.version}</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <!-- Avoid defining exclusions in pluginManagement as they are global.
+ We already inherit some from the ASF parent pom. -->
+ </plugin>
+ <!-- We put slow-running tests into src/it and run them during the
+ integration-test phase using the failsafe plugin. This way
+ developers can run unit tests conveniently from the IDE or via
+ "mvn package" from the command line without triggering time
+ consuming integration tests. -->
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <version>${maven-build-helper-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>add-test-source</id>
+ <phase>validate</phase>
+ <goals>
+ <goal>add-test-source</goal>
+ </goals>
+ <configuration>
+ <sources>
+ <source>${basedir}/src/it/java</source>
+ </sources>
+ </configuration>
+ </execution>
+ <execution>
+ <id>add-test-resource</id>
+ <phase>validate</phase>
+ <goals>
+ <goal>add-test-resource</goal>
+ </goals>
+ <configuration>
+ <resources>
+ <resource>
+ <directory>${basedir}/src/it/resources</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <version>${maven-failsafe-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>ParallelStatsEnabledTest</id>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ <forkCount>${numForkedIT}</forkCount>
+ <runOrder>alphabetical</runOrder>
+ <reuseForks>true</reuseForks>
+ <runOrder>alphabetical</runOrder>
+ <!--parallel>methods</parallel>
+ <threadCount>20</threadCount-->
+ <argLine>-Xmx2000m -XX:MaxPermSize=256m -Djava.security.egd=file:/dev/./urandom "-Djava.library.path=${hadoop.library.path}${path.separator}${java.library.path}" -XX:NewRatio=4 -XX:SurvivorRatio=8 -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+DisableExplicitGC -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:CMSInitiatingOccupancyFraction=68 -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./target/</argLine>
+ <redirectTestOutputToFile>${test.output.tofile}</redirectTestOutputToFile>
+ <shutdown>kill</shutdown>
+ <testSourceDirectory>${basedir}/src/it/java</testSourceDirectory>
+ <groups>org.apache.phoenix.end2end.ParallelStatsEnabledTest</groups>
+ </configuration>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>ParallelStatsDisabledTest</id>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ <forkCount>${numForkedIT}</forkCount>
+ <runOrder>alphabetical</runOrder>
+ <reuseForks>true</reuseForks>
+ <runOrder>alphabetical</runOrder>
+ <!--parallel>methods</parallel>
+ <threadCount>20</threadCount-->
+ <!-- We're intermittantly hitting this assertion when running in parallel:
+ Caused by: java.lang.AssertionError: we should never remove a different context
+ at org.apache.hadoop.hbase.regionserver.HRegion$RowLockContext.cleanUp(HRegion.java:5206)
+ at org.apache.hadoop.hbase.regionserver.HRegion$RowLockImpl.release(HRegion.java:5246)
+ at org.apache.phoenix.coprocessor.MetaDataEndpointImpl.doGetTable(MetaDataEndpointImpl.java:2898)
+ at org.apache.phoenix.coprocessor.MetaDataEndpointImpl.doGetTable(MetaDataEndpointImpl.java:2835)
+ at org.apache.phoenix.coprocessor.MetaDataEndpointImpl.getTable(MetaDataEndpointImpl.java:490) -->
+ <!--enableAssertions>false</enableAssertions-->
+ <argLine>-Xmx3000m -XX:MaxPermSize=256m -Djava.security.egd=file:/dev/./urandom "-Djava.library.path=${hadoop.library.path}${path.separator}${java.library.path}" -XX:NewRatio=4 -XX:SurvivorRatio=8 -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+DisableExplicitGC -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:CMSInitiatingOccupancyFraction=68 -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./target/</argLine>
+ <redirectTestOutputToFile>${test.output.tofile}</redirectTestOutputToFile>
+ <shutdown>kill</shutdown>
+ <testSourceDirectory>${basedir}/src/it/java</testSourceDirectory>
+ <groups>org.apache.phoenix.end2end.ParallelStatsDisabledTest</groups>
+ </configuration>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>HBaseManagedTimeTests</id>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ <forkCount>${numForkedIT}</forkCount>
+ <runOrder>alphabetical</runOrder>
+ <reuseForks>true</reuseForks>
+ <argLine>-enableassertions -Xmx2000m -XX:MaxPermSize=128m -Djava.security.egd=file:/dev/./urandom "-Djava.library.path=${hadoop.library.path}${path.separator}${java.library.path}" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./target/</argLine>
+ <redirectTestOutputToFile>${test.output.tofile}</redirectTestOutputToFile>
+ <testSourceDirectory>${basedir}/src/it/java</testSourceDirectory>
+ <groups>org.apache.phoenix.end2end.HBaseManagedTimeTest</groups>
+ <shutdown>kill</shutdown>
+ </configuration>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>NeedTheirOwnClusterTests</id>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ <forkCount>${numForkedIT}</forkCount>
+ <runOrder>alphabetical</runOrder>
+ <reuseForks>false</reuseForks>
+ <argLine>-enableassertions -Xmx2000m -XX:MaxPermSize=256m -Djava.security.egd=file:/dev/./urandom "-Djava.library.path=${hadoop.library.path}${path.separator}${java.library.path}" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./target/</argLine>
+ <redirectTestOutputToFile>${test.output.tofile}</redirectTestOutputToFile>
+ <testSourceDirectory>${basedir}/src/it/java</testSourceDirectory>
+ <groups>org.apache.phoenix.end2end.NeedsOwnMiniClusterTest</groups>
+ <shutdown>kill</shutdown>
+ </configuration>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+<execution>
+ <id>SplitSystemCatalogTests</id>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ <forkCount>${numForkedIT}</forkCount>
+ <runOrder>alphabetical</runOrder>
+ <reuseForks>false</reuseForks>
+ <argLine>-enableassertions -Xmx2000m -XX:MaxPermSize=256m -Djava.security.egd=file:/dev/./urandom "-Djava.library.path=${hadoop.library.path}${path.separator}${java.library.path}" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./target/</argLine>
+ <redirectTestOutputToFile>${test.output.tofile}</redirectTestOutputToFile>
+ <testSourceDirectory>${basedir}/src/it/java</testSourceDirectory>
+ <groups>org.apache.phoenix.end2end.SplitSystemCatalogTests</groups>
+ <shutdown>kill</shutdown>
+ </configuration>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>${maven-dependency-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>create-mrapp-generated-classpath</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>build-classpath</goal>
+ </goals>
+ <configuration>
+ <outputFile>${project.build.directory}/classes/mrapp-generated-classpath
+ </outputFile>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>2.4.3</version>
+ </plugin>
+ <plugin>
+ <!-- Allows us to get the apache-ds bundle artifacts -->
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <version>2.5.3</version>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <version>2.13</version>
+ <executions>
+ <execution>
+ <id>validate</id>
+ <phase>validate</phase>
+ <configuration>
+ <skip>true</skip>
+ <configLocation>${top.dir}/src/main/config/checkstyle/checker.xml</configLocation>
+ <suppressionsLocation>${top.dir}/src/main/config/checkstyle/suppressions.xml</suppressionsLocation>
+ <consoleOutput>true</consoleOutput>
+ <headerLocation>${top.dir}/src/main/config/checkstyle/header.txt</headerLocation>
+ <failOnViolation><!--true-->false</failOnViolation>
+ <includeTestSourceDirectory><!--true-->false</includeTestSourceDirectory>
+ </configuration>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ <version>2.2.1</version>
+ <executions>
+ <execution>
+ <id>attach-sources</id>
+ <phase>prepare-package</phase>
+ <goals>
+ <goal>jar-no-fork</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>2.9</version>
+ <configuration>
+ <quiet>true</quiet>
+ <links>
+ <link>http://hbase.apache.org/apidocs/</link>
+ </links>
+ </configuration>
+ <executions>
+ <execution>
+ <id>attach-javadocs</id>
+ <goals>
+ <!-- TODO turn back on javadocs - disabled now for testing -->
+ <!-- <goal>jar</goal> -->
+ </goals>
+ <configuration>
+ <additionalparam>${javadoc.opts}</additionalparam>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>${maven-surefire-plugin.version}</version>
+ <configuration>
+ <forkCount>${numForkedUT}</forkCount>
+ <reuseForks>true</reuseForks>
+ <argLine>-enableassertions -Xmx2250m -XX:MaxPermSize=128m
+ -Djava.security.egd=file:/dev/./urandom "-Djava.library.path=${hadoop.library.path}${path.separator}${java.library.path}" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./target/</argLine>
+ <redirectTestOutputToFile>${test.output.tofile}</redirectTestOutputToFile>
+ <shutdown>kill</shutdown>
+ </configuration>
+ </plugin>
+ <!-- All projects create a test jar -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.4</version>
+ <executions>
+ <execution>
+ <phase>prepare-package
+ </phase>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-site-plugin</artifactId>
+ <version>3.7.1</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- Header on changelog isn't normal -->
+ <exclude>CHANGES</exclude>
+ <!-- IDE configuration -->
+ <exclude>dev/phoenix.importorder</exclude>
+ <!-- Release L&N -->
+ <exclude>dev/release_files/LICENSE</exclude>
+ <exclude>dev/release_files/NOTICE</exclude>
+ <!-- Exclude data files for examples -->
+ <exclude>docs/*.csv</exclude>
+ <exclude>examples/*.csv</exclude>
+ <!-- Exclude SQL files from rat. Sqlline 1.1.9 doesn't work with
+ comments on the first line of a file. -->
+ <exclude>examples/*.sql</exclude>
+ <exclude>examples/pig/testdata</exclude>
+ <!-- precommit? -->
+ <exclude>**/patchprocess/**</exclude>
+ <!-- Argparse is bundled to work around system Python version
+ issues, compatibile with ALv2 -->
+ <exclude>bin/argparse-1.4.0/argparse.py</exclude>
+ <!-- Not our code -->
+ <exclude>python/requests-kerberos/**</exclude>
+ <exclude>python/phoenixdb/phoenixdb/avatica/proto/*</exclude>
+ <exclude>python/phoenixdb/*.rst</exclude>
+ <exclude>python/phoenixdb/ci/**</exclude>
+ <exclude>python/phoenixdb/doc/*.rst</exclude>
+ <exclude>python/phoenixdb/doc/conf.py</exclude>
+ <exclude>python/phoenixdb/doc/Makefile</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <!-- Allows us to get the apache-ds bundle artifacts -->
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <inherited>true</inherited>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencyManagement>
+ <dependencies>
+ <!-- Phoenix dependencies -->
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <version>${phoenix.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <version>${phoenix.version}</version>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-flume</artifactId>
+ <version>${phoenix.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-kafka</artifactId>
+ <version>${phoenix.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-pig</artifactId>
+ <version>${phoenix.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-spark</artifactId>
+ <version>${phoenix.version}</version>
+ </dependency>
+
+ <!-- HBase dependencies -->
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <version>${hbase.version}</version>
+ <scope>test</scope>
+ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-metrics-api</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-metrics</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- Hadoop Dependencies -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop-two.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ <version>${hadoop-two.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <version>${hadoop-two.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>${hadoop-two.version}</version>
+ <optional>true</optional>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- Required for mini-cluster since hbase built against old version of hadoop -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-auth</artifactId>
+ <version>${hadoop-two.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-common</artifactId>
+ <version>${hadoop-two.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ <version>${hadoop-two.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop-two.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${hadoop-two.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${hadoop-two.version}</version>
+ <type>test-jar</type> <!-- this does not work which is typical for maven.-->
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minikdc</artifactId>
+ <version>${hadoop-two.version}</version>
+ </dependency>
+
+ <!-- General Dependencies -->
+ <dependency>
+ <groupId>org.apache.pig</groupId>
+ <artifactId>pig</artifactId>
+ <version>${pig.version}</version>
+ <classifier>h2</classifier>
+ <exclusions>
+ <exclusion>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.calcite.avatica</groupId>
+ <artifactId>avatica</artifactId>
+ <version>${avatica.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.calcite.avatica</groupId>
+ <artifactId>avatica-core</artifactId>
+ <version>${avatica.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.calcite.avatica</groupId>
+ <artifactId>avatica-server</artifactId>
+ <version>${avatica.version}</version>
+ </dependency>
+
+ <!-- Transaction dependencies -->
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-api</artifactId>
+ <version>${tephra.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <version>${tephra.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <version>${tephra.version}</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-hbase-compat-1.4</artifactId>
+ <version>${tephra.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.omid</groupId>
+ <artifactId>omid-hbase-client-hbase1.x</artifactId>
+ <version>${omid.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.testng</groupId>
+ <artifactId>testng</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.omid</groupId>
+ <artifactId>omid-hbase-coprocessor-hbase1.x</artifactId>
+ <version>${omid.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.testng</groupId>
+ <artifactId>testng</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.omid</groupId>
+ <artifactId>omid-tso-server-hbase1.x</artifactId>
+ <version>${omid.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.testng</groupId>
+ <artifactId>testng</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.omid</groupId>
+ <artifactId>omid-tso-server-hbase1.x</artifactId>
+ <version>${omid.version}</version>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>org.testng</groupId>
+ <artifactId>testng</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+
+ <!-- Make sure we have all the antlr dependencies -->
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr-runtime</artifactId>
+ <version>${antlr.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ <version>2.11</version>
+ </dependency>
+ <dependency>
+ <groupId>sqlline</groupId>
+ <artifactId>sqlline</artifactId>
+ <version>${sqlline.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>${guava.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-core</artifactId>
+ <version>${flume.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.findbugs</groupId>
+ <artifactId>findbugs-annotations</artifactId>
+ <version>${findbugs-annotations.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.jcip</groupId>
+ <artifactId>jcip-annotations</artifactId>
+ <version>${jcip-annotations.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.iq80.snappy</groupId>
+ <artifactId>snappy</artifactId>
+ <version>${snappy.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ <version>${jackson.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ <version>${jackson.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ <version>2.0.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-jaxrs</artifactId>
+ <version>${jackson.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-xc</artifactId>
+ <version>${jackson.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${junit.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <version>${mockito-all.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>${protobuf-java.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>${log4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>${slf4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>${commons-cli.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>${commons-logging.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.htrace</groupId>
+ <artifactId>htrace-core</artifactId>
+ <version>${htrace.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>${commons-codec.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ <version>${collections.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-csv</artifactId>
+ <version>${commons-csv.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>joda-time</groupId>
+ <artifactId>joda-time</artifactId>
+ <version>${jodatime.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.curator</groupId>
+ <artifactId>curator-test</artifactId>
+ <version>${curator.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.curator</groupId>
+ <artifactId>curator-client</artifactId>
+ <version>${curator.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.clearspring.analytics</groupId>
+ <artifactId>stream</artifactId>
+ <version>${stream.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.salesforce.i18n</groupId>
+ <artifactId>i18n-util</artifactId>
+ <version>${i18n-util.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>javax.servlet-api</artifactId>
+ <version>${servlet.api.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.lmax</groupId>
+ <artifactId>disruptor</artifactId>
+ <version>${disruptor.version}</version>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+
+ <profiles>
+ <!-- disable doclint with 1.8+ JDKs-->
+ <profile>
+ <id>java8-doclint-disabled</id>
+ <activation>
+ <jdk>[1.8,)</jdk>
+ </activation>
+ <properties>
+ <javadoc.opts>-Xdoclint:none</javadoc.opts>
+ </properties>
+ </profile>
+ <!-- this profile should be activated for release builds -->
+ <profile>
+ <id>release</id>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-gpg-plugin</artifactId>
+ <version>1.6</version>
+ <executions>
+ <execution>
+ <id>sign-artifacts</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>sign</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ <profile>
+ <id>spark16</id>
+ <properties>
+ <spark.version>1.6.1</spark.version>
+ <scala.version>2.10.4</scala.version>
+ <scala.binary.version>2.10</scala.binary.version>
+ </properties>
+ </profile>
+ </profiles>
+
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-project-info-reports-plugin</artifactId>
+ <version>3.0.0</version>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>findbugs-maven-plugin</artifactId>
+ <version>3.0.5</version>
+ </plugin>
+ </plugins>
+ </reporting>
+</project>
diff --git a/presto-phoenix-shaded/LICENSE b/presto-phoenix-shaded/LICENSE
new file mode 100644
index 0000000..7a4a3ea
--- /dev/null
+++ b/presto-phoenix-shaded/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/presto-phoenix-shaded/README.md b/presto-phoenix-shaded/README.md
new file mode 100644
index 0000000..f103122
--- /dev/null
+++ b/presto-phoenix-shaded/README.md
@@ -0,0 +1,18 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+# Apache Phoenix Shaded
+Shaded version of Apache Phoenix and HBase - Testing Util for Presto. This is needed because Phoenix and HBase use different versions of guava and jersey than Presto.
\ No newline at end of file
diff --git a/presto-phoenix-shaded/pom.xml b/presto-phoenix-shaded/pom.xml
new file mode 100644
index 0000000..60acdc3
--- /dev/null
+++ b/presto-phoenix-shaded/pom.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix</artifactId>
+ <version>4.15.0-HBase-1.4-SNAPSHOT</version>
+ </parent>
+
+ <name>Presto Apache Phoenix Shaded</name>
+ <description>Shaded version of Apache Phoenix for Presto</description>
+ <artifactId>presto-phoenix-shaded</artifactId>
+ <packaging>pom</packaging>
+
+ <licenses>
+ <license>
+ <name>Apache License 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+
+ <properties>
+ <!-- Don't make a test-jar -->
+ <maven.test.skip>true</maven.test.skip>
+ <shadeBase>org.apache.phoenix.shaded</shadeBase>
+ </properties>
+
+ <modules>
+ <module>presto-hbase-testing-util-shaded</module>
+ <module>presto-phoenix-client-shaded</module>
+ </modules>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-gpg-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>sign-artifacts</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>sign</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>javadoc-jar</id>
+ <phase>package</phase>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <configuration>
+ <verbose>true</verbose>
+ <additionalOptions>-Xdoclint:none</additionalOptions>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/presto-phoenix-shaded/presto-hbase-testing-util-shaded/.gitignore b/presto-phoenix-shaded/presto-hbase-testing-util-shaded/.gitignore
new file mode 100644
index 0000000..6e14f70
--- /dev/null
+++ b/presto-phoenix-shaded/presto-hbase-testing-util-shaded/.gitignore
@@ -0,0 +1,6 @@
+/target
+/.settings/
+/test-output/
+.classpath
+.project
+*.versionsBackup
\ No newline at end of file
diff --git a/presto-phoenix-shaded/presto-hbase-testing-util-shaded/pom.xml b/presto-phoenix-shaded/presto-hbase-testing-util-shaded/pom.xml
new file mode 100644
index 0000000..6a846df
--- /dev/null
+++ b/presto-phoenix-shaded/presto-hbase-testing-util-shaded/pom.xml
@@ -0,0 +1,148 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>presto-phoenix-shaded</artifactId>
+ <version>4.15.0-HBase-1.4-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+
+ <artifactId>presto-hbase-testing-util-shaded</artifactId>
+ <name>Presto HBase Testing Util Shaded</name>
+ <description>Shaded version of Apache HBase - Testing Util</description>
+ <packaging>jar</packaging>
+
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments />
+ </license>
+ </licenses>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <scope>compile</scope>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>*</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <type>test-jar</type>
+ <scope>compile</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>*</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <type>test-jar</type>
+ <scope>compile</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>*</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-it</artifactId>
+ <scope>compile</scope>
+ <type>test-jar</type>
+ <exclusions>
+ <exclusion>
+ <groupId>*</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <type>test-jar</type>
+ <scope>compile</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>*</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.tephra</groupId>
+ <artifactId>tephra-core</artifactId>
+ <type>test-jar</type>
+ <scope>compile</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>*</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createSourcesJar>true</createSourcesJar>
+ <shadeSourcesContent>true</shadeSourcesContent>
+ <dependencyReducedPomLocation>${project.build.directory}/pom.xml</dependencyReducedPomLocation>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
+ <resource>mrapp-generated-classpath</resource>
+ </transformer>
+ </transformers>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <!-- io.airlift:joni resources. Can't simply exclude -->
+ <!-- the dependency because we are depending on a shaded phoenix-client -->
+ <exclude>tables/*.bin</exclude>
+ <!-- skip french localization -->
+ <exclude>assets/org/apache/commons/math3/**/*</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/presto-phoenix-shaded/presto-phoenix-client-shaded/.gitignore b/presto-phoenix-shaded/presto-phoenix-client-shaded/.gitignore
new file mode 100644
index 0000000..6e14f70
--- /dev/null
+++ b/presto-phoenix-shaded/presto-phoenix-client-shaded/.gitignore
@@ -0,0 +1,6 @@
+/target
+/.settings/
+/test-output/
+.classpath
+.project
+*.versionsBackup
\ No newline at end of file
diff --git a/presto-phoenix-shaded/presto-phoenix-client-shaded/pom.xml b/presto-phoenix-shaded/presto-phoenix-client-shaded/pom.xml
new file mode 100644
index 0000000..94171f6
--- /dev/null
+++ b/presto-phoenix-shaded/presto-phoenix-client-shaded/pom.xml
@@ -0,0 +1,356 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>presto-phoenix-shaded</artifactId>
+ <version>4.15.0-HBase-1.4-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+
+ <artifactId>presto-phoenix-client-shaded</artifactId>
+ <name>Presto Apache Phoenix Client Shaded</name>
+ <description>Shaded version of Apache Phoenix Client for Presto</description>
+ <packaging>jar</packaging>
+
+ <licenses>
+ <license>
+ <name>The Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments />
+ </license>
+ </licenses>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-core</artifactId>
+ <version>${project.version}</version>
+ <exclusions>
+ <!-- Presto uses jcl-over-slf4j and log4j-over-slf4j
+ to route to slf4j-jdk14 -->
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createSourcesJar>true</createSourcesJar>
+ <shadeSourcesContent>true</shadeSourcesContent>
+ <dependencyReducedPomLocation>${project.build.directory}/pom.xml</dependencyReducedPomLocation>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+ </transformers>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ <excludes>
+ <exclude>xom:xom</exclude>
+ <exclude>log4j:log4j</exclude>
+ <exclude>org.slf4j:slf4j-log4j12</exclude>
+ <exclude>commons-logging:commons-logging</exclude>
+ </excludes>
+ </artifactSet>
+
+ <relocations>
+ <!-- COM relocation -->
+ <relocation>
+ <pattern>com.beust.jcommander</pattern>
+ <shadedPattern>${shadeBase}.com.beust.jcommander</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.codahale</pattern>
+ <shadedPattern>${shadeBase}.com.codahale</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.fasterxml</pattern>
+ <shadedPattern>${shadeBase}.com.fasterxml</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.google</pattern>
+ <shadedPattern>${shadeBase}.com.google</shadedPattern>
+ </relocation>
+
+ <relocation>
+ <pattern>com.jamesmurty</pattern>
+ <shadedPattern>${shadeBase}.com.jamesmurty</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.jcraft</pattern>
+ <shadedPattern>${shadeBase}.com.jcraft</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.lmax</pattern>
+ <shadedPattern>${shadeBase}.com.lmax</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.sun.xml</pattern>
+ <shadedPattern>${shadeBase}.com.sun.xml</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.sun.istack</pattern>
+ <shadedPattern>${shadeBase}.com.sun.istack</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.sun.research</pattern>
+ <shadedPattern>${shadeBase}.com.sun.research</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.sun.activation</pattern>
+ <shadedPattern>${shadeBase}.com.sun.activation</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.thoughtworks</pattern>
+ <shadedPattern>${shadeBase}.com.thoughtworks</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.yammer</pattern>
+ <shadedPattern>${shadeBase}.com.yammer</shadedPattern>
+ </relocation>
+
+ <!-- IO relocations -->
+ <relocation>
+ <pattern>io.netty</pattern>
+ <shadedPattern>${shadeBase}.io.netty</shadedPattern>
+ </relocation>
+
+ <!-- ORG relocations -->
+ <relocation>
+ <pattern>org.antlr</pattern>
+ <shadedPattern>${shadeBase}.org.antlr</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.aopalliance</pattern>
+ <shadedPattern>${shadeBase}.org.aopalliance</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.codehaus</pattern>
+ <shadedPattern>${shadeBase}.org.codehaus</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.fusesource</pattern>
+ <shadedPattern>${shadeBase}.org.fusesource</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.hamcrest</pattern>
+ <shadedPattern>${shadeBase}.org.hamcrest</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.hsqldb</pattern>
+ <shadedPattern>${shadeBase}.org.hsqldb</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.iq80</pattern>
+ <shadedPattern>${shadeBase}.org.iq80</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jamon</pattern>
+ <shadedPattern>${shadeBase}.org.jamon</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jboss</pattern>
+ <shadedPattern>${shadeBase}.org.jboss</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jcodings</pattern>
+ <shadedPattern>${shadeBase}.org.jcodings</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.jets3t</pattern>
+ <shadedPattern>${shadeBase}.org.jets3t</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.joda</pattern>
+ <shadedPattern>${shadeBase}.org.joda</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.joni</pattern>
+ <shadedPattern>${shadeBase}.org.joni</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.junit</pattern>
+ <shadedPattern>${shadeBase}.org.junit</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.kosmix</pattern>
+ <shadedPattern>${shadeBase}.org.kosmix</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.mortbay</pattern>
+ <shadedPattern>${shadeBase}.org.mortbay</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.objectweb</pattern>
+ <shadedPattern>${shadeBase}.org.objectweb</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.stringtemplate</pattern>
+ <shadedPattern>${shadeBase}.org.stringtemplate</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.tukaani</pattern>
+ <shadedPattern>${shadeBase}.org.tukaani</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.znerd</pattern>
+ <shadedPattern>${shadeBase}.org.znerd</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.avro</pattern>
+ <shadedPattern>${shadeBase}.org.apache.avro</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons</pattern>
+ <shadedPattern>${shadeBase}.org.apache.commons</shadedPattern>
+ <excludes>
+ <exclude>org.apache.commons.csv.**</exclude>
+ <exclude>org.apache.commons.logging.**</exclude>
+ <exclude>org.apache.commons.configuration.**</exclude>
+ </excludes>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.directory</pattern>
+ <shadedPattern>${shadeBase}.org.apache.directory</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.http</pattern>
+ <shadedPattern>${shadeBase}.org.apache.http</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.jasper</pattern>
+ <shadedPattern>${shadeBase}.org.apache.jasper</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.jute</pattern>
+ <shadedPattern>${shadeBase}.org.apache.jute</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.mina</pattern>
+ <shadedPattern>${shadeBase}.org.apache.mina</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.oro</pattern>
+ <shadedPattern>${shadeBase}.org.apache.oro</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.taglibs</pattern>
+ <shadedPattern>${shadeBase}.org.apache.taglibs</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.thrift</pattern>
+ <shadedPattern>${shadeBase}.org.apache.thrift</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.tools</pattern>
+ <shadedPattern>${shadeBase}.org.apache.tools</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.twill</pattern>
+ <shadedPattern>${shadeBase}.org.apache.twill</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.velocity</pattern>
+ <shadedPattern>${shadeBase}.org.apache.velocity</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.zookeeper</pattern>
+ <shadedPattern>${shadeBase}.org.apache.zookeeper</shadedPattern>
+ </relocation>
+
+ <!-- NET relocations -->
+ <relocation>
+ <pattern>net</pattern>
+ <shadedPattern>${shadeBase}.net</shadedPattern>
+ </relocation>
+
+ <!-- Misc relocations -->
+ <relocation>
+ <pattern>antlr</pattern>
+ <shadedPattern>${shadeBase}.antlr</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>it.unimi</pattern>
+ <shadedPattern>${shadeBase}.it.unimi</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>javax.activation</pattern>
+ <shadedPattern>${shadeBase}.javax.activation</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>javax.annotation</pattern>
+ <shadedPattern>${shadeBase}.javax.annotation</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>javax.inject</pattern>
+ <shadedPattern>${shadeBase}.javax.inject</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>javax.xml.bind</pattern>
+ <shadedPattern>${shadeBase}.javax.xml.bind</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>javax.servlet</pattern>
+ <shadedPattern>${shadeBase}.javax.servlet</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>javax.ws</pattern>
+ <shadedPattern>${shadeBase}.javax.ws</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>jline</pattern>
+ <shadedPattern>${shadeBase}.jline</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>junit</pattern>
+ <shadedPattern>${shadeBase}.junit</shadedPattern>
+ </relocation>
+ </relocations>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <!-- io.airlift:joni resource files -->
+ <exclude>tables/*.bin</exclude>
+ <!-- skip french localization -->
+ <exclude>assets/org/apache/commons/math3/**/*</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>