HBASE-27639 Support hbase-connectors compilation with HBase 2.5.3, Hadoop 3.2.4 and Spark 3.2.3 (#110) * Added mockito-all as otherwise we get java.lang.NoClassDefFoundError: org/mockito/stubbing/Answer at org.apache.hadoop.hdfs.MiniDFSCluster.isNameNodeUp() for hadoop 3.2.4 * Exlclude hadoop-client-api and hadoop-client-runtime coming from Spark 3.2, else minicluster.start() fails * Exclude lower versioned paranamer coming from avro otherwise tests fail with java.lang.ArrayIndexOutOfBoundsException * Added spark.hadoopRDD.ignoreEmptySplits for test due to behaviour change in spark 3.2, where the below conf is true by default. We will get empty table as result (for small sized tables) for HBase version not having HBASE-26340 Signed-off-by: Rajeshbabu Chintaguntla <rajeshbabu@apache.org> Signed-off-by: Peter Somogyi <psomogyi@apache.org> Reviewed-by: Istvan Toth <stoty@apache.org>

commit: bb2902fc98d7c586a5424eb1e4e8c3b811863a52 [log] [tgz]
author: Nihal Jain <nihaljain.cs@gmail.com> Wed Mar 01 15:02:35 2023 +0530
committer: GitHub <noreply@github.com> Wed Mar 01 10:32:35 2023 +0100
tree: 3a9d64caef300b0240ae8ac9a37f1f0cf38984e6
parent: ca7733ab566d8a274f65e4aced88fbdba8e6c209 [diff]
diff --git a/pom.xml b/pom.xml
index ba1c3fe..1bfe0cf 100644
--- a/pom.xml
+++ b/pom.xml

@@ -142,6 +142,7 @@
     <exec.maven.version>1.6.0</exec.maven.version>
     <audience-annotations.version>0.5.0</audience-annotations.version>
     <junit.version>4.12</junit.version>
+    <mockito-all.version>1.8.5</mockito-all.version>
     <hbase-thirdparty.version>4.0.1</hbase-thirdparty.version>
     <hadoop-two.version>2.8.5</hadoop-two.version>
     <hadoop-three.version>3.2.0</hadoop-three.version>
@@ -195,6 +196,12 @@
         <groupId>org.apache.avro</groupId>
         <artifactId>avro</artifactId>
         <version>${avro.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>com.thoughtworks.paranamer</groupId>
+            <artifactId>paranamer</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
@@ -234,6 +241,12 @@
         <version>${junit.version}</version>
       </dependency>
       <dependency>
+        <groupId>org.mockito</groupId>
+        <artifactId>mockito-all</artifactId>
+        <version>${mockito-all.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
         <groupId>org.apache.hbase.thirdparty</groupId>
         <artifactId>hbase-shaded-miscellaneous</artifactId>
         <version>${hbase-thirdparty.version}</version>

diff --git a/spark/hbase-spark-it/pom.xml b/spark/hbase-spark-it/pom.xml
index 976edba..a7f4236 100644
--- a/spark/hbase-spark-it/pom.xml
+++ b/spark/hbase-spark-it/pom.xml

@@ -247,6 +247,14 @@
           <groupId>com.google.code.findbugs</groupId>
           <artifactId>jsr305</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client-runtime</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -287,6 +295,11 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <profiles>

diff --git a/spark/hbase-spark/pom.xml b/spark/hbase-spark/pom.xml
index 7ac4828..d8e290e 100644
--- a/spark/hbase-spark/pom.xml
+++ b/spark/hbase-spark/pom.xml

@@ -88,6 +88,14 @@
           <groupId>xerces</groupId>
           <artifactId>xercesImpl</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client-runtime</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -116,6 +124,11 @@
       <scope>test</scope>
     </dependency>
     <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
       <version>3.0.5</version>

diff --git a/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
index 793ed8e..6601eb7 100644
--- a/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
+++ b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java

@@ -47,6 +47,7 @@
 import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
+import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function;
@@ -69,21 +70,29 @@
   public static final HBaseClassTestRule TIMEOUT =
       HBaseClassTestRule.forClass(TestJavaHBaseContext.class);
 
-  private static transient JavaSparkContext JSC;
+  protected static transient JavaSparkContext JSC;
   private static HBaseTestingUtility TEST_UTIL;
   private static JavaHBaseContext HBASE_CONTEXT;
   private static final Logger LOG = LoggerFactory.getLogger(TestJavaHBaseContext.class);
 
-  byte[] tableName = Bytes.toBytes("t1");
-  byte[] columnFamily = Bytes.toBytes("c");
+  protected byte[] tableName = Bytes.toBytes("t1");
+  protected byte[] columnFamily = Bytes.toBytes("c");
   byte[] columnFamily1 = Bytes.toBytes("d");
   String columnFamilyStr = Bytes.toString(columnFamily);
   String columnFamilyStr1 = Bytes.toString(columnFamily1);
 
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
+    // NOTE: We need to do this due to behaviour change in spark 3.2, where the below conf is true
+    // by default. We will get empty table as result (for small sized tables) for HBase version not
+    // having HBASE-26340
+    SparkConf sparkConf = new SparkConf().set("spark.hadoopRDD.ignoreEmptySplits", "false");
+    JSC = new JavaSparkContext("local", "JavaHBaseContextSuite", sparkConf);
 
-    JSC = new JavaSparkContext("local", "JavaHBaseContextSuite");
+    init();
+  }
+
+  protected static void init() throws Exception {
     TEST_UTIL = new HBaseTestingUtility();
     Configuration conf = TEST_UTIL.getConfiguration();
 
@@ -503,11 +512,12 @@
     }
   }
 
-  private void populateTableWithMockData(Configuration conf, TableName tableName)
+  protected void populateTableWithMockData(Configuration conf, TableName tableName)
           throws IOException {
     try (
       Connection conn = ConnectionFactory.createConnection(conf);
-      Table table = conn.getTable(tableName)) {
+      Table table = conn.getTable(tableName);
+      Admin admin = conn.getAdmin()) {
 
       List<Put> puts = new ArrayList<>(5);
 
@@ -517,6 +527,7 @@
         puts.add(put);
       }
       table.put(puts);
+      admin.flush(tableName);
     }
   }
 }

diff --git a/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContextForLargeRows.java b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContextForLargeRows.java
new file mode 100644
index 0000000..81908ad
--- /dev/null
+++ b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContextForLargeRows.java

@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+
+@Category({ MiscTests.class, MediumTests.class })
+public class TestJavaHBaseContextForLargeRows extends TestJavaHBaseContext {
+
+  @ClassRule public static final HBaseClassTestRule TIMEOUT =
+      HBaseClassTestRule.forClass(TestJavaHBaseContextForLargeRows.class);
+
+  @BeforeClass public static void setUpBeforeClass() throws Exception {
+    JSC = new JavaSparkContext("local", "JavaHBaseContextSuite");
+
+    init();
+  }
+
+  protected void populateTableWithMockData(Configuration conf, TableName tableName)
+      throws IOException {
+    try (Connection conn = ConnectionFactory.createConnection(conf);
+        Table table = conn.getTable(tableName);
+        Admin admin = conn.getAdmin()) {
+
+      List<Put> puts = new ArrayList<>(5);
+
+      for (int i = 1; i < 6; i++) {
+        Put put = new Put(Bytes.toBytes(Integer.toString(i)));
+        // We are trying to generate a large row value here
+        char[] chars = new char[1024 * 1024 * 2];
+        // adding '0' to convert int to char
+        Arrays.fill(chars, (char) (i + '0'));
+        put.addColumn(columnFamily, columnFamily, Bytes.toBytes(String.valueOf(chars)));
+        puts.add(put);
+      }
+      table.put(puts);
+      admin.flush(tableName);
+    }
+  }
+}
commit	bb2902fc98d7c586a5424eb1e4e8c3b811863a52	[log] [tgz]
author	Nihal Jain <nihaljain.cs@gmail.com>	Wed Mar 01 15:02:35 2023 +0530
committer	GitHub <noreply@github.com>	Wed Mar 01 10:32:35 2023 +0100
tree	3a9d64caef300b0240ae8ac9a37f1f0cf38984e6
parent	ca7733ab566d8a274f65e4aced88fbdba8e6c209 [diff]