HBASE-27639 Support hbase-connectors compilation with HBase 2.5.3, Hadoop 3.2.4 and Spark 3.2.3 (#110)

* Added mockito-all as otherwise we get java.lang.NoClassDefFoundError: org/mockito/stubbing/Answer at org.apache.hadoop.hdfs.MiniDFSCluster.isNameNodeUp() for hadoop 3.2.4
* Exlclude hadoop-client-api and hadoop-client-runtime coming from Spark 3.2, else minicluster.start() fails
* Exclude lower versioned paranamer coming from avro otherwise tests fail with java.lang.ArrayIndexOutOfBoundsException
* Added spark.hadoopRDD.ignoreEmptySplits for test due to behaviour change in spark 3.2, where the below conf is true by default. We will get empty table as result (for small sized tables) for HBase version not having HBASE-26340

Signed-off-by: Rajeshbabu Chintaguntla <rajeshbabu@apache.org>
Signed-off-by: Peter Somogyi <psomogyi@apache.org>
Reviewed-by: Istvan Toth <stoty@apache.org>
diff --git a/pom.xml b/pom.xml
index ba1c3fe..1bfe0cf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -142,6 +142,7 @@
     <exec.maven.version>1.6.0</exec.maven.version>
     <audience-annotations.version>0.5.0</audience-annotations.version>
     <junit.version>4.12</junit.version>
+    <mockito-all.version>1.8.5</mockito-all.version>
     <hbase-thirdparty.version>4.0.1</hbase-thirdparty.version>
     <hadoop-two.version>2.8.5</hadoop-two.version>
     <hadoop-three.version>3.2.0</hadoop-three.version>
@@ -195,6 +196,12 @@
         <groupId>org.apache.avro</groupId>
         <artifactId>avro</artifactId>
         <version>${avro.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>com.thoughtworks.paranamer</groupId>
+            <artifactId>paranamer</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
@@ -234,6 +241,12 @@
         <version>${junit.version}</version>
       </dependency>
       <dependency>
+        <groupId>org.mockito</groupId>
+        <artifactId>mockito-all</artifactId>
+        <version>${mockito-all.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
         <groupId>org.apache.hbase.thirdparty</groupId>
         <artifactId>hbase-shaded-miscellaneous</artifactId>
         <version>${hbase-thirdparty.version}</version>
diff --git a/spark/hbase-spark-it/pom.xml b/spark/hbase-spark-it/pom.xml
index 976edba..a7f4236 100644
--- a/spark/hbase-spark-it/pom.xml
+++ b/spark/hbase-spark-it/pom.xml
@@ -247,6 +247,14 @@
           <groupId>com.google.code.findbugs</groupId>
           <artifactId>jsr305</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client-runtime</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -287,6 +295,11 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <profiles>
diff --git a/spark/hbase-spark/pom.xml b/spark/hbase-spark/pom.xml
index 7ac4828..d8e290e 100644
--- a/spark/hbase-spark/pom.xml
+++ b/spark/hbase-spark/pom.xml
@@ -88,6 +88,14 @@
           <groupId>xerces</groupId>
           <artifactId>xercesImpl</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client-runtime</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -116,6 +124,11 @@
       <scope>test</scope>
     </dependency>
     <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
       <version>3.0.5</version>
diff --git a/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
index 793ed8e..6601eb7 100644
--- a/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
+++ b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContext.java
@@ -47,6 +47,7 @@
 import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
+import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function;
@@ -69,21 +70,29 @@
   public static final HBaseClassTestRule TIMEOUT =
       HBaseClassTestRule.forClass(TestJavaHBaseContext.class);
 
-  private static transient JavaSparkContext JSC;
+  protected static transient JavaSparkContext JSC;
   private static HBaseTestingUtility TEST_UTIL;
   private static JavaHBaseContext HBASE_CONTEXT;
   private static final Logger LOG = LoggerFactory.getLogger(TestJavaHBaseContext.class);
 
-  byte[] tableName = Bytes.toBytes("t1");
-  byte[] columnFamily = Bytes.toBytes("c");
+  protected byte[] tableName = Bytes.toBytes("t1");
+  protected byte[] columnFamily = Bytes.toBytes("c");
   byte[] columnFamily1 = Bytes.toBytes("d");
   String columnFamilyStr = Bytes.toString(columnFamily);
   String columnFamilyStr1 = Bytes.toString(columnFamily1);
 
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
+    // NOTE: We need to do this due to behaviour change in spark 3.2, where the below conf is true
+    // by default. We will get empty table as result (for small sized tables) for HBase version not
+    // having HBASE-26340
+    SparkConf sparkConf = new SparkConf().set("spark.hadoopRDD.ignoreEmptySplits", "false");
+    JSC = new JavaSparkContext("local", "JavaHBaseContextSuite", sparkConf);
 
-    JSC = new JavaSparkContext("local", "JavaHBaseContextSuite");
+    init();
+  }
+
+  protected static void init() throws Exception {
     TEST_UTIL = new HBaseTestingUtility();
     Configuration conf = TEST_UTIL.getConfiguration();
 
@@ -503,11 +512,12 @@
     }
   }
 
-  private void populateTableWithMockData(Configuration conf, TableName tableName)
+  protected void populateTableWithMockData(Configuration conf, TableName tableName)
           throws IOException {
     try (
       Connection conn = ConnectionFactory.createConnection(conf);
-      Table table = conn.getTable(tableName)) {
+      Table table = conn.getTable(tableName);
+      Admin admin = conn.getAdmin()) {
 
       List<Put> puts = new ArrayList<>(5);
 
@@ -517,6 +527,7 @@
         puts.add(put);
       }
       table.put(puts);
+      admin.flush(tableName);
     }
   }
 }
diff --git a/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContextForLargeRows.java b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContextForLargeRows.java
new file mode 100644
index 0000000..81908ad
--- /dev/null
+++ b/spark/hbase-spark/src/test/java/org/apache/hadoop/hbase/spark/TestJavaHBaseContextForLargeRows.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.spark;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+
+@Category({ MiscTests.class, MediumTests.class })
+public class TestJavaHBaseContextForLargeRows extends TestJavaHBaseContext {
+
+  @ClassRule public static final HBaseClassTestRule TIMEOUT =
+      HBaseClassTestRule.forClass(TestJavaHBaseContextForLargeRows.class);
+
+  @BeforeClass public static void setUpBeforeClass() throws Exception {
+    JSC = new JavaSparkContext("local", "JavaHBaseContextSuite");
+
+    init();
+  }
+
+  protected void populateTableWithMockData(Configuration conf, TableName tableName)
+      throws IOException {
+    try (Connection conn = ConnectionFactory.createConnection(conf);
+        Table table = conn.getTable(tableName);
+        Admin admin = conn.getAdmin()) {
+
+      List<Put> puts = new ArrayList<>(5);
+
+      for (int i = 1; i < 6; i++) {
+        Put put = new Put(Bytes.toBytes(Integer.toString(i)));
+        // We are trying to generate a large row value here
+        char[] chars = new char[1024 * 1024 * 2];
+        // adding '0' to convert int to char
+        Arrays.fill(chars, (char) (i + '0'));
+        put.addColumn(columnFamily, columnFamily, Bytes.toBytes(String.valueOf(chars)));
+        puts.add(put);
+      }
+      table.put(puts);
+      admin.flush(tableName);
+    }
+  }
+}