PARQUET-2158: Upgrade Hadoop dependency to version 3.2.0 (#976)

* PARQUET-2158: Upgrade Hadoop dependency to version 3.2.0

This updates Parquet's Hadoop dependency to 3.2.0.
This version adds compatibility with Java 11, as well
as many other features and bug fixes.

* PARQUET-2158. PathGlobPattern to compile/link with hadoop 3.2.0

The deprecated parquet-thrift class PathGlobPattern doesn't
compile against hadoop 3.x because in HADOOP-12436 the
nominally private class org.apache.hadoop.fs.GlobPattern
implementation switched from using java.util.regex.Pattern
to com.google.re2j.PatternSyntaxException.

The fact nobody has ever reported this problem implies that it
is never used on any hadoop 3 release, ever.

This commit fixes the build by moving to the google classes.
The alternative strategy would actually be to fork the hadoop
class. This will work unless/until the hadoop project changes
the class again.

It may be time to consider removing entirely. Clearly nobody
is actually using it.

* PARQUET-2158. build auditing to cope with switch to google rej2j.

Disables the API compatibility check and adds rej2j as a 'provided'
dependency so that the relevant auditing checks do not fail.
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestInputFormat.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestInputFormat.java
index 02c80fc..697e8e2 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestInputFormat.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestInputFormat.java
@@ -42,7 +42,6 @@
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.lang.SystemUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileStatus;
diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml
index 7f08ca6..e30b5d4 100644
--- a/parquet-thrift/pom.xml
+++ b/parquet-thrift/pom.xml
@@ -174,6 +174,14 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <!-- Needed to compile PathGlobPattern on Hadoop 3.
+           If that deprecated class is removed, so can this dependency -->
+      <groupId>com.google.re2j</groupId>
+      <artifactId>re2j</artifactId>
+      <version>1.1</version>
+      <scope>provided</scope>
+    </dependency>
   </dependencies>
 
   <dependencyManagement>
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPattern.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPattern.java
index ba5646d..4d4136f 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPattern.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPattern.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,8 +20,8 @@
 
 import org.apache.hadoop.fs.GlobPattern;
 
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
+import com.google.re2j.Pattern;
+import com.google.re2j.PatternSyntaxException;
 
 /**
  * Enhanced version of GlobPattern class that is defined in hadoop with extra capability of matching
@@ -56,7 +56,7 @@
   }
 
   private static void error(String message, String pattern, int pos) {
-    throw new PatternSyntaxException(message, pattern, pos);
+    throw new PatternSyntaxException(String.format("%1s at %2d", message, pos), pattern);
   }
 
   /**
diff --git a/pom.xml b/pom.xml
index d37afb6..a9f23a5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -76,7 +76,7 @@
     <jackson-databind.version>2.13.2.2</jackson-databind.version>
     <japicmp.version>0.14.2</japicmp.version>
     <shade.prefix>shaded.parquet</shade.prefix>
-    <hadoop.version>2.10.1</hadoop.version>
+    <hadoop.version>3.2.0</hadoop.version>
     <parquet.format.version>2.9.0</parquet.format.version>
     <previous.version>1.12.0</previous.version>
     <thrift.executable>thrift</thrift.executable>
@@ -519,6 +519,8 @@
                 change to fix a integer overflow issue.
                 TODO: remove this after Parquet 1.13 release -->
               <exclude>org.apache.parquet.column.values.dictionary.DictionaryValuesWriter#dictionaryByteSize</exclude>
+              <!-- In PARQUET-2158 the return type of PathGlobPattern was changed to be compatible with Hadoop 3 -->
+              <exclude>org.apache.parquet.thrift.projection.deprecated.PathGlobPattern</exclude>
             </excludes>
           </parameter>
         </configuration>