PARQUET-1488: UserDefinedPredicate throw NPE (#663)


diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java
index e5103b4..fea9ca9 100644
--- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java
+++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java
@@ -39,6 +39,21 @@
   public UserDefinedPredicate() { }
 
   /**
+   * Returns whether this predicate accepts {@code null} values.
+   *
+   * @return {@code true} if this predicate accepts {@code null} values, {@code false} otherwise
+   */
+  public boolean acceptsNullValue() {
+    try {
+      return keep(null);
+    } catch (NullPointerException e) {
+      // The implementor might not be prepared to handle null values;
+      // in this case this predicate obviously does not accept nulls
+      return false;
+    }
+  }
+
+  /**
    * Return true to keep the record with this value, false to drop it.
    * <p>
    * This method shall handle {@code null} values returning whether this user defined predicate accepts {@code null}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
index e28a380..15be50e 100644
--- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
@@ -291,7 +291,7 @@
     public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> PrimitiveIterator.OfInt visit(
         UserDefined<T, U> udp) {
       final UserDefinedPredicate<T> predicate = udp.getUserDefinedPredicate();
-      final boolean acceptNulls = predicate.keep(null);
+      final boolean acceptNulls = predicate.acceptsNullValue();
 
       if (acceptNulls && nullCounts == null) {
         // Nulls match so if we don't have null related statistics we have to return all pages
@@ -321,7 +321,7 @@
     public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> PrimitiveIterator.OfInt visit(
         LogicalNotUserDefined<T, U> udp) {
       final UserDefinedPredicate<T> inversePredicate = udp.getUserDefined().getUserDefinedPredicate();
-      final boolean acceptNulls = !inversePredicate.keep(null);
+      final boolean acceptNulls = !inversePredicate.acceptsNullValue();
 
       if (acceptNulls && nullCounts == null) {
         // Nulls match so if we don't have null related statistics we have to return all pages
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
index f3efff7..6dec774 100644
--- a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
@@ -149,14 +149,14 @@
   @Override
   public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> RowRanges visit(UserDefined<T, U> udp) {
     return applyPredicate(udp.getColumn(), ci -> ci.visit(udp),
-        udp.getUserDefinedPredicate().keep(null) ? allRows() : RowRanges.EMPTY);
+        udp.getUserDefinedPredicate().acceptsNullValue() ? allRows() : RowRanges.EMPTY);
   }
 
   @Override
   public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> RowRanges visit(
       LogicalNotUserDefined<T, U> udp) {
     return applyPredicate(udp.getUserDefined().getColumn(), ci -> ci.visit(udp),
-        udp.getUserDefined().getUserDefinedPredicate().keep(null) ? RowRanges.EMPTY : allRows());
+        udp.getUserDefined().getUserDefinedPredicate().acceptsNullValue() ? RowRanges.EMPTY : allRows());
   }
 
   private RowRanges applyPredicate(Column<?> column, Function<ColumnIndex, PrimitiveIterator.OfInt> func,
diff --git a/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java b/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
index 1cdb5d1..3c1cf48 100644
--- a/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
+++ b/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
@@ -248,7 +248,7 @@
         "      valueInspector = new ValueInspector() {\n" +
         "        @Override\n" +
         "        public void updateNull() {\n" +
-        "          setResult(" + (invert ? "!" : "") + "udp.keep(null));\n" +
+        "          setResult(" + (invert ? "!" : "") + "udp.acceptsNullValue());\n" +
         "        }\n" +
         "\n" +
         "        @SuppressWarnings(\"unchecked\")\n" +
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
index 52e1458..c43380b 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
@@ -390,9 +390,9 @@
     // The column is missing, thus all null. Check if the predicate keeps null.
     if (meta == null) {
       if (inverted) {
-        return udp.keep(null);
+        return udp.acceptsNullValue();
       } else {
-        return !udp.keep(null);
+        return !udp.acceptsNullValue();
       }
     }
 
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
index 446c8a3..31b6c45 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
@@ -366,9 +366,9 @@
       // the column isn't in this file so all values are null.
       // lets run the udp with null value to see if it keeps null or not.
       if (inverted) {
-        return udp.keep(null);
+        return udp.acceptsNullValue();
       } else {
-        return !udp.keep(null);
+        return !udp.acceptsNullValue();
       }
     }
 
@@ -382,9 +382,9 @@
     if (isAllNulls(columnChunk)) {
       // lets run the udp with null value to see if it keeps null or not.
       if (inverted) {
-        return udp.keep(null);
+        return udp.acceptsNullValue();
       } else {
-        return !udp.keep(null);
+        return !udp.acceptsNullValue();
       }
     }
 
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
index 71155ce..ccb6a03 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
@@ -385,7 +385,9 @@
 
     @Override
     public boolean keep(Long value) {
-      return value != null && value % divisor == 0;
+      // Deliberately not checking for null to verify the handling of NPE
+      // Implementors shall always checks the value for null and return accordingly
+      return value % divisor == 0;
     }
 
     @Override