PARQUET-1488: UserDefinedPredicate throw NPE (#663)
diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java
index e5103b4..fea9ca9 100644
--- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java
+++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java
@@ -39,6 +39,21 @@
public UserDefinedPredicate() { }
/**
+ * Returns whether this predicate accepts {@code null} values.
+ *
+ * @return {@code true} if this predicate accepts {@code null} values, {@code false} otherwise
+ */
+ public boolean acceptsNullValue() {
+ try {
+ return keep(null);
+ } catch (NullPointerException e) {
+ // The implementor might not be prepared to handle null values;
+ // in this case this predicate obviously does not accept nulls
+ return false;
+ }
+ }
+
+ /**
* Return true to keep the record with this value, false to drop it.
* <p>
* This method shall handle {@code null} values returning whether this user defined predicate accepts {@code null}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
index e28a380..15be50e 100644
--- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
@@ -291,7 +291,7 @@
public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> PrimitiveIterator.OfInt visit(
UserDefined<T, U> udp) {
final UserDefinedPredicate<T> predicate = udp.getUserDefinedPredicate();
- final boolean acceptNulls = predicate.keep(null);
+ final boolean acceptNulls = predicate.acceptsNullValue();
if (acceptNulls && nullCounts == null) {
// Nulls match so if we don't have null related statistics we have to return all pages
@@ -321,7 +321,7 @@
public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> PrimitiveIterator.OfInt visit(
LogicalNotUserDefined<T, U> udp) {
final UserDefinedPredicate<T> inversePredicate = udp.getUserDefined().getUserDefinedPredicate();
- final boolean acceptNulls = !inversePredicate.keep(null);
+ final boolean acceptNulls = !inversePredicate.acceptsNullValue();
if (acceptNulls && nullCounts == null) {
// Nulls match so if we don't have null related statistics we have to return all pages
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
index f3efff7..6dec774 100644
--- a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
@@ -149,14 +149,14 @@
@Override
public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> RowRanges visit(UserDefined<T, U> udp) {
return applyPredicate(udp.getColumn(), ci -> ci.visit(udp),
- udp.getUserDefinedPredicate().keep(null) ? allRows() : RowRanges.EMPTY);
+ udp.getUserDefinedPredicate().acceptsNullValue() ? allRows() : RowRanges.EMPTY);
}
@Override
public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> RowRanges visit(
LogicalNotUserDefined<T, U> udp) {
return applyPredicate(udp.getUserDefined().getColumn(), ci -> ci.visit(udp),
- udp.getUserDefined().getUserDefinedPredicate().keep(null) ? RowRanges.EMPTY : allRows());
+ udp.getUserDefined().getUserDefinedPredicate().acceptsNullValue() ? RowRanges.EMPTY : allRows());
}
private RowRanges applyPredicate(Column<?> column, Function<ColumnIndex, PrimitiveIterator.OfInt> func,
diff --git a/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java b/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
index 1cdb5d1..3c1cf48 100644
--- a/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
+++ b/parquet-generator/src/main/java/org/apache/parquet/filter2/IncrementallyUpdatedFilterPredicateGenerator.java
@@ -248,7 +248,7 @@
" valueInspector = new ValueInspector() {\n" +
" @Override\n" +
" public void updateNull() {\n" +
- " setResult(" + (invert ? "!" : "") + "udp.keep(null));\n" +
+ " setResult(" + (invert ? "!" : "") + "udp.acceptsNullValue());\n" +
" }\n" +
"\n" +
" @SuppressWarnings(\"unchecked\")\n" +
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
index 52e1458..c43380b 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
@@ -390,9 +390,9 @@
// The column is missing, thus all null. Check if the predicate keeps null.
if (meta == null) {
if (inverted) {
- return udp.keep(null);
+ return udp.acceptsNullValue();
} else {
- return !udp.keep(null);
+ return !udp.acceptsNullValue();
}
}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
index 446c8a3..31b6c45 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java
@@ -366,9 +366,9 @@
// the column isn't in this file so all values are null.
// lets run the udp with null value to see if it keeps null or not.
if (inverted) {
- return udp.keep(null);
+ return udp.acceptsNullValue();
} else {
- return !udp.keep(null);
+ return !udp.acceptsNullValue();
}
}
@@ -382,9 +382,9 @@
if (isAllNulls(columnChunk)) {
// lets run the udp with null value to see if it keeps null or not.
if (inverted) {
- return udp.keep(null);
+ return udp.acceptsNullValue();
} else {
- return !udp.keep(null);
+ return !udp.acceptsNullValue();
}
}
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
index 71155ce..ccb6a03 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
@@ -385,7 +385,9 @@
@Override
public boolean keep(Long value) {
- return value != null && value % divisor == 0;
+ // Deliberately not checking for null to verify the handling of NPE
+ // Implementors shall always checks the value for null and return accordingly
+ return value % divisor == 0;
}
@Override