parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java - iceberg - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.iceberg.parquet;

 import java.io.File;
 import java.io.IOException;
 import java.util.UUID;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import org.apache.avro.generic.GenericData.Record;
 import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.iceberg.Files;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.TestHelpers;
 import org.apache.iceberg.avro.AvroSchemaUtil;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.io.FileAppender;
 import org.apache.iceberg.io.InputFile;
 import org.apache.iceberg.io.OutputFile;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.types.Types.DoubleType;
 import org.apache.iceberg.types.Types.FloatType;
 import org.apache.iceberg.types.Types.IntegerType;
 import org.apache.iceberg.types.Types.LongType;
 import org.apache.iceberg.types.Types.StringType;
 import org.apache.parquet.column.page.DictionaryPageReadStore;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.schema.MessageType;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;

 import static org.apache.iceberg.avro.AvroSchemaUtil.convert;
 import static org.apache.iceberg.expressions.Expressions.and;
 import static org.apache.iceberg.expressions.Expressions.equal;
 import static org.apache.iceberg.expressions.Expressions.greaterThan;
 import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
 import static org.apache.iceberg.expressions.Expressions.in;
 import static org.apache.iceberg.expressions.Expressions.isNaN;
 import static org.apache.iceberg.expressions.Expressions.isNull;
 import static org.apache.iceberg.expressions.Expressions.lessThan;
 import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
 import static org.apache.iceberg.expressions.Expressions.not;
 import static org.apache.iceberg.expressions.Expressions.notEqual;
 import static org.apache.iceberg.expressions.Expressions.notIn;
 import static org.apache.iceberg.expressions.Expressions.notNaN;
 import static org.apache.iceberg.expressions.Expressions.notNull;
 import static org.apache.iceberg.expressions.Expressions.or;
 import static org.apache.iceberg.expressions.Expressions.startsWith;
 import static org.apache.iceberg.types.Types.NestedField.optional;
 import static org.apache.iceberg.types.Types.NestedField.required;

 public class TestDictionaryRowGroupFilter {

   private static final Types.StructType structFieldType =
           Types.StructType.of(Types.NestedField.required(9, "int_field", IntegerType.get()));

   private static final Schema SCHEMA = new Schema(
       required(1, "id", IntegerType.get()),
       optional(2, "no_stats", StringType.get()),
       required(3, "required", StringType.get()),
       optional(4, "all_nulls", LongType.get()),
       optional(5, "some_nulls", StringType.get()),
       optional(6, "no_nulls", StringType.get()),
       optional(7, "non_dict", StringType.get()),
       optional(8, "struct_not_null", structFieldType),
       optional(10, "not_in_file", FloatType.get()),
       optional(11, "all_nans", DoubleType.get()),
       optional(12, "some_nans", FloatType.get()),
       optional(13, "no_nans", DoubleType.get())
   );

   private static final Types.StructType _structFieldType =
           Types.StructType.of(Types.NestedField.required(9, "_int_field", IntegerType.get()));

   private static final Schema FILE_SCHEMA = new Schema(
       required(1, "_id", IntegerType.get()),
       optional(2, "_no_stats", StringType.get()),
       required(3, "_required", StringType.get()),
       optional(4, "_all_nulls", LongType.get()),
       optional(5, "_some_nulls", StringType.get()),
       optional(6, "_no_nulls", StringType.get()),
       optional(7, "_non_dict", StringType.get()),
       optional(8, "_struct_not_null", _structFieldType),
       optional(11, "_all_nans", DoubleType.get()),
       optional(12, "_some_nans", FloatType.get()),
       optional(13, "_no_nans", DoubleType.get())

   );

   private static final String TOO_LONG_FOR_STATS;

   static {
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < 200; i += 1) {
       sb.append(UUID.randomUUID().toString());
     }
     TOO_LONG_FOR_STATS = sb.toString();
   }

   private static final int INT_MIN_VALUE = 30;
   private static final int INT_MAX_VALUE = 79;

   private MessageType parquetSchema = null;
   private BlockMetaData rowGroupMetadata = null;
   private DictionaryPageReadStore dictionaryStore = null;

   @Rule
   public TemporaryFolder temp = new TemporaryFolder();

   @Before
   public void createInputFile() throws IOException {
     File parquetFile = temp.newFile();
     Assert.assertTrue(parquetFile.delete());

     // build struct field schema
     org.apache.avro.Schema structSchema = AvroSchemaUtil.convert(_structFieldType);

     OutputFile outFile = Files.localOutput(parquetFile);
     try (FileAppender<Record> appender = Parquet.write(outFile)
         .schema(FILE_SCHEMA)
         .build()) {
       GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table"));
       // create 20 copies of each record to ensure dictionary-encoding
       for (int copy = 0; copy < 20; copy += 1) {
         // create 50 records
         for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
           builder.set("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
           builder.set("_no_stats", TOO_LONG_FOR_STATS); // value longer than 4k will produce no stats
           builder.set("_required", "req"); // required, always non-null
           builder.set("_all_nulls", null); // never non-null
           builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
           builder.set("_no_nulls", ""); // optional, but always non-null
           builder.set("_non_dict", UUID.randomUUID().toString()); // not dictionary-encoded
           builder.set("_all_nans", Double.NaN); // never non-nan
           builder.set("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
           builder.set("_no_nans", 3D); // optional, but always non-nan

           Record structNotNull = new Record(structSchema);
           structNotNull.put("_int_field", INT_MIN_VALUE + i);
           builder.set("_struct_not_null", structNotNull); // struct with int

           appender.add(builder.build());
         }
       }
     }

     InputFile inFile = Files.localInput(parquetFile);

     ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(inFile));

     Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size());
     rowGroupMetadata = reader.getRowGroups().get(0);
     parquetSchema = reader.getFileMetaData().getSchema();
     dictionaryStore = reader.getNextDictionaryReader();
   }

   @Test
   public void testAssumptions() {
     // this case validates that other cases don't need to test expressions with null literals.
     TestHelpers.assertThrows("Should reject null literal in equal expression",
         NullPointerException.class,
         "Cannot create expression literal from null",
         () -> equal("col", null));
     TestHelpers.assertThrows("Should reject null literal in notEqual expression",
         NullPointerException.class,
         "Cannot create expression literal from null",
         () -> notEqual("col", null));
     TestHelpers.assertThrows("Should reject null literal in lessThan expression",
         NullPointerException.class,
         "Cannot create expression literal from null",
         () -> lessThan("col", null));
     TestHelpers.assertThrows("Should reject null literal in lessThanOrEqual expression",
         NullPointerException.class,
         "Cannot create expression literal from null",
         () -> lessThanOrEqual("col", null));
     TestHelpers.assertThrows("Should reject null literal in greaterThan expression",
         NullPointerException.class,
         "Cannot create expression literal from null",
         () -> greaterThan("col", null));
     TestHelpers.assertThrows("Should reject null literal in greaterThanOrEqual expression",
         NullPointerException.class,
         "Cannot create expression literal from null",
         () -> greaterThanOrEqual("col", null));
     TestHelpers.assertThrows("Should reject null literal in startsWith expression",
         NullPointerException.class,
         "Cannot create expression literal from null",
         () -> startsWith("col", null));
   }

   @Test
   public void testAllNulls() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notNull("all_nulls"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary filter doesn't help", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notNull("some_nulls"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary filter doesn't help", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notNull("no_nulls"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary filter doesn't help", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notNull("struct_not_null"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary filter doesn't help", shouldRead);
   }

   @Test
   public void testNoNulls() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNull("all_nulls"))
            .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary filter doesn't help", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNull("some_nulls"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary filter doesn't help", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNull("no_nulls"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary filter doesn't help", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNull("struct_not_null"))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary filter doesn't help", shouldRead);
   }

   @Test
   public void testRequiredColumn() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notNull("required"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: required columns are always non-null", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNull("required"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: required columns are always non-null", shouldRead);
   }

   @Test
   public void testIsNaNs() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNaN("all_nans"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: all_nans column will contain NaN", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNaN("some_nans"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: some_nans column will contain NaN", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNaN("no_nans"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: no_nans column will not contain NaN", shouldRead);
   }

   @Test
   public void testNotNaNs() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notNaN("all_nans"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: all_nans column will not contain non-NaN", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notNaN("some_nans"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: some_nans column will contain non-NaN", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notNaN("no_nans"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: no_nans column will contain non-NaN", shouldRead);
   }

   @Test
   public void testStartsWith() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, startsWith("non_dict", "re"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: no dictionary", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, startsWith("required", "re"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary contains a matching entry", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, startsWith("required", "req"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary contains a matching entry", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, startsWith("some_nulls", "so"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: dictionary contains a matching entry", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, startsWith("no_stats", UUID.randomUUID().toString()))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: no stats but dictionary is present", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, startsWith("required", "reqs"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: no match in dictionary", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, startsWith("some_nulls", "somex"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: no match in dictionary", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, startsWith("no_nulls", "xxx"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: no match in dictionary", shouldRead);
   }

   @Test
   public void testMissingColumn() {
     TestHelpers.assertThrows("Should complain about missing column in expression",
         ValidationException.class, "Cannot find field 'missing'",
         () -> new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("missing", 5))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore));
   }

   @Test
   public void testColumnNotInFile() {
     Expression[] exprs = new Expression[] {
         lessThan("not_in_file", 1.0f), lessThanOrEqual("not_in_file", 1.0f),
         equal("not_in_file", 1.0f), greaterThan("not_in_file", 1.0f),
         greaterThanOrEqual("not_in_file", 1.0f), notNull("not_in_file"),
         isNull("not_in_file"), notEqual("not_in_file", 1.0f)
     };

     for (Expression expr : exprs) {
       boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, expr)
           .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
       Assert.assertTrue("Should read: dictionary cannot be found: " + expr, shouldRead);
     }
   }

   @Test
   public void testColumnFallbackOrNotDictionaryEncoded() {
     Expression[] exprs = new Expression[] {
         lessThan("non_dict", "a"), lessThanOrEqual("non_dict", "a"), equal("non_dict", "a"),
         greaterThan("non_dict", "a"), greaterThanOrEqual("non_dict", "a"), notNull("non_dict"),
         isNull("non_dict"), notEqual("non_dict", "a")
     };

     for (Expression expr : exprs) {
       boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, expr)
           .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
       Assert.assertTrue("Should read: dictionary cannot be found: " + expr, shouldRead);
     }
   }

   @Test
   public void testMissingStats() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("no_stats", "a"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: stats are missing but dictionary is present", shouldRead);
   }

   @Test
   public void testNot() {
     // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(lessThan("id", INT_MIN_VALUE - 25)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: not(false)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(greaterThan("id", INT_MIN_VALUE - 25)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: not(true)", shouldRead);
   }

   @Test
   public void testAnd() {
     // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MIN_VALUE - 30)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: and(false, true)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: and(false, false)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         and(greaterThan("id", INT_MIN_VALUE - 25), lessThanOrEqual("id", INT_MIN_VALUE)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: and(true, true)", shouldRead);
   }

   @Test
   public void testOr() {
     // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: or(false, false)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE - 19)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: or(false, true)", shouldRead);
   }

   @Test
   public void testIntegerLt() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("id", INT_MIN_VALUE - 25))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range below lower bound (5 < 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("id", INT_MIN_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range below lower bound (30 is not < 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("id", INT_MIN_VALUE + 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: one possible id", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("id", INT_MAX_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: may possible ids", shouldRead);
   }

   @Test
   public void testIntegerLtEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThanOrEqual("id", INT_MIN_VALUE - 25))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range below lower bound (5 < 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThanOrEqual("id", INT_MIN_VALUE - 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range below lower bound (29 < 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThanOrEqual("id", INT_MIN_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: one possible id", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThanOrEqual("id", INT_MAX_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: many possible ids", shouldRead);
   }

   @Test
   public void testIntegerGt() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThan("id", INT_MAX_VALUE + 6))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range above upper bound (85 < 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThan("id", INT_MAX_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range above upper bound (79 is not > 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThan("id", INT_MAX_VALUE - 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: one possible id", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThan("id", INT_MAX_VALUE - 4))
           .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: may possible ids", shouldRead);
   }

   @Test
   public void testIntegerGtEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThanOrEqual("id", INT_MAX_VALUE + 6))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range above upper bound (85 < 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThanOrEqual("id", INT_MAX_VALUE + 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range above upper bound (80 > 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThanOrEqual("id", INT_MAX_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: one possible id", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThanOrEqual("id", INT_MAX_VALUE - 4))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: may possible ids", shouldRead);
   }

   @Test
   public void testIntegerEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("id", INT_MIN_VALUE - 25))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("id", INT_MIN_VALUE - 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("id", INT_MIN_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("id", INT_MAX_VALUE - 4))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id between lower and upper bounds", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("id", INT_MAX_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("id", INT_MAX_VALUE + 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id above upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("id", INT_MAX_VALUE + 6))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id above upper bound", shouldRead);
   }

   @Test
   public void testIntegerNotEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("id", INT_MIN_VALUE - 25))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("id", INT_MIN_VALUE - 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("id", INT_MIN_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("id", INT_MAX_VALUE - 4))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id between lower and upper bounds", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("id", INT_MAX_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("id", INT_MAX_VALUE + 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id above upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("id", INT_MAX_VALUE + 6))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id above upper bound", shouldRead);
   }

   @Test
   public void testIntegerNotEqRewritten() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(equal("id", INT_MIN_VALUE - 25)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(equal("id", INT_MIN_VALUE - 1)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(equal("id", INT_MIN_VALUE)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(equal("id", INT_MAX_VALUE - 4)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id between lower and upper bounds", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(equal("id", INT_MAX_VALUE)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(equal("id", INT_MAX_VALUE + 1)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id above upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(equal("id", INT_MAX_VALUE + 6)))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id above upper bound", shouldRead);
   }

   @Test
   public void testStringNotEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("some_nulls", "some"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: contains null != 'some'", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("no_nulls", ""))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: contains only ''", shouldRead);
   }

   @Test
   public void testStructFieldLt() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         lessThan("struct_not_null.int_field", INT_MIN_VALUE - 25)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range below lower bound (5 < 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("struct_not_null.int_field", INT_MIN_VALUE))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range below lower bound (30 is not < 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("struct_not_null.int_field", INT_MIN_VALUE + 1))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: one possible id", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("struct_not_null.int_field", INT_MAX_VALUE))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: may possible ids", shouldRead);
   }

   @Test
   public void testStructFieldLtEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 25)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range below lower bound (5 < 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 1)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range below lower bound (29 < 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: one possible id", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         lessThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: many possible ids", shouldRead);

   }

   @Test
   public void testStructFieldGt() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         greaterThan("struct_not_null.int_field", INT_MAX_VALUE + 6)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range above upper bound (85 < 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         greaterThan("struct_not_null.int_field", INT_MAX_VALUE)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range above upper bound (79 is not > 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 1)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: one possible id", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 4)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: may possible ids", shouldRead);
   }

   @Test
   public void testStructFieldGtEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 6)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range above upper bound (85 < 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 1)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id range above upper bound (80 > 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: one possible id", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE - 4)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: may possible ids", shouldRead);
   }

   @Test
   public void testStructFieldEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         equal("struct_not_null.int_field", INT_MIN_VALUE - 25)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("struct_not_null.int_field", INT_MIN_VALUE - 1))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("struct_not_null.int_field", INT_MIN_VALUE))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("struct_not_null.int_field", INT_MAX_VALUE - 4))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id between lower and upper bounds", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("struct_not_null.int_field", INT_MAX_VALUE))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("struct_not_null.int_field", INT_MAX_VALUE + 1))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id above upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, equal("struct_not_null.int_field", INT_MAX_VALUE + 6))
             .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id above upper bound", shouldRead);
   }

   @Test
   public void testStructFieldNotEq() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         notEqual("struct_not_null.int_field", INT_MIN_VALUE - 25)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("struct_not_null.int_field", INT_MIN_VALUE - 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id below lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("struct_not_null.int_field", INT_MIN_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to lower bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("struct_not_null.int_field", INT_MAX_VALUE - 4))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id between lower and upper bounds", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("struct_not_null.int_field", INT_MAX_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("id", INT_MAX_VALUE + 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id above upper bound", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("struct_not_null.int_field", INT_MAX_VALUE + 6))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id above upper bound", shouldRead);
   }

   @Test
   public void testCaseInsensitive() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("no_Nulls", ""), false)
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should skip: contains only ''", shouldRead);
   }

   @Test
   public void testMissingDictionaryPageForColumn() {
     TestHelpers.assertThrows("Should complain about missing dictionary",
         IllegalStateException.class, "Failed to read required dictionary page for id: 5",
         () -> new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("some_nulls", "some"))
             .shouldRead(parquetSchema, rowGroupMetadata, descriptor -> null));
   }

   @Test
   public void testIntegerIn() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id below lower bound (5 < 30, 6 < 30). The two sets are disjoint.",
         shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id below lower bound (28 < 30, 29 < 30). The two sets are disjoint.",
         shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("id", INT_MIN_VALUE - 1, INT_MIN_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to lower bound (30 == 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: in set is a subset of the dictionary", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("id", INT_MAX_VALUE, INT_MAX_VALUE + 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to upper bound (79 == 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id above upper bound (80 > 79, 81 > 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: id above upper bound (85 > 79, 86 > 79). The two sets are disjoint.",
         shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         in("id", IntStream.range(INT_MIN_VALUE - 10, INT_MAX_VALUE + 10).boxed().collect(Collectors.toList()))
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: the dictionary is a subset of the in set", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         in("id", IntStream.range(INT_MIN_VALUE, INT_MAX_VALUE + 1).boxed().collect(Collectors.toList()))
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: the dictionary is equal to the in set", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("all_nulls", 1, 2))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: in on all nulls column (isFallback to be true) ", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("some_nulls", "aaa", "some"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: in on some nulls column", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("some_nulls", "aaa", "bbb"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: some_nulls values are not within the set", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("no_nulls", "aaa", "bbb"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: in on no nulls column (empty string is not within the set)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, in("no_nulls", "aaa", ""))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: in on no nulls column (empty string is within the set)", shouldRead);
   }

   @Test
   public void testIntegerNotIn() {
     boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         notIn("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24)
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id below lower bound (5 < 30, 6 < 30). The two sets are disjoint.", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id below lower bound (28 < 30, 29 < 30). The two sets are disjoint.", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("id", INT_MIN_VALUE - 1, INT_MIN_VALUE))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to lower bound (30 == 30)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: the notIn set is a subset of the dictionary", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("id", INT_MAX_VALUE, INT_MAX_VALUE + 1))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id equal to upper bound (79 == 79)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id above upper bound (80 > 79, 81 > 79). The two sets are disjoint.", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: id above upper bound (85 > 79, 86 > 79). The two sets are disjoint.", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         notIn("id", IntStream.range(INT_MIN_VALUE - 10, INT_MAX_VALUE + 10).boxed().collect(Collectors.toList()))
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: the dictionary is a subset of the notIn set", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA,
         notIn("id", IntStream.range(INT_MIN_VALUE, INT_MAX_VALUE + 1).boxed().collect(Collectors.toList()))
     ).shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: the dictionary is equal to the notIn set", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("all_nulls", 1, 2))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: notIn on all nulls column", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("some_nulls", "aaa", "bbb"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: notIn on some nulls column (any null matches the notIn)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("no_nulls", "aaa", "bbb"))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertTrue("Should read: notIn on no nulls column (empty string is not within the set)", shouldRead);

     shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("no_nulls", "aaa", ""))
         .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);
     Assert.assertFalse("Should not read: notIn on no nulls column (empty string is within the set)", shouldRead);
   }
 }