blob: 3dd4ee696d4ed87a850b9228cadda3819dee89e0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.column;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
public class TestEncodingStats {
@Test
public void testReusedBuilder() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.withV2Pages();
builder.addDictEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.RLE_DICTIONARY, 3);
builder.addDataEncoding(Encoding.DELTA_BYTE_ARRAY);
builder.addDataEncoding(Encoding.DELTA_BYTE_ARRAY);
EncodingStats stats1 = builder.build();
builder.clear();
builder.addDataEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.PLAIN);
EncodingStats stats2 = builder.build();
assertEquals("Dictionary stats should be correct", 0,
stats2.dictStats.size());
assertEquals("Data stats size should be correct", 1,
stats2.dataStats.size());
assertEquals("Data stats content should be correct", 4,
stats2.dataStats.get(Encoding.PLAIN).intValue());
assertEquals("Dictionary stats size should be correct after reuse",
1, stats1.dictStats.size());
assertEquals("Dictionary stats content should be correct", 1,
stats1.dictStats.get(Encoding.PLAIN).intValue());
assertEquals("Data stats size should be correct after reuse", 2,
stats1.dataStats.size());
assertEquals("Data stats content should be correct", 3,
stats1.dataStats.get(Encoding.RLE_DICTIONARY).intValue());
assertEquals("Data stats content should be correct", 2,
stats1.dataStats.get(Encoding.DELTA_BYTE_ARRAY).intValue());
}
@Test
public void testNoPages() {
EncodingStats.Builder builder = new EncodingStats.Builder();
EncodingStats stats = builder.build();
assertFalse(stats.usesV2Pages());
assertFalse("Should not have dictionary-encoded pages", stats.hasDictionaryEncodedPages());
assertFalse("Should not have non-dictionary pages", stats.hasNonDictionaryEncodedPages());
assertFalse("Should not have dictionary pages", stats.hasDictionaryPages());
}
@Test
public void testNoDataPages() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.addDictEncoding(Encoding.PLAIN_DICTIONARY);
EncodingStats stats = builder.build();
assertFalse(stats.usesV2Pages());
assertFalse("Should not have dictionary-encoded pages", stats.hasDictionaryEncodedPages());
assertFalse("Should not have non-dictionary pages", stats.hasNonDictionaryEncodedPages());
assertTrue("Should have dictionary pages", stats.hasDictionaryPages());
}
@Test
public void testV1AllDictionary() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.addDictEncoding(Encoding.PLAIN_DICTIONARY);
builder.addDataEncoding(Encoding.PLAIN_DICTIONARY);
builder.addDataEncoding(Encoding.PLAIN_DICTIONARY);
EncodingStats stats = builder.build();
assertFalse(stats.usesV2Pages());
assertTrue("Should have dictionary-encoded pages", stats.hasDictionaryEncodedPages());
assertFalse("Should not have non-dictionary pages", stats.hasNonDictionaryEncodedPages());
assertTrue("Should have dictionary pages", stats.hasDictionaryPages());
}
@Test
public void testV1NoDictionary() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.addDataEncoding(Encoding.PLAIN);
EncodingStats stats = builder.build();
assertFalse(stats.usesV2Pages());
assertFalse("Should not have dictionary-encoded pages", stats.hasDictionaryEncodedPages());
assertTrue("Should have non-dictionary pages", stats.hasNonDictionaryEncodedPages());
assertFalse("Should not have dictionary pages", stats.hasDictionaryPages());
}
@Test
public void testV1Fallback() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.addDictEncoding(Encoding.PLAIN_DICTIONARY);
builder.addDataEncoding(Encoding.PLAIN_DICTIONARY);
builder.addDataEncoding(Encoding.PLAIN_DICTIONARY);
builder.addDataEncoding(Encoding.PLAIN);
EncodingStats stats = builder.build();
assertFalse(stats.usesV2Pages());
assertTrue("Should have dictionary-encoded pages", stats.hasDictionaryEncodedPages());
assertTrue("Should have non-dictionary pages", stats.hasNonDictionaryEncodedPages());
assertTrue("Should have dictionary pages", stats.hasDictionaryPages());
}
@Test
public void testV2AllDictionary() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.withV2Pages();
builder.addDictEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.RLE_DICTIONARY);
EncodingStats stats = builder.build();
assertTrue(stats.usesV2Pages());
assertTrue("Should have dictionary-encoded pages", stats.hasDictionaryEncodedPages());
assertFalse("Should not have non-dictionary pages", stats.hasNonDictionaryEncodedPages());
assertTrue("Should have dictionary pages", stats.hasDictionaryPages());
}
@Test
public void testV2NoDictionary() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.withV2Pages();
builder.addDataEncoding(Encoding.DELTA_BINARY_PACKED);
builder.addDataEncoding(Encoding.DELTA_BINARY_PACKED);
EncodingStats stats = builder.build();
assertTrue(stats.usesV2Pages());
assertFalse("Should not have dictionary-encoded pages", stats.hasDictionaryEncodedPages());
assertTrue("Should have non-dictionary pages", stats.hasNonDictionaryEncodedPages());
assertFalse("Should not have dictionary pages", stats.hasDictionaryPages());
}
@Test
public void testV2Fallback() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.withV2Pages();
builder.addDictEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.RLE_DICTIONARY);
builder.addDataEncoding(Encoding.DELTA_BYTE_ARRAY);
builder.addDataEncoding(Encoding.DELTA_BYTE_ARRAY);
EncodingStats stats = builder.build();
assertTrue(stats.usesV2Pages());
assertTrue("Should have dictionary-encoded pages", stats.hasDictionaryEncodedPages());
assertTrue("Should have non-dictionary pages", stats.hasNonDictionaryEncodedPages());
assertTrue("Should have dictionary pages", stats.hasDictionaryPages());
}
@Test
public void testCounts() {
EncodingStats.Builder builder = new EncodingStats.Builder();
builder.withV2Pages();
builder.addDictEncoding(Encoding.PLAIN);
builder.addDataEncoding(Encoding.RLE_DICTIONARY, 4);
builder.addDataEncoding(Encoding.RLE_DICTIONARY);
builder.addDataEncoding(Encoding.DELTA_BYTE_ARRAY);
builder.addDataEncoding(Encoding.DELTA_BYTE_ARRAY);
EncodingStats stats = builder.build();
assertEquals("Count should match", 1, stats.getNumDictionaryPagesEncodedAs(Encoding.PLAIN));
assertEquals("Count should match", 0, stats.getNumDictionaryPagesEncodedAs(Encoding.PLAIN_DICTIONARY));
assertEquals("Count should match", 0, stats.getNumDictionaryPagesEncodedAs(Encoding.RLE));
assertEquals("Count should match", 0, stats.getNumDictionaryPagesEncodedAs(Encoding.BIT_PACKED));
assertEquals("Count should match", 0, stats.getNumDictionaryPagesEncodedAs(Encoding.DELTA_BYTE_ARRAY));
assertEquals("Count should match", 0, stats.getNumDictionaryPagesEncodedAs(Encoding.DELTA_BINARY_PACKED));
assertEquals("Count should match", 0, stats.getNumDictionaryPagesEncodedAs(Encoding.DELTA_LENGTH_BYTE_ARRAY));
assertEquals("Count should match", 5, stats.getNumDataPagesEncodedAs(Encoding.RLE_DICTIONARY));
assertEquals("Count should match", 2, stats.getNumDataPagesEncodedAs(Encoding.DELTA_BYTE_ARRAY));
assertEquals("Count should match", 0, stats.getNumDataPagesEncodedAs(Encoding.RLE));
assertEquals("Count should match", 0, stats.getNumDataPagesEncodedAs(Encoding.BIT_PACKED));
assertEquals("Count should match", 0, stats.getNumDataPagesEncodedAs(Encoding.PLAIN));
assertEquals("Count should match", 0, stats.getNumDataPagesEncodedAs(Encoding.PLAIN_DICTIONARY));
assertEquals("Count should match", 0, stats.getNumDataPagesEncodedAs(Encoding.DELTA_BINARY_PACKED));
assertEquals("Count should match", 0, stats.getNumDataPagesEncodedAs(Encoding.DELTA_LENGTH_BYTE_ARRAY));
}
}