blob: b89c0cdd6aabebdcebe29fbebff105f59901e9c9 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.segment.local.segment.index.readers;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import java.io.File;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.Random;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.segment.local.segment.creator.impl.SegmentDictionaryCreator;
import org.apache.pinot.segment.spi.V1Constants;
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.data.MetricFieldSpec;
import org.apache.pinot.spi.utils.ArrayCopyUtils;
import org.apache.pinot.spi.utils.BigDecimalUtils;
import org.apache.pinot.spi.utils.ByteArray;
import org.apache.pinot.spi.utils.BytesUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
public class ImmutableDictionaryTypeConversionTest {
private static final File TEMP_DIR = new File(FileUtils.getTempDirectory(), "ImmutableDictionaryTypeConversionTest");
private static final Random RANDOM = new Random();
private static final String INT_COLUMN_NAME = "intColumn";
private static final String LONG_COLUMN_NAME = "longColumn";
private static final String FLOAT_COLUMN_NAME = "floatColumn";
private static final String DOUBLE_COLUMN_NAME = "doubleColumn";
private static final String BIG_DECIMAL_COLUMN_NAME = "bigDecimalColumn";
private static final String STRING_COLUMN_NAME = "stringColumn";
private static final String BYTES_COLUMN_NAME = "bytesColumn";
private static final int NUM_VALUES = 1000;
// For BytesDictionary, length need to be fixed
private static final int MIN_VALUE = 100000;
private static final int MAX_VALUE = 1000000;
private static final int STRING_LENGTH = 6;
private static final int BYTES_LENGTH = STRING_LENGTH / 2;
private int[] _intValues;
private long[] _longValues;
private float[] _floatValues;
private double[] _doubleValues;
private BigDecimal[] _bigDecimalValues;
private int _bigDecimalByteLength;
private String[] _stringValues;
private ByteArray[] _bytesValues;
private int[] _dictIds;
private int[] _intValuesBuffer;
private long[] _longValuesBuffer;
private float[] _floatValuesBuffer;
private double[] _doubleValuesBuffer;
private BigDecimal[] _bigDecimalValuesBuffer;
private String[] _stringValuesBuffer;
private byte[][] _bytesValuesBuffer;
@BeforeClass
public void setUp()
throws Exception {
FileUtils.deleteQuietly(TEMP_DIR);
IntOpenHashSet intSet = new IntOpenHashSet();
while (intSet.size() < NUM_VALUES) {
intSet.add(RANDOM.nextInt(MAX_VALUE - MIN_VALUE) + MIN_VALUE);
}
_intValues = intSet.toIntArray();
Arrays.sort(_intValues);
_longValues = new long[NUM_VALUES];
ArrayCopyUtils.copy(_intValues, _longValues, NUM_VALUES);
_floatValues = new float[NUM_VALUES];
ArrayCopyUtils.copy(_intValues, _floatValues, NUM_VALUES);
_doubleValues = new double[NUM_VALUES];
ArrayCopyUtils.copy(_intValues, _doubleValues, NUM_VALUES);
_bigDecimalValues = new BigDecimal[NUM_VALUES];
ArrayCopyUtils.copy(_intValues, _bigDecimalValues, NUM_VALUES);
for (BigDecimal bigDecimal : _bigDecimalValues) {
_bigDecimalByteLength = Math.max(_bigDecimalByteLength, BigDecimalUtils.byteSize(bigDecimal));
}
_stringValues = new String[NUM_VALUES];
ArrayCopyUtils.copy(_intValues, _stringValues, NUM_VALUES);
_bytesValues = new ByteArray[NUM_VALUES];
for (int i = 0; i < NUM_VALUES; i++) {
_bytesValues[i] = BytesUtils.toByteArray(_stringValues[i]);
}
try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(_intValues,
new DimensionFieldSpec(INT_COLUMN_NAME, DataType.INT, true), TEMP_DIR)) {
dictionaryCreator.build();
}
try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(_longValues,
new DimensionFieldSpec(LONG_COLUMN_NAME, DataType.LONG, true), TEMP_DIR)) {
dictionaryCreator.build();
}
try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(_floatValues,
new DimensionFieldSpec(FLOAT_COLUMN_NAME, DataType.FLOAT, true), TEMP_DIR)) {
dictionaryCreator.build();
}
try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(_doubleValues,
new DimensionFieldSpec(DOUBLE_COLUMN_NAME, DataType.DOUBLE, true), TEMP_DIR)) {
dictionaryCreator.build();
}
try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(_bigDecimalValues,
new MetricFieldSpec(BIG_DECIMAL_COLUMN_NAME, DataType.BIG_DECIMAL), TEMP_DIR)) {
dictionaryCreator.build();
}
try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(_stringValues,
new DimensionFieldSpec(STRING_COLUMN_NAME, DataType.STRING, true), TEMP_DIR)) {
dictionaryCreator.build();
assertEquals(dictionaryCreator.getNumBytesPerEntry(), STRING_LENGTH);
}
try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(_bytesValues,
new DimensionFieldSpec(BYTES_COLUMN_NAME, DataType.BYTES, true), TEMP_DIR)) {
dictionaryCreator.build();
assertEquals(dictionaryCreator.getNumBytesPerEntry(), BYTES_LENGTH);
}
_dictIds = new int[NUM_VALUES];
for (int i = 0; i < NUM_VALUES; i++) {
_dictIds[i] = i;
}
_intValuesBuffer = new int[NUM_VALUES];
_longValuesBuffer = new long[NUM_VALUES];
_floatValuesBuffer = new float[NUM_VALUES];
_doubleValuesBuffer = new double[NUM_VALUES];
_bigDecimalValuesBuffer = new BigDecimal[NUM_VALUES];
_stringValuesBuffer = new String[NUM_VALUES];
_bytesValuesBuffer = new byte[NUM_VALUES][];
}
@Test
public void testIntDictionary()
throws Exception {
try (IntDictionary intDictionary = new IntDictionary(
PinotDataBuffer.mapReadOnlyBigEndianFile(new File(TEMP_DIR, INT_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)),
NUM_VALUES)) {
testNumericDictionary(intDictionary);
}
}
@Test
public void testOnHeapIntDictionary()
throws Exception {
try (OnHeapIntDictionary onHeapIntDictionary = new OnHeapIntDictionary(
PinotDataBuffer.mapReadOnlyBigEndianFile(new File(TEMP_DIR, INT_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)),
NUM_VALUES)) {
testNumericDictionary(onHeapIntDictionary);
}
}
@Test
public void testLongDictionary()
throws Exception {
try (LongDictionary longDictionary = new LongDictionary(PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, LONG_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES)) {
testNumericDictionary(longDictionary);
}
}
@Test
public void testOnHeapLongDictionary()
throws Exception {
try (OnHeapLongDictionary onHeapLongDictionary = new OnHeapLongDictionary(PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, LONG_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES)) {
testNumericDictionary(onHeapLongDictionary);
}
}
@Test
public void testFloatDictionary()
throws Exception {
try (FloatDictionary floatDictionary = new FloatDictionary(PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, FLOAT_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES)) {
testNumericDictionary(floatDictionary);
}
}
@Test
public void testOnHeapFloatDictionary()
throws Exception {
try (OnHeapFloatDictionary onHeapFloatDictionary = new OnHeapFloatDictionary(
PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, FLOAT_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES)) {
testNumericDictionary(onHeapFloatDictionary);
}
}
@Test
public void testDoubleDictionary()
throws Exception {
try (DoubleDictionary doubleDictionary = new DoubleDictionary(PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, DOUBLE_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES)) {
testNumericDictionary(doubleDictionary);
}
}
@Test
public void testOnHeapDoubleDictionary()
throws Exception {
try (OnHeapDoubleDictionary onHeapDoubleDictionary = new OnHeapDoubleDictionary(
PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, DOUBLE_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES)) {
testNumericDictionary(onHeapDoubleDictionary);
}
}
@Test
public void testBigDecimalDictionary()
throws Exception {
try (BigDecimalDictionary bigDecimalDictionary = new BigDecimalDictionary(PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, BIG_DECIMAL_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES,
_bigDecimalByteLength)) {
testNumericDictionary(bigDecimalDictionary);
}
}
@Test
public void testOnHeapBigDecimalDictionary()
throws Exception {
try (OnHeapBigDecimalDictionary onHeapBigDecimalDictionary = new OnHeapBigDecimalDictionary(
PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, BIG_DECIMAL_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES,
_bigDecimalByteLength)) {
testNumericDictionary(onHeapBigDecimalDictionary);
}
}
private void testNumericDictionary(BaseImmutableDictionary dictionary) {
for (int i = 0; i < NUM_VALUES; i++) {
Assert.assertEquals(((Number) dictionary.get(i)).intValue(), _intValues[i]);
assertEquals(dictionary.getIntValue(i), _intValues[i]);
assertEquals(dictionary.getLongValue(i), _longValues[i]);
assertEquals(dictionary.getFloatValue(i), _floatValues[i]);
assertEquals(dictionary.getDoubleValue(i), _doubleValues[i]);
Assert.assertEquals(Double.parseDouble(dictionary.getStringValue(i)), _doubleValues[i]);
}
dictionary.readIntValues(_dictIds, NUM_VALUES, _intValuesBuffer);
Assert.assertEquals(_intValuesBuffer, _intValues);
dictionary.readLongValues(_dictIds, NUM_VALUES, _longValuesBuffer);
Assert.assertEquals(_longValuesBuffer, _longValues);
dictionary.readFloatValues(_dictIds, NUM_VALUES, _floatValuesBuffer);
Assert.assertEquals(_floatValuesBuffer, _floatValues);
dictionary.readDoubleValues(_dictIds, NUM_VALUES, _doubleValuesBuffer);
Assert.assertEquals(_doubleValuesBuffer, _doubleValues);
dictionary.readBigDecimalValues(_dictIds, NUM_VALUES, _bigDecimalValuesBuffer);
for (int i = 0; i < _bigDecimalValuesBuffer.length; i++) {
Assert.assertEquals(_bigDecimalValuesBuffer[i].compareTo(_bigDecimalValues[i]), 0);
}
dictionary.readStringValues(_dictIds, NUM_VALUES, _stringValuesBuffer);
for (int i = 0; i < NUM_VALUES; i++) {
Assert.assertEquals(Double.parseDouble(_stringValuesBuffer[i]), _doubleValues[i]);
}
try {
dictionary.getBytesValue(0);
if (dictionary.getValueType() != DataType.BIG_DECIMAL) {
Assert.fail();
}
} catch (UnsupportedOperationException e) {
// Expected
}
try {
dictionary.readBytesValues(_dictIds, NUM_VALUES, _bytesValuesBuffer);
if (dictionary.getValueType() != DataType.BIG_DECIMAL) {
Assert.fail();
}
} catch (UnsupportedOperationException e) {
// Expected
}
}
@Test
public void testStringDictionary()
throws Exception {
try (StringDictionary stringDictionary = new StringDictionary(PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, STRING_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES, STRING_LENGTH,
(byte) 0)) {
testStringDictionary(stringDictionary);
}
}
@Test
public void testOnHeapStringDictionary()
throws Exception {
try (OnHeapStringDictionary onHeapStringDictionary = new OnHeapStringDictionary(
PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, STRING_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES, STRING_LENGTH,
(byte) 0)) {
testStringDictionary(onHeapStringDictionary);
}
}
private void testStringDictionary(BaseImmutableDictionary dictionary) {
for (int i = 0; i < NUM_VALUES; i++) {
assertEquals(dictionary.get(i), _stringValues[i]);
assertEquals(dictionary.getIntValue(i), _intValues[i]);
assertEquals(dictionary.getLongValue(i), _longValues[i]);
assertEquals(dictionary.getFloatValue(i), _floatValues[i]);
assertEquals(dictionary.getDoubleValue(i), _doubleValues[i]);
assertEquals(dictionary.getStringValue(i), _stringValues[i]);
assertEquals(dictionary.getBytesValue(i), _bytesValues[i].getBytes());
}
dictionary.readIntValues(_dictIds, NUM_VALUES, _intValuesBuffer);
Assert.assertEquals(_intValuesBuffer, _intValues);
dictionary.readLongValues(_dictIds, NUM_VALUES, _longValuesBuffer);
Assert.assertEquals(_longValuesBuffer, _longValues);
dictionary.readFloatValues(_dictIds, NUM_VALUES, _floatValuesBuffer);
Assert.assertEquals(_floatValuesBuffer, _floatValues);
dictionary.readDoubleValues(_dictIds, NUM_VALUES, _doubleValuesBuffer);
Assert.assertEquals(_doubleValuesBuffer, _doubleValues);
dictionary.readStringValues(_dictIds, NUM_VALUES, _stringValuesBuffer);
Assert.assertEquals(_stringValuesBuffer, _stringValues);
dictionary.readBytesValues(_dictIds, NUM_VALUES, _bytesValuesBuffer);
for (int i = 0; i < NUM_VALUES; i++) {
Assert.assertEquals(_bytesValuesBuffer[i], _bytesValues[i].getBytes());
}
}
@Test
public void testBytesDictionary()
throws Exception {
try (BytesDictionary bytesDictionary = new BytesDictionary(PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, BYTES_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES, BYTES_LENGTH)) {
testBytesDictionary(bytesDictionary);
}
}
@Test
public void testOnHeapBytesDictionary()
throws Exception {
try (OnHeapBytesDictionary onHeapBytesDictionary = new OnHeapBytesDictionary(
PinotDataBuffer.mapReadOnlyBigEndianFile(
new File(TEMP_DIR, BYTES_COLUMN_NAME + V1Constants.Dict.FILE_EXTENSION)), NUM_VALUES, BYTES_LENGTH)) {
testBytesDictionary(onHeapBytesDictionary);
}
}
private void testBytesDictionary(BaseImmutableDictionary dictionary) {
for (int i = 0; i < NUM_VALUES; i++) {
assertEquals(dictionary.get(i), _bytesValues[i].getBytes());
assertEquals(dictionary.getStringValue(i), _stringValues[i]);
assertEquals(dictionary.getBytesValue(i), _bytesValues[i].getBytes());
}
dictionary.readStringValues(_dictIds, NUM_VALUES, _stringValuesBuffer);
Assert.assertEquals(_stringValuesBuffer, _stringValues);
dictionary.readBytesValues(_dictIds, NUM_VALUES, _bytesValuesBuffer);
for (int i = 0; i < NUM_VALUES; i++) {
Assert.assertEquals(_bytesValuesBuffer[i], _bytesValues[i].getBytes());
}
try {
dictionary.getIntValue(0);
Assert.fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
dictionary.getLongValue(0);
Assert.fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
dictionary.getFloatValue(0);
Assert.fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
dictionary.getDoubleValue(0);
Assert.fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
dictionary.readIntValues(_dictIds, NUM_VALUES, _intValuesBuffer);
Assert.fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
dictionary.readLongValues(_dictIds, NUM_VALUES, _longValuesBuffer);
Assert.fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
dictionary.readFloatValues(_dictIds, NUM_VALUES, _floatValuesBuffer);
Assert.fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
dictionary.readDoubleValues(_dictIds, NUM_VALUES, _doubleValuesBuffer);
Assert.fail();
} catch (UnsupportedOperationException e) {
// Expected
}
}
@AfterClass
public void tearDown() {
FileUtils.deleteQuietly(TEMP_DIR);
}
}