blob: 835cdc105b8c2ed8e657e9302923f6b678891c3d [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.segment.local.segment.index.readers;
import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.ints.IntSet;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.Arrays;
import org.apache.pinot.segment.local.io.util.FixedByteValueReaderWriter;
import org.apache.pinot.segment.local.io.util.ValueReader;
import org.apache.pinot.segment.local.io.util.VarLengthValueReader;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.apache.pinot.spi.utils.ByteArray;
import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Base implementation of immutable dictionary.
*/
@SuppressWarnings("rawtypes")
public abstract class BaseImmutableDictionary implements Dictionary {
private final ValueReader _valueReader;
private final int _length;
private final int _numBytesPerValue;
private final byte _paddingByte;
protected BaseImmutableDictionary(PinotDataBuffer dataBuffer, int length, int numBytesPerValue, byte paddingByte) {
if (VarLengthValueReader.isVarLengthValueBuffer(dataBuffer)) {
VarLengthValueReader valueReader = new VarLengthValueReader(dataBuffer);
_valueReader = valueReader;
_length = valueReader.getNumValues();
_paddingByte = 0;
} else {
Preconditions.checkState(dataBuffer.size() == (long) length * numBytesPerValue,
"Buffer size mismatch: bufferSize = %s, numValues = %s, numByesPerValue = %s", dataBuffer.size(), length,
numBytesPerValue);
_valueReader = new FixedByteValueReaderWriter(dataBuffer);
_length = length;
_paddingByte = paddingByte;
}
_numBytesPerValue = numBytesPerValue;
}
/**
* For virtual dictionary.
*/
protected BaseImmutableDictionary(int length) {
_valueReader = null;
_length = length;
_numBytesPerValue = -1;
_paddingByte = 0;
}
@Override
public boolean isSorted() {
return true;
}
@Override
public int length() {
return _length;
}
@Override
public int indexOf(String stringValue) {
return normalizeIndex(insertionIndexOf(stringValue));
}
protected final int normalizeIndex(int index) {
return index >= 0 ? index : NULL_VALUE_INDEX;
}
@Override
public IntSet getDictIdsInRange(String lower, String upper, boolean includeLower, boolean includeUpper) {
// This method should not be called for sorted dictionary.
throw new UnsupportedOperationException();
}
@Override
public int compare(int dictId1, int dictId2) {
return Integer.compare(dictId1, dictId2);
}
@Override
public Comparable getMinVal() {
return (Comparable) get(0);
}
@Override
public Comparable getMaxVal() {
return (Comparable) get(_length - 1);
}
@Override
public Object getSortedValues() {
// This method is for the stats collection phase when sealing the consuming segment, so it is not required for
// regular immutable dictionary within the immutable segment.
throw new UnsupportedOperationException();
}
@Override
public void close()
throws IOException {
if (_valueReader != null) {
_valueReader.close();
}
}
protected int binarySearch(int value) {
int low = 0;
int high = _length - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
int midValue = _valueReader.getInt(mid);
if (midValue < value) {
low = mid + 1;
} else if (midValue > value) {
high = mid - 1;
} else {
return mid;
}
}
return -(low + 1);
}
protected int binarySearch(long value) {
int low = 0;
int high = _length - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
long midValue = _valueReader.getLong(mid);
if (midValue < value) {
low = mid + 1;
} else if (midValue > value) {
high = mid - 1;
} else {
return mid;
}
}
return -(low + 1);
}
protected int binarySearch(float value) {
int low = 0;
int high = _length - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
float midValue = _valueReader.getFloat(mid);
if (midValue < value) {
low = mid + 1;
} else if (midValue > value) {
high = mid - 1;
} else {
return mid;
}
}
return -(low + 1);
}
protected int binarySearch(double value) {
int low = 0;
int high = _length - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
double midValue = _valueReader.getDouble(mid);
if (midValue < value) {
low = mid + 1;
} else if (midValue > value) {
high = mid - 1;
} else {
return mid;
}
}
return -(low + 1);
}
protected int binarySearch(BigDecimal value) {
int low = 0;
int high = _length - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
BigDecimal midValue = _valueReader.getBigDecimal(mid, _numBytesPerValue);
int compareResult = midValue.compareTo(value);
if (compareResult < 0) {
low = mid + 1;
} else if (compareResult > 0) {
high = mid - 1;
} else {
return mid;
}
}
return -(low + 1);
}
/**
* WARNING: With non-zero padding byte, binary search result might not reflect the real insertion index for the value.
* E.g. with padding byte 'b', if unpadded value "aa" is in the dictionary, and stored as "aab", then unpadded value
* "a" will be mis-positioned after value "aa"; unpadded value "aab" will return positive value even if value "aab" is
* not in the dictionary.
* TODO: Clean up the segments with legacy non-zero padding byte, and remove the support for non-zero padding byte
*/
protected int binarySearch(String value) {
byte[] buffer = getBuffer();
int low = 0;
int high = _length - 1;
if (_paddingByte == 0) {
while (low <= high) {
int mid = (low + high) >>> 1;
String midValue = _valueReader.getUnpaddedString(mid, _numBytesPerValue, _paddingByte, buffer);
int compareResult = midValue.compareTo(value);
if (compareResult < 0) {
low = mid + 1;
} else if (compareResult > 0) {
high = mid - 1;
} else {
return mid;
}
}
} else {
String paddedValue = padString(value);
while (low <= high) {
int mid = (low + high) >>> 1;
String midValue = _valueReader.getPaddedString(mid, _numBytesPerValue, buffer);
int compareResult = midValue.compareTo(paddedValue);
if (compareResult < 0) {
low = mid + 1;
} else if (compareResult > 0) {
high = mid - 1;
} else {
return mid;
}
}
}
return -(low + 1);
}
protected int binarySearch(byte[] value) {
int low = 0;
int high = _length - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
byte[] midValue = _valueReader.getBytes(mid, _numBytesPerValue);
int compareResult = ByteArray.compare(midValue, value);
if (compareResult < 0) {
low = mid + 1;
} else if (compareResult > 0) {
high = mid - 1;
} else {
return mid;
}
}
return -(low + 1);
}
protected String padString(String value) {
byte[] valueBytes = value.getBytes(UTF_8);
int length = valueBytes.length;
String paddedValue;
if (length >= _numBytesPerValue) {
paddedValue = value;
} else {
byte[] paddedValueBytes = new byte[_numBytesPerValue];
System.arraycopy(valueBytes, 0, paddedValueBytes, 0, length);
Arrays.fill(paddedValueBytes, length, _numBytesPerValue, _paddingByte);
paddedValue = new String(paddedValueBytes, UTF_8);
}
return paddedValue;
}
protected int getInt(int dictId) {
return _valueReader.getInt(dictId);
}
protected long getLong(int dictId) {
return _valueReader.getLong(dictId);
}
protected float getFloat(int dictId) {
return _valueReader.getFloat(dictId);
}
protected double getDouble(int dictId) {
return _valueReader.getDouble(dictId);
}
protected BigDecimal getBigDecimal(int dictId) {
return _valueReader.getBigDecimal(dictId, _numBytesPerValue);
}
protected String getUnpaddedString(int dictId, byte[] buffer) {
return _valueReader.getUnpaddedString(dictId, _numBytesPerValue, _paddingByte, buffer);
}
protected String getPaddedString(int dictId, byte[] buffer) {
return _valueReader.getPaddedString(dictId, _numBytesPerValue, buffer);
}
protected byte[] getBytes(int dictId) {
return _valueReader.getBytes(dictId, _numBytesPerValue);
}
protected byte[] getBuffer() {
return new byte[_numBytesPerValue];
}
}