blob: a44bc1ada2c4ec132a46c0dd5e3b1fd0352958ab [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.scan.filter.executer;
import java.io.IOException;
import java.util.BitSet;
import java.util.List;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
import org.apache.carbondata.core.scan.expression.Expression;
import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException;
import org.apache.carbondata.core.scan.filter.FilterUtil;
import org.apache.carbondata.core.scan.filter.intf.RowIntf;
import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
import org.apache.carbondata.core.util.BitSetGroup;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.core.util.comparator.Comparator;
import org.apache.carbondata.core.util.comparator.SerializableComparator;
public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilterExecuterImpl {
protected byte[][] filterRangeValues;
protected Object[] msrFilterRangeValues;
protected SerializableComparator comparator;
/**
* flag to check whether default values is present in the filter value list
*/
private boolean isDefaultValuePresentInFilter;
private int lastDimensionColOrdinal = 0;
public RowLevelRangeLessThanEqualFilterExecuterImpl(
List<DimColumnResolvedFilterInfo> dimColEvaluatorInfoList,
List<MeasureColumnResolvedFilterInfo> msrColEvalutorInfoList, Expression exp,
AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues,
Object[] msrFilterRangeValues, SegmentProperties segmentProperties) {
super(dimColEvaluatorInfoList, msrColEvalutorInfoList, exp, tableIdentifier, segmentProperties,
null);
lastDimensionColOrdinal = segmentProperties.getLastDimensionColOrdinal();
this.filterRangeValues = filterRangeValues;
this.msrFilterRangeValues = msrFilterRangeValues;
if (isMeasurePresentInCurrentBlock[0]) {
CarbonMeasure measure = this.msrColEvalutorInfoList.get(0).getMeasure();
comparator = Comparator.getComparatorByDataTypeForMeasure(measure.getDataType());
}
ifDefaultValueMatchesFilter();
if (isDimensionPresentInCurrentBlock[0]) {
isNaturalSorted = dimColEvaluatorInfoList.get(0).getDimension().isUseInvertedIndex()
&& dimColEvaluatorInfoList.get(0).getDimension().isSortColumn();
}
}
/**
* This method will check whether default value is present in the given filter values
*/
private void ifDefaultValueMatchesFilter() {
if (!dimColEvaluatorInfoList.isEmpty() && !isDimensionPresentInCurrentBlock[0]) {
CarbonDimension dimension = this.dimColEvaluatorInfoList.get(0).getDimension();
byte[] defaultValue = dimension.getDefaultValue();
if (null != defaultValue) {
for (int k = 0; k < filterRangeValues.length; k++) {
int maxCompare =
ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangeValues[k], defaultValue);
if (maxCompare >= 0) {
isDefaultValuePresentInFilter = true;
break;
}
}
}
} else if (!msrColEvalutorInfoList.isEmpty() && !isMeasurePresentInCurrentBlock[0]) {
CarbonMeasure measure = this.msrColEvalutorInfoList.get(0).getMeasure();
byte[] defaultValue = measure.getDefaultValue();
if (null != defaultValue) {
for (int k = 0; k < msrFilterRangeValues.length; k++) {
int maxCompare = comparator.compare(msrFilterRangeValues[k],
DataTypeUtil.getMeasureObjectFromDataType(defaultValue, measure.getDataType()));
if (maxCompare >= 0) {
isDefaultValuePresentInFilter = true;
break;
}
}
}
}
}
@Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) {
BitSet bitSet = new BitSet(1);
byte[] minValue = null;
boolean isScanRequired = false;
if (isMeasurePresentInCurrentBlock[0] || isDimensionPresentInCurrentBlock[0]) {
if (isMeasurePresentInCurrentBlock[0]) {
minValue = blockMinValue[measureBlocksIndex[0] + lastDimensionColOrdinal];
isScanRequired =
isScanRequired(minValue, msrFilterRangeValues, msrColEvalutorInfoList.get(0).getType());
} else {
minValue = blockMinValue[dimensionBlocksIndex[0]];
isScanRequired = isScanRequired(minValue, filterRangeValues);
}
} else {
isScanRequired = isDefaultValuePresentInFilter;
}
if (isScanRequired) {
bitSet.set(0);
}
return bitSet;
}
private boolean isScanRequired(byte[] blockMinValue, byte[][] filterValues) {
boolean isScanRequired = false;
for (int k = 0; k < filterValues.length; k++) {
// and filter-min should be positive
int minCompare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[k], blockMinValue);
// if any filter applied is not in range of min and max of block
// then since its a less than equal to fiter validate whether the block
// min range is less than equal to applied filter member
if (minCompare >= 0) {
isScanRequired = true;
break;
}
}
return isScanRequired;
}
private boolean isScanRequired(byte[] minValue, Object[] filterValue,
DataType dataType) {
Object value =
DataTypeUtil.getMeasureObjectFromDataType(minValue, dataType);
for (int i = 0; i < filterValue.length; i++) {
// TODO handle min and max for null values.
if (filterValue[i] == null) {
return true;
}
if (comparator.compare(filterValue[i], value) >= 0) {
return true;
}
}
return false;
}
@Override
public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder, boolean useBitsetPipeLine)
throws FilterUnsupportedException, IOException {
// select all rows if dimension does not exists in the current block
if (!isDimensionPresentInCurrentBlock[0] && !isMeasurePresentInCurrentBlock[0]) {
int numberOfRows = blockChunkHolder.getDataBlock().nodeSize();
return FilterUtil
.createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(),
numberOfRows, true);
}
if (isDimensionPresentInCurrentBlock[0]) {
int blockIndex =
segmentProperties.getDimensionOrdinalToBlockMapping().get(dimensionBlocksIndex[0]);
if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
.getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex);
}
DimensionRawColumnChunk rawColumnChunk =
blockChunkHolder.getDimensionRawDataChunk()[blockIndex];
BitSetGroup bitSetGroup = new BitSetGroup(rawColumnChunk.getPagesCount());
for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) {
if (rawColumnChunk.getMinValues() != null) {
if (isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues)) {
BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i),
rawColumnChunk.getRowCount()[i]);
bitSetGroup.setBitSet(bitSet, i);
}
} else {
BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i),
rawColumnChunk.getRowCount()[i]);
bitSetGroup.setBitSet(bitSet, i);
}
}
return bitSetGroup;
} else if (isMeasurePresentInCurrentBlock[0]) {
int blockIndex =
segmentProperties.getMeasuresOrdinalToBlockMapping().get(measureBlocksIndex[0]);
if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) {
blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
.getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex);
}
MeasureRawColumnChunk rawColumnChunk = blockChunkHolder.getMeasureRawDataChunk()[blockIndex];
BitSetGroup bitSetGroup = new BitSetGroup(rawColumnChunk.getPagesCount());
for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) {
if (rawColumnChunk.getMinValues() != null) {
if (isScanRequired(rawColumnChunk.getMinValues()[i], this.msrFilterRangeValues,
msrColEvalutorInfoList.get(0).getType())) {
BitSet bitSet =
getFilteredIndexesForMeasures(rawColumnChunk.convertToColumnPage(i),
rawColumnChunk.getRowCount()[i]);
bitSetGroup.setBitSet(bitSet, i);
}
} else {
BitSet bitSet =
getFilteredIndexesForMeasures(rawColumnChunk.convertToColumnPage(i),
rawColumnChunk.getRowCount()[i]);
bitSetGroup.setBitSet(bitSet, i);
}
}
return bitSetGroup;
}
return null;
}
@Override
public boolean applyFilter(RowIntf value, int dimOrdinalMax)
throws FilterUnsupportedException, IOException {
if (isDimensionPresentInCurrentBlock[0]) {
byte[] col =
(byte[]) value.getVal(dimColEvaluatorInfoList.get(0).getDimension().getOrdinal());
return ByteUtil.compare(filterRangeValues[0], col) >= 0;
}
if (isMeasurePresentInCurrentBlock[0]) {
Object col =
value.getVal(msrColEvalutorInfoList.get(0).getMeasure().getOrdinal() + dimOrdinalMax);
return comparator.compare(msrFilterRangeValues[0], col) >= 0;
}
return false;
}
private BitSet getFilteredIndexesForMeasures(ColumnPage columnPage,
int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
Object[] filterValues = this.msrFilterRangeValues;
DataType msrType = msrColEvalutorInfoList.get(0).getType();
SerializableComparator comparator = Comparator.getComparatorByDataTypeForMeasure(msrType);
BitSet nullBitSet = columnPage.getNullBits();
for (int i = 0; i < filterValues.length; i++) {
if (filterValues[i] == null) {
for (int j = nullBitSet.nextSetBit(0); j >= 0; j = nullBitSet.nextSetBit(j + 1)) {
bitSet.set(j);
}
continue;
}
for (int startIndex = 0; startIndex < numerOfRows; startIndex++) {
if (!nullBitSet.get(startIndex)) {
Object msrValue = DataTypeUtil
.getMeasureObjectBasedOnDataType(columnPage, startIndex,
msrType, msrColEvalutorInfoList.get(0).getMeasure());
if (comparator.compare(msrValue, filterValues[i]) <= 0) {
// This is a match.
bitSet.set(startIndex);
}
}
}
}
return bitSet;
}
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(
dimColEvaluatorInfoList.get(0).getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
CarbonDimension currentBlockDimension =
segmentProperties.getDimensions().get(dimensionBlocksIndex[0]);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toBytes(key);
}
} else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
BitSet bitSet = null;
if (dimensionColumnDataChunk.isExplicitSorted()) {
bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows,
defaultValue);
} else {
bitSet = setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
FilterUtil.removeNullValues(dimensionColumnDataChunk, bitSet,
CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
}
return bitSet;
}
/**
* Method will scan the block and finds the range start index from which all members
* will be considered for applying range filters. this method will be called if the
* column is not supported by default so column index mapping will be present for
* accesing the members from the block.
*
* @param dimensionColumnDataChunk
* @param numerOfRows
* @return BitSet.
*/
private BitSet setFilterdIndexToBitSetWithColumnIndex(
DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows,
byte[] defaultValue) {
BitSet bitSet = new BitSet(numerOfRows);
int start = 0;
int last = 0;
int skip = 0;
int startIndex = 0;
byte[][] filterValues = this.filterRangeValues;
//find the number of default values to skip the null value in case of direct dictionary
if (null != defaultValue) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
defaultValue, true);
if (start < 0) {
skip = -(start + 1);
// end of block
if (skip == numerOfRows) {
return bitSet;
}
} else {
// as start will be last index of null value inclusive
// so adding 1 to skip last null value
skip = start + 1;
}
startIndex = skip;
}
for (int i = 0; i < filterValues.length; i++) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
filterValues[i], true);
if (start < 0) {
start = -(start + 1);
if (start >= numerOfRows) {
start = start - 1;
}
// When negative value of start is returned from getFirstIndexUsingBinarySearch the Start
// will be pointing to the next consecutive position. So compare it again and point to the
// previous value returned from getFirstIndexUsingBinarySearch.
if (ByteUtil.compare(filterValues[i],
dimensionColumnDataChunk.getChunkData(dimensionColumnDataChunk.getInvertedIndex(start)))
< 0) {
start = start - 1;
}
}
last = start;
for (int j = start; j >= skip; j--) {
bitSet.set(dimensionColumnDataChunk.getInvertedIndex(j));
last--;
}
startIndex = last;
if (startIndex <= 0) {
break;
}
}
return bitSet;
}
/**
* Method will scan the block and finds the range start index from which all
* members will be considered for applying range filters. this method will
* be called if the column is sorted default so column index
* mapping will be present for accesing the members from the block.
*
* @param dimensionColumnDataChunk
* @param numerOfRows
* @param defaultValue
* @return BitSet.
*/
private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows, byte[] defaultValue) {
BitSet bitSet = new BitSet(numerOfRows);
byte[][] filterValues = this.filterRangeValues;
// binary search can only be applied if column is sorted
if (isNaturalSorted) {
int start = 0;
int last = 0;
int startIndex = 0;
int skip = 0;
//find the number of default values to skip the null value in case of direct dictionary
if (null != defaultValue) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
numerOfRows - 1, defaultValue, true);
if (start < 0) {
skip = -(start + 1);
// end of block
if (skip == numerOfRows) {
return bitSet;
}
} else {
// as start will be last index of null value inclusive
// so adding 1 to skip last null value
skip = start + 1;
}
startIndex = skip;
}
for (int k = 0; k < filterValues.length; k++) {
start = CarbonUtil
.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
numerOfRows - 1, filterValues[k], true);
if (start < 0) {
start = -(start + 1);
if (start >= numerOfRows) {
start = start - 1;
}
// When negative value of start is returned from getFirstIndexUsingBinarySearch the Start
// will be pointing to the next consecutive position. So compare it again and point to the
// previous value returned from getFirstIndexUsingBinarySearch.
if (ByteUtil.compare(filterValues[k], dimensionColumnDataChunk.getChunkData(start)) < 0) {
start = start - 1;
}
}
last = start;
for (int j = start; j >= skip; j--) {
bitSet.set(j);
last--;
}
startIndex = last;
if (startIndex <= 0) {
break;
}
}
} else {
for (int k = 0; k < filterValues.length; k++) {
for (int i = 0; i < numerOfRows; i++) {
if (ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValues[k]) <= 0) {
bitSet.set(i);
}
}
}
}
return bitSet;
}
@Override public void readBlocks(BlocksChunkHolder blockChunkHolder) throws IOException {
if (isDimensionPresentInCurrentBlock[0]) {
if (!dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY)) {
super.readBlocks(blockChunkHolder);
}
int blockIndex = dimensionBlocksIndex[0];
if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
.getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex);
}
} else if (isMeasurePresentInCurrentBlock[0]) {
int blockIndex = measureBlocksIndex[0];
if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) {
blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
.getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex);
}
}
}
}