blob: fad37fc0179e517a445a4bf2854d8eb18d198fb7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.scan.filter.executer;
import java.io.IOException;
import java.util.BitSet;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.scan.filter.FilterUtil;
import org.apache.carbondata.core.scan.filter.intf.RowIntf;
import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
import org.apache.carbondata.core.util.BitSetGroup;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.core.util.comparator.Comparator;
import org.apache.carbondata.core.util.comparator.SerializableComparator;
public class ExcludeFilterExecuterImpl implements FilterExecuter {
protected DimColumnResolvedFilterInfo dimColEvaluatorInfo;
protected DimColumnExecuterFilterInfo dimColumnExecuterInfo;
protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo;
protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo;
protected SegmentProperties segmentProperties;
protected boolean isDimensionPresentInCurrentBlock = false;
protected boolean isMeasurePresentInCurrentBlock = false;
private SerializableComparator comparator;
/**
* is dimension column data is natural sorted
*/
private boolean isNaturalSorted = false;
public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo,
MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo, SegmentProperties segmentProperties,
boolean isMeasure) {
this.segmentProperties = segmentProperties;
if (!isMeasure) {
this.dimColEvaluatorInfo = dimColEvaluatorInfo;
dimColumnExecuterInfo = new DimColumnExecuterFilterInfo();
FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties,
dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo, null, null);
isDimensionPresentInCurrentBlock = true;
isNaturalSorted =
dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo
.getDimension().isSortColumn();
} else {
this.msrColumnEvaluatorInfo = msrColumnEvaluatorInfo;
msrColumnExecutorInfo = new MeasureColumnExecuterFilterInfo();
FilterUtil
.prepareKeysFromSurrogates(msrColumnEvaluatorInfo.getFilterValues(), segmentProperties,
null, null, msrColumnEvaluatorInfo.getMeasure(), msrColumnExecutorInfo);
isMeasurePresentInCurrentBlock = true;
DataType msrType = getMeasureDataType(msrColumnEvaluatorInfo);
comparator = Comparator.getComparatorByDataTypeForMeasure(msrType);
}
}
@Override
public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder, boolean useBitsetPipeLine)
throws IOException {
if (isDimensionPresentInCurrentBlock) {
int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping()
.get(dimColEvaluatorInfo.getColumnIndex());
if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
.getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex);
}
DimensionRawColumnChunk dimensionRawColumnChunk =
blockChunkHolder.getDimensionRawDataChunk()[blockIndex];
DimensionColumnDataChunk[] dimensionColumnDataChunks =
dimensionRawColumnChunk.convertToDimColDataChunks();
BitSetGroup bitSetGroup = new BitSetGroup(dimensionRawColumnChunk.getPagesCount());
for (int i = 0; i < dimensionColumnDataChunks.length; i++) {
BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i],
dimensionRawColumnChunk.getRowCount()[i], useBitsetPipeLine,
blockChunkHolder.getBitSetGroup(), i);
bitSetGroup.setBitSet(bitSet, i);
}
return bitSetGroup;
} else if (isMeasurePresentInCurrentBlock) {
int blockIndex = segmentProperties.getMeasuresOrdinalToBlockMapping()
.get(msrColumnEvaluatorInfo.getColumnIndex());
if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) {
blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
.getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex);
}
MeasureRawColumnChunk measureRawColumnChunk =
blockChunkHolder.getMeasureRawDataChunk()[blockIndex];
ColumnPage[] ColumnPages =
measureRawColumnChunk.convertToColumnPage();
BitSetGroup bitSetGroup = new BitSetGroup(measureRawColumnChunk.getPagesCount());
DataType msrType = getMeasureDataType(msrColumnEvaluatorInfo);
for (int i = 0; i < ColumnPages.length; i++) {
BitSet bitSet =
getFilteredIndexesForMeasure(
measureRawColumnChunk.convertToColumnPage(i),
measureRawColumnChunk.getRowCount()[i],
useBitsetPipeLine,
blockChunkHolder.getBitSetGroup(),
i,
msrType);
bitSetGroup.setBitSet(bitSet, i);
}
return bitSetGroup;
}
return null;
}
@Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) {
if (isDimensionPresentInCurrentBlock) {
byte[][] filterValues = dimColumnExecuterInfo.getExcludeFilterKeys();
byte[] col = (byte[])value.getVal(dimColEvaluatorInfo.getDimension().getOrdinal());
for (int i = 0; i < filterValues.length; i++) {
if (0 == ByteUtil.UnsafeComparer.INSTANCE.compareTo(col, 0, col.length,
filterValues[i], 0, filterValues[i].length)) {
return false;
}
}
} else if (isMeasurePresentInCurrentBlock) {
Object[] filterValues = msrColumnExecutorInfo.getFilterKeys();
Object col = value.getVal(msrColumnEvaluatorInfo.getMeasure().getOrdinal() + dimOrdinalMax);
for (int i = 0; i < filterValues.length; i++) {
if (filterValues[i] == null) {
if (null == col) {
return false;
}
continue;
}
if (comparator.compare(col, filterValues[i]) == 0) {
return false;
}
}
}
return true;
}
private DataType getMeasureDataType(MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo) {
if (msrColumnEvaluatorInfo.getType() == DataTypes.BOOLEAN) {
return DataTypes.BOOLEAN;
} else if (msrColumnEvaluatorInfo.getType() == DataTypes.SHORT) {
return DataTypes.SHORT;
} else if (msrColumnEvaluatorInfo.getType() == DataTypes.INT) {
return DataTypes.INT;
} else if (msrColumnEvaluatorInfo.getType() == DataTypes.LONG) {
return DataTypes.LONG;
} else if (DataTypes.isDecimal(msrColumnEvaluatorInfo.getType())) {
return DataTypes.createDefaultDecimalType();
} else {
return DataTypes.DOUBLE;
}
}
private BitSet getFilteredIndexes(ColumnPage columnPage, int numerOfRows, DataType msrType) {
// Here the algorithm is
// Get the measure values from the chunk. compare sequentially with the
// the filter values. The one that matches sets it Bitset.
BitSet bitSet = new BitSet(numerOfRows);
bitSet.flip(0, numerOfRows);
Object[] filterValues = msrColumnExecutorInfo.getFilterKeys();
SerializableComparator comparator = Comparator.getComparatorByDataTypeForMeasure(msrType);
for (int i = 0; i < filterValues.length; i++) {
BitSet nullBitSet = columnPage.getNullBits();
if (filterValues[i] == null) {
for (int j = nullBitSet.nextSetBit(0); j >= 0; j = nullBitSet.nextSetBit(j + 1)) {
bitSet.flip(j);
}
continue;
}
for (int startIndex = 0; startIndex < numerOfRows; startIndex++) {
if (!nullBitSet.get(startIndex)) {
// Check if filterValue[i] matches with measure Values.
Object msrValue = DataTypeUtil
.getMeasureObjectBasedOnDataType(columnPage, startIndex,
msrType, msrColumnEvaluatorInfo.getMeasure());
if (comparator.compare(msrValue, filterValues[i]) == 0) {
// This is a match.
bitSet.flip(startIndex);
}
}
}
}
return bitSet;
}
/**
* Below method will be used to apply filter on measure column
* @param measureColumnPage
* @param numberOfRows
* @param useBitsetPipeLine
* @param prvBitSetGroup
* @param pageNumber
* @param msrDataType
* @return filtered indexes bitset
*/
private BitSet getFilteredIndexesForMeasure(ColumnPage measureColumnPage, int numberOfRows,
boolean useBitsetPipeLine, BitSetGroup prvBitSetGroup, int pageNumber, DataType msrDataType) {
// check whether previous indexes can be optimal to apply filter on measure column
if (CarbonUtil.usePreviousFilterBitsetGroup(useBitsetPipeLine, prvBitSetGroup, pageNumber,
msrColumnExecutorInfo.getFilterKeys().length)) {
return getFilteredIndexesForMsrUsingPrvBitSet(measureColumnPage, prvBitSetGroup, pageNumber,
numberOfRows, msrDataType);
} else {
return getFilteredIndexes(measureColumnPage, numberOfRows, msrDataType);
}
}
/**
* Below method will be used to apply filter on measure column based on previous filtered indexes
* @param measureColumnPage
* @param prvBitSetGroup
* @param pageNumber
* @param numberOfRows
* @param msrDataType
* @return filtred indexes bitset
*/
private BitSet getFilteredIndexesForMsrUsingPrvBitSet(ColumnPage measureColumnPage,
BitSetGroup prvBitSetGroup, int pageNumber, int numberOfRows, DataType msrDataType) {
BitSet bitSet = new BitSet(numberOfRows);
bitSet.flip(0, numberOfRows);
Object[] filterValues = msrColumnExecutorInfo.getFilterKeys();
BitSet nullBitSet = measureColumnPage.getNullBits();
BitSet prvPageBitSet = prvBitSetGroup.getBitSet(pageNumber);
SerializableComparator comparator = Comparator.getComparatorByDataTypeForMeasure(msrDataType);
for (int i = 0; i < filterValues.length; i++) {
if (filterValues[i] == null) {
for (int j = nullBitSet.nextSetBit(0); j >= 0; j = nullBitSet.nextSetBit(j + 1)) {
bitSet.flip(j);
}
continue;
}
for (int index = prvPageBitSet.nextSetBit(0);
index >= 0; index = prvPageBitSet.nextSetBit(index + 1)) {
if (!nullBitSet.get(index)) {
// Check if filterValue[i] matches with measure Values.
Object msrValue = DataTypeUtil
.getMeasureObjectBasedOnDataType(measureColumnPage, index,
msrDataType, msrColumnEvaluatorInfo.getMeasure());
if (comparator.compare(msrValue, filterValues[i]) == 0) {
// This is a match.
bitSet.flip(index);
}
}
}
}
return bitSet;
}
/**
* Below method will be used to apply filter on dimension column
* @param dimensionColumnDataChunk
* @param numberOfRows
* @param useBitsetPipeLine
* @param prvBitSetGroup
* @param pageNumber
* @return filtered indexes bitset
*/
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk,
int numberOfRows, boolean useBitsetPipeLine, BitSetGroup prvBitSetGroup, int pageNumber) {
// check whether applying filtered based on previous bitset will be optimal
if (CarbonUtil.usePreviousFilterBitsetGroup(useBitsetPipeLine, prvBitSetGroup, pageNumber,
dimColumnExecuterInfo.getExcludeFilterKeys().length)) {
return getFilteredIndexesUisngPrvBitset(dimensionColumnDataChunk, prvBitSetGroup, pageNumber,
numberOfRows);
} else {
return getFilteredIndexes(dimensionColumnDataChunk, numberOfRows);
}
}
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk,
int numberOfRows) {
if (dimensionColumnDataChunk.isExplicitSorted()) {
return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numberOfRows);
}
return setFilterdIndexToBitSet(dimensionColumnDataChunk, numberOfRows);
}
/**
* Below method will be used to apply filter based on previous filtered bitset
* @param dimensionColumnDataChunk
* @param prvBitSetGroup
* @param pageNumber
* @param numberOfRows
* @return filtered indexes bitset
*/
private BitSet getFilteredIndexesUisngPrvBitset(DimensionColumnDataChunk dimensionColumnDataChunk,
BitSetGroup prvBitSetGroup, int pageNumber, int numberOfRows) {
BitSet prvPageBitSet = prvBitSetGroup.getBitSet(pageNumber);
BitSet bitSet = new BitSet();
bitSet.or(prvPageBitSet);
byte[][] filterKeys = dimColumnExecuterInfo.getExcludeFilterKeys();
int compareResult = 0;
// if dimension data was natural sorted then get the index from previous bitset
// and use the same in next column data, otherwise use the inverted index reverse
if (!dimensionColumnDataChunk.isExplicitSorted()) {
for (int index = prvPageBitSet.nextSetBit(0);
index >= 0; index = prvPageBitSet.nextSetBit(index + 1)) {
compareResult = CarbonUtil
.isFilterPresent(filterKeys, dimensionColumnDataChunk, 0, filterKeys.length - 1, index);
if (compareResult != 0) {
bitSet.set(index);
} else {
if (bitSet.get(index)) {
bitSet.flip(index);
}
}
}
} else {
for (int index = prvPageBitSet.nextSetBit(0);
index >= 0; index = prvPageBitSet.nextSetBit(index + 1)) {
compareResult = CarbonUtil
.isFilterPresent(filterKeys, dimensionColumnDataChunk, 0, filterKeys.length - 1,
dimensionColumnDataChunk.getInvertedReverseIndex(index));
if (compareResult != 0) {
bitSet.set(index);
} else {
if (bitSet.get(index)) {
bitSet.flip(index);
}
}
}
}
return bitSet;
}
private BitSet setFilterdIndexToBitSetWithColumnIndex(
DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
bitSet.flip(0, numerOfRows);
int startIndex = 0;
byte[][] filterValues = dimColumnExecuterInfo.getExcludeFilterKeys();
for (int i = 0; i < filterValues.length; i++) {
if (startIndex >= numerOfRows) {
break;
}
int[] rangeIndex = CarbonUtil
.getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
filterValues[i]);
for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
bitSet.flip(dimensionColumnDataChunk.getInvertedIndex(j));
}
if (rangeIndex[1] >= 0) {
startIndex = rangeIndex[1] + 1;
}
}
return bitSet;
}
private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk,
int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
bitSet.flip(0, numerOfRows);
byte[][] filterValues = dimColumnExecuterInfo.getExcludeFilterKeys();
// filterValues can be null when the dictionary chunk and surrogate size both are one
if (filterValues.length == 0) {
return bitSet;
}
// binary search can only be applied if column is sorted
if (isNaturalSorted) {
int startIndex = 0;
for (int i = 0; i < filterValues.length; i++) {
if (startIndex >= numerOfRows) {
break;
}
int[] rangeIndex = CarbonUtil
.getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
filterValues[i]);
for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
bitSet.flip(j);
}
if (rangeIndex[1] >= 0) {
startIndex = rangeIndex[1] + 1;
}
}
} else {
if (filterValues.length > 1) {
for (int i = 0; i < numerOfRows; i++) {
int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
dimensionColumnDataChunk.getChunkData(i));
if (index >= 0) {
bitSet.flip(i);
}
}
} else {
for (int j = 0; j < numerOfRows; j++) {
if (dimensionColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
bitSet.flip(j);
}
}
}
}
return bitSet;
}
@Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) {
BitSet bitSet = new BitSet(1);
bitSet.flip(0, 1);
return bitSet;
}
@Override public void readBlocks(BlocksChunkHolder blockChunkHolder) throws IOException {
if (isDimensionPresentInCurrentBlock) {
int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping()
.get(dimColEvaluatorInfo.getColumnIndex());
if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
.getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex);
}
} else if (isMeasurePresentInCurrentBlock) {
int blockIndex = segmentProperties.getMeasuresOrdinalToBlockMapping()
.get(msrColumnEvaluatorInfo.getColumnIndex());
if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) {
blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
.getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex);
}
}
}
}