| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.carbondata.core.scan.filter.executer; |
| |
| import java.io.IOException; |
| import java.util.BitSet; |
| |
| import org.apache.carbondata.core.datastore.block.SegmentProperties; |
| import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; |
| import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; |
| import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; |
| import org.apache.carbondata.core.datastore.page.ColumnPage; |
| import org.apache.carbondata.core.metadata.datatype.DataType; |
| import org.apache.carbondata.core.metadata.datatype.DataTypes; |
| import org.apache.carbondata.core.scan.filter.FilterUtil; |
| import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; |
| import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; |
| import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; |
| import org.apache.carbondata.core.util.BitSetGroup; |
| import org.apache.carbondata.core.util.ByteUtil; |
| import org.apache.carbondata.core.util.CarbonUtil; |
| import org.apache.carbondata.core.util.DataTypeUtil; |
| import org.apache.carbondata.core.util.comparator.Comparator; |
| import org.apache.carbondata.core.util.comparator.SerializableComparator; |
| |
| public class IncludeFilterExecuterImpl implements FilterExecuter { |
| |
| protected DimColumnResolvedFilterInfo dimColumnEvaluatorInfo; |
| protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; |
| protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; |
| protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; |
| protected SegmentProperties segmentProperties; |
| protected boolean isDimensionPresentInCurrentBlock = false; |
| protected boolean isMeasurePresentInCurrentBlock = false; |
| protected SerializableComparator comparator; |
| /** |
| * is dimension column data is natural sorted |
| */ |
| private boolean isNaturalSorted = false; |
| |
| public IncludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColumnEvaluatorInfo, |
| MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo, SegmentProperties segmentProperties, |
| boolean isMeasure) { |
| |
| this.segmentProperties = segmentProperties; |
| if (!isMeasure) { |
| this.dimColumnEvaluatorInfo = dimColumnEvaluatorInfo; |
| dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); |
| FilterUtil |
| .prepareKeysFromSurrogates(dimColumnEvaluatorInfo.getFilterValues(), segmentProperties, |
| dimColumnEvaluatorInfo.getDimension(), dimColumnExecuterInfo, null, null); |
| isDimensionPresentInCurrentBlock = true; |
| isNaturalSorted = |
| dimColumnEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColumnEvaluatorInfo |
| .getDimension().isSortColumn(); |
| |
| } else { |
| this.msrColumnEvaluatorInfo = msrColumnEvaluatorInfo; |
| msrColumnExecutorInfo = new MeasureColumnExecuterFilterInfo(); |
| comparator = |
| Comparator.getComparatorByDataTypeForMeasure(getMeasureDataType(msrColumnEvaluatorInfo)); |
| FilterUtil |
| .prepareKeysFromSurrogates(msrColumnEvaluatorInfo.getFilterValues(), segmentProperties, |
| null, null, msrColumnEvaluatorInfo.getMeasure(), msrColumnExecutorInfo); |
| isMeasurePresentInCurrentBlock = true; |
| |
| } |
| |
| } |
| |
| @Override |
| public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder, boolean useBitsetPipeLine) |
| throws IOException { |
| if (isDimensionPresentInCurrentBlock == true) { |
| int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() |
| .get(dimColumnEvaluatorInfo.getColumnIndex()); |
| if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { |
| blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() |
| .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); |
| } |
| DimensionRawColumnChunk dimensionRawColumnChunk = |
| blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; |
| BitSetGroup bitSetGroup = new BitSetGroup(dimensionRawColumnChunk.getPagesCount()); |
| for (int i = 0; i < dimensionRawColumnChunk.getPagesCount(); i++) { |
| if (dimensionRawColumnChunk.getMaxValues() != null) { |
| if (isScanRequired(dimensionRawColumnChunk.getMaxValues()[i], |
| dimensionRawColumnChunk.getMinValues()[i], dimColumnExecuterInfo.getFilterKeys())) { |
| BitSet bitSet = getFilteredIndexes(dimensionRawColumnChunk.convertToDimColDataChunk(i), |
| dimensionRawColumnChunk.getRowCount()[i], useBitsetPipeLine, |
| blockChunkHolder.getBitSetGroup(), i); |
| bitSetGroup.setBitSet(bitSet, i); |
| } |
| } else { |
| BitSet bitSet = getFilteredIndexes(dimensionRawColumnChunk.convertToDimColDataChunk(i), |
| dimensionRawColumnChunk.getRowCount()[i], useBitsetPipeLine, |
| blockChunkHolder.getBitSetGroup(), i); |
| bitSetGroup.setBitSet(bitSet, i); |
| } |
| } |
| return bitSetGroup; |
| } else if (isMeasurePresentInCurrentBlock) { |
| int blockIndex = segmentProperties.getMeasuresOrdinalToBlockMapping() |
| .get(msrColumnEvaluatorInfo.getColumnIndex()); |
| if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) { |
| blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() |
| .getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex); |
| } |
| MeasureRawColumnChunk measureRawColumnChunk = |
| blockChunkHolder.getMeasureRawDataChunk()[blockIndex]; |
| BitSetGroup bitSetGroup = new BitSetGroup(measureRawColumnChunk.getPagesCount()); |
| DataType msrType = getMeasureDataType(msrColumnEvaluatorInfo); |
| for (int i = 0; i < measureRawColumnChunk.getPagesCount(); i++) { |
| if (measureRawColumnChunk.getMaxValues() != null) { |
| if (isScanRequired(measureRawColumnChunk.getMaxValues()[i], |
| measureRawColumnChunk.getMinValues()[i], msrColumnExecutorInfo.getFilterKeys(), |
| msrColumnEvaluatorInfo.getType())) { |
| BitSet bitSet = |
| getFilteredIndexesForMeasure(measureRawColumnChunk.convertToColumnPage(i), |
| measureRawColumnChunk.getRowCount()[i], useBitsetPipeLine, |
| blockChunkHolder.getBitSetGroup(), i, msrType); |
| bitSetGroup.setBitSet(bitSet, i); |
| } |
| } else { |
| BitSet bitSet = |
| getFilteredIndexesForMeasure(measureRawColumnChunk.convertToColumnPage(i), |
| measureRawColumnChunk.getRowCount()[i], useBitsetPipeLine, |
| blockChunkHolder.getBitSetGroup(), i, msrType); |
| bitSetGroup.setBitSet(bitSet, i); |
| } |
| } |
| return bitSetGroup; |
| } |
| return null; |
| } |
| |
| private DataType getMeasureDataType(MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo) { |
| if (msrColumnEvaluatorInfo.getType() == DataTypes.BOOLEAN) { |
| return DataTypes.BOOLEAN; |
| } else if (msrColumnEvaluatorInfo.getType() == DataTypes.SHORT) { |
| return DataTypes.SHORT; |
| } else if (msrColumnEvaluatorInfo.getType() == DataTypes.INT) { |
| return DataTypes.INT; |
| } else if (msrColumnEvaluatorInfo.getType() == DataTypes.LONG) { |
| return DataTypes.LONG; |
| } else if (msrColumnEvaluatorInfo.getType() == DataTypes.DECIMAL) { |
| return DataTypes.DECIMAL; |
| } else { |
| return DataTypes.DOUBLE; |
| } |
| } |
| |
| private BitSet getFilteredIndexesForMeasures(ColumnPage columnPage, |
| int rowsInPage, DataType msrType) { |
| // Here the algorithm is |
| // Get the measure values from the chunk. compare sequentially with the |
| // the filter values. The one that matches sets it Bitset. |
| BitSet bitSet = new BitSet(rowsInPage); |
| Object[] filterValues = msrColumnExecutorInfo.getFilterKeys(); |
| |
| SerializableComparator comparator = Comparator.getComparatorByDataTypeForMeasure(msrType); |
| BitSet nullBitSet = columnPage.getNullBits(); |
| for (int i = 0; i < filterValues.length; i++) { |
| if (filterValues[i] == null) { |
| for (int j = nullBitSet.nextSetBit(0); j >= 0; j = nullBitSet.nextSetBit(j + 1)) { |
| bitSet.set(j); |
| } |
| continue; |
| } |
| for (int startIndex = 0; startIndex < rowsInPage; startIndex++) { |
| if (!nullBitSet.get(startIndex)) { |
| // Check if filterValue[i] matches with measure Values. |
| Object msrValue = DataTypeUtil |
| .getMeasureObjectBasedOnDataType(columnPage, startIndex, |
| msrType, msrColumnEvaluatorInfo.getMeasure()); |
| |
| if (comparator.compare(msrValue, filterValues[i]) == 0) { |
| // This is a match. |
| bitSet.set(startIndex); |
| } |
| } |
| } |
| } |
| return bitSet; |
| } |
| |
| /** |
| * Below method will be used to apply filter on measure column |
| * @param measureColumnPage |
| * @param numberOfRows |
| * @param useBitsetPipeLine |
| * @param prvBitSetGroup |
| * @param pageNumber |
| * @param msrDataType |
| * @return filtered indexes bitset |
| */ |
| private BitSet getFilteredIndexesForMeasure(ColumnPage measureColumnPage, int numberOfRows, |
| boolean useBitsetPipeLine, BitSetGroup prvBitSetGroup, int pageNumber, DataType msrDataType) { |
| // check whether previous indexes can be optimal to apply filter on measure column |
| if (CarbonUtil.usePreviousFilterBitsetGroup(useBitsetPipeLine, prvBitSetGroup, pageNumber, |
| msrColumnExecutorInfo.getFilterKeys().length)) { |
| return getFilteredIndexesForMsrUsingPrvBitSet(measureColumnPage, prvBitSetGroup, pageNumber, |
| numberOfRows, msrDataType); |
| } else { |
| return getFilteredIndexesForMeasures(measureColumnPage, numberOfRows, msrDataType); |
| } |
| } |
| |
| /** |
| * Below method will be used to apply filter on measure column based on previous filtered indexes |
| * @param measureColumnPage |
| * @param prvBitSetGroup |
| * @param pageNumber |
| * @param numberOfRows |
| * @param msrDataType |
| * @return filtred indexes bitset |
| */ |
| private BitSet getFilteredIndexesForMsrUsingPrvBitSet(ColumnPage measureColumnPage, |
| BitSetGroup prvBitSetGroup, int pageNumber, int numberOfRows, DataType msrDataType) { |
| BitSet bitSet = new BitSet(numberOfRows); |
| Object[] filterValues = msrColumnExecutorInfo.getFilterKeys(); |
| BitSet nullBitSet = measureColumnPage.getNullBits(); |
| BitSet prvPageBitSet = prvBitSetGroup.getBitSet(pageNumber); |
| SerializableComparator comparator = Comparator.getComparatorByDataTypeForMeasure(msrDataType); |
| for (int i = 0; i < filterValues.length; i++) { |
| if (filterValues[i] == null) { |
| for (int j = nullBitSet.nextSetBit(0); j >= 0; j = nullBitSet.nextSetBit(j + 1)) { |
| bitSet.set(j); |
| } |
| continue; |
| } |
| for (int index = prvPageBitSet.nextSetBit(0); |
| index >= 0; index = prvPageBitSet.nextSetBit(index + 1)) { |
| if (!nullBitSet.get(index)) { |
| // Check if filterValue[i] matches with measure Values. |
| Object msrValue = DataTypeUtil |
| .getMeasureObjectBasedOnDataType(measureColumnPage, index, |
| msrDataType, msrColumnEvaluatorInfo.getMeasure()); |
| |
| if (comparator.compare(msrValue, filterValues[i]) == 0) { |
| // This is a match. |
| bitSet.set(index); |
| } |
| } |
| } |
| } |
| return bitSet; |
| } |
| |
| /** |
| * Below method will be used to apply filter on dimension column |
| * @param dimensionColumnDataChunk |
| * @param numberOfRows |
| * @param useBitsetPipeLine |
| * @param prvBitSetGroup |
| * @param pageNumber |
| * @return filtered indexes bitset |
| */ |
| private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, |
| int numberOfRows, boolean useBitsetPipeLine, BitSetGroup prvBitSetGroup, int pageNumber) { |
| // check whether previous indexes can be optimal to apply filter on dimension column |
| if (CarbonUtil.usePreviousFilterBitsetGroup(useBitsetPipeLine, prvBitSetGroup, pageNumber, |
| dimColumnExecuterInfo.getFilterKeys().length)) { |
| return getFilteredIndexesUisngPrvBitset(dimensionColumnDataChunk, prvBitSetGroup, pageNumber, |
| numberOfRows); |
| } else { |
| return getFilteredIndexes(dimensionColumnDataChunk, numberOfRows); |
| } |
| } |
| |
| private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, |
| int numberOfRows) { |
| if (dimensionColumnDataChunk.isExplicitSorted()) { |
| return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numberOfRows); |
| } |
| return setFilterdIndexToBitSet(dimensionColumnDataChunk, numberOfRows); |
| } |
| |
| /** |
| * Below method will be used to apply filter on dimension |
| * column based on previous filtered indexes |
| * @param dimensionColumnDataChunk |
| * @param prvBitSetGroup |
| * @param pageNumber |
| * @param numberOfRows |
| * @return filtered bitset |
| */ |
| private BitSet getFilteredIndexesUisngPrvBitset(DimensionColumnDataChunk dimensionColumnDataChunk, |
| BitSetGroup prvBitSetGroup, int pageNumber, int numberOfRows) { |
| BitSet prvPageBitSet = prvBitSetGroup.getBitSet(pageNumber); |
| BitSet bitSet = new BitSet(numberOfRows); |
| byte[][] filterKeys = dimColumnExecuterInfo.getFilterKeys(); |
| int compareResult = 0; |
| // if dimension data was natural sorted then get the index from previous bitset |
| // and use the same in next column data, otherwise use the inverted index reverse |
| if (!dimensionColumnDataChunk.isExplicitSorted()) { |
| for (int index = prvPageBitSet.nextSetBit(0); |
| index >= 0; index = prvPageBitSet.nextSetBit(index + 1)) { |
| compareResult = CarbonUtil |
| .isFilterPresent(filterKeys, dimensionColumnDataChunk, 0, filterKeys.length - 1, index); |
| if (compareResult == 0) { |
| bitSet.set(index); |
| } |
| } |
| } else { |
| for (int index = prvPageBitSet.nextSetBit(0); |
| index >= 0; index = prvPageBitSet.nextSetBit(index + 1)) { |
| compareResult = CarbonUtil |
| .isFilterPresent(filterKeys, dimensionColumnDataChunk, 0, filterKeys.length - 1, |
| dimensionColumnDataChunk.getInvertedReverseIndex(index)); |
| if (compareResult == 0) { |
| bitSet.set(index); |
| } |
| } |
| } |
| return bitSet; |
| } |
| private BitSet setFilterdIndexToBitSetWithColumnIndex( |
| DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) { |
| BitSet bitSet = new BitSet(numerOfRows); |
| int startIndex = 0; |
| byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); |
| for (int i = 0; i < filterValues.length; i++) { |
| if (startIndex >= numerOfRows) { |
| break; |
| } |
| int[] rangeIndex = CarbonUtil |
| .getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, |
| filterValues[i]); |
| for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) { |
| bitSet.set(dimensionColumnDataChunk.getInvertedIndex(j)); |
| } |
| if (rangeIndex[1] >= 0) { |
| startIndex = rangeIndex[1] + 1; |
| } |
| } |
| return bitSet; |
| } |
| |
| private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk, |
| int numerOfRows) { |
| BitSet bitSet = new BitSet(numerOfRows); |
| byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); |
| // binary search can only be applied if column is sorted and |
| // inverted index exists for that column |
| if (isNaturalSorted) { |
| int startIndex = 0; |
| for (int i = 0; i < filterValues.length; i++) { |
| if (startIndex >= numerOfRows) { |
| break; |
| } |
| int[] rangeIndex = CarbonUtil |
| .getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, |
| filterValues[i]); |
| for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) { |
| bitSet.set(j); |
| } |
| if (rangeIndex[1] >= 0) { |
| startIndex = rangeIndex[1] + 1; |
| } |
| } |
| } else { |
| if (filterValues.length > 1) { |
| for (int i = 0; i < numerOfRows; i++) { |
| int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1, |
| dimensionColumnDataChunk.getChunkData(i)); |
| if (index >= 0) { |
| bitSet.set(i); |
| } |
| } |
| } else { |
| for (int j = 0; j < numerOfRows; j++) { |
| if (dimensionColumnDataChunk.compareTo(j, filterValues[0]) == 0) { |
| bitSet.set(j); |
| } |
| } |
| } |
| } |
| return bitSet; |
| } |
| |
| public BitSet isScanRequired(byte[][] blkMaxVal, byte[][] blkMinVal) { |
| BitSet bitSet = new BitSet(1); |
| byte[][] filterValues = null; |
| int columnIndex = 0; |
| int blockIndex = 0; |
| boolean isScanRequired = false; |
| |
| if (isDimensionPresentInCurrentBlock) { |
| filterValues = dimColumnExecuterInfo.getFilterKeys(); |
| columnIndex = dimColumnEvaluatorInfo.getColumnIndex(); |
| blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping().get(columnIndex); |
| isScanRequired = |
| isScanRequired(blkMaxVal[blockIndex], blkMinVal[blockIndex], filterValues); |
| |
| } else if (isMeasurePresentInCurrentBlock) { |
| columnIndex = msrColumnEvaluatorInfo.getColumnIndex(); |
| blockIndex = |
| segmentProperties.getMeasuresOrdinalToBlockMapping().get(columnIndex) + segmentProperties |
| .getLastDimensionColOrdinal(); |
| isScanRequired = isScanRequired(blkMaxVal[blockIndex], blkMinVal[blockIndex], |
| msrColumnExecutorInfo.getFilterKeys(), |
| msrColumnEvaluatorInfo.getType()); |
| } |
| |
| if (isScanRequired) { |
| bitSet.set(0); |
| } |
| return bitSet; |
| } |
| |
| private boolean isScanRequired(byte[] blkMaxVal, byte[] blkMinVal, byte[][] filterValues) { |
| boolean isScanRequired = false; |
| for (int k = 0; k < filterValues.length; k++) { |
| // filter value should be in range of max and min value i.e |
| // max>filtervalue>min |
| // so filter-max should be negative |
| int maxCompare = |
| ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[k], blkMaxVal); |
| // and filter-min should be positive |
| int minCompare = |
| ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[k], blkMinVal); |
| |
| // if any filter value is in range than this block needs to be |
| // scanned |
| if (maxCompare <= 0 && minCompare >= 0) { |
| isScanRequired = true; |
| break; |
| } |
| } |
| return isScanRequired; |
| } |
| |
| private boolean isScanRequired(byte[] maxValue, byte[] minValue, Object[] filterValue, |
| DataType dataType) { |
| Object maxObject = DataTypeUtil.getMeasureObjectFromDataType(maxValue, dataType); |
| Object minObject = DataTypeUtil.getMeasureObjectFromDataType(minValue, dataType); |
| for (int i = 0; i < filterValue.length; i++) { |
| // TODO handle min and max for null values. |
| if (filterValue[i] == null) { |
| return true; |
| } |
| if (comparator.compare(filterValue[i], maxObject) <= 0 |
| && comparator.compare(filterValue[i], minObject) >= 0) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| @Override public void readBlocks(BlocksChunkHolder blockChunkHolder) throws IOException { |
| if (isDimensionPresentInCurrentBlock == true) { |
| int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() |
| .get(dimColumnEvaluatorInfo.getColumnIndex()); |
| if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { |
| blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() |
| .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); |
| } |
| } else if (isMeasurePresentInCurrentBlock == true) { |
| int blockIndex = segmentProperties.getMeasuresOrdinalToBlockMapping() |
| .get(msrColumnEvaluatorInfo.getColumnIndex()); |
| if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) { |
| blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() |
| .getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex); |
| } |
| } |
| } |
| } |