blob: 41083b0ce4dfeafa161f52b4dabd35ea4d97235e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.scan.filter.executer;
import java.util.BitSet;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage;
import org.apache.carbondata.core.util.CarbonUtil;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import junit.framework.TestCase;
public class IncludeFilterExecuterImplTest extends TestCase {
/**
* @throws Exception
*/
@Before
public void setUp() throws Exception {
}
public BitSet setFilterdIndexToBitSetNew(DimensionColumnPage dimensionColumnPage,
int numerOfRows, byte[][] filterValues) {
BitSet bitSet = new BitSet(numerOfRows);
if (dimensionColumnPage instanceof FixedLengthDimensionColumnPage) {
// byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
if (filterValues.length > 1) {
for (int i = 0; i < numerOfRows; i++) {
int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
dimensionColumnPage, i);
if (index >= 0) {
bitSet.set(i);
}
}
} else if (filterValues.length == 1) {
for (int i = 0; i < numerOfRows; i++) {
if (dimensionColumnPage.compareTo(i, filterValues[0]) == 0) {
bitSet.set(i);
}
}
}
}
return bitSet;
}
public BitSet setFilterdIndexToBitSet(DimensionColumnPage dimensionColumnPage, int numerOfRows,
byte[][] filterValues) {
BitSet bitSet = new BitSet(numerOfRows);
if (dimensionColumnPage instanceof FixedLengthDimensionColumnPage) {
// byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
for (int k = 0; k < filterValues.length; k++) {
for (int j = 0; j < numerOfRows; j++) {
if (dimensionColumnPage.compareTo(j, filterValues[k]) == 0) {
bitSet.set(j);
}
}
}
}
return bitSet;
}
/**
* change int to byte[]
*
* @param value
* @param size
* @return byte[]
*/
private byte[] transferIntToByteArr(int value, int size) {
byte[] targets = new byte[size];
for (int i = 0; i < size; i++) {
int data = value;
for (int j = i; j < size - 1; j++) {
data = data >> 8;
}
data = data & 0xFF;
targets[i] = (byte) (data & 0xFF);
}
return targets;
}
@Test
public void testPerformance() {
// dimension's data number in a blocklet, usually default is 32000
int dataChunkSize = 32000;
// repeat query times in the test
int queryTimes = 5;
// repeated times for a dictionary value
int repeatTimes = 200;
//filtered value count in a blocklet
int filteredValueCnt = 800;
comparePerformance(dataChunkSize, filteredValueCnt, queryTimes, repeatTimes);
filteredValueCnt = 100;
// set big repeat value for test dimension dictionary size is 1
repeatTimes = 2000;
comparePerformance(dataChunkSize, filteredValueCnt, queryTimes, repeatTimes);
}
/**
* Tests the filterKeys.length = 0 and filterKeys.length = 1
*/
@Test
public void testBoundary() {
// dimension's data number in a blocklet, usually default is 32000
int dataChunkSize = 32000;
// repeat query times in the test
int queryTimes = 5;
// repeated times for a dictionary value
int repeatTimes = 20;
//filtered value count in a blocklet
int filteredValueCnt = 1;
comparePerformance(dataChunkSize, filteredValueCnt, queryTimes, repeatTimes);
filteredValueCnt = 0;
comparePerformance(dataChunkSize, filteredValueCnt, queryTimes, repeatTimes);
}
/**
* comapre result and performance
*
* @param dataChunkSize dataChunk's stored data size
* @param filteredValueCnt filtered dictionary value count
* @param queryTimes repeat query times in the test
* @param repeatTimes repeated times for a dictionary value
* @return
*/
private void comparePerformance(int dataChunkSize, int filteredValueCnt,
int queryTimes, int repeatTimes) {
long start;
long oldTime = 0;
long newTime = 0;
// used to generate filter value
int baseFilteredValue = 100;
// column dictionary size
int dimColumnSize = 2;
if ((dataChunkSize / repeatTimes) <= 255 && (baseFilteredValue+filteredValueCnt) <= 255) {
dimColumnSize = 1;
}
System.out.println("dimColumnSize: " + dimColumnSize);
FixedLengthDimensionColumnPage dimensionColumnDataChunk;
DimColumnExecuterFilterInfo dim = new DimColumnExecuterFilterInfo();
byte[] dataChunk = new byte[dataChunkSize * dimColumnSize];
for (int i = 0; i < dataChunkSize; i++) {
if (i % repeatTimes == 0) {
repeatTimes++;
}
byte[] data = transferIntToByteArr(repeatTimes, dimColumnSize);
for(int j =0 ; j< dimColumnSize;j++){
dataChunk[dimColumnSize * i + j] = data[j];
}
}
byte[][] filterKeys = new byte[filteredValueCnt][2];
for (int k = 0; k < filteredValueCnt; k++) {
filterKeys[k] = transferIntToByteArr(baseFilteredValue + k, dimColumnSize);
}
dim.setFilterKeys(filterKeys);
dimensionColumnDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null,
dataChunkSize, dimColumnSize, dataChunk.length);
// repeat query and compare 2 result between old code and new optimized code
for (int j = 0; j < queryTimes; j++) {
start = System.currentTimeMillis();
BitSet bitOld = this.setFilterdIndexToBitSet(dimensionColumnDataChunk, dataChunkSize, filterKeys);
oldTime = oldTime + System.currentTimeMillis() - start;
start = System.currentTimeMillis();
BitSet bitNew = this.setFilterdIndexToBitSetNew(dimensionColumnDataChunk, dataChunkSize,
filterKeys);
newTime = newTime + System.currentTimeMillis() - start;
assertTrue(bitOld.equals(bitNew));
}
if (filteredValueCnt >= 100) {
Assert.assertTrue(newTime <= oldTime);
}
System.out.println("old code performance time: " + oldTime + " ms");
System.out.println("new code performance time: " + newTime + " ms");
System.out.println("filteredValueCnt: " + filteredValueCnt);
}
private BitSet setFilterdIndexToBitSetWithColumnIndexOld(FixedLengthDimensionColumnPage dimensionColumnDataChunk,
int numerOfRows, byte[][] filterValues) {
BitSet bitSet = new BitSet(numerOfRows);
int start = 0;
int last = 0;
int startIndex = 0;
// byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
for (int i = 0; i < filterValues.length; i++) {
start = CarbonUtil.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1,
filterValues[i], false);
if (start < 0) {
continue;
}
bitSet.set(start);
last = start;
for (int j = start + 1; j < numerOfRows; j++) {
if (dimensionColumnDataChunk.compareTo(j, filterValues[i]) == 0) {
bitSet.set(j);
last++;
} else {
break;
}
}
startIndex = last;
if (startIndex >= numerOfRows) {
break;
}
}
return bitSet;
}
private BitSet setFilterdIndexToBitSetWithColumnIndexNew(FixedLengthDimensionColumnPage dimensionColumnDataChunk,
int numerOfRows, byte[][] filterValues) {
BitSet bitSet = new BitSet(numerOfRows);
int startIndex = 0;
// byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
for (int i = 0; i < filterValues.length; i++) {
int[] rangeIndex = CarbonUtil.getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
numerOfRows - 1, filterValues[i]);
for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
bitSet.set(j);
}
if (rangeIndex[1] > -1) {
startIndex = rangeIndex[1];
}
}
return bitSet;
}
@Test
public void testRangBinarySearch() {
long oldTime = 0;
long newTime = 0;
long start;
long end;
// dimension's data number in a blocklet, usually default is 32000
int dataChunkSize = 32000;
// repeat query times in the test
int queryTimes = 10000;
// repeated times for a dictionary value
int repeatTimes = 200;
//filtered value count in a blocklet
int filteredValueCnt = 800;
// column dictionary size
int dimColumnSize = 2;
FixedLengthDimensionColumnPage dimensionColumnDataChunk;
DimColumnExecuterFilterInfo dim = new DimColumnExecuterFilterInfo();
byte[] dataChunk = new byte[dataChunkSize * dimColumnSize];
for (int i = 0; i < dataChunkSize; i++) {
if (i % repeatTimes == 0) {
repeatTimes++;
}
byte[] data = transferIntToByteArr(repeatTimes, dimColumnSize);
dataChunk[2 * i] = data[0];
dataChunk[2 * i + 1] = data[1];
}
byte[][] filterKeys = new byte[filteredValueCnt][2];
for (int k = 0; k < filteredValueCnt; k++) {
filterKeys[k] = transferIntToByteArr(100 + k, dimColumnSize);
}
dim.setFilterKeys(filterKeys);
dimensionColumnDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null,
dataChunk.length / dimColumnSize, dimColumnSize, dataChunk.length);
// initial to run
BitSet bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataChunkSize, filterKeys);
BitSet bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataChunkSize, filterKeys);
// performance run
for (int j = 0; j < queryTimes; j++) {
start = System.currentTimeMillis();
bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataChunkSize, filterKeys);
end = System.currentTimeMillis();
oldTime = oldTime + end - start;
start = System.currentTimeMillis();
bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataChunkSize, filterKeys);
end = System.currentTimeMillis();
newTime = newTime + end - start;
assertTrue(bitOld.equals(bitNew));
}
System.out.println("old code performance time: " + oldTime + " ms");
System.out.println("new code performance time: " + newTime + " ms");
}
}