blob: 0f3e555118ef7efc691ec74352e8addd46fbf5b9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.datastore.chunk.store.impl.safe;
import java.nio.ByteBuffer;
import java.util.BitSet;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory;
import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertibleVector;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
/**
* Below class is responsible to store variable length dimension data chunk in
* memory. Memory occupied can be on heap or off-heap using unsafe interface
*/
public abstract class SafeVariableLengthDimensionDataChunkStore
extends SafeAbstractDimensionDataChunkStore {
/**
* total number of rows
*/
private int numberOfRows;
/**
* offset of the data this will be used during search, as we can directly jump
* to particular location
*/
private int[] dataOffsets;
private int dataLength;
public SafeVariableLengthDimensionDataChunkStore(boolean isInvertedIndex, int numberOfRows,
int dataLength) {
super(isInvertedIndex);
this.numberOfRows = numberOfRows;
this.dataOffsets = new int[numberOfRows];
this.dataLength = dataLength;
}
/**
* Below method will be used to put the rows and its metadata in off-heap
*
* @param invertedIndex inverted index to be stored
* @param invertedIndexReverse inverted index reverse to be stored
* @param data data to be stored
*/
@Override
public void putArray(final int[] invertedIndex, final int[] invertedIndexReverse,
byte[] data) {
// first put the data, inverted index and reverse inverted index to memory
super.putArray(invertedIndex, invertedIndexReverse, data);
this.dataOffsets = new int[numberOfRows];
// As data is of variable length and data format is
// <length in short><data><length in short><data>
// we need to store offset of each data so data can be accessed directly
// for example:
//data = {0,5,1,2,3,4,5,0,6,0,1,2,3,4,5,0,2,8,9}
//so value stored in offset will be position of actual data
// [2,9,17]
// to store this value we need to get the actual data length + 2 bytes used for storing the
// length
// start position will be used to store the current data position
int startOffset = 0;
// as first position will be start from 2 byte as data is stored first in the memory block
// we need to skip first two bytes this is because first two bytes will be length of the data
// which we have to skip
dataOffsets[0] = getLengthSize();
// creating a byte buffer which will wrap the length of the row
ByteBuffer buffer = ByteBuffer.wrap(data);
for (int i = 1; i < numberOfRows; i++) {
buffer.position(startOffset);
// so current row position will be
// previous row length + 2 bytes used for storing previous row data
startOffset += getLengthFromBuffer(buffer) + getLengthSize();
// as same byte buffer is used to avoid creating many byte buffer for each row
// we need to clear the byte buffer
dataOffsets[i] = startOffset + getLengthSize();
}
}
@Override
public void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data,
ColumnVectorInfo vectorInfo) {
CarbonColumnVector vector = vectorInfo.vector;
vector.setDictionary(null);
DataType dt = vector.getType();
AbstractNonDictionaryVectorFiller vectorFiller = NonDictionaryVectorFillerFactory
.getVectorFiller(getLengthSize(), dt, numberOfRows, dataLength);
vector = ColumnarVectorWrapperDirectFactory
.getDirectVectorWrapperFactory(vectorInfo, vector, invertedIndex, new BitSet(),
vectorInfo.deletedRows, false, false);
vectorFiller.fillVector(data, vector);
if (vector instanceof ConvertibleVector) {
((ConvertibleVector) vector).convert();
}
}
protected abstract int getLengthSize();
protected abstract int getLengthFromBuffer(ByteBuffer buffer);
@Override
public byte[] getRow(int rowId) {
// if column was explicitly sorted we need to get the row id based inverted index reverse
if (isExplicitSorted) {
rowId = invertedIndexReverse[rowId];
}
// now to get the row from memory block we need to do following thing
// 1. first get the current offset
// 2. if it's not a last row- get the next row offset
// Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
// else subtract the current row offset with complete data
// length get the offset of set of data
int currentDataOffset = dataOffsets[rowId];
int length = 0;
// calculating the length of data
if (rowId < numberOfRows - 1) {
length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
} else {
// for last record
length = this.dataLength - currentDataOffset;
}
byte[] currentRowData = new byte[length];
System.arraycopy(data, currentDataOffset, currentRowData, 0, length);
return currentRowData;
}
@Override
public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
vector.setDictionary(null);
// if column was explicitly sorted we need to get the row id based inverted index reverse
if (isExplicitSorted) {
rowId = invertedIndexReverse[rowId];
}
// now to get the row from memory block we need to do following thing
// 1. first get the current offset
// 2. if it's not a last row- get the next row offset
// Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
// else subtract the current row offset with complete data
// length get the offset of set of data
int currentDataOffset = dataOffsets[rowId];
int length = 0;
// calculating the length of data
if (rowId < numberOfRows - 1) {
length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
} else {
// for last record
length = this.dataLength - currentDataOffset;
}
DataType dt = vector.getType();
if (((!(dt == DataTypes.STRING) && !(dt == DataTypes.VARCHAR)) && length == 0)
|| ByteUtil.UnsafeComparer.INSTANCE
.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0,
CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentDataOffset,
length)) {
vector.putNull(vectorRow);
} else {
if (dt == DataTypes.STRING || dt == DataTypes.VARCHAR || dt == DataTypes.BINARY) {
vector.putByteArray(vectorRow, currentDataOffset, length, data);
} else if (dt == DataTypes.BOOLEAN) {
vector.putBoolean(vectorRow, ByteUtil.toBoolean(data[currentDataOffset]));
} else if (dt == DataTypes.SHORT) {
vector.putShort(vectorRow, ByteUtil.toXorShort(data, currentDataOffset, length));
} else if (dt == DataTypes.INT) {
vector.putInt(vectorRow, ByteUtil.toXorInt(data, currentDataOffset, length));
} else if (dt == DataTypes.LONG) {
vector.putLong(vectorRow,
DataTypeUtil.getDataBasedOnRestructuredDataType(data, vector.getBlockDataType(),
currentDataOffset, length));
} else if (dt == DataTypes.TIMESTAMP) {
vector.putLong(vectorRow, ByteUtil.toXorLong(data, currentDataOffset, length) * 1000L);
}
}
}
@Override
public int compareTo(int rowId, byte[] compareValue) {
// now to get the row from memory block we need to do following thing
// 1. first get the current offset
// 2. if it's not a last row- get the next row offset
// Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
// else subtract the current row offset with complete data
// length
// get the offset of set of data
int currentDataOffset = dataOffsets[rowId];
int length = 0;
// calculating the length of data
if (rowId < numberOfRows - 1) {
length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
} else {
// for last record
length = this.dataLength - currentDataOffset;
}
return ByteUtil.UnsafeComparer.INSTANCE
.compareTo(data, currentDataOffset, length, compareValue, 0, compareValue.length);
}
@Override
public void freeMemory() {
super.freeMemory();
dataOffsets = null;
}
}