blob: f44ea10d5d8f99157555984f319758edba08ba2b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.datastore.chunk.store.impl.unsafe;
import java.nio.ByteBuffer;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.memory.CarbonUnsafe;
import org.apache.carbondata.core.scan.executor.util.QueryUtil;
import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
/**
* Below class is responsible to store variable length dimension data chunk in
* memory Memory occupied can be on heap or off-heap using unsafe interface
*/
public abstract class UnsafeVariableLengthDimensionDataChunkStore
extends UnsafeAbstractDimensionDataChunkStore {
/**
* total number of rows
*/
private int numberOfRows;
/**
* pointers offsets
*/
private long dataPointersOffsets;
/**
* Reusable data array
* this will be useful for vector scenario, as it will be created once and filled every time
* if new data length is bigger than exiting data length then create new data with bigger length
* and assign to value
*/
private byte[] value;
public UnsafeVariableLengthDimensionDataChunkStore(long totalSize, boolean isInvertedIndex,
int numberOfRows, int dataLength) {
super(totalSize, isInvertedIndex, numberOfRows, dataLength);
this.numberOfRows = numberOfRows;
// initials size assigning to some random value
this.value = new byte[20];
}
/**
* Below method will be used to put the rows and its metadata in off-heap
*
* @param invertedIndex inverted index to be stored
* @param invertedIndexReverse inverted index reverse to be stored
* @param data data to be stored
*/
@Override
public void putArray(final int[] invertedIndex, final int[] invertedIndexReverse,
byte[] data) {
// first put the data, inverted index and reverse inverted index to memory
super.putArray(invertedIndex, invertedIndexReverse, data);
// position from where offsets will start
this.dataPointersOffsets = this.invertedIndexReverseOffset;
if (isExplicitSorted) {
this.dataPointersOffsets += (long) numberOfRows * CarbonCommonConstants.INT_SIZE_IN_BYTE;
}
// As data is of variable length and data format is
// <length in short><data><length in short/int><data>
// we need to store offset of each data so data can be accessed directly
// for example:
//data = {0,5,1,2,3,4,5,0,6,0,1,2,3,4,5,0,2,8,9}
//so value stored in offset will be position of actual data
// [2,9,17]
// to store this value we need to get the actual data length + 2/4 bytes used for storing the
// length
// start position will be used to store the current data position
int startOffset = 0;
// as first position will be start from 2/4 byte as data is stored first in the memory block
// we need to skip first two bytes this is because first two bytes will be length of the data
// which we have to skip
int [] dataOffsets = new int[numberOfRows];
dataOffsets[0] = getLengthSize();
// creating a byte buffer which will wrap the length of the row
ByteBuffer buffer = ByteBuffer.wrap(data);
for (int i = 1; i < numberOfRows; i++) {
buffer.position(startOffset);
// so current row position will be
// previous row length + 2/4 bytes used for storing previous row data
startOffset += getLengthFromBuffer(buffer) + getLengthSize();
// as same byte buffer is used to avoid creating many byte buffer for each row
// we need to clear the byte buffer
dataOffsets[i] = startOffset + getLengthSize();
}
CarbonUnsafe.getUnsafe().copyMemory(dataOffsets, CarbonUnsafe.INT_ARRAY_OFFSET,
dataPageMemoryBlock.getBaseObject(),
dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets,
dataOffsets.length * CarbonCommonConstants.INT_SIZE_IN_BYTE);
}
protected abstract int getLengthSize();
protected abstract int getLengthFromBuffer(ByteBuffer byteBuffer);
/**
* Below method will be used to get the row based on row id passed
* Getting the row from unsafe works in below logic
* 1. if inverted index is present then get the row id based on reverse inverted index
* 2. get the current row id data offset
* 3. if it's not a last row- get the next row offset
* Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
* 4. if it's last row
* subtract the current row offset + 2 bytes(to skip the data length) with complete data length
* @param rowId
* @return row
*/
@Override
public byte[] getRow(int rowId) {
// get the actual row id
rowId = getRowId(rowId);
// get offset of data in unsafe
int currentDataOffset = getOffSet(rowId);
// get the data length
int length = getLength(rowId, currentDataOffset);
// create data array
byte[] data = new byte[length];
// fill the row data
fillRowInternal(length, data, currentDataOffset);
return data;
}
/**
* Returns the actual row id for data
* if inverted index is present then get the row id based on reverse inverted index
* otherwise return the same row id
* @param rowId row id
* @return actual row id
*/
private int getRowId(int rowId) {
// if column was explicitly sorted we need to get the row id based inverted index reverse
if (isExplicitSorted) {
rowId = CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
dataPageMemoryBlock.getBaseOffset() + this.invertedIndexReverseOffset + ((long)rowId
* CarbonCommonConstants.INT_SIZE_IN_BYTE));
}
return rowId;
}
/**
* get data offset based on current row id
* @param rowId row id
* @return data offset
*/
private int getOffSet(int rowId) {
return CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + ((long)rowId
* CarbonCommonConstants.INT_SIZE_IN_BYTE));
}
/**
* To get the length of data for row id
* if it's not a last row- get the next row offset
* Subtract the current row offset + 2/4 bytes(to skip the data length) with next row offset
* if it's last row
* subtract the current row offset + 2/4 bytes(to skip the data length) with complete data length
* @param rowId rowId
* @param currentDataOffset current data offset
* @return length of row
*/
private int getLength(int rowId, int currentDataOffset) {
int length = 0;
// calculating the length of data
if (rowId < numberOfRows - 1) {
int OffsetOfNextData = CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + ((rowId + 1)
* CarbonCommonConstants.INT_SIZE_IN_BYTE));
length = OffsetOfNextData - (currentDataOffset + getLengthSize());
} else {
// for last record we need to subtract with data length
length = this.dataLength - currentDataOffset;
}
return length;
}
/**
* Return the row from unsafe
* @param length length of the data
* @param data data array
* @param currentDataOffset current data offset
*/
private void fillRowInternal(int length, byte[] data, int currentDataOffset) {
CarbonUnsafe.getUnsafe().copyMemory(dataPageMemoryBlock.getBaseObject(),
dataPageMemoryBlock.getBaseOffset() + currentDataOffset, data,
CarbonUnsafe.BYTE_ARRAY_OFFSET, length);
}
/**
*
* Below method will be used to put the row in vector based on row id passed
* Getting the row from unsafe works in below logic
* 1. if inverted index is present then get the row id based on reverse inverted index
* 2. get the current row id data offset
* 3. if it's not a last row- get the next row offset
* Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
* 4. if it's last row
* subtract the current row offset + 2 bytes(to skip the data length) with complete data length
* @param rowId row id
* @param vector vector to be filled
* @param vectorRow vector row id
*
*/
@Override
public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
vector.setDictionary(null);
// get the row id from reverse inverted index based on row id
rowId = getRowId(rowId);
// get the current row offset
int currentDataOffset = getOffSet(rowId);
// get the row data length
int length = getLength(rowId, currentDataOffset);
// check if value length is less the current data length
// then create a new array else use the same
if (length > value.length) {
value = new byte[length];
}
// get the row from unsafe
fillRowInternal(length, value, currentDataOffset);
QueryUtil.putDataToVector(vector, value, vectorRow, length);
}
/**
* to compare the two byte array
*
* @param rowId index of first byte array
* @param compareValue value of to be compared
* @return compare result
*/
@Override
public int compareTo(int rowId, byte[] compareValue) {
int currentDataOffset = getOffSet(rowId);
int length = getLength(rowId, currentDataOffset);
// as this class handles this variable length data, so filter value can be
// smaller or bigger than than actual data, so we need to take the smaller length
int compareResult;
int compareLength = Math.min(length , compareValue.length);
for (int i = 0; i < compareLength; i++) {
compareResult = (CarbonUnsafe.getUnsafe().getByte(dataPageMemoryBlock.getBaseObject(),
dataPageMemoryBlock.getBaseOffset() + currentDataOffset) & 0xff) - (compareValue[i]
& 0xff);
// if compare result is not equal we can break
if (compareResult != 0) {
return compareResult;
}
// increment the offset by one as comparison is done byte by byte
currentDataOffset++;
}
return length - compareValue.length;
}
}