core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java - carbondata - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.carbondata.core.datastore.chunk.store.impl.safe;

 import java.nio.ByteBuffer;
 import java.util.BitSet;

 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
 import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
 import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory;
 import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertibleVector;
 import org.apache.carbondata.core.util.ByteUtil;
 import org.apache.carbondata.core.util.DataTypeUtil;

 /**
  * Below class is responsible to store variable length dimension data chunk in
  * memory. Memory occupied can be on heap or off-heap using unsafe interface
  */
 public abstract class SafeVariableLengthDimensionDataChunkStore
     extends SafeAbstractDimensionDataChunkStore {

   /**
    * total number of rows
    */
   private int numberOfRows;

   /**
    * offset of the data this will be used during search, as we can directly jump
    * to particular location
    */
   private int[] dataOffsets;

   private int dataLength;

   public SafeVariableLengthDimensionDataChunkStore(boolean isInvertedIndex, int numberOfRows,
       int dataLength) {
     super(isInvertedIndex);
     this.numberOfRows = numberOfRows;
     this.dataOffsets = new int[numberOfRows];
     this.dataLength = dataLength;
   }

   /**
    * Below method will be used to put the rows and its metadata in off-heap
    *
    * @param invertedIndex        inverted index to be stored
    * @param invertedIndexReverse inverted index reverse to be stored
    * @param data                 data to be stored
    */
   @Override
   public void putArray(final int[] invertedIndex, final int[] invertedIndexReverse,
       byte[] data) {
     // first put the data, inverted index and reverse inverted index to memory
     super.putArray(invertedIndex, invertedIndexReverse, data);
     this.dataOffsets = new int[numberOfRows];
     // As data is of variable length and data format is
     // <length in short><data><length in short><data>
     // we need to store offset of each data so data can be accessed directly
     // for example:
     //data = {0,5,1,2,3,4,5,0,6,0,1,2,3,4,5,0,2,8,9}
     //so value stored in offset will be position of actual data
     // [2,9,17]
     // to store this value we need to get the actual data length + 2 bytes used for storing the
     // length

     // start position will be used to store the current data position
     int startOffset = 0;
     // as first position will be start from 2 byte as data is stored first in the memory block
     // we need to skip first two bytes this is because first two bytes will be length of the data
     // which we have to skip
     dataOffsets[0] = getLengthSize();
     // creating a byte buffer which will wrap the length of the row
     ByteBuffer buffer = ByteBuffer.wrap(data);
     for (int i = 1; i < numberOfRows; i++) {
       buffer.position(startOffset);
       // so current row position will be
       // previous row length + 2 bytes used for storing previous row data
       startOffset += getLengthFromBuffer(buffer) + getLengthSize();
       // as same byte buffer is used to avoid creating many byte buffer for each row
       // we need to clear the byte buffer
       dataOffsets[i] = startOffset + getLengthSize();
     }
   }

   @Override
   public void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data,
       ColumnVectorInfo vectorInfo) {
     CarbonColumnVector vector = vectorInfo.vector;
     vector.setDictionary(null);
     DataType dt = vector.getType();
     AbstractNonDictionaryVectorFiller vectorFiller = NonDictionaryVectorFillerFactory
         .getVectorFiller(getLengthSize(), dt, numberOfRows, dataLength);
     vector = ColumnarVectorWrapperDirectFactory
         .getDirectVectorWrapperFactory(vectorInfo, vector, invertedIndex, new BitSet(),
             vectorInfo.deletedRows, false, false);
     vectorFiller.fillVector(data, vector);
     if (vector instanceof ConvertibleVector) {
       ((ConvertibleVector) vector).convert();
     }
   }

   protected abstract int getLengthSize();

   protected abstract int getLengthFromBuffer(ByteBuffer buffer);

   @Override
   public byte[] getRow(int rowId) {
     // if column was explicitly sorted we need to get the row id based inverted index reverse
     if (isExplicitSorted) {
       rowId = invertedIndexReverse[rowId];
     }
     // now to get the row from memory block we need to do following thing
     // 1. first get the current offset
     // 2. if it's not a last row- get the next row offset
     // Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
     // else subtract the current row offset with complete data
     // length get the offset of set of data
     int currentDataOffset = dataOffsets[rowId];
     int length = 0;
     // calculating the length of data
     if (rowId < numberOfRows - 1) {
       length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
     } else {
       // for last record
       length = this.dataLength - currentDataOffset;
     }
     byte[] currentRowData = new byte[length];
     System.arraycopy(data, currentDataOffset, currentRowData, 0, length);
     return currentRowData;
   }

   @Override
   public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
     vector.setDictionary(null);
     // if column was explicitly sorted we need to get the row id based inverted index reverse
     if (isExplicitSorted) {
       rowId = invertedIndexReverse[rowId];
     }
     // now to get the row from memory block we need to do following thing
     // 1. first get the current offset
     // 2. if it's not a last row- get the next row offset
     // Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
     // else subtract the current row offset with complete data
     // length get the offset of set of data
     int currentDataOffset = dataOffsets[rowId];
     int length = 0;
     // calculating the length of data
     if (rowId < numberOfRows - 1) {
       length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
     } else {
       // for last record
       length = this.dataLength - currentDataOffset;
     }
     DataType dt = vector.getType();

     if (((!(dt == DataTypes.STRING) && !(dt == DataTypes.VARCHAR)) && length == 0)
         || ByteUtil.UnsafeComparer.INSTANCE
         .equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0,
             CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentDataOffset,
             length)) {
       vector.putNull(vectorRow);
     } else {
       if (dt == DataTypes.STRING || dt == DataTypes.VARCHAR || dt == DataTypes.BINARY) {
         vector.putByteArray(vectorRow, currentDataOffset, length, data);
       } else if (dt == DataTypes.BOOLEAN) {
         vector.putBoolean(vectorRow, ByteUtil.toBoolean(data[currentDataOffset]));
       } else if (dt == DataTypes.SHORT) {
         vector.putShort(vectorRow, ByteUtil.toXorShort(data, currentDataOffset, length));
       } else if (dt == DataTypes.INT) {
         vector.putInt(vectorRow, ByteUtil.toXorInt(data, currentDataOffset, length));
       } else if (dt == DataTypes.LONG) {
         vector.putLong(vectorRow,
             DataTypeUtil.getDataBasedOnRestructuredDataType(data, vector.getBlockDataType(),
                 currentDataOffset, length));
       } else if (dt  == DataTypes.TIMESTAMP) {
         vector.putLong(vectorRow, ByteUtil.toXorLong(data, currentDataOffset, length) * 1000L);
       }
     }
   }

   @Override
   public int compareTo(int rowId, byte[] compareValue) {
     // now to get the row from memory block we need to do following thing
     // 1. first get the current offset
     // 2. if it's not a last row- get the next row offset
     // Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
     // else subtract the current row offset with complete data
     // length

     // get the offset of set of data
     int currentDataOffset = dataOffsets[rowId];
     int length = 0;
     // calculating the length of data
     if (rowId < numberOfRows - 1) {
       length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
     } else {
       // for last record
       length = this.dataLength - currentDataOffset;
     }
     return ByteUtil.UnsafeComparer.INSTANCE
         .compareTo(data, currentDataOffset, length, compareValue, 0, compareValue.length);
   }

   @Override
   public void freeMemory() {
     super.freeMemory();
     dataOffsets = null;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.carbondata.core.datastore.chunk.store.impl.safe;

	import java.nio.ByteBuffer;
	import java.util.BitSet;

	import org.apache.carbondata.core.constants.CarbonCommonConstants;
	import org.apache.carbondata.core.metadata.datatype.DataType;
	import org.apache.carbondata.core.metadata.datatype.DataTypes;
	import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
	import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
	import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory;
	import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertibleVector;
	import org.apache.carbondata.core.util.ByteUtil;
	import org.apache.carbondata.core.util.DataTypeUtil;

	/**
	* Below class is responsible to store variable length dimension data chunk in
	* memory. Memory occupied can be on heap or off-heap using unsafe interface
	*/
	public abstract class SafeVariableLengthDimensionDataChunkStore
	extends SafeAbstractDimensionDataChunkStore {

	/**
	* total number of rows
	*/
	private int numberOfRows;

	/**
	* offset of the data this will be used during search, as we can directly jump
	* to particular location
	*/
	private int[] dataOffsets;

	private int dataLength;

	public SafeVariableLengthDimensionDataChunkStore(boolean isInvertedIndex, int numberOfRows,
	int dataLength) {
	super(isInvertedIndex);
	this.numberOfRows = numberOfRows;
	this.dataOffsets = new int[numberOfRows];
	this.dataLength = dataLength;
	}

	/**
	* Below method will be used to put the rows and its metadata in off-heap
	*
	* @param invertedIndex inverted index to be stored
	* @param invertedIndexReverse inverted index reverse to be stored
	* @param data data to be stored
	*/
	@Override
	public void putArray(final int[] invertedIndex, final int[] invertedIndexReverse,
	byte[] data) {
	// first put the data, inverted index and reverse inverted index to memory
	super.putArray(invertedIndex, invertedIndexReverse, data);
	this.dataOffsets = new int[numberOfRows];
	// As data is of variable length and data format is
	// <length in short><data><length in short><data>
	// we need to store offset of each data so data can be accessed directly
	// for example:
	//data = {0,5,1,2,3,4,5,0,6,0,1,2,3,4,5,0,2,8,9}
	//so value stored in offset will be position of actual data
	// [2,9,17]
	// to store this value we need to get the actual data length + 2 bytes used for storing the
	// length

	// start position will be used to store the current data position
	int startOffset = 0;
	// as first position will be start from 2 byte as data is stored first in the memory block
	// we need to skip first two bytes this is because first two bytes will be length of the data
	// which we have to skip
	dataOffsets[0] = getLengthSize();
	// creating a byte buffer which will wrap the length of the row
	ByteBuffer buffer = ByteBuffer.wrap(data);
	for (int i = 1; i < numberOfRows; i++) {
	buffer.position(startOffset);
	// so current row position will be
	// previous row length + 2 bytes used for storing previous row data
	startOffset += getLengthFromBuffer(buffer) + getLengthSize();
	// as same byte buffer is used to avoid creating many byte buffer for each row
	// we need to clear the byte buffer
	dataOffsets[i] = startOffset + getLengthSize();
	}
	}

	@Override
	public void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data,
	ColumnVectorInfo vectorInfo) {
	CarbonColumnVector vector = vectorInfo.vector;
	vector.setDictionary(null);
	DataType dt = vector.getType();
	AbstractNonDictionaryVectorFiller vectorFiller = NonDictionaryVectorFillerFactory
	.getVectorFiller(getLengthSize(), dt, numberOfRows, dataLength);
	vector = ColumnarVectorWrapperDirectFactory
	.getDirectVectorWrapperFactory(vectorInfo, vector, invertedIndex, new BitSet(),
	vectorInfo.deletedRows, false, false);
	vectorFiller.fillVector(data, vector);
	if (vector instanceof ConvertibleVector) {
	((ConvertibleVector) vector).convert();
	}
	}

	protected abstract int getLengthSize();

	protected abstract int getLengthFromBuffer(ByteBuffer buffer);

	@Override
	public byte[] getRow(int rowId) {
	// if column was explicitly sorted we need to get the row id based inverted index reverse
	if (isExplicitSorted) {
	rowId = invertedIndexReverse[rowId];
	}
	// now to get the row from memory block we need to do following thing
	// 1. first get the current offset
	// 2. if it's not a last row- get the next row offset
	// Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
	// else subtract the current row offset with complete data
	// length get the offset of set of data
	int currentDataOffset = dataOffsets[rowId];
	int length = 0;
	// calculating the length of data
	if (rowId < numberOfRows - 1) {
	length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
	} else {
	// for last record
	length = this.dataLength - currentDataOffset;
	}
	byte[] currentRowData = new byte[length];
	System.arraycopy(data, currentDataOffset, currentRowData, 0, length);
	return currentRowData;
	}

	@Override
	public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
	vector.setDictionary(null);
	// if column was explicitly sorted we need to get the row id based inverted index reverse
	if (isExplicitSorted) {
	rowId = invertedIndexReverse[rowId];
	}
	// now to get the row from memory block we need to do following thing
	// 1. first get the current offset
	// 2. if it's not a last row- get the next row offset
	// Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
	// else subtract the current row offset with complete data
	// length get the offset of set of data
	int currentDataOffset = dataOffsets[rowId];
	int length = 0;
	// calculating the length of data
	if (rowId < numberOfRows - 1) {
	length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
	} else {
	// for last record
	length = this.dataLength - currentDataOffset;
	}
	DataType dt = vector.getType();

	if (((!(dt == DataTypes.STRING) && !(dt == DataTypes.VARCHAR)) && length == 0)
	\|\| ByteUtil.UnsafeComparer.INSTANCE
	.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0,
	CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentDataOffset,
	length)) {
	vector.putNull(vectorRow);
	} else {
	if (dt == DataTypes.STRING \|\| dt == DataTypes.VARCHAR \|\| dt == DataTypes.BINARY) {
	vector.putByteArray(vectorRow, currentDataOffset, length, data);
	} else if (dt == DataTypes.BOOLEAN) {
	vector.putBoolean(vectorRow, ByteUtil.toBoolean(data[currentDataOffset]));
	} else if (dt == DataTypes.SHORT) {
	vector.putShort(vectorRow, ByteUtil.toXorShort(data, currentDataOffset, length));
	} else if (dt == DataTypes.INT) {
	vector.putInt(vectorRow, ByteUtil.toXorInt(data, currentDataOffset, length));
	} else if (dt == DataTypes.LONG) {
	vector.putLong(vectorRow,
	DataTypeUtil.getDataBasedOnRestructuredDataType(data, vector.getBlockDataType(),
	currentDataOffset, length));
	} else if (dt == DataTypes.TIMESTAMP) {
	vector.putLong(vectorRow, ByteUtil.toXorLong(data, currentDataOffset, length) * 1000L);
	}
	}
	}

	@Override
	public int compareTo(int rowId, byte[] compareValue) {
	// now to get the row from memory block we need to do following thing
	// 1. first get the current offset
	// 2. if it's not a last row- get the next row offset
	// Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
	// else subtract the current row offset with complete data
	// length

	// get the offset of set of data
	int currentDataOffset = dataOffsets[rowId];
	int length = 0;
	// calculating the length of data
	if (rowId < numberOfRows - 1) {
	length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize());
	} else {
	// for last record
	length = this.dataLength - currentDataOffset;
	}
	return ByteUtil.UnsafeComparer.INSTANCE
	.compareTo(data, currentDataOffset, length, compareValue, 0, compareValue.length);
	}

	@Override
	public void freeMemory() {
	super.freeMemory();
	dataOffsets = null;
	}
	}