integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala - carbondata - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.carbondata.presto

 import java.util.Optional

 import com.facebook.presto.spi.block.{Block, VariableWidthBlock}
 import io.airlift.slice.Slices._

 import org.apache.carbondata.core.cache.{Cache, CacheProvider, CacheType}
 import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryChunksWrapper, DictionaryColumnUniqueIdentifier}
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.datatype.{DataType, DataTypes}
 import org.apache.carbondata.core.metadata.encoder.Encoding
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn
 import org.apache.carbondata.core.util.CarbonUtil
 import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport

 /**
  * This is the class to decode dictionary encoded column data back to its original value.
  */
 class CarbonDictionaryDecodeReadSupport[T] extends CarbonReadSupport[T] {
   private var dictionaries: Array[Dictionary] = _
   private var dataTypes: Array[DataType] = _

   /**
    * This initialization is done inside executor task
    * for column dictionary involved in decoding.
    *
    * @param carbonColumns column list
    */

   override def initialize(carbonColumns: Array[CarbonColumn], carbonTable: CarbonTable) {

     dictionaries = new Array[Dictionary](carbonColumns.length)
     dataTypes = new Array[DataType](carbonColumns.length)

     carbonColumns.zipWithIndex.foreach {
       case (carbonColumn, index) => if (carbonColumn.hasEncoding(Encoding.DICTIONARY) &&
                                         !carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY) &&
                                         !carbonColumn.isComplex) {
         val cacheProvider: CacheProvider = CacheProvider.getInstance
         val forwardDictionaryCache: Cache[DictionaryColumnUniqueIdentifier, Dictionary] =
           cacheProvider
             .createCache(CacheType.FORWARD_DICTIONARY)
         dataTypes(index) = carbonColumn.getDataType
         val dictionaryPath: String = carbonTable.getTableInfo.getFactTable.getTableProperties
           .get(CarbonCommonConstants.DICTIONARY_PATH)
         dictionaries(index) = forwardDictionaryCache
           .get(new DictionaryColumnUniqueIdentifier(carbonTable.getAbsoluteTableIdentifier,
             carbonColumn.getColumnIdentifier, dataTypes(index), dictionaryPath))
       } else {
         dataTypes(index) = carbonColumn.getDataType
       }
     }

   }

   /**
    * Function to create the SliceArrayBlock with dictionary Data
    *
    * @param dictionaryData
    * @return
    */
   private def createDictionaryBlock(dictionaryData: Dictionary): Block = {
     val chunks: DictionaryChunksWrapper = dictionaryData.getDictionaryChunks
     val positionCount = chunks.getSize

    // In dictionary there will be only one null and the key value will be 1 by default in carbon,
    // hence the isNullVector will be populated only once with null value it has no bearing on
    // actual data.

     val offsetVector : Array[Int] = new Array[Int](positionCount + 2 )
     val isNullVector: Array[Boolean] = new Array[Boolean](positionCount + 1)
     // the first value is just a filler as we always start with index 1 in carbon
     isNullVector(0) = true
     isNullVector(1) = true
     var count = 0
     var byteArray = new Array[Byte](0)
     // The Carbondata key starts from 1 so we need a filler at 0th position hence adding filler to
     // offset, hence 0th Position -> 0
     offsetVector(0) = 0
     while (chunks.hasNext) {
       val value: Array[Byte] = chunks.next
       if (count == 0) {
         // 1 index is actually Null to map to carbondata null values .
         // 1st Position -> 0 (For actual Null)
         offsetVector(count + 1) = 0
         // 2nd Postion -> 0 as the byte[] is still null so starting point will be 0 only
         offsetVector(count + 2) = 0
       } else {
         byteArray = byteArray ++ value
         offsetVector(count + 2) = byteArray.length
       }
       count += 1
     }
     new VariableWidthBlock(positionCount + 1,
       wrappedBuffer(byteArray, 0, byteArray.length),
       offsetVector,
       Optional.ofNullable(isNullVector))
   }

   override def readRow(data: Array[AnyRef]): T = {
     throw new RuntimeException("UnSupported Method")
   }

   def getDictionaries: Array[Dictionary] = {
     dictionaries
   }

   def getDataTypes: Array[DataType] = {
     dataTypes
   }

   /**
    * to book keep the dictionary cache or update access count for each
    * column involved during decode, to facilitate LRU cache policy if memory
    * threshold is reached
    */
   override def close() {
     dictionaries
       .foreach(dictionary => if (Option(dictionary).isDefined) {
         CarbonUtil
           .clearDictionaryCache(dictionary)
       })
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.carbondata.presto

	import java.util.Optional

	import com.facebook.presto.spi.block.{Block, VariableWidthBlock}
	import io.airlift.slice.Slices._

	import org.apache.carbondata.core.cache.{Cache, CacheProvider, CacheType}
	import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryChunksWrapper, DictionaryColumnUniqueIdentifier}
	import org.apache.carbondata.core.constants.CarbonCommonConstants
	import org.apache.carbondata.core.metadata.datatype.{DataType, DataTypes}
	import org.apache.carbondata.core.metadata.encoder.Encoding
	import org.apache.carbondata.core.metadata.schema.table.CarbonTable
	import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn
	import org.apache.carbondata.core.util.CarbonUtil
	import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport

	/**
	* This is the class to decode dictionary encoded column data back to its original value.
	*/
	class CarbonDictionaryDecodeReadSupport[T] extends CarbonReadSupport[T] {
	private var dictionaries: Array[Dictionary] = _
	private var dataTypes: Array[DataType] = _

	/**
	* This initialization is done inside executor task
	* for column dictionary involved in decoding.
	*
	* @param carbonColumns column list
	*/

	override def initialize(carbonColumns: Array[CarbonColumn], carbonTable: CarbonTable) {

	dictionaries = new Array[Dictionary](carbonColumns.length)
	dataTypes = new Array[DataType](carbonColumns.length)

	carbonColumns.zipWithIndex.foreach {
	case (carbonColumn, index) => if (carbonColumn.hasEncoding(Encoding.DICTIONARY) &&
	!carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY) &&
	!carbonColumn.isComplex) {
	val cacheProvider: CacheProvider = CacheProvider.getInstance
	val forwardDictionaryCache: Cache[DictionaryColumnUniqueIdentifier, Dictionary] =
	cacheProvider
	.createCache(CacheType.FORWARD_DICTIONARY)
	dataTypes(index) = carbonColumn.getDataType
	val dictionaryPath: String = carbonTable.getTableInfo.getFactTable.getTableProperties
	.get(CarbonCommonConstants.DICTIONARY_PATH)
	dictionaries(index) = forwardDictionaryCache
	.get(new DictionaryColumnUniqueIdentifier(carbonTable.getAbsoluteTableIdentifier,
	carbonColumn.getColumnIdentifier, dataTypes(index), dictionaryPath))
	} else {
	dataTypes(index) = carbonColumn.getDataType
	}
	}

	}

	/**
	* Function to create the SliceArrayBlock with dictionary Data
	*
	* @param dictionaryData
	* @return
	*/
	private def createDictionaryBlock(dictionaryData: Dictionary): Block = {
	val chunks: DictionaryChunksWrapper = dictionaryData.getDictionaryChunks
	val positionCount = chunks.getSize

	// In dictionary there will be only one null and the key value will be 1 by default in carbon,
	// hence the isNullVector will be populated only once with null value it has no bearing on
	// actual data.

	val offsetVector : Array[Int] = new Array[Int](positionCount + 2 )
	val isNullVector: Array[Boolean] = new Array[Boolean](positionCount + 1)
	// the first value is just a filler as we always start with index 1 in carbon
	isNullVector(0) = true
	isNullVector(1) = true
	var count = 0
	var byteArray = new Array[Byte](0)
	// The Carbondata key starts from 1 so we need a filler at 0th position hence adding filler to
	// offset, hence 0th Position -> 0
	offsetVector(0) = 0
	while (chunks.hasNext) {
	val value: Array[Byte] = chunks.next
	if (count == 0) {
	// 1 index is actually Null to map to carbondata null values .
	// 1st Position -> 0 (For actual Null)
	offsetVector(count + 1) = 0
	// 2nd Postion -> 0 as the byte[] is still null so starting point will be 0 only
	offsetVector(count + 2) = 0
	} else {
	byteArray = byteArray ++ value
	offsetVector(count + 2) = byteArray.length
	}
	count += 1
	}
	new VariableWidthBlock(positionCount + 1,
	wrappedBuffer(byteArray, 0, byteArray.length),
	offsetVector,
	Optional.ofNullable(isNullVector))
	}

	override def readRow(data: Array[AnyRef]): T = {
	throw new RuntimeException("UnSupported Method")
	}

	def getDictionaries: Array[Dictionary] = {
	dictionaries
	}

	def getDataTypes: Array[DataType] = {
	dataTypes
	}

	/**
	* to book keep the dictionary cache or update access count for each
	* column involved during decode, to facilitate LRU cache policy if memory
	* threshold is reached
	*/
	override def close() {
	dictionaries
	.foreach(dictionary => if (Option(dictionary).isDefined) {
	CarbonUtil
	.clearDictionaryCache(dictionary)
	})
	}
	}