core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java - carbondata - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.carbondata.core.datastore.chunk.impl;

 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.BitSet;
 import java.util.List;

 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.FileReader;
 import org.apache.carbondata.core.datastore.ReusableDataBuffer;
 import org.apache.carbondata.core.datastore.chunk.AbstractRawColumnChunk;
 import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
 import org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkReader;
 import org.apache.carbondata.core.datastore.compression.Compressor;
 import org.apache.carbondata.core.datastore.compression.CompressorFactory;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
 import org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder;
 import org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory;
 import org.apache.carbondata.core.scan.result.vector.CarbonDictionary;
 import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
 import org.apache.carbondata.core.scan.result.vector.impl.CarbonDictionaryImpl;
 import org.apache.carbondata.core.util.CarbonMetadataUtil;
 import org.apache.carbondata.format.Encoding;
 import org.apache.carbondata.format.LocalDictionaryChunk;

 /**
  * Contains raw dimension data,
  * 1. The read uncompressed raw data of column chunk with all pages is stored in this instance.
  * 2. The raw data can be converted to processed chunk using decodeColumnPage method
  *  by specifying page number.
  */
 public class DimensionRawColumnChunk extends AbstractRawColumnChunk {

   private DimensionColumnPage[] dataChunks;

   private DimensionColumnChunkReader chunkReader;

   private FileReader fileReader;

   private CarbonDictionary localDictionary;

   public DimensionRawColumnChunk(int columnIndex, ByteBuffer rawData, long offSet, int length,
       DimensionColumnChunkReader columnChunkReader) {
     super(columnIndex, rawData, offSet, length);
     this.chunkReader = columnChunkReader;
   }

   /**
    * Convert all raw data with all pages to processed DimensionColumnPage's
    * @return
    */
   public DimensionColumnPage[] decodeAllColumnPages() {
     if (dataChunks == null) {
       dataChunks = new DimensionColumnPage[pagesCount];
     }
     for (int i = 0; i < pagesCount; i++) {
       try {
         if (dataChunks[i] == null) {
           dataChunks[i] = chunkReader.decodeColumnPage(this, i, null);
         }
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
     }
     return dataChunks;
   }

   /**
    * Convert raw data with specified page number processed to DimensionColumnPage
    * @param pageNumber
    * @return
    */
   public DimensionColumnPage decodeColumnPage(int pageNumber) {
     assert pageNumber < pagesCount;
     if (dataChunks == null) {
       dataChunks = new DimensionColumnPage[pagesCount];
     }
     if (dataChunks[pageNumber] == null) {
       try {
         dataChunks[pageNumber] = chunkReader.decodeColumnPage(this, pageNumber, null);
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
     }

     return dataChunks[pageNumber];
   }

   /**
    * Convert raw data with specified page number processed to DimensionColumnDataChunk
    *
    * @param index
    * @return
    */
   public DimensionColumnPage convertToDimColDataChunkWithOutCache(int index,
       ReusableDataBuffer reusableDataBuffer) {
     assert index < pagesCount;
     // in case of filter query filter column if filter column is decoded and stored.
     // then return the same
     if (dataChunks != null && null != dataChunks[index]) {
       return dataChunks[index];
     }
     try {
       return chunkReader.decodeColumnPage(this, index, reusableDataBuffer);
     } catch (Exception e) {
       throw new RuntimeException(e);
     }
   }

   /**
    * Convert raw data with specified page number processed to DimensionColumnDataChunk and fill
    * the vector
    *
    * @param pageNumber page number to decode and fill the vector
    * @param vectorInfo vector to be filled with column page
    */
   public void convertToDimColDataChunkAndFillVector(int pageNumber, ColumnVectorInfo vectorInfo,
       ReusableDataBuffer reusableDataBuffer) {
     assert pageNumber < pagesCount;
     try {
       chunkReader.decodeColumnPageAndFillVector(this, pageNumber, vectorInfo, reusableDataBuffer);
     } catch (Exception e) {
       throw new RuntimeException(e);
     }
   }

   @Override
   public void freeMemory() {
     super.freeMemory();
     if (null != dataChunks) {
       for (int i = 0; i < dataChunks.length; i++) {
         if (dataChunks[i] != null) {
           dataChunks[i].freeMemory();
           dataChunks[i] = null;
         }
       }
     }
     rawData = null;
   }

   public void setFileReader(FileReader fileReader) {
     this.fileReader = fileReader;
   }

   public FileReader getFileReader() {
     return fileReader;
   }

   public CarbonDictionary getLocalDictionary() {
     if (null != getDataChunkV3() && null != getDataChunkV3().local_dictionary
         && null == localDictionary) {
       try {
         String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta(
             getDataChunkV3().data_chunk_list.get(0).chunk_meta);

         Compressor compressor = CompressorFactory.getInstance().getCompressor(compressorName);
         localDictionary = getDictionary(getDataChunkV3().local_dictionary, compressor);
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
     }
     return localDictionary;
   }

   /**
    * Below method will be used to get the local dictionary for a blocklet
    * @param localDictionaryChunk
    * local dictionary chunk thrift object
    * @return local dictionary
    * @throws IOException
    */
   public static CarbonDictionary getDictionary(LocalDictionaryChunk localDictionaryChunk,
       Compressor compressor) throws IOException {
     if (null != localDictionaryChunk) {
       List<Encoding> encodings = localDictionaryChunk.getDictionary_meta().getEncoders();
       List<ByteBuffer> encoderMetas = localDictionaryChunk.getDictionary_meta().getEncoder_meta();
       ColumnPageDecoder decoder = DefaultEncodingFactory.getInstance().createDecoder(
           encodings, encoderMetas, compressor.getName());
       ColumnPage decode = decoder.decode(localDictionaryChunk.getDictionary_data(), 0,
           localDictionaryChunk.getDictionary_data().length);
       BitSet usedDictionary = BitSet.valueOf(compressor.unCompressByte(
           localDictionaryChunk.getDictionary_values()));
       int length = usedDictionary.length();
       int index = 0;
       byte[][] dictionary = new byte[length][];
       for (int i = 0; i < length; i++) {
         if (usedDictionary.get(i)) {
           dictionary[i] = decode.getBytes(index++);
         } else {
           dictionary[i] = null;
         }
       }
       decode.freeMemory();
       if (!usedDictionary.isEmpty()) {
         // as dictionary values starts from 1 setting null default value
         dictionary[1] = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
       }
       return new CarbonDictionaryImpl(dictionary, usedDictionary.cardinality());
     }
     return null;
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.carbondata.core.datastore.chunk.impl;

	import java.io.IOException;
	import java.nio.ByteBuffer;
	import java.util.BitSet;
	import java.util.List;

	import org.apache.carbondata.core.constants.CarbonCommonConstants;
	import org.apache.carbondata.core.datastore.FileReader;
	import org.apache.carbondata.core.datastore.ReusableDataBuffer;
	import org.apache.carbondata.core.datastore.chunk.AbstractRawColumnChunk;
	import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
	import org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkReader;
	import org.apache.carbondata.core.datastore.compression.Compressor;
	import org.apache.carbondata.core.datastore.compression.CompressorFactory;
	import org.apache.carbondata.core.datastore.page.ColumnPage;
	import org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder;
	import org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory;
	import org.apache.carbondata.core.scan.result.vector.CarbonDictionary;
	import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
	import org.apache.carbondata.core.scan.result.vector.impl.CarbonDictionaryImpl;
	import org.apache.carbondata.core.util.CarbonMetadataUtil;
	import org.apache.carbondata.format.Encoding;
	import org.apache.carbondata.format.LocalDictionaryChunk;

	/**
	* Contains raw dimension data,
	* 1. The read uncompressed raw data of column chunk with all pages is stored in this instance.
	* 2. The raw data can be converted to processed chunk using decodeColumnPage method
	* by specifying page number.
	*/
	public class DimensionRawColumnChunk extends AbstractRawColumnChunk {

	private DimensionColumnPage[] dataChunks;

	private DimensionColumnChunkReader chunkReader;

	private FileReader fileReader;

	private CarbonDictionary localDictionary;

	public DimensionRawColumnChunk(int columnIndex, ByteBuffer rawData, long offSet, int length,
	DimensionColumnChunkReader columnChunkReader) {
	super(columnIndex, rawData, offSet, length);
	this.chunkReader = columnChunkReader;
	}

	/**
	* Convert all raw data with all pages to processed DimensionColumnPage's
	* @return
	*/
	public DimensionColumnPage[] decodeAllColumnPages() {
	if (dataChunks == null) {
	dataChunks = new DimensionColumnPage[pagesCount];
	}
	for (int i = 0; i < pagesCount; i++) {
	try {
	if (dataChunks[i] == null) {
	dataChunks[i] = chunkReader.decodeColumnPage(this, i, null);
	}
	} catch (IOException e) {
	throw new RuntimeException(e);
	}
	}
	return dataChunks;
	}

	/**
	* Convert raw data with specified page number processed to DimensionColumnPage
	* @param pageNumber
	* @return
	*/
	public DimensionColumnPage decodeColumnPage(int pageNumber) {
	assert pageNumber < pagesCount;
	if (dataChunks == null) {
	dataChunks = new DimensionColumnPage[pagesCount];
	}
	if (dataChunks[pageNumber] == null) {
	try {
	dataChunks[pageNumber] = chunkReader.decodeColumnPage(this, pageNumber, null);
	} catch (IOException e) {
	throw new RuntimeException(e);
	}
	}

	return dataChunks[pageNumber];
	}

	/**
	* Convert raw data with specified page number processed to DimensionColumnDataChunk
	*
	* @param index
	* @return
	*/
	public DimensionColumnPage convertToDimColDataChunkWithOutCache(int index,
	ReusableDataBuffer reusableDataBuffer) {
	assert index < pagesCount;
	// in case of filter query filter column if filter column is decoded and stored.
	// then return the same
	if (dataChunks != null && null != dataChunks[index]) {
	return dataChunks[index];
	}
	try {
	return chunkReader.decodeColumnPage(this, index, reusableDataBuffer);
	} catch (Exception e) {
	throw new RuntimeException(e);
	}
	}

	/**
	* Convert raw data with specified page number processed to DimensionColumnDataChunk and fill
	* the vector
	*
	* @param pageNumber page number to decode and fill the vector
	* @param vectorInfo vector to be filled with column page
	*/
	public void convertToDimColDataChunkAndFillVector(int pageNumber, ColumnVectorInfo vectorInfo,
	ReusableDataBuffer reusableDataBuffer) {
	assert pageNumber < pagesCount;
	try {
	chunkReader.decodeColumnPageAndFillVector(this, pageNumber, vectorInfo, reusableDataBuffer);
	} catch (Exception e) {
	throw new RuntimeException(e);
	}
	}

	@Override
	public void freeMemory() {
	super.freeMemory();
	if (null != dataChunks) {
	for (int i = 0; i < dataChunks.length; i++) {
	if (dataChunks[i] != null) {
	dataChunks[i].freeMemory();
	dataChunks[i] = null;
	}
	}
	}
	rawData = null;
	}

	public void setFileReader(FileReader fileReader) {
	this.fileReader = fileReader;
	}

	public FileReader getFileReader() {
	return fileReader;
	}

	public CarbonDictionary getLocalDictionary() {
	if (null != getDataChunkV3() && null != getDataChunkV3().local_dictionary
	&& null == localDictionary) {
	try {
	String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta(
	getDataChunkV3().data_chunk_list.get(0).chunk_meta);

	Compressor compressor = CompressorFactory.getInstance().getCompressor(compressorName);
	localDictionary = getDictionary(getDataChunkV3().local_dictionary, compressor);
	} catch (IOException e) {
	throw new RuntimeException(e);
	}
	}
	return localDictionary;
	}

	/**
	* Below method will be used to get the local dictionary for a blocklet
	* @param localDictionaryChunk
	* local dictionary chunk thrift object
	* @return local dictionary
	* @throws IOException
	*/
	public static CarbonDictionary getDictionary(LocalDictionaryChunk localDictionaryChunk,
	Compressor compressor) throws IOException {
	if (null != localDictionaryChunk) {
	List<Encoding> encodings = localDictionaryChunk.getDictionary_meta().getEncoders();
	List<ByteBuffer> encoderMetas = localDictionaryChunk.getDictionary_meta().getEncoder_meta();
	ColumnPageDecoder decoder = DefaultEncodingFactory.getInstance().createDecoder(
	encodings, encoderMetas, compressor.getName());
	ColumnPage decode = decoder.decode(localDictionaryChunk.getDictionary_data(), 0,
	localDictionaryChunk.getDictionary_data().length);
	BitSet usedDictionary = BitSet.valueOf(compressor.unCompressByte(
	localDictionaryChunk.getDictionary_values()));
	int length = usedDictionary.length();
	int index = 0;
	byte[][] dictionary = new byte[length][];
	for (int i = 0; i < length; i++) {
	if (usedDictionary.get(i)) {
	dictionary[i] = decode.getBytes(index++);
	} else {
	dictionary[i] = null;
	}
	}
	decode.freeMemory();
	if (!usedDictionary.isEmpty()) {
	// as dictionary values starts from 1 setting null default value
	dictionary[1] = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
	}
	return new CarbonDictionaryImpl(dictionary, usedDictionary.cardinality());
	}
	return null;
	}

	}