| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.carbondata.core.datastore.chunk.reader.dimension.v3; |
| |
| import java.io.ByteArrayInputStream; |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| |
| import org.apache.carbondata.core.datastore.FileReader; |
| import org.apache.carbondata.core.datastore.ReusableDataBuffer; |
| import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; |
| import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; |
| import org.apache.carbondata.core.datastore.compression.CompressorFactory; |
| import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; |
| import org.apache.carbondata.core.util.CarbonMetadataUtil; |
| import org.apache.carbondata.core.util.CarbonUtil; |
| import org.apache.carbondata.format.DataChunk2; |
| import org.apache.carbondata.format.DataChunk3; |
| import org.apache.carbondata.format.Encoding; |
| |
| /** |
| * Dimension column V3 Reader class which will be used to read and uncompress |
| * V3 format data. It reads the data in each page at once unlike whole blocklet. It is |
| * used for memory constraint operations like compaction. |
| * data format |
| * Data Format |
| * <FileHeader> |
| * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>> |
| * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>> |
| * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>> |
| * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>> |
| * <File Footer> |
| */ |
| public class DimensionChunkPageReaderV3 extends DimensionChunkReaderV3 { |
| |
| /** |
| * end position of last dimension in carbon data file |
| */ |
| private long lastDimensionOffsets; |
| |
| public DimensionChunkPageReaderV3(BlockletInfo blockletInfo, String filePath) { |
| super(blockletInfo, filePath); |
| lastDimensionOffsets = blockletInfo.getDimensionOffset(); |
| } |
| |
| /** |
| * Below method will be used to read the dimension column data form carbon data file |
| * Steps for reading |
| * 1. Get the length of the data to be read |
| * 2. Allocate the direct buffer |
| * 3. read the data from file |
| * 4. Get the data chunk object from data read |
| * 5. Create the raw chunk object and fill the details |
| * |
| * @param fileReader reader for reading the column from carbon data file |
| * @param blockletColumnIndex blocklet index of the column in carbon data file |
| * @return dimension raw chunk |
| */ |
| @Override |
| public DimensionRawColumnChunk readRawDimensionChunk(FileReader fileReader, |
| int blockletColumnIndex) throws IOException { |
| // get the current dimension offset |
| long currentDimensionOffset = dimensionChunksOffset.get(blockletColumnIndex); |
| int length = 0; |
| // to calculate the length of the data to be read |
| // column other than last column we can subtract the offset of current column with |
| // next column and get the total length. |
| // but for last column we need to use lastDimensionOffset which is the end position |
| // of the last dimension, we can subtract current dimension offset from lastDimensionOffset |
| if (dimensionChunksOffset.size() - 1 == blockletColumnIndex) { |
| length = (int) (lastDimensionOffsets - currentDimensionOffset); |
| } else { |
| length = (int) (dimensionChunksOffset.get(blockletColumnIndex + 1) - currentDimensionOffset); |
| } |
| ByteBuffer buffer; |
| // read the data from carbon data file |
| synchronized (fileReader) { |
| buffer = fileReader.readByteBuffer(filePath, currentDimensionOffset, |
| dimensionChunksLength.get(blockletColumnIndex)); |
| } |
| // get the data chunk which will have all the details about the data pages |
| DataChunk3 dataChunk = CarbonUtil.readDataChunk3(new ByteArrayInputStream(buffer.array())); |
| DimensionRawColumnChunk rawColumnChunk = |
| getDimensionRawColumnChunk(fileReader, blockletColumnIndex, currentDimensionOffset, length, |
| null, dataChunk); |
| |
| return rawColumnChunk; |
| } |
| |
| /** |
| * Below method will be used to read the multiple dimension column data in group |
| * and divide into dimension raw chunk object |
| * Steps for reading |
| * 1. Get the length of the data to be read |
| * 2. Allocate the direct buffer |
| * 3. read the data from file |
| * 4. Get the data chunk object from file for each column |
| * 5. Create the raw chunk object and fill the details for each column |
| * 6. increment the offset of the data |
| * |
| * @param fileReader reader which will be used to read the dimension columns data from file |
| * @param startBlockletColumnIndex blocklet index of the first dimension column |
| * @param endBlockletColumnIndex blocklet index of the last dimension column |
| * @ DimensionRawColumnChunk array |
| */ |
| protected DimensionRawColumnChunk[] readRawDimensionChunksInGroup(FileReader fileReader, |
| int startBlockletColumnIndex, int endBlockletColumnIndex) throws IOException { |
| // create raw chunk for each dimension column |
| DimensionRawColumnChunk[] dimensionDataChunks = |
| new DimensionRawColumnChunk[endBlockletColumnIndex - startBlockletColumnIndex + 1]; |
| int index = 0; |
| for (int i = startBlockletColumnIndex; i <= endBlockletColumnIndex; i++) { |
| dimensionDataChunks[index] = readRawDimensionChunk(fileReader, i); |
| index++; |
| } |
| return dimensionDataChunks; |
| } |
| |
| /** |
| * Below method will be used to convert the compressed dimension chunk raw data to actual data |
| * |
| * @param dimensionRawColumnChunk dimension raw chunk |
| * @param pageNumber number |
| * @return DimensionColumnDataChunk |
| */ |
| @Override |
| public DimensionColumnPage decodeColumnPage( |
| DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber, |
| ReusableDataBuffer reusableDataBuffer) |
| throws IOException { |
| // data chunk of page |
| DataChunk2 pageMetadata = null; |
| // data chunk of blocklet column |
| DataChunk3 dataChunk3 = dimensionRawColumnChunk.getDataChunkV3(); |
| |
| pageMetadata = dataChunk3.getData_chunk_list().get(pageNumber); |
| |
| if (compressor == null) { |
| this.compressor = CompressorFactory.getInstance().getCompressor( |
| CarbonMetadataUtil.getCompressorNameFromChunkMeta(pageMetadata.getChunk_meta())); |
| } |
| // calculating the start point of data |
| // as buffer can contain multiple column data, start point will be data chunk offset + |
| // data chunk length + page offset |
| long offset = dimensionRawColumnChunk.getOffSet() + dimensionChunksLength |
| .get(dimensionRawColumnChunk.getColumnIndex()) + dataChunk3.getPage_offset() |
| .get(pageNumber); |
| int length = pageMetadata.data_page_length; |
| if (CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.INVERTED_INDEX)) { |
| length += pageMetadata.rowid_page_length; |
| } |
| |
| if (CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.RLE)) { |
| length += pageMetadata.rle_page_length; |
| } |
| // get the data buffer |
| ByteBuffer rawData = dimensionRawColumnChunk.getFileReader() |
| .readByteBuffer(filePath, offset, length); |
| |
| return decodeDimension(dimensionRawColumnChunk, rawData, pageMetadata, 0, null, |
| reusableDataBuffer); |
| } |
| } |