blob: 15f85d6c78c03f8e9ef316460c2b78a2523f3297 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.util;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Map;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.BlockInfo;
import org.apache.carbondata.core.datastore.block.TableBlockInfo;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.metadata.ValueEncoderMeta;
import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
import org.apache.carbondata.core.metadata.blocklet.SegmentInfo;
import org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk;
import org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex;
import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.reader.CarbonIndexFileReader;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.format.BlockIndex;
/**
* Footer reader class
*/
public abstract class AbstractDataFileFooterConverter {
/**
* Below method will be used to convert the thrift presence meta to wrapper
* presence meta
*
* @param presentMetadataThrift
* @return wrapper presence meta
*/
private static BitSet getPresenceMeta(
org.apache.carbondata.format.PresenceMeta presentMetadataThrift) {
return BitSet.valueOf(presentMetadataThrift.getPresent_bit_stream());
}
/**
* Below method will be used to get the index info from index file
*
* @param filePath file path of the index file
* @param tableBlockInfoList table block index
* @return list of index info
* @throws IOException problem while reading the index file
*/
public List<DataFileFooter> getIndexInfo(String filePath, List<TableBlockInfo> tableBlockInfoList)
throws IOException {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
try {
// open the reader
indexReader.openThriftReader(filePath);
// get the index header
org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns =
readIndexHeader.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
// get the segment info
SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
BlockletIndex blockletIndex = null;
int counter = 0;
int index = 0;
DataFileFooter dataFileFooter = null;
// read the block info from file
while (indexReader.hasNext()) {
BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
dataFileFooter = new DataFileFooter();
TableBlockInfo tableBlockInfo = tableBlockInfoList.get(index);
if (Integer.parseInt(CarbonTablePath.DataFileUtil.getPartNo(
tableBlockInfo.getFilePath())) == counter++) {
tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
tableBlockInfo.setVersion(
ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion()));
int blockletSize = getBlockletSize(readBlockIndexInfo);
tableBlockInfo.getBlockletInfos().setNoOfBlockLets(blockletSize);
dataFileFooter.setBlockletIndex(blockletIndex);
dataFileFooter.setColumnInTable(columnSchemaList);
dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
dataFileFooter.setSegmentInfo(segmentInfo);
dataFileFooters.add(dataFileFooter);
if (++index == tableBlockInfoList.size()) {
break;
}
}
}
} finally {
indexReader.closeThriftReader();
}
return dataFileFooters;
}
/**
* Below method will be used to get the index info from index file
*
* @param filePath file path of the index file
* @return list of index info
* @throws IOException problem while reading the index file
*/
public List<DataFileFooter> getIndexInfo(String filePath) throws IOException {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
String parentPath = filePath.substring(0, filePath.lastIndexOf("/"));
try {
// open the reader
indexReader.openThriftReader(filePath);
// get the index header
org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns =
readIndexHeader.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
// get the segment info
SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
BlockletIndex blockletIndex = null;
DataFileFooter dataFileFooter = null;
// read the block info from file
while (indexReader.hasNext()) {
BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
dataFileFooter = new DataFileFooter();
TableBlockInfo tableBlockInfo = new TableBlockInfo();
tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
ColumnarFormatVersion version =
ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion());
tableBlockInfo.setVersion(version);
int blockletSize = getBlockletSize(readBlockIndexInfo);
tableBlockInfo.getBlockletInfos().setNoOfBlockLets(blockletSize);
String fileName = readBlockIndexInfo.file_name;
// Take only name of file.
if (fileName.lastIndexOf("/") > 0) {
fileName = fileName.substring(fileName.lastIndexOf("/"));
}
tableBlockInfo.setFilePath(parentPath + "/" + fileName);
dataFileFooter.setBlockletIndex(blockletIndex);
dataFileFooter.setColumnInTable(columnSchemaList);
dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
dataFileFooter.setSegmentInfo(segmentInfo);
dataFileFooter.setVersionId(version);
if (readBlockIndexInfo.isSetBlocklet_info()) {
List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
BlockletInfo blockletInfo = new DataFileFooterConverterV3()
.getBlockletInfo(readBlockIndexInfo.getBlocklet_info(),
CarbonUtil.getNumberOfDimensionColumns(columnSchemaList));
blockletInfo.setBlockletIndex(blockletIndex);
blockletInfoList.add(blockletInfo);
dataFileFooter.setBlockletList(blockletInfoList);
}
dataFileFooters.add(dataFileFooter);
}
} finally {
indexReader.closeThriftReader();
}
return dataFileFooters;
}
/**
* the methods returns the number of blocklets in a block
*
* @param readBlockIndexInfo
* @return
*/
protected int getBlockletSize(BlockIndex readBlockIndexInfo) {
long num_rows = readBlockIndexInfo.getNum_rows();
int blockletSize = Integer.parseInt(CarbonProperties.getInstance()
.getProperty(CarbonCommonConstants.BLOCKLET_SIZE,
CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL));
int remainder = (int) (num_rows % blockletSize);
int noOfBlockLet = (int) (num_rows / blockletSize);
// there could be some blocklets which will not
// contain the total records equal to the blockletSize
if (remainder > 0) {
noOfBlockLet = noOfBlockLet + 1;
}
return noOfBlockLet;
}
/**
* Below method will be used to convert thrift file meta to wrapper file meta
*/
public abstract DataFileFooter readDataFileFooter(TableBlockInfo tableBlockInfo)
throws IOException;
public abstract List<ColumnSchema> getSchema(TableBlockInfo tableBlockInfo) throws IOException;
/**
* Below method will be used to get blocklet index for data file meta
*
* @param blockletIndexList
* @return blocklet index
*/
protected BlockletIndex getBlockletIndexForDataFileFooter(List<BlockletIndex> blockletIndexList) {
BlockletIndex blockletIndex = new BlockletIndex();
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
blockletBTreeIndex.setStartKey(blockletIndexList.get(0).getBtreeIndex().getStartKey());
blockletBTreeIndex
.setEndKey(blockletIndexList.get(blockletIndexList.size() - 1).getBtreeIndex().getEndKey());
blockletIndex.setBtreeIndex(blockletBTreeIndex);
byte[][] currentMinValue = blockletIndexList.get(0).getMinMaxIndex().getMinValues().clone();
byte[][] currentMaxValue = blockletIndexList.get(0).getMinMaxIndex().getMaxValues().clone();
byte[][] minValue = null;
byte[][] maxValue = null;
for (int i = 1; i < blockletIndexList.size(); i++) {
minValue = blockletIndexList.get(i).getMinMaxIndex().getMinValues();
maxValue = blockletIndexList.get(i).getMinMaxIndex().getMaxValues();
for (int j = 0; j < maxValue.length; j++) {
if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(currentMinValue[j], minValue[j]) > 0) {
currentMinValue[j] = minValue[j].clone();
}
if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(currentMaxValue[j], maxValue[j]) < 0) {
currentMaxValue[j] = maxValue[j].clone();
}
}
}
BlockletMinMaxIndex minMax = new BlockletMinMaxIndex();
minMax.setMaxValues(currentMaxValue);
minMax.setMinValues(currentMinValue);
blockletIndex.setMinMaxIndex(minMax);
return blockletIndex;
}
protected ColumnSchema thriftColumnSchmeaToWrapperColumnSchema(
org.apache.carbondata.format.ColumnSchema externalColumnSchema) {
ColumnSchema wrapperColumnSchema = new ColumnSchema();
wrapperColumnSchema.setColumnUniqueId(externalColumnSchema.getColumn_id());
wrapperColumnSchema.setColumnName(externalColumnSchema.getColumn_name());
wrapperColumnSchema.setColumnar(externalColumnSchema.isColumnar());
wrapperColumnSchema
.setDataType(thriftDataTyopeToWrapperDataType(externalColumnSchema.data_type));
wrapperColumnSchema.setDimensionColumn(externalColumnSchema.isDimension());
List<Encoding> encoders = new ArrayList<Encoding>();
for (org.apache.carbondata.format.Encoding encoder : externalColumnSchema.getEncoders()) {
encoders.add(fromExternalToWrapperEncoding(encoder));
}
wrapperColumnSchema.setEncodingList(encoders);
wrapperColumnSchema.setNumberOfChild(externalColumnSchema.getNum_child());
wrapperColumnSchema.setPrecision(externalColumnSchema.getPrecision());
wrapperColumnSchema.setColumnGroup(externalColumnSchema.getColumn_group_id());
wrapperColumnSchema.setScale(externalColumnSchema.getScale());
wrapperColumnSchema.setDefaultValue(externalColumnSchema.getDefault_value());
Map<String, String> properties = externalColumnSchema.getColumnProperties();
if (properties != null) {
if (properties.get(CarbonCommonConstants.SORT_COLUMNS) != null) {
wrapperColumnSchema.setSortColumn(true);
}
}
return wrapperColumnSchema;
}
/**
* Below method is convert the thrift encoding to wrapper encoding
*
* @param encoderThrift thrift encoding
* @return wrapper encoding
*/
protected Encoding fromExternalToWrapperEncoding(
org.apache.carbondata.format.Encoding encoderThrift) {
switch (encoderThrift) {
case DICTIONARY:
return Encoding.DICTIONARY;
case DELTA:
return Encoding.DELTA;
case RLE:
return Encoding.RLE;
case INVERTED_INDEX:
return Encoding.INVERTED_INDEX;
case BIT_PACKED:
return Encoding.BIT_PACKED;
case DIRECT_DICTIONARY:
return Encoding.DIRECT_DICTIONARY;
default:
throw new IllegalArgumentException(encoderThrift.toString() + " is not supported");
}
}
/**
* Below method will be used to convert thrift segment object to wrapper
* segment object
*
* @param segmentInfo thrift segment info object
* @return wrapper segment info object
*/
protected SegmentInfo getSegmentInfo(org.apache.carbondata.format.SegmentInfo segmentInfo) {
SegmentInfo info = new SegmentInfo();
int[] cardinality = new int[segmentInfo.getColumn_cardinalities().size()];
for (int i = 0; i < cardinality.length; i++) {
cardinality[i] = segmentInfo.getColumn_cardinalities().get(i);
}
info.setColumnCardinality(cardinality);
info.setNumberOfColumns(segmentInfo.getNum_cols());
return info;
}
/**
* Below method will be used to convert the blocklet index of thrift to
* wrapper
*
* @param blockletIndexThrift
* @return blocklet index wrapper
*/
protected BlockletIndex getBlockletIndex(
org.apache.carbondata.format.BlockletIndex blockletIndexThrift) {
org.apache.carbondata.format.BlockletBTreeIndex btreeIndex =
blockletIndexThrift.getB_tree_index();
org.apache.carbondata.format.BlockletMinMaxIndex minMaxIndex =
blockletIndexThrift.getMin_max_index();
return new BlockletIndex(
new BlockletBTreeIndex(btreeIndex.getStart_key(), btreeIndex.getEnd_key()),
new BlockletMinMaxIndex(minMaxIndex.getMin_values(), minMaxIndex.getMax_values()));
}
/**
* Below method will be used to convert the thrift data type to wrapper data
* type
*
* @param dataTypeThrift
* @return dataType wrapper
*/
protected DataType thriftDataTyopeToWrapperDataType(
org.apache.carbondata.format.DataType dataTypeThrift) {
switch (dataTypeThrift) {
case BOOLEAN:
return DataTypes.BOOLEAN;
case STRING:
return DataTypes.STRING;
case SHORT:
return DataTypes.SHORT;
case INT:
return DataTypes.INT;
case LONG:
return DataTypes.LONG;
case DOUBLE:
return DataTypes.DOUBLE;
case DECIMAL:
return DataTypes.DECIMAL;
case DATE:
return DataTypes.DATE;
case TIMESTAMP:
return DataTypes.TIMESTAMP;
case ARRAY:
return DataTypes.ARRAY;
case STRUCT:
return DataTypes.STRUCT;
default:
return DataTypes.STRING;
}
}
/**
* Below method will be used to convert the thrift data chunk to wrapper
* data chunk
*
* @param datachunkThrift
* @return wrapper data chunk
*/
protected DataChunk getDataChunk(org.apache.carbondata.format.DataChunk datachunkThrift,
boolean isPresenceMetaPresent) {
DataChunk dataChunk = new DataChunk();
dataChunk.setDataPageLength(datachunkThrift.getData_page_length());
dataChunk.setDataPageOffset(datachunkThrift.getData_page_offset());
if (isPresenceMetaPresent) {
dataChunk.setNullValueIndexForColumn(getPresenceMeta(datachunkThrift.getPresence()));
}
dataChunk.setRlePageLength(datachunkThrift.getRle_page_length());
dataChunk.setRlePageOffset(datachunkThrift.getRle_page_offset());
dataChunk.setRowMajor(datachunkThrift.isRowMajor());
dataChunk.setRowIdPageLength(datachunkThrift.getRowid_page_length());
dataChunk.setRowIdPageOffset(datachunkThrift.getRowid_page_offset());
List<Encoding> encodingList = new ArrayList<Encoding>(datachunkThrift.getEncoders().size());
for (int i = 0; i < datachunkThrift.getEncoders().size(); i++) {
encodingList.add(fromExternalToWrapperEncoding(datachunkThrift.getEncoders().get(i)));
}
dataChunk.setEncodingList(encodingList);
if (encodingList.contains(Encoding.DELTA)) {
List<ByteBuffer> thriftEncoderMeta = datachunkThrift.getEncoder_meta();
List<ValueEncoderMeta> encodeMetaList =
new ArrayList<ValueEncoderMeta>(thriftEncoderMeta.size());
for (int i = 0; i < thriftEncoderMeta.size(); i++) {
encodeMetaList.add(CarbonUtil.deserializeEncoderMetaV2(thriftEncoderMeta.get(i).array()));
}
dataChunk.setValueEncoderMeta(encodeMetaList);
}
return dataChunk;
}
}