blob: 85271018c9cf76e3162d3f8d28750c7cba59c3fa [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.indexstore.schema;
import java.util.ArrayList;
import java.util.List;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.util.BlockletIndexUtil;
/**
* class for creating schema for a given Index
*/
public class SchemaGenerator {
/**
* Method for creating blocklet Schema. Each blocklet row will share the same schema
*
* @param segmentProperties
* @return
*/
public static CarbonRowSchema[] createBlockSchema(SegmentProperties segmentProperties,
List<CarbonColumn> minMaxCacheColumns) {
List<CarbonRowSchema> indexSchemas = new ArrayList<>();
// get MinMax Schema
getMinMaxSchema(segmentProperties, indexSchemas, minMaxCacheColumns);
// for number of rows.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.INT));
// for table block path
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for version number.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
// for schema updated time.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// for block footer offset.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// for locations
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for storing block length.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// for storing min max flag for each column which reflects whether min max for a column is
// written in the metadata or not.
addMinMaxFlagSchema(segmentProperties, indexSchemas, minMaxCacheColumns);
CarbonRowSchema[] schema = indexSchemas.toArray(new CarbonRowSchema[indexSchemas.size()]);
updateBytePosition(schema);
return schema;
}
/**
* Method to update the byte position which will be used in case of unsafe dm store
* @see org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java:87
*
* @param schema
*/
private static void updateBytePosition(CarbonRowSchema[] schema) {
int currentSize;
int bytePosition = 0;
// First assign byte position to all the fixed length schema
for (int i = 0; i < schema.length; i++) {
switch (schema[i].getSchemaType()) {
case STRUCT:
CarbonRowSchema[] childSchemas =
((CarbonRowSchema.StructCarbonRowSchema) schema[i]).getChildSchemas();
for (int j = 0; j < childSchemas.length; j++) {
currentSize = getSchemaSize(childSchemas[j]);
if (currentSize != -1) {
childSchemas[j].setBytePosition(bytePosition);
bytePosition += currentSize;
}
}
break;
default:
currentSize = getSchemaSize(schema[i]);
if (currentSize != -1) {
schema[i].setBytePosition(bytePosition);
bytePosition += currentSize;
}
break;
}
}
// adding byte position for storing offset in case of variable length columns
for (int i = 0; i < schema.length; i++) {
switch (schema[i].getSchemaType()) {
case STRUCT:
CarbonRowSchema[] childSchemas =
((CarbonRowSchema.StructCarbonRowSchema) schema[i]).getChildSchemas();
for (int j = 0; j < childSchemas.length; j++) {
if (childSchemas[j].getBytePosition() == -1) {
childSchemas[j].setBytePosition(bytePosition);
bytePosition += CarbonCommonConstants.INT_SIZE_IN_BYTE;
}
}
break;
default:
if (schema[i].getBytePosition() == -1) {
schema[i].setBytePosition(bytePosition);
bytePosition += CarbonCommonConstants.INT_SIZE_IN_BYTE;
}
break;
}
}
}
private static int getSchemaSize(CarbonRowSchema schema) {
switch (schema.getSchemaType()) {
case FIXED:
return schema.getLength();
case VARIABLE_SHORT:
case VARIABLE_INT:
return -1;
default:
throw new UnsupportedOperationException("Invalid Type");
}
}
/**
* Method for creating blocklet Schema. Each blocklet row will share the same schema
*
* @param segmentProperties
* @return
*/
public static CarbonRowSchema[] createBlockletSchema(SegmentProperties segmentProperties,
List<CarbonColumn> minMaxCacheColumns) {
List<CarbonRowSchema> indexSchemas = new ArrayList<>();
// get MinMax Schema
getMinMaxSchema(segmentProperties, indexSchemas, minMaxCacheColumns);
// for number of rows.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.INT));
// for table block path
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for version number.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
// for schema updated time.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// for block footer offset.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// for locations
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for storing block length.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// for storing min max flag for each column which reflects whether min max for a column is
// written in the metadata or not.
addMinMaxFlagSchema(segmentProperties, indexSchemas, minMaxCacheColumns);
//for blocklet info
indexSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for number of pages.
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
// for relative blocklet id i.e. blocklet id that belongs to a particular part file
indexSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.SHORT));
CarbonRowSchema[] schema = indexSchemas.toArray(new CarbonRowSchema[indexSchemas.size()]);
updateBytePosition(schema);
return schema;
}
/**
* Creates the schema to store summary information or the information which can be stored only
* once per index. It stores index level max/min of each column and partition information of
* index
*
* @param segmentProperties
*/
public static CarbonRowSchema[] createTaskSummarySchema(SegmentProperties segmentProperties,
List<CarbonColumn> minMaxCacheColumns,
boolean storeBlockletCount, boolean filePathToBeStored) {
List<CarbonRowSchema> taskMinMaxSchemas = new ArrayList<>();
// for number of rows.
taskMinMaxSchemas.add(new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.LONG));
// get MinMax Schema
getMinMaxSchema(segmentProperties, taskMinMaxSchemas, minMaxCacheColumns);
// for storing file name
taskMinMaxSchemas
.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for storing segmentId
taskMinMaxSchemas
.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
// for storing min max flag for each column which reflects whether min max for a column is
// written in the metadata or not.
addMinMaxFlagSchema(segmentProperties, taskMinMaxSchemas, minMaxCacheColumns);
// store path only in case of partition table or non transactional table
if (filePathToBeStored) {
// for storing file path
taskMinMaxSchemas
.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
}
// flag to check whether it is required to store blocklet count of each carbondata file as
// binary in summary schema. This will be true when it is not a legacy store (>1.1 version)
// and CACHE_LEVEL=BLOCK
if (storeBlockletCount) {
taskMinMaxSchemas.add(new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY));
}
CarbonRowSchema[] schema =
taskMinMaxSchemas.toArray(new CarbonRowSchema[taskMinMaxSchemas.size()]);
updateBytePosition(schema);
return schema;
}
/**
* Method to create schema for storing min/max data
*
* @param segmentProperties
* @param minMaxSchemas
*/
private static void getMinMaxSchema(SegmentProperties segmentProperties,
List<CarbonRowSchema> minMaxSchemas, List<CarbonColumn> minMaxCacheColumns) {
// Index key
int[] minMaxLen = getMinMaxLength(segmentProperties, minMaxCacheColumns);
int[] columnOrdinals = getColumnOrdinalsToAccess(segmentProperties, minMaxCacheColumns);
// do it 2 times, one for min and one for max.
for (int k = 0; k < 2; k++) {
CarbonRowSchema[] mapSchemas = new CarbonRowSchema[minMaxLen.length];
for (int i = 0; i < minMaxLen.length; i++) {
if (minMaxLen[i] <= 0) {
boolean isVarchar = false;
if (columnOrdinals[i] < segmentProperties.getDimensions().size()
&& segmentProperties.getDimensions().get(columnOrdinals[i]).getDataType()
== DataTypes.VARCHAR) {
isVarchar = true;
}
mapSchemas[i] =
new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY, isVarchar);
} else {
mapSchemas[i] =
new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, minMaxLen[i]);
}
}
CarbonRowSchema mapSchema =
new CarbonRowSchema.StructCarbonRowSchema(DataTypes.createDefaultStructType(),
mapSchemas);
minMaxSchemas.add(mapSchema);
}
}
/**
* Method to add min max flag schema for all the dimensions
* @param segmentProperties
* @param indexSchemas
* @param minMaxCacheColumns
*/
private static void addMinMaxFlagSchema(SegmentProperties segmentProperties,
List<CarbonRowSchema> indexSchemas, List<CarbonColumn> minMaxCacheColumns) {
int minMaxFlagLength = segmentProperties.getNumberOfColumns();
if (null != minMaxCacheColumns) {
minMaxFlagLength = minMaxCacheColumns.size();
}
CarbonRowSchema[] minMaxFlagSchemas = new CarbonRowSchema[minMaxFlagLength];
for (int i = 0; i < minMaxFlagLength; i++) {
minMaxFlagSchemas[i] = new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BOOLEAN);
}
CarbonRowSchema structMinMaxFlagSchema =
new CarbonRowSchema.StructCarbonRowSchema(DataTypes.createDefaultStructType(),
minMaxFlagSchemas);
indexSchemas.add(structMinMaxFlagSchema);
}
/**
* Method to get the min max length of each column. It will return the length of only column
* which will be cached
*
* @param segmentProperties
* @param minMaxCacheColumns
* @return
*/
private static int[] getMinMaxLength(SegmentProperties segmentProperties,
List<CarbonColumn> minMaxCacheColumns) {
int[] minMaxLen = null;
if (null != minMaxCacheColumns) {
minMaxLen = new int[minMaxCacheColumns.size()];
int counter = 0;
for (CarbonColumn column : minMaxCacheColumns) {
minMaxLen[counter++] = segmentProperties.createColumnValueLength()[BlockletIndexUtil
.getColumnOrdinal(segmentProperties, column)];
}
} else {
minMaxLen = segmentProperties.createColumnValueLength();
}
return minMaxLen;
}
/**
* Method to fill the column ordinals to access based on the columns to be cached
*
* @param segmentProperties
* @param minMaxCacheColumns
* @return
*/
private static int[] getColumnOrdinalsToAccess(SegmentProperties segmentProperties,
List<CarbonColumn> minMaxCacheColumns) {
int[] columnOrdinalsTOAccess = null;
if (null != minMaxCacheColumns) {
columnOrdinalsTOAccess = new int[minMaxCacheColumns.size()];
int counter = 0;
for (CarbonColumn column : minMaxCacheColumns) {
columnOrdinalsTOAccess[counter++] =
BlockletIndexUtil.getColumnOrdinal(segmentProperties, column);
}
} else {
// when columns to cache is not specified then column access order will be same as the array
// index of min max length
columnOrdinalsTOAccess = new int[segmentProperties.getNumberOfColumns()];
for (int i = 0; i < columnOrdinalsTOAccess.length; i++) {
columnOrdinalsTOAccess[i] = i;
}
}
return columnOrdinalsTOAccess;
}
}