blob: f90ba326535c847ba6278ec7929c520aeabd765c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.scan.executor.util;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo;
import org.apache.carbondata.core.scan.executor.infos.DimensionInfo;
import org.apache.carbondata.core.scan.executor.infos.MeasureInfo;
import org.apache.carbondata.core.scan.model.ProjectionDimension;
import org.apache.carbondata.core.scan.model.ProjectionMeasure;
import org.apache.carbondata.core.scan.model.QueryModel;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.commons.lang3.ArrayUtils;
/**
* Utility class for restructuring
*/
public class RestructureUtil {
/**
* Below method will be used to get the updated query dimension update
* means, after restructuring some dimension will be not present in older
* table blocks in that case we need to select only those dimension out of
* query dimension which is present in the current table block
*
* @param blockExecutionInfo
* @param queryDimensions
* @param tableBlockDimensions
* @param tableComplexDimension
* @param isTransactionalTable
* @return list of query dimension which is present in the table block
*/
public static List<ProjectionDimension> createDimensionInfoAndGetCurrentBlockQueryDimension(
BlockExecutionInfo blockExecutionInfo, ProjectionDimension[] queryDimensions,
List<CarbonDimension> tableBlockDimensions, List<CarbonDimension> tableComplexDimension,
int measureCount, boolean isTransactionalTable) {
List<ProjectionDimension> presentDimension =
new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
boolean[] isDimensionExists = new boolean[queryDimensions.length];
Object[] defaultValues = new Object[queryDimensions.length];
// create dimension information instance
DimensionInfo dimensionInfo = new DimensionInfo(isDimensionExists, defaultValues);
dimensionInfo.dataType = new DataType[queryDimensions.length + measureCount];
int newDictionaryColumnCount = 0;
int newNoDictionaryColumnCount = 0;
// selecting only those dimension which is present in the query
int dimIndex = 0;
for (ProjectionDimension queryDimension : queryDimensions) {
if (queryDimension.getDimension().hasEncoding(Encoding.IMPLICIT)) {
presentDimension.add(queryDimension);
isDimensionExists[dimIndex] = true;
dimensionInfo.dataType[queryDimension.getOrdinal()] =
queryDimension.getDimension().getDataType();
} else {
for (CarbonDimension tableDimension : tableBlockDimensions) {
if (isColumnMatches(isTransactionalTable, queryDimension.getDimension(),
tableDimension)) {
ProjectionDimension currentBlockDimension = new ProjectionDimension(tableDimension);
tableDimension.getColumnSchema()
.setPrecision(queryDimension.getDimension().getColumnSchema().getPrecision());
tableDimension.getColumnSchema()
.setScale(queryDimension.getDimension().getColumnSchema().getScale());
tableDimension.getColumnSchema()
.setDefaultValue(queryDimension.getDimension().getDefaultValue());
currentBlockDimension.setOrdinal(queryDimension.getOrdinal());
presentDimension.add(currentBlockDimension);
isDimensionExists[dimIndex] = true;
dimensionInfo.dataType[currentBlockDimension.getOrdinal()] =
currentBlockDimension.getDimension().getDataType();
break;
}
}
// if dimension is found then no need to search in the complex dimensions list
if (isDimensionExists[dimIndex]) {
dimIndex++;
continue;
}
for (CarbonDimension tableDimension : tableComplexDimension) {
if (isColumnMatches(isTransactionalTable, queryDimension.getDimension(),
tableDimension)) {
ProjectionDimension currentBlockDimension = null;
// If projection dimension is child of struct field and contains Parent Ordinal
if (null != queryDimension.getDimension().getComplexParentDimension()) {
currentBlockDimension = new ProjectionDimension(queryDimension.getDimension());
} else {
currentBlockDimension = new ProjectionDimension(tableDimension);
}
// TODO: for complex dimension set scale and precision by traversing
// the child dimensions
currentBlockDimension.setOrdinal(queryDimension.getOrdinal());
presentDimension.add(currentBlockDimension);
isDimensionExists[dimIndex] = true;
dimensionInfo.dataType[currentBlockDimension.getOrdinal()] =
currentBlockDimension.getDimension().getDataType();
break;
}
}
// add default value only in case query dimension is not found in the current block
if (!isDimensionExists[dimIndex]) {
defaultValues[dimIndex] = validateAndGetDefaultValue(queryDimension.getDimension());
blockExecutionInfo.setRestructuredBlock(true);
// set the flag to say whether a new dictionary column or no dictionary column
// has been added. This will be useful after restructure for compaction scenarios where
// newly added columns data need to be filled
if (queryDimension.getDimension().getDataType() == DataTypes.DATE) {
dimensionInfo.setDictionaryColumnAdded(true);
newDictionaryColumnCount++;
} else {
dimensionInfo.setNoDictionaryColumnAdded(true);
newNoDictionaryColumnCount++;
}
}
}
dimIndex++;
}
dimensionInfo.setNewDictionaryColumnCount(newDictionaryColumnCount);
dimensionInfo.setNewNoDictionaryColumnCount(newNoDictionaryColumnCount);
blockExecutionInfo.setDimensionInfo(dimensionInfo);
return presentDimension;
}
/**
* Match the columns for transactional and non transactional tables
* @param isTransactionalTable
* @param queryColumn
* @param tableColumn
* @return
*/
public static boolean isColumnMatches(boolean isTransactionalTable,
CarbonColumn queryColumn, CarbonColumn tableColumn) {
// If it is non transactional table just check the column names, no need to validate
// column id as multiple sdk's output placed in a single folder doesn't have same
// column ID but can have same column name
if (tableColumn.getDataType().isComplexType() && !(tableColumn.getDataType().getId()
== DataTypes.ARRAY_TYPE_ID)) {
if (tableColumn.getColumnId().equalsIgnoreCase(queryColumn.getColumnId()) || tableColumn
.isColumnMatchBasedOnId(queryColumn)) {
return true;
} else {
return isColumnMatchesStruct(tableColumn, queryColumn);
}
} else {
return (tableColumn.getColumnId().equalsIgnoreCase(queryColumn.getColumnId()) || (
!isTransactionalTable && tableColumn.getColName()
.equalsIgnoreCase(queryColumn.getColName()))
// In case of SDK, columnId is same as columnName therefore the following check will
// ensure that if the table columnName is same as the query columnName and the table
// columnId is the same as table columnName then it's a valid columnName to be scanned.
|| tableColumn.isColumnMatchBasedOnId(queryColumn));
}
}
/**
* In case of Multilevel Complex column - Struct/StructOfStruct, traverse all the child dimension
* to check column Id
*
* @param tableColumn
* @param queryColumn
* @return
*/
private static boolean isColumnMatchesStruct(CarbonColumn tableColumn, CarbonColumn queryColumn) {
if (tableColumn instanceof CarbonDimension) {
List<CarbonDimension> parentDimension =
((CarbonDimension) tableColumn).getListOfChildDimensions();
CarbonDimension carbonDimension = null;
String[] colSplits = queryColumn.getColName().split("\\.");
StringBuffer tempColName = new StringBuffer(colSplits[0]);
for (String colSplit : colSplits) {
if (!tempColName.toString().equalsIgnoreCase(colSplit)) {
tempColName = tempColName.append(".").append(colSplit);
}
carbonDimension = CarbonTable.getCarbonDimension(tempColName.toString(), parentDimension);
if (carbonDimension != null) {
// In case of SDK the columnId and columnName is same and this check will ensure for
// all the child columns that the table column name is equal to query column name and
// table columnId is equal to table columnName
if (carbonDimension.getColumnSchema().getColumnUniqueId()
.equalsIgnoreCase(queryColumn.getColumnId()) || (
carbonDimension.getColumnSchema().getColumnUniqueId()
.equalsIgnoreCase(carbonDimension.getColName()) && carbonDimension.getColName()
.equalsIgnoreCase(queryColumn.getColName()))) {
return true;
}
if (carbonDimension.getListOfChildDimensions() != null) {
parentDimension = carbonDimension.getListOfChildDimensions();
}
}
}
}
return false;
}
/**
* This method will validate and return the default value to be
* filled at the time of result preparation
*
* @param queryDimension
* @return
*/
public static Object validateAndGetDefaultValue(CarbonDimension queryDimension) {
byte[] defaultValue = queryDimension.getDefaultValue();
Object defaultValueToBeConsidered = null;
if (queryDimension.getDataType() == DataTypes.DATE) {
// direct dictionary case
defaultValueToBeConsidered = getDirectDictionaryDefaultValue(queryDimension.getDataType(),
queryDimension.getDefaultValue());
} else {
// no dictionary
defaultValueToBeConsidered =
getNoDictionaryDefaultValue(queryDimension.getDataType(), defaultValue);
}
return defaultValueToBeConsidered;
}
/**
* Method for computing default value for direct dictionary
*
* @param dataType
* @param defaultValue
* @return
*/
public static Object getDirectDictionaryDefaultValue(DataType dataType, byte[] defaultValue) {
Object directDictionaryDefaultValue = null;
if (!isDefaultValueNull(defaultValue)) {
DirectDictionaryGenerator directDictionaryGenerator =
DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dataType);
if (directDictionaryGenerator != null) {
String value =
new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
directDictionaryDefaultValue =
directDictionaryGenerator.getValueFromSurrogate(Integer.parseInt(value));
}
}
return directDictionaryDefaultValue;
}
/**
* Method for computing default value for no dictionary
*
* @param defaultValue
* @return
*/
private static Object getNoDictionaryDefaultValue(DataType datatype, byte[] defaultValue) {
Object noDictionaryDefaultValue = null;
String value = null;
if (!isDefaultValueNull(defaultValue)) {
if (datatype == DataTypes.INT) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
noDictionaryDefaultValue = Integer.parseInt(value);
} else if (datatype == DataTypes.LONG) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
noDictionaryDefaultValue = Long.parseLong(value);
} else if (datatype == DataTypes.TIMESTAMP) {
long timestampValue = ByteUtil.toXorLong(defaultValue, 0, defaultValue.length);
noDictionaryDefaultValue = timestampValue * 1000L;
} else {
noDictionaryDefaultValue =
DataTypeUtil.getDataTypeConverter().convertFromByteToUTF8Bytes(defaultValue);
}
}
return noDictionaryDefaultValue;
}
/**
* This method will validate whether a given value is empty or null
*
* @param defaultValue
* @return
*/
private static boolean isDefaultValueNull(byte[] defaultValue) {
return null == defaultValue;
}
/**
* Method for computing measure default value based on the data type
*
* @param columnSchema
* @param defaultValue
* @return
*/
public static Object getMeasureDefaultValue(ColumnSchema columnSchema, byte[] defaultValue) {
Object measureDefaultValue = null;
if (!isDefaultValueNull(defaultValue)) {
String value;
DataType dataType = columnSchema.getDataType();
if (dataType == DataTypes.SHORT) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
measureDefaultValue = Short.valueOf(value);
} else if (dataType == DataTypes.LONG) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
measureDefaultValue = Long.parseLong(value);
} else if (dataType == DataTypes.INT) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
measureDefaultValue = Integer.parseInt(value);
} else if (dataType == DataTypes.BOOLEAN) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
measureDefaultValue = Boolean.valueOf(value);
} else if (DataTypes.isDecimal(dataType)) {
BigDecimal decimal = DataTypeUtil.byteToBigDecimal(defaultValue);
if (columnSchema.getScale() > decimal.scale()) {
decimal = decimal.setScale(columnSchema.getScale(), RoundingMode.HALF_UP);
}
measureDefaultValue = decimal;
} else {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
Double parsedValue = Double.valueOf(value);
if (!Double.isInfinite(parsedValue) && !Double.isNaN(parsedValue)) {
measureDefaultValue = parsedValue;
}
}
}
return measureDefaultValue;
}
/**
* Gets the default value based on the column data type.
*
* @param columnSchema
* @param defaultValue
* @return
*/
public static Object getMeasureDefaultValueByType(ColumnSchema columnSchema,
byte[] defaultValue) {
Object measureDefaultValue = null;
if (!isDefaultValueNull(defaultValue)) {
String value;
DataType dataType = columnSchema.getDataType();
if (dataType == DataTypes.SHORT) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
measureDefaultValue = Short.parseShort(value);
} else if (dataType == DataTypes.INT) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
measureDefaultValue = Integer.parseInt(value);
} else if (dataType == DataTypes.LONG) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
measureDefaultValue = Long.parseLong(value);
} else if (dataType == DataTypes.BOOLEAN) {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
measureDefaultValue = Boolean.valueOf(value);
} else if (DataTypes.isDecimal(dataType)) {
BigDecimal decimal = DataTypeUtil.byteToBigDecimal(defaultValue);
if (columnSchema.getScale() > decimal.scale()) {
decimal = decimal.setScale(columnSchema.getScale(), RoundingMode.HALF_UP);
}
measureDefaultValue = decimal;
} else {
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
Double parsedValue = Double.valueOf(value);
if (!Double.isInfinite(parsedValue) && !Double.isNaN(parsedValue)) {
measureDefaultValue = parsedValue;
}
}
}
return measureDefaultValue;
}
/**
* Below method will be used to prepare the measure info object
* in this method some of the properties which will be extracted
* from query measure and current block measures will be set
*
* @param blockExecutionInfo
* @param queryMeasures measures present in query
* @param currentBlockMeasures current block measures
* @param isTransactionalTable
* @return measures present in the block
*/
public static List<ProjectionMeasure> createMeasureInfoAndGetCurrentBlockQueryMeasures(
BlockExecutionInfo blockExecutionInfo, ProjectionMeasure[] queryMeasures,
List<CarbonMeasure> currentBlockMeasures, boolean isTransactionalTable) {
MeasureInfo measureInfo = new MeasureInfo();
List<ProjectionMeasure> presentMeasure = new ArrayList<>(queryMeasures.length);
int numberOfMeasureInQuery = queryMeasures.length;
List<Integer> measureOrdinalList = new ArrayList<>(numberOfMeasureInQuery);
Object[] defaultValues = new Object[numberOfMeasureInQuery];
boolean[] measureExistsInCurrentBlock = new boolean[numberOfMeasureInQuery];
DataType[] measureDataTypes = new DataType[numberOfMeasureInQuery];
int index = 0;
for (ProjectionMeasure queryMeasure : queryMeasures) {
// if query measure exists in current dimension measures
// then setting measure exists is true
// otherwise adding a default value of a measure
for (CarbonMeasure carbonMeasure : currentBlockMeasures) {
if (isColumnMatches(isTransactionalTable, queryMeasure.getMeasure(), carbonMeasure)) {
ProjectionMeasure currentBlockMeasure = new ProjectionMeasure(carbonMeasure);
carbonMeasure.getColumnSchema().setDataType(queryMeasure.getMeasure().getDataType());
carbonMeasure.getColumnSchema().setPrecision(queryMeasure.getMeasure().getPrecision());
carbonMeasure.getColumnSchema().setScale(queryMeasure.getMeasure().getScale());
carbonMeasure.getColumnSchema()
.setDefaultValue(queryMeasure.getMeasure().getDefaultValue());
currentBlockMeasure.setOrdinal(queryMeasure.getOrdinal());
presentMeasure.add(currentBlockMeasure);
measureOrdinalList.add(carbonMeasure.getOrdinal());
measureExistsInCurrentBlock[index] = true;
measureDataTypes[index] = carbonMeasure.getDataType();
break;
}
}
if (!measureExistsInCurrentBlock[index]) {
defaultValues[index] = getMeasureDefaultValue(queryMeasure.getMeasure().getColumnSchema(),
queryMeasure.getMeasure().getDefaultValue());
measureDataTypes[index] = queryMeasure.getMeasure().getDataType();
blockExecutionInfo.setRestructuredBlock(true);
}
index++;
}
int[] measureOrdinals =
ArrayUtils.toPrimitive(measureOrdinalList.toArray(new Integer[measureOrdinalList.size()]));
measureInfo.setDefaultValues(defaultValues);
measureInfo.setMeasureOrdinals(measureOrdinals);
measureInfo.setMeasureExists(measureExistsInCurrentBlock);
measureInfo.setMeasureDataTypes(measureDataTypes);
blockExecutionInfo.setMeasureInfo(measureInfo);
return presentMeasure;
}
/**
* set actual projection of blockExecutionInfo
*/
public static void actualProjectionOfSegment(BlockExecutionInfo blockExecutionInfo,
QueryModel queryModel, SegmentProperties segmentProperties) {
List<ProjectionDimension> projectionDimensions = queryModel.getProjectionDimensions();
List<ProjectionMeasure> projectionMeasures = queryModel.getProjectionMeasures();
if (queryModel.getTable().hasColumnDrift()) {
List<CarbonMeasure> tableBlockMeasures = segmentProperties.getMeasures();
List<ProjectionMeasure> updatedProjectionMeasures =
new ArrayList<>(projectionMeasures.size() + tableBlockMeasures.size());
updatedProjectionMeasures.addAll(projectionMeasures);
List<ProjectionDimension> updatedProjectionDimensions =
new ArrayList<>(projectionDimensions.size());
for (ProjectionDimension projectionDimension : projectionDimensions) {
CarbonMeasure carbonMeasure = null;
for (CarbonMeasure tableBlockMeasure : tableBlockMeasures) {
if (isColumnMatches(queryModel.getTable().isTransactionalTable(),
projectionDimension.getDimension(), tableBlockMeasure)) {
carbonMeasure = tableBlockMeasure;
break;
}
}
if (carbonMeasure != null) {
ProjectionMeasure projectionMeasure = new ProjectionMeasure(carbonMeasure);
projectionMeasure.setOrdinal(projectionDimension.getOrdinal());
updatedProjectionMeasures.add(projectionMeasure);
} else {
updatedProjectionDimensions.add(projectionDimension);
}
}
blockExecutionInfo.setActualQueryDimensions(updatedProjectionDimensions
.toArray(new ProjectionDimension[updatedProjectionDimensions.size()]));
blockExecutionInfo.setActualQueryMeasures(updatedProjectionMeasures
.toArray(new ProjectionMeasure[updatedProjectionMeasures.size()]));
} else {
blockExecutionInfo.setActualQueryDimensions(
projectionDimensions.toArray(new ProjectionDimension[projectionDimensions.size()]));
blockExecutionInfo.setActualQueryMeasures(
projectionMeasures.toArray(new ProjectionMeasure[projectionMeasures.size()]));
}
}
public static boolean hasColumnDriftOnSegment(CarbonTable table,
SegmentProperties segmentProperties) {
for (CarbonDimension queryColumn : table.getColumnDrift()) {
for (CarbonMeasure tableColumn : segmentProperties.getMeasures()) {
if (isColumnMatches(table.isTransactionalTable(), queryColumn, tableColumn)) {
return true;
}
}
}
return false;
}
}