blob: 1d291d2265d89e6aa57f090dca3d2f84407a1d38 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.datastore.block;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.log4j.Logger;
/**
* This class contains all the details about the restructuring information of
* the block. This will be used during query execution to handle restructure
* information
*/
public class SegmentProperties {
private static final Logger LOG =
LogServiceFactory.getLogService(SegmentProperties.class.getName());
// When calculating the finger printer of all columns. In order to
// identify dimension columns with other column. The finger printer
// of dimensions will left shift 1 bit
private static final int DIMENSIONS_FINGER_PRINTER_SHIFT = 1;
// When calculating the finger pinter of all columns. In order to
// identify measure columns with other column. The finger printer
// of measures will left shift 2 bit
private static final int MEASURES_FINGER_PRINTER_SHIFT = 2;
// When calculating the finger pinter of all columns. In order to
// identify complex columns with other column. The finger printer
// of complex columns will left shift 3 bit
private static final int COMPLEX_FINGER_PRINTER_SHIFT = 3;
/**
* list of dimension present in the block
*/
private List<CarbonDimension> dimensions;
/**
* list of dimension present in the block
*/
private List<CarbonDimension> complexDimensions;
/**
* list of measure present in the block
*/
private List<CarbonMeasure> measures;
/**
* mapping of dimension ordinal in schema to column chunk index in the data file
*/
private Map<Integer, Integer> dimensionOrdinalToChunkMapping;
/**
* a block can have multiple columns. This will have block index as key
* and all dimension participated in that block as values
*/
private Map<Integer, Set<Integer>> blockToDimensionOrdinalMapping;
/**
* mapping of measure ordinal in schema to column chunk index in the data file
*/
private Map<Integer, Integer> measuresOrdinalToChunkMapping;
/**
* to store the number of no dictionary dimension
* this will be used during query execution for creating
* start and end key. Purpose of storing this value here is
* so during query execution no need to calculate every time
*/
private int numberOfNoDictionaryDimension;
private int numberOfSortColumns;
private int numberOfDictDimensions;
private int numberOfColumnsAfterFlatten;
private int lastDimensionColOrdinal;
/**
* The finger printer is the xor result of all the columns in table.
* Besides, in the case of two segment properties have same columns
* but different sort column, n like there is a column exists in both
* segment properties, but is dimension in one segment properties,
* but is a measure in the other. In order to identify the difference
* of these two segment properties. The xor result of all dimension
* will left shift 1 bit, the xor results of all measures will left shift
* 2bit, and the xor results of all complex columns will left shift 3 bits
* Sum up, the Formula of generate finger printer is
*
* fingerPrinter = (dimensionFingerPrinter >> 1)
* ^ (measureFingerPrinter >> 1) ^ (complexFingerPrinter >> 1)
* dimensionsFingerPrinter = dimension1 ^ dimension2 ^ ...
* measuresFingerPrinter = measure1 ^ measure2 ^ measure3 ...
* complexFingerPrinter = complex1 ^ complex2 ^ complex3 ...
*/
private long fingerPrinter = Long.MAX_VALUE;
public SegmentProperties(List<ColumnSchema> columnsInTable) {
dimensions = new ArrayList<CarbonDimension>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
complexDimensions =
new ArrayList<CarbonDimension>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
measures = new ArrayList<CarbonMeasure>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
fillDimensionAndMeasureDetails(columnsInTable);
dimensionOrdinalToChunkMapping =
new HashMap<Integer, Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
blockToDimensionOrdinalMapping =
new HashMap<Integer, Set<Integer>>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
measuresOrdinalToChunkMapping =
new HashMap<Integer, Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
fillOrdinalToBlockMappingForDimension();
fillOrdinalToChunkIndexMappingForMeasureColumns();
}
/**
* below method is to fill the dimension and its mapping to file blocks all
* the column will point to same column group
*/
private void fillOrdinalToBlockMappingForDimension() {
int blockOrdinal = -1;
CarbonDimension dimension = null;
int index = 0;
while (index < dimensions.size()) {
dimension = dimensions.get(index);
blockOrdinal++;
dimensionOrdinalToChunkMapping.put(dimension.getOrdinal(), blockOrdinal);
index++;
}
index = 0;
// complex dimension will be stored at last
while (index < complexDimensions.size()) {
dimension = complexDimensions.get(index);
dimensionOrdinalToChunkMapping.put(dimension.getOrdinal(), ++blockOrdinal);
blockOrdinal = fillComplexDimensionChildBlockIndex(blockOrdinal, dimension);
index++;
}
fillBlockToDimensionOrdinalMapping();
}
private void fillBlockToDimensionOrdinalMapping() {
Set<Entry<Integer, Integer>> blocks = dimensionOrdinalToChunkMapping.entrySet();
Iterator<Entry<Integer, Integer>> blockItr = blocks.iterator();
while (blockItr.hasNext()) {
Entry<Integer, Integer> block = blockItr.next();
Set<Integer> dimensionOrdinals = blockToDimensionOrdinalMapping.get(block.getValue());
if (dimensionOrdinals == null) {
dimensionOrdinals = new HashSet<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
blockToDimensionOrdinalMapping.put(block.getValue(), dimensionOrdinals);
}
dimensionOrdinals.add(block.getKey());
}
}
/**
* compare the segment properties based on finger printer
*/
@Override
public boolean equals(Object obj) {
if (!(obj instanceof SegmentProperties)) {
return false;
}
// If these two segment properties have different number of columns
// Return false directly
SegmentProperties segmentProperties = (SegmentProperties) obj;
if (this.getNumberOfColumns() != segmentProperties.getNumberOfColumns()) {
return false;
}
// Compare the finger printer
return getFingerPrinter() != Long.MIN_VALUE &&
segmentProperties.getFingerPrinter() != Long.MIN_VALUE &&
(getFingerPrinter() == segmentProperties.getFingerPrinter());
}
@Override
public int hashCode() {
return super.hashCode();
}
/**
* Below method will be used to add the complex dimension child
* block index.It is a recursive method which will be get the children
* add the block index
*
* @param blockOrdinal start block ordinal
* @param dimension parent dimension
* @return last block index
*/
private int fillComplexDimensionChildBlockIndex(int blockOrdinal, CarbonDimension dimension) {
for (int i = 0; i < dimension.getNumberOfChild(); i++) {
dimensionOrdinalToChunkMapping
.put(dimension.getListOfChildDimensions().get(i).getOrdinal(), ++blockOrdinal);
if (dimension.getListOfChildDimensions().get(i).getNumberOfChild() > 0) {
blockOrdinal = fillComplexDimensionChildBlockIndex(blockOrdinal,
dimension.getListOfChildDimensions().get(i));
}
}
return blockOrdinal;
}
/**
* Below method will be used to fill the mapping
* of measure ordinal to its block index mapping in
* file
*/
private void fillOrdinalToChunkIndexMappingForMeasureColumns() {
int blockOrdinal = 0;
int index = 0;
while (index < measures.size()) {
measuresOrdinalToChunkMapping.put(measures.get(index).getOrdinal(), blockOrdinal);
blockOrdinal++;
index++;
}
}
/**
* fingerPrinter = (dimensionFingerPrinter >> 1)
* ^ (measureFingerPrinter >> 1) ^ (complexFingerPrinter >> 1)
* dimensionsFingerPrinter = dimension1 ^ dimension2 ^ ...
* measuresFingerPrinter = measure1 ^ measure2 ^ measure3 ...
* complexFingerPrinter = complex1 ^ complex2 ^ complex3 ...
*/
protected long getFingerPrinter() {
if (this.fingerPrinter == Long.MAX_VALUE) {
long dimensionsFingerPrinter = getFingerprinter(this.dimensions.stream()
.map(t -> t.getColumnSchema()).collect(Collectors.toList()));
long measuresFingerPrinter = getFingerprinter(this.measures.stream()
.map(t -> t.getColumnSchema()).collect(Collectors.toList()));
long complexFingerPrinter = getFingerprinter(this.complexDimensions.stream()
.map(t -> t.getColumnSchema()).collect(Collectors.toList()));
this.fingerPrinter = (dimensionsFingerPrinter >> DIMENSIONS_FINGER_PRINTER_SHIFT)
^ (measuresFingerPrinter >> MEASURES_FINGER_PRINTER_SHIFT)
^ (complexFingerPrinter >> COMPLEX_FINGER_PRINTER_SHIFT);
}
return this.fingerPrinter;
}
private long getFingerprinter(List<ColumnSchema> columns) {
int counter = 0;
ColumnSchema columnSchema = null;
long fingerprint = Long.MAX_VALUE;
while (counter < columns.size()) {
columnSchema = columns.get(counter);
UUID columnUUID = null;
try {
columnUUID = UUID.fromString(columnSchema.getColumnUniqueId());
} catch (Exception e) {
LOG.error("Invalid UUID string: " + columnSchema.getColumnUniqueId());
return Long.MIN_VALUE;
}
long columnUUIDToBits = columnUUID.getMostSignificantBits();
fingerprint = fingerprint ^ columnUUIDToBits;
counter++;
}
return fingerprint;
}
/**
* below method will fill dimension and measure detail of the block.
*
* @param columnsInTable
*/
private void fillDimensionAndMeasureDetails(List<ColumnSchema> columnsInTable) {
ColumnSchema columnSchema = null;
// ordinal will be required to read the data from file block
int dimensionOrdinal = 0;
int measureOrdinal = -1;
// table ordinal is actually a schema ordinal this is required as
// cardinality array
// which is stored in segment info contains -1 if that particular column
// is n
boolean isComplexDimensionStarted = false;
CarbonDimension carbonDimension = null;
int keyOrdinal = 0;
int counter = 0;
int complexTypeOrdinal = -1;
while (counter < columnsInTable.size()) {
columnSchema = columnsInTable.get(counter);
if (columnSchema.isDimensionColumn()) {
// not adding the cardinality of the non dictionary
// column as it was not the part of MDKey
if (CarbonUtil.hasEncoding(columnSchema.getEncodingList(), Encoding.DICTIONARY)
&& !isComplexDimensionStarted && columnSchema.getNumberOfChild() == 0) {
this.numberOfDictDimensions++;
this.numberOfColumnsAfterFlatten++;
if (columnSchema.isSortColumn()) {
this.numberOfSortColumns++;
}
// if it is a columnar dimension participated in MDKey then added
// key ordinal and dimension ordinal
carbonDimension =
new CarbonDimension(columnSchema, dimensionOrdinal++, keyOrdinal++, -1);
}
// as complex type will be stored at last so once complex type started all the dimension
// will be added to complex type
else if (isComplexDimensionStarted || columnSchema.getDataType().isComplexType()) {
carbonDimension =
new CarbonDimension(columnSchema, dimensionOrdinal++, -1, ++complexTypeOrdinal);
carbonDimension.initializeChildDimensionsList(columnSchema.getNumberOfChild());
complexDimensions.add(carbonDimension);
isComplexDimensionStarted = true;
dimensionOrdinal =
readAllComplexTypeChildren(dimensionOrdinal, columnSchema.getNumberOfChild(),
columnsInTable, carbonDimension, complexTypeOrdinal);
counter = dimensionOrdinal;
complexTypeOrdinal = assignComplexOrdinal(carbonDimension, complexTypeOrdinal);
this.numberOfColumnsAfterFlatten += getNumColumnsAfterFlatten(carbonDimension);
continue;
} else {
// for no dictionary dimension
carbonDimension = new CarbonDimension(columnSchema, dimensionOrdinal++, -1, -1);
numberOfColumnsAfterFlatten++;
numberOfNoDictionaryDimension++;
if (columnSchema.isSortColumn()) {
this.numberOfSortColumns++;
}
}
dimensions.add(carbonDimension);
} else {
numberOfColumnsAfterFlatten++;
measures.add(new CarbonMeasure(columnSchema, ++measureOrdinal));
}
counter++;
}
lastDimensionColOrdinal = dimensionOrdinal;
}
/**
* Read all primitive/complex children and set it as list of child carbon dimension to parent
* dimension
*
* @param dimensionOrdinal
* @param childCount
* @param listOfColumns
* @param parentDimension
* @return
*/
private int readAllComplexTypeChildren(int dimensionOrdinal, int childCount,
List<ColumnSchema> listOfColumns, CarbonDimension parentDimension,
int complexDimensionOrdinal) {
for (int i = 0; i < childCount; i++) {
ColumnSchema columnSchema = listOfColumns.get(dimensionOrdinal);
if (columnSchema.isDimensionColumn()) {
if (columnSchema.getNumberOfChild() > 0) {
CarbonDimension complexDimension =
new CarbonDimension(columnSchema, dimensionOrdinal++, -1, complexDimensionOrdinal++);
complexDimension.initializeChildDimensionsList(columnSchema.getNumberOfChild());
parentDimension.getListOfChildDimensions().add(complexDimension);
dimensionOrdinal =
readAllComplexTypeChildren(dimensionOrdinal, columnSchema.getNumberOfChild(),
listOfColumns, complexDimension, complexDimensionOrdinal);
} else {
parentDimension.getListOfChildDimensions().add(
new CarbonDimension(columnSchema, dimensionOrdinal++, -1, complexDimensionOrdinal++));
}
}
}
return dimensionOrdinal;
}
/**
* Read all primitive/complex children and set it as list of child carbon dimension to parent
* dimension
*/
private int assignComplexOrdinal(CarbonDimension parentDimension, int complexDimensionOrdinal) {
for (int i = 0; i < parentDimension.getNumberOfChild(); i++) {
CarbonDimension dimension = parentDimension.getListOfChildDimensions().get(i);
if (dimension.getNumberOfChild() > 0) {
dimension.setComplexTypeOrdinal(++complexDimensionOrdinal);
complexDimensionOrdinal = assignComplexOrdinal(dimension, complexDimensionOrdinal);
} else {
parentDimension.getListOfChildDimensions().get(i)
.setComplexTypeOrdinal(++complexDimensionOrdinal);
}
}
return complexDimensionOrdinal;
}
/**
* @return the dimensions
*/
public List<CarbonDimension> getDimensions() {
return dimensions;
}
/**
* @return the complexDimensions
*/
public List<CarbonDimension> getComplexDimensions() {
return complexDimensions;
}
/**
* @return the measures
*/
public List<CarbonMeasure> getMeasures() {
return measures;
}
/**
* @return the dimensionOrdinalToChunkMapping
*/
public Map<Integer, Integer> getDimensionOrdinalToChunkMapping() {
return dimensionOrdinalToChunkMapping;
}
/**
* @return the measuresOrdinalToChunkMapping
*/
public Map<Integer, Integer> getMeasuresOrdinalToChunkMapping() {
return measuresOrdinalToChunkMapping;
}
/**
* @return the numberOfNoDictionaryDimension
*/
public int getNumberOfNoDictionaryDimension() {
return numberOfNoDictionaryDimension;
}
/**
* @return It returns block index to dimension ordinal mapping
*/
public Map<Integer, Set<Integer>> getBlockToDimensionOrdinalMapping() {
return blockToDimensionOrdinalMapping;
}
/**
* This method will search a given dimension and return the dimension from current block
*
* @param queryDimension
* @return
*/
public CarbonDimension getDimensionFromCurrentBlock(CarbonDimension queryDimension) {
return CarbonUtil.getDimensionFromCurrentBlock(this.dimensions, queryDimension);
}
/**
* This method will search for a given measure in the current block measures list
*
* @param measureToBeSearched
* @return
*/
public CarbonMeasure getMeasureFromCurrentBlock(CarbonMeasure measureToBeSearched) {
return CarbonUtil.getMeasureFromCurrentBlock(this.measures, measureToBeSearched);
}
public int getNumberOfSortColumns() {
return numberOfSortColumns;
}
public int getLastDimensionColOrdinal() {
return lastDimensionColOrdinal;
}
public int getNumberOfColumns() {
return numberOfColumnsAfterFlatten;
}
public int getNumberOfDictDimensions() {
return numberOfDictDimensions;
}
public int getNumberOfPrimitiveDimensions() {
return numberOfDictDimensions + numberOfNoDictionaryDimension;
}
public int getNumberOfComplexDimensions() {
return complexDimensions.size();
}
public int getNumberOfMeasures() {
return measures.size();
}
/**
* Return column value length in byte for all dimension columns in the table
* for dimension it is -1 (for DATE it is 4),
*/
public int[] createDimColumnValueLength() {
int[] length = new int[dimensions.size()];
int index = 0;
for (CarbonDimension dimension : dimensions) {
DataType dataType = dimension.getDataType();
if (dataType == DataTypes.DATE) {
length[index] = 4;
} else {
length[index] = -1;
}
index++;
}
return length;
}
/**
* Return column value length in byte for all columns in the table
* for dimension and complex column it is -1 (for DATE it is 4),
* for measure is 8 (for decimal is -1)
*/
public int[] createColumnValueLength() {
int[] length = new int[numberOfColumnsAfterFlatten];
int index = 0;
for (CarbonDimension dimension : dimensions) {
DataType dataType = dimension.getDataType();
if (dataType == DataTypes.DATE) {
length[index] = 4;
} else {
length[index] = -1;
}
index++;
}
for (CarbonDimension complexDimension : complexDimensions) {
int depth = getNumColumnsAfterFlatten(complexDimension);
for (int i = 0; i < depth; i++) {
length[index++] = -1;
}
}
for (CarbonMeasure measure : measures) {
DataType dataType = measure.getDataType();
if (DataTypes.isDecimal(dataType)) {
length[index++] = -1;
} else {
length[index++] = 8;
}
}
return length;
}
private int getNumColumnsAfterFlatten(CarbonDimension dimension) {
int count = 1;
if (dimension.isComplex()) {
List<CarbonDimension> children = dimension.getListOfChildDimensions();
for (CarbonDimension child : children) {
count += getNumColumnsAfterFlatten(child);
}
}
return count;
}
}