| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.carbondata.core.metadata.schema.table; |
| |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import org.apache.carbondata.core.metadata.schema.datamap.DataMapClassProvider; |
| import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; |
| import org.apache.carbondata.core.metadata.schema.table.column.ParentColumnTableRelation; |
| import org.apache.carbondata.core.preagg.TimeSeriesFunctionEnum; |
| |
| /** |
| * data map schema class for pre aggregation |
| */ |
| public class AggregationDataMapSchema extends DataMapSchema { |
| |
| private static final long serialVersionUID = 5900935117929888412L; |
| /** |
| * map of parent column name to set of child column column without |
| * aggregation function |
| */ |
| private Map<String, Set<ColumnSchema>> parentToNonAggChildMapping; |
| |
| /** |
| * map of parent column name to set of child columns column with |
| * aggregation function |
| */ |
| private Map<String, Set<ColumnSchema>> parentToAggChildMapping; |
| |
| /** |
| * map of parent column name to set of aggregation function applied in |
| * in parent column |
| */ |
| private Map<String, Set<String>> parentColumnToAggregationsMapping; |
| |
| /** |
| * whether its a timeseries data map |
| */ |
| private boolean isTimeseriesDataMap; |
| |
| /** |
| * below ordinal will be used during sorting the data map |
| * to support rollup for loading |
| */ |
| private int ordinal = Integer.MAX_VALUE; |
| |
| // Dont remove transient otherwise serialization for carbonTable will fail using |
| // JavaSerialization in spark. |
| private transient Set aggExpToColumnMapping; |
| |
| AggregationDataMapSchema(String dataMapName, String className) { |
| super(dataMapName, className); |
| } |
| |
| public void setChildSchema(TableSchema childSchema) { |
| super.setChildSchema(childSchema); |
| List<ColumnSchema> listOfColumns = getChildSchema().getListOfColumns(); |
| fillNonAggFunctionColumns(listOfColumns); |
| fillAggFunctionColumns(listOfColumns); |
| fillParentNameToAggregationMapping(listOfColumns); |
| } |
| |
| /** |
| * Below method will be used to get the columns on which aggregate function |
| * and time series function is not applied |
| * @param columnName |
| * parent column name |
| * @return child column schema |
| */ |
| public ColumnSchema getNonAggNonTimeseriesChildColBasedByParent(String columnName) { |
| Set<ColumnSchema> columnSchemas = parentToNonAggChildMapping.get(columnName); |
| if (null != columnSchemas) { |
| Iterator<ColumnSchema> iterator = columnSchemas.iterator(); |
| while (iterator.hasNext()) { |
| ColumnSchema next = iterator.next(); |
| if ((null == next.getAggFunction() || next.getAggFunction().isEmpty()) && null == next |
| .getTimeSeriesFunction() || next.getTimeSeriesFunction().isEmpty()) { |
| return next; |
| } |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Below method will be used to get the columns on which aggregate function is not applied |
| * @param columnName |
| * parent column name |
| * @return child column schema |
| */ |
| public ColumnSchema getNonAggChildColBasedByParent(String columnName) { |
| Set<ColumnSchema> columnSchemas = parentToNonAggChildMapping.get(columnName); |
| if (null != columnSchemas) { |
| Iterator<ColumnSchema> iterator = columnSchemas.iterator(); |
| while (iterator.hasNext()) { |
| ColumnSchema next = iterator.next(); |
| if ((null == next.getAggFunction() || next.getAggFunction().isEmpty())) { |
| return next; |
| } |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Below method will be used to get the columns on which aggregate function is not applied |
| * |
| * @param columnName parent column name |
| * @return child column schema |
| */ |
| public ColumnSchema getTimeseriesChildColBasedByParent(String columnName, |
| String timeseriesFunction) { |
| Set<ColumnSchema> columnSchemas = parentToNonAggChildMapping.get(columnName); |
| if (null != columnSchemas) { |
| Iterator<ColumnSchema> iterator = columnSchemas.iterator(); |
| while (iterator.hasNext()) { |
| ColumnSchema next = iterator.next(); |
| if (timeseriesFunction.equals(next.getTimeSeriesFunction())) { |
| return next; |
| } |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Below method will be used to get the column schema based on parent column name |
| * @param columName |
| * parent column name |
| * @return child column schema |
| */ |
| public ColumnSchema getChildColByParentColName(String columName) { |
| List<ColumnSchema> listOfColumns = childSchema.getListOfColumns(); |
| for (ColumnSchema columnSchema : listOfColumns) { |
| List<ParentColumnTableRelation> parentColumnTableRelations = |
| columnSchema.getParentColumnTableRelations(); |
| if (null != parentColumnTableRelations && parentColumnTableRelations.size() == 1 |
| && parentColumnTableRelations.get(0).getColumnName().equalsIgnoreCase(columName) && |
| columnSchema.getColumnName().endsWith(columName)) { |
| return columnSchema; |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Below method will be used to get the child column schema based on parent name and aggregate |
| * function applied on column |
| * @param columnName |
| * parent column name |
| * @param aggFunction |
| * aggregate function applied |
| * @return child column schema |
| */ |
| public ColumnSchema getAggChildColByParent(String columnName, |
| String aggFunction) { |
| Set<ColumnSchema> columnSchemas = parentToAggChildMapping.get(columnName); |
| if (null != columnSchemas) { |
| Iterator<ColumnSchema> iterator = columnSchemas.iterator(); |
| while (iterator.hasNext()) { |
| ColumnSchema next = iterator.next(); |
| if (null != next.getAggFunction() && next.getAggFunction().equalsIgnoreCase(aggFunction)) { |
| return next; |
| } |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Below method will be used to get the column schema based on parent column name |
| * @param columName |
| * parent column name |
| * @param timeseriesFunction |
| * timeseries function applied on column |
| * @return child column schema |
| */ |
| public ColumnSchema getTimeseriesChildColByParent(String columName, String timeseriesFunction) { |
| List<ColumnSchema> listOfColumns = childSchema.getListOfColumns(); |
| for (ColumnSchema columnSchema : listOfColumns) { |
| List<ParentColumnTableRelation> parentColumnTableRelations = |
| columnSchema.getParentColumnTableRelations(); |
| if (null != parentColumnTableRelations && parentColumnTableRelations.size() == 1 |
| && parentColumnTableRelations.get(0).getColumnName().equalsIgnoreCase(columName) |
| && timeseriesFunction.equalsIgnoreCase(columnSchema.getTimeSeriesFunction())) { |
| return columnSchema; |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Method to prepare mapping of parent to list of aggregation function applied on that column |
| * @param listOfColumns |
| * child column schema list |
| */ |
| private void fillParentNameToAggregationMapping(List<ColumnSchema> listOfColumns) { |
| parentColumnToAggregationsMapping = new HashMap<>(); |
| for (ColumnSchema column : listOfColumns) { |
| if (null != column.getAggFunction() && !column.getAggFunction().isEmpty()) { |
| List<ParentColumnTableRelation> parentColumnTableRelations = |
| column.getParentColumnTableRelations(); |
| if (null != parentColumnTableRelations && parentColumnTableRelations.size() == 1) { |
| String columnName = column.getParentColumnTableRelations().get(0).getColumnName(); |
| Set<String> aggFunctions = parentColumnToAggregationsMapping.get(columnName); |
| if (null == aggFunctions) { |
| aggFunctions = new HashSet<>(); |
| parentColumnToAggregationsMapping.put(columnName, aggFunctions); |
| } |
| aggFunctions.add(column.getAggFunction()); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Below method will be used prepare mapping between parent column to non aggregation function |
| * columns |
| * @param listOfColumns |
| * list of child columns |
| */ |
| private void fillNonAggFunctionColumns(List<ColumnSchema> listOfColumns) { |
| parentToNonAggChildMapping = new HashMap<>(); |
| for (ColumnSchema column : listOfColumns) { |
| if (!isTimeseriesDataMap) { |
| isTimeseriesDataMap = |
| null != column.getTimeSeriesFunction() && !column.getTimeSeriesFunction().isEmpty(); |
| if (isTimeseriesDataMap) { |
| this.ordinal = |
| TimeSeriesFunctionEnum.valueOf(column.getTimeSeriesFunction().toUpperCase()) |
| .getOrdinal(); |
| } |
| } |
| if (null == column.getAggFunction() || column.getAggFunction().isEmpty()) { |
| fillMappingDetails(column, parentToNonAggChildMapping); |
| } |
| } |
| } |
| |
| private void fillMappingDetails(ColumnSchema column, |
| Map<String, Set<ColumnSchema>> map) { |
| List<ParentColumnTableRelation> parentColumnTableRelations = |
| column.getParentColumnTableRelations(); |
| if (null != parentColumnTableRelations && parentColumnTableRelations.size() == 1) { |
| String columnName = column.getParentColumnTableRelations().get(0).getColumnName(); |
| Set<ColumnSchema> columnSchemas = map.get(columnName); |
| if (null == columnSchemas) { |
| columnSchemas = new HashSet<>(); |
| map.put(columnName, columnSchemas); |
| } |
| columnSchemas.add(column); |
| } |
| } |
| |
| /** |
| * Below method will be used to fill parent to list of aggregation column mapping |
| * @param listOfColumns |
| * list of child columns |
| */ |
| private void fillAggFunctionColumns(List<ColumnSchema> listOfColumns) { |
| parentToAggChildMapping = new HashMap<>(); |
| for (ColumnSchema column : listOfColumns) { |
| if (null != column.getAggFunction() && !column.getAggFunction().isEmpty()) { |
| fillMappingDetails(column, parentToAggChildMapping); |
| } |
| } |
| } |
| |
| public boolean isTimeseriesDataMap() { |
| return isTimeseriesDataMap; |
| } |
| |
| /** |
| * Below method is to support rollup during loading the data in pre aggregate table |
| * In case of timeseries year level table data loading can be done using month level table or any |
| * time series level below year level for example day,hour minute, second. |
| * @TODO need to handle for pre aggregate table without timeseries |
| * |
| * @param aggregationDataMapSchema |
| * @return whether aggregation data map can be selected or not |
| */ |
| public boolean canSelectForRollup(AggregationDataMapSchema aggregationDataMapSchema) { |
| List<ColumnSchema> listOfColumns = childSchema.getListOfColumns(); |
| for (ColumnSchema column : listOfColumns) { |
| List<ParentColumnTableRelation> parentColumnTableRelations = |
| column.getParentColumnTableRelations(); |
| //@TODO handle scenario when aggregate datamap columns is derive from multiple column |
| // which is not supported currently |
| if (null != parentColumnTableRelations && parentColumnTableRelations.size() == 1) { |
| if (null != column.getAggFunction() && !column.getAggFunction().isEmpty()) { |
| if (null == aggregationDataMapSchema |
| .getAggChildColByParent(parentColumnTableRelations.get(0).getColumnName(), |
| column.getAggFunction())) { |
| return false; |
| } |
| } else { |
| if (null == aggregationDataMapSchema.getNonAggChildColBasedByParent( |
| parentColumnTableRelations.get(0).getColumnName())) { |
| return false; |
| } |
| } |
| } else { |
| // in case of any expression one column can be derived from multiple column |
| // in that case we cannot do rollup so hit the maintable |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| public int getOrdinal() { |
| return ordinal; |
| } |
| |
| /** |
| * Below method will be used to get the aggregation column based on index |
| * It will return the first aggregation column found based on index |
| * @param searchStartIndex |
| * start index |
| * @param sortedColumnSchema |
| * list of sorted table columns |
| * @return found column list |
| * |
| */ |
| public ColumnSchema getAggColumnBasedOnIndex(int searchStartIndex, |
| List<ColumnSchema> sortedColumnSchema) { |
| ColumnSchema columnSchema = null; |
| for (int i = searchStartIndex; i < sortedColumnSchema.size(); i++) { |
| if (!sortedColumnSchema.get(i).getAggFunction().isEmpty()) { |
| columnSchema = sortedColumnSchema.get(i); |
| break; |
| } |
| } |
| return columnSchema; |
| } |
| |
| public synchronized Set getAggExpToColumnMapping() { |
| return aggExpToColumnMapping; |
| } |
| |
| public synchronized void setAggExpToColumnMapping(Set aggExpToColumnMapping) { |
| if (null == this.aggExpToColumnMapping) { |
| this.aggExpToColumnMapping = aggExpToColumnMapping; |
| } |
| } |
| |
| public DataMapClassProvider getProvider() { |
| return isTimeseriesDataMap ? |
| DataMapClassProvider.TIMESERIES : DataMapClassProvider.PREAGGREGATE; |
| } |
| |
| @Override |
| public boolean equals(Object o) { |
| if (this == o) return true; |
| if (o == null || getClass() != o.getClass()) return false; |
| if (!super.equals(o)) return false; |
| AggregationDataMapSchema that = (AggregationDataMapSchema) o; |
| return that == this; |
| } |
| |
| @Override |
| public int hashCode() { |
| return super.hashCode(); |
| } |
| } |