blob: 2443663a27fa83a9a24ffc9cb1a0c2c2db0ec6c0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.metastore.util;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.metastore.metadata.BaseMetadata;
import org.apache.drill.metastore.metadata.TableMetadata;
import org.apache.drill.metastore.statistics.CollectableColumnStatisticsKind;
import org.apache.drill.metastore.statistics.ColumnStatistics;
import org.apache.drill.metastore.statistics.ColumnStatisticsKind;
import org.apache.drill.metastore.statistics.StatisticsHolder;
import org.apache.drill.metastore.statistics.TableStatisticsKind;
import org.apache.drill.shaded.guava.com.google.common.primitives.UnsignedBytes;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class TableMetadataUtils {
/**
* Returns {@link Comparator} instance considering specified {@code type}.
*
* @param type type of the column
* @return {@link Comparator} instance
*/
@SuppressWarnings("unchecked")
public static <T> Comparator<T> getComparator(TypeProtos.MinorType type) {
switch (type) {
case INTERVALDAY:
case INTERVAL:
case INTERVALYEAR:
// This odd cast is needed because this method is poorly designed.
// The method is statically typed to type T. But, the type
// is selected dynamically at runtime via the type parameter.
// As a result, we are casting a comparator to the WRONG type
// in some cases. We have to remove the byte[] type, then force
// the type to T. This works because we should only use this
// case if T is byte[]. But, this is a horrible hack and should
// be fixed.
return (Comparator<T>) (Comparator<?>)
Comparator.nullsFirst(UnsignedBytes.lexicographicalComparator());
case UINT1:
return (Comparator<T>)
Comparator.nullsFirst(UnsignedBytes::compare);
case UINT2:
case UINT4:
return (Comparator<T>) Comparator.nullsFirst(Integer::compareUnsigned);
case UINT8:
return (Comparator<T>) Comparator.nullsFirst(Long::compareUnsigned);
default:
return (Comparator<T>) getNaturalNullsFirstComparator();
}
}
/**
* Returns "natural order" comparator which threads nulls as min values.
*
* @param <T> type to compare
* @return "natural order" comparator
*/
public static <T extends Comparable<T>> Comparator<T> getNaturalNullsFirstComparator() {
return Comparator.nullsFirst(Comparator.naturalOrder());
}
/**
* Merges list of specified metadata into the map of {@link ColumnStatistics} with columns as keys.
*
* @param <T> type of metadata to collect
* @param metadataList list of metadata to be merged
* @param columns set of columns whose statistics should be merged
* @param statisticsToCollect kinds of statistics that should be collected
* @return list of merged metadata
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public static <T extends BaseMetadata> Map<SchemaPath, ColumnStatistics<?>> mergeColumnsStatistics(
Collection<T> metadataList, Set<SchemaPath> columns, List<CollectableColumnStatisticsKind<?>> statisticsToCollect) {
Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
for (SchemaPath column : columns) {
List<ColumnStatistics<?>> statisticsList = new ArrayList<>();
for (T metadata : metadataList) {
ColumnStatistics<?> statistics = metadata.getColumnsStatistics().get(column);
if (statistics == null) {
// schema change happened, set statistics which represents all nulls
statistics = new ColumnStatistics(
Collections.singletonList(
new StatisticsHolder<>(TableStatisticsKind.ROW_COUNT.getValue(metadata), ColumnStatisticsKind.NULLS_COUNT)));
}
statisticsList.add(statistics);
}
List<StatisticsHolder<?>> statisticsHolders = new ArrayList<>();
for (CollectableColumnStatisticsKind<?> statisticsKind : statisticsToCollect) {
Object mergedStatistic = statisticsKind.mergeStatistics(statisticsList);
statisticsHolders.add(new StatisticsHolder<>(mergedStatistic, statisticsKind));
}
Iterator<ColumnStatistics<?>> iterator = statisticsList.iterator();
// Use INT if statistics wasn't provided
TypeProtos.MinorType comparatorType = iterator.hasNext() ? iterator.next().getComparatorType() : TypeProtos.MinorType.INT;
columnsStatistics.put(column, new ColumnStatistics<>(statisticsHolders, comparatorType));
}
return columnsStatistics;
}
/**
* Updates row count and column nulls count for specified table metadata and returns new {@link TableMetadata} instance with updated statistics.
*
* @param tableMetadata table statistics to update
* @param statistics list of statistics whose row count should be considered
* @return new {@link TableMetadata} instance with updated statistics
*/
public static TableMetadata updateRowCount(TableMetadata tableMetadata, Collection<? extends BaseMetadata> statistics) {
List<StatisticsHolder<?>> newStats = new ArrayList<>();
newStats.add(new StatisticsHolder<>(TableStatisticsKind.ROW_COUNT.mergeStatistics(statistics), TableStatisticsKind.ROW_COUNT));
Set<SchemaPath> columns = tableMetadata.getColumnsStatistics().keySet();
Map<SchemaPath, ColumnStatistics<?>> columnsStatistics =
mergeColumnsStatistics(statistics, columns,
Collections.singletonList(ColumnStatisticsKind.NULLS_COUNT));
return tableMetadata.cloneWithStats(columnsStatistics, newStats);
}
}