blob: 1ab75799e136f8d1a3f9aa3b06b18a4b19020d53 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.expr;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.expression.FunctionHolderExpression;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.expression.fn.FuncHolder;
import org.apache.drill.common.expression.fn.FunctionReplacementUtils;
import org.apache.drill.common.expression.visitors.AbstractExprVisitor;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.expr.fn.DrillSimpleFuncHolder;
import org.apache.drill.exec.expr.fn.interpreter.InterpreterEvaluator;
import org.apache.drill.exec.expr.holders.BigIntHolder;
import org.apache.drill.exec.expr.holders.Float4Holder;
import org.apache.drill.exec.expr.holders.Float8Holder;
import org.apache.drill.exec.expr.holders.IntHolder;
import org.apache.drill.exec.expr.holders.TimeStampHolder;
import org.apache.drill.exec.expr.holders.ValueHolder;
import org.apache.drill.exec.vector.ValueHolderHelper;
import org.apache.drill.metastore.statistics.ColumnStatistics;
import org.apache.drill.metastore.statistics.ColumnStatisticsKind;
import org.apache.drill.metastore.statistics.StatisticsHolder;
import java.math.BigInteger;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.Map;
import java.util.Set;
public class StatisticsProvider<T extends Comparable<T>> extends AbstractExprVisitor<ColumnStatistics<?>, Void, RuntimeException> {
private final Map<SchemaPath, ColumnStatistics<?>> columnStatMap;
private final long rowCount;
public StatisticsProvider(Map<SchemaPath, ColumnStatistics<?>> columnStatMap, long rowCount) {
this.columnStatMap = columnStatMap;
this.rowCount = rowCount;
}
public long getRowCount() {
return this.rowCount;
}
@Override
public ColumnStatistics<?> visitUnknown(LogicalExpression e, Void value) {
// do nothing for the unknown expression
return null;
}
@Override
public ColumnStatistics<?> visitTypedFieldExpr(TypedFieldExpr typedFieldExpr, Void value) {
ColumnStatistics<?> columnStatistics = columnStatMap.get(typedFieldExpr.getPath().getUnIndexed());
if (columnStatistics != null) {
return columnStatistics;
} else if (typedFieldExpr.getMajorType().equals(Types.OPTIONAL_INT)) {
// field does not exist.
return StatisticsProvider.getColumnStatistics(null, null, rowCount, typedFieldExpr.getMajorType().getMinorType());
}
return null;
}
@Override
public ColumnStatistics<Integer> visitIntConstant(ValueExpressions.IntExpression expr, Void value) {
return getConstantColumnStatistics(expr.getInt(), expr);
}
@Override
public ColumnStatistics<Boolean> visitBooleanConstant(ValueExpressions.BooleanExpression expr, Void value) {
return getConstantColumnStatistics(expr.getBoolean(), expr);
}
@Override
public ColumnStatistics<Long> visitLongConstant(ValueExpressions.LongExpression expr, Void value) {
return getConstantColumnStatistics(expr.getLong(), expr);
}
@Override
public ColumnStatistics<Float> visitFloatConstant(ValueExpressions.FloatExpression expr, Void value) {
return getConstantColumnStatistics(expr.getFloat(), expr);
}
@Override
public ColumnStatistics<Double> visitDoubleConstant(ValueExpressions.DoubleExpression expr, Void value) {
return getConstantColumnStatistics(expr.getDouble(), expr);
}
@Override
public ColumnStatistics<Long> visitDateConstant(ValueExpressions.DateExpression expr, Void value) {
return getConstantColumnStatistics(expr.getDate(), expr);
}
@Override
public ColumnStatistics<Long> visitTimeStampConstant(ValueExpressions.TimeStampExpression expr, Void value) {
return getConstantColumnStatistics(expr.getTimeStamp(), expr);
}
@Override
public ColumnStatistics<Integer> visitTimeConstant(ValueExpressions.TimeExpression expr, Void value) {
return getConstantColumnStatistics(expr.getTime(), expr);
}
@Override
public ColumnStatistics<String> visitQuotedStringConstant(ValueExpressions.QuotedString expr, Void value) {
return getConstantColumnStatistics(expr.getString(), expr);
}
@Override
public ColumnStatistics<BigInteger> visitVarDecimalConstant(ValueExpressions.VarDecimalExpression expr, Void value) {
return getConstantColumnStatistics(expr.getBigDecimal().unscaledValue(), expr);
}
@Override
@SuppressWarnings("unchecked")
public ColumnStatistics<?> visitFunctionHolderExpression(FunctionHolderExpression holderExpr, Void value) {
FuncHolder funcHolder = holderExpr.getHolder();
if (!(funcHolder instanceof DrillSimpleFuncHolder)) {
// Only Drill function is allowed.
return null;
}
String funcName = ((DrillSimpleFuncHolder) funcHolder).getRegisteredNames()[0];
if (FunctionReplacementUtils.isCastFunction(funcName)) {
ColumnStatistics<T> stat = (ColumnStatistics<T>) holderExpr.args.get(0).accept(this, null);
if (!IsPredicate.isNullOrEmpty(stat)) {
return evalCastFunc(holderExpr, stat);
}
}
return null;
}
private ColumnStatistics<?> evalCastFunc(FunctionHolderExpression holderExpr, ColumnStatistics<T> input) {
try {
DrillSimpleFuncHolder funcHolder = (DrillSimpleFuncHolder) holderExpr.getHolder();
DrillSimpleFunc interpreter = funcHolder.createInterpreter();
ValueHolder minHolder;
ValueHolder maxHolder;
TypeProtos.MinorType srcType = holderExpr.args.get(0).getMajorType().getMinorType();
TypeProtos.MinorType destType = holderExpr.getMajorType().getMinorType();
if (srcType.equals(destType)) {
// same type cast ==> NoOp.
return input;
} else if (!CAST_FUNC.containsKey(srcType) || !CAST_FUNC.get(srcType).contains(destType)) {
return null; // cast func between srcType and destType is NOT allowed.
}
switch (srcType) {
case INT :
minHolder = ValueHolderHelper.getIntHolder((Integer) ComparisonPredicate.getMinValue(input));
maxHolder = ValueHolderHelper.getIntHolder((Integer) ComparisonPredicate.getMaxValue(input));
break;
case BIGINT:
minHolder = ValueHolderHelper.getBigIntHolder((Long) ComparisonPredicate.getMinValue(input));
maxHolder = ValueHolderHelper.getBigIntHolder((Long) ComparisonPredicate.getMaxValue(input));
break;
case FLOAT4:
minHolder = ValueHolderHelper.getFloat4Holder((Float) ComparisonPredicate.getMinValue(input));
maxHolder = ValueHolderHelper.getFloat4Holder((Float) ComparisonPredicate.getMaxValue(input));
break;
case FLOAT8:
minHolder = ValueHolderHelper.getFloat8Holder((Double) ComparisonPredicate.getMinValue(input));
maxHolder = ValueHolderHelper.getFloat8Holder((Double) ComparisonPredicate.getMaxValue(input));
break;
case DATE:
minHolder = ValueHolderHelper.getDateHolder((Long) ComparisonPredicate.getMinValue(input));
maxHolder = ValueHolderHelper.getDateHolder((Long) ComparisonPredicate.getMaxValue(input));
break;
default:
return null;
}
ValueHolder[] args1 = {minHolder};
ValueHolder[] args2 = {maxHolder};
ValueHolder minFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args1, holderExpr.getName());
ValueHolder maxFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args2, holderExpr.getName());
switch (destType) {
case INT:
return StatisticsProvider.getColumnStatistics(
((IntHolder) minFuncHolder).value,
((IntHolder) maxFuncHolder).value,
ColumnStatisticsKind.NULLS_COUNT.getFrom(input),
destType);
case BIGINT:
return StatisticsProvider.getColumnStatistics(
((BigIntHolder) minFuncHolder).value,
((BigIntHolder) maxFuncHolder).value,
ColumnStatisticsKind.NULLS_COUNT.getFrom(input),
destType);
case FLOAT4:
return StatisticsProvider.getColumnStatistics(
((Float4Holder) minFuncHolder).value,
((Float4Holder) maxFuncHolder).value,
ColumnStatisticsKind.NULLS_COUNT.getFrom(input),
destType);
case FLOAT8:
return StatisticsProvider.getColumnStatistics(
((Float8Holder) minFuncHolder).value,
((Float8Holder) maxFuncHolder).value,
ColumnStatisticsKind.NULLS_COUNT.getFrom(input),
destType);
case TIMESTAMP:
return StatisticsProvider.getColumnStatistics(
((TimeStampHolder) minFuncHolder).value,
((TimeStampHolder) maxFuncHolder).value,
ColumnStatisticsKind.NULLS_COUNT.getFrom(input),
destType);
default:
return null;
}
} catch (Exception e) {
throw new DrillRuntimeException("Error in evaluating function of " + holderExpr.getName());
}
}
/**
* Returns {@link ColumnStatistics} instance with set min, max values and nulls count statistics specified in the arguments.
*
* @param minVal min value
* @param maxVal max value
* @param nullsCount nulls count
* @param type type of the column
* @param <V> type of min and max values
* @return {@link ColumnStatistics} instance with set min, max values and nulls count statistics
*/
public static <V> ColumnStatistics<V> getColumnStatistics(V minVal, V maxVal, long nullsCount, TypeProtos.MinorType type) {
return new ColumnStatistics<>(
Arrays.asList(new StatisticsHolder<>(minVal, ColumnStatisticsKind.MIN_VALUE),
new StatisticsHolder<>(maxVal, ColumnStatisticsKind.MAX_VALUE),
new StatisticsHolder<>(nullsCount, ColumnStatisticsKind.NULLS_COUNT)),
type);
}
/**
* Returns {@link ColumnStatistics} instance with min and max values set to {@code minMaxValue}
* and nulls count set to 0. Resulting {@link ColumnStatistics} instance corresponds
* to a constant value, so nulls count is set to 0.
*
* @param minMaxValue value of min and max statistics
* @param expr source of column type
* @param <V> type of min and max values
* @return {@link ColumnStatistics} instance with min and max values set to {@code minMaxValue} and nulls count set to 0
*/
public static <V> ColumnStatistics<V> getConstantColumnStatistics(V minMaxValue, LogicalExpression expr) {
return getConstantColumnStatistics(minMaxValue, expr.getMajorType().getMinorType());
}
/**
* Returns {@link ColumnStatistics} instance with min and max values set to {@code minMaxValue}
* and nulls count set to 0. Resulting {@link ColumnStatistics} instance corresponds
* to a constant value, so nulls count is set to 0.
*
* @param minMaxValue value of min and max statistics
* @param type column type
* @param <V> type of min and max values
* @return {@link ColumnStatistics} instance with min and max values set to {@code minMaxValue} and nulls count set to 0
*/
public static <V> ColumnStatistics<V> getConstantColumnStatistics(V minMaxValue, TypeProtos.MinorType type) {
return getColumnStatistics(minMaxValue, minMaxValue, 0, type);
}
private static final Map<TypeProtos.MinorType, Set<TypeProtos.MinorType>> CAST_FUNC = new EnumMap<>(TypeProtos.MinorType.class);
static {
// float -> double , int, bigint
Set<TypeProtos.MinorType> float4Types = EnumSet.noneOf(TypeProtos.MinorType.class);
CAST_FUNC.put(TypeProtos.MinorType.FLOAT4, float4Types);
float4Types.add(TypeProtos.MinorType.FLOAT8);
float4Types.add(TypeProtos.MinorType.INT);
float4Types.add(TypeProtos.MinorType.BIGINT);
// double -> float, int, bigint
Set<TypeProtos.MinorType> float8Types = EnumSet.noneOf(TypeProtos.MinorType.class);
CAST_FUNC.put(TypeProtos.MinorType.FLOAT8, float8Types);
float8Types.add(TypeProtos.MinorType.FLOAT4);
float8Types.add(TypeProtos.MinorType.INT);
float8Types.add(TypeProtos.MinorType.BIGINT);
// int -> float, double, bigint
Set<TypeProtos.MinorType> intTypes = EnumSet.noneOf(TypeProtos.MinorType.class);
CAST_FUNC.put(TypeProtos.MinorType.INT, intTypes);
intTypes.add(TypeProtos.MinorType.FLOAT4);
intTypes.add(TypeProtos.MinorType.FLOAT8);
intTypes.add(TypeProtos.MinorType.BIGINT);
// bigint -> int, float, double
Set<TypeProtos.MinorType> bigIntTypes = EnumSet.noneOf(TypeProtos.MinorType.class);
CAST_FUNC.put(TypeProtos.MinorType.BIGINT, bigIntTypes);
bigIntTypes.add(TypeProtos.MinorType.INT);
bigIntTypes.add(TypeProtos.MinorType.FLOAT4);
bigIntTypes.add(TypeProtos.MinorType.FLOAT8);
// date -> timestamp
Set<TypeProtos.MinorType> dateTypes = EnumSet.noneOf(TypeProtos.MinorType.class);
CAST_FUNC.put(TypeProtos.MinorType.DATE, dateTypes);
dateTypes.add(TypeProtos.MinorType.TIMESTAMP);
}
}