| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.druid.segment.virtual; |
| |
| import com.google.common.annotations.VisibleForTesting; |
| import com.google.common.base.Preconditions; |
| import com.google.common.base.Supplier; |
| import com.google.common.collect.Iterables; |
| import org.apache.druid.common.config.NullHandling; |
| import org.apache.druid.math.expr.Expr; |
| import org.apache.druid.math.expr.ExprEval; |
| import org.apache.druid.math.expr.InputBindings; |
| import org.apache.druid.query.dimension.DefaultDimensionSpec; |
| import org.apache.druid.query.expression.ExprUtils; |
| import org.apache.druid.query.extraction.ExtractionFn; |
| import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; |
| import org.apache.druid.segment.BaseObjectColumnValueSelector; |
| import org.apache.druid.segment.ColumnSelectorFactory; |
| import org.apache.druid.segment.ColumnValueSelector; |
| import org.apache.druid.segment.ConstantExprEvalSelector; |
| import org.apache.druid.segment.DimensionSelector; |
| import org.apache.druid.segment.NilColumnValueSelector; |
| import org.apache.druid.segment.column.ColumnCapabilities; |
| import org.apache.druid.segment.column.ColumnHolder; |
| import org.apache.druid.segment.column.ValueType; |
| import org.apache.druid.segment.data.IndexedInts; |
| |
| import javax.annotation.Nullable; |
| import java.util.Arrays; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.stream.Collectors; |
| |
| public class ExpressionSelectors |
| { |
| private ExpressionSelectors() |
| { |
| // No instantiation. |
| } |
| |
| /** |
| * Makes a ColumnValueSelector whose getObject method returns an Object that is the value computed by |
| * an {@link ExprEval}. |
| * |
| * @see ExpressionSelectors#makeExprEvalSelector(ColumnSelectorFactory, Expr) |
| */ |
| public static ColumnValueSelector makeColumnValueSelector( |
| ColumnSelectorFactory columnSelectorFactory, |
| Expr expression |
| ) |
| { |
| final ColumnValueSelector<ExprEval> baseSelector = makeExprEvalSelector(columnSelectorFactory, expression); |
| |
| return new ColumnValueSelector() |
| { |
| @Override |
| public double getDouble() |
| { |
| // No Assert for null handling as baseSelector already have it. |
| return baseSelector.getDouble(); |
| } |
| |
| @Override |
| public float getFloat() |
| { |
| // No Assert for null handling as baseSelector already have it. |
| return baseSelector.getFloat(); |
| } |
| |
| @Override |
| public long getLong() |
| { |
| // No Assert for null handling as baseSelector already have it. |
| return baseSelector.getLong(); |
| } |
| |
| @Override |
| public boolean isNull() |
| { |
| return baseSelector.isNull(); |
| } |
| |
| @Nullable |
| @Override |
| public Object getObject() |
| { |
| // No need for null check on getObject() since baseSelector impls will never return null. |
| ExprEval eval = baseSelector.getObject(); |
| return coerceEvalToSelectorObject(eval); |
| } |
| |
| @Override |
| public Class classOfObject() |
| { |
| return Object.class; |
| } |
| |
| @Override |
| public void inspectRuntimeShape(RuntimeShapeInspector inspector) |
| { |
| inspector.visit("baseSelector", baseSelector); |
| } |
| }; |
| } |
| |
| /** |
| * Makes a ColumnValueSelector whose getObject method returns an {@link ExprEval}. |
| * |
| * @see ExpressionSelectors#makeColumnValueSelector(ColumnSelectorFactory, Expr) |
| */ |
| public static ColumnValueSelector<ExprEval> makeExprEvalSelector( |
| ColumnSelectorFactory columnSelectorFactory, |
| Expr expression |
| ) |
| { |
| return makeExprEvalSelector(columnSelectorFactory, ExpressionPlanner.plan(columnSelectorFactory, expression)); |
| } |
| |
| public static ColumnValueSelector<ExprEval> makeExprEvalSelector( |
| ColumnSelectorFactory columnSelectorFactory, |
| ExpressionPlan plan |
| ) |
| { |
| if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) { |
| final String column = plan.getSingleInputName(); |
| final ValueType inputType = plan.getSingleInputType(); |
| if (inputType == ValueType.LONG) { |
| return new SingleLongInputCachingExpressionColumnValueSelector( |
| columnSelectorFactory.makeColumnValueSelector(column), |
| plan.getExpression(), |
| !ColumnHolder.TIME_COLUMN_NAME.equals(column) // __time doesn't need an LRU cache since it is sorted. |
| ); |
| } else if (inputType == ValueType.STRING) { |
| return new SingleStringInputCachingExpressionColumnValueSelector( |
| columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)), |
| plan.getExpression() |
| ); |
| } |
| } |
| final Expr.ObjectBinding bindings = createBindings(plan.getAnalysis(), columnSelectorFactory); |
| |
| // Optimization for constant expressions |
| if (bindings.equals(ExprUtils.nilBindings())) { |
| return new ConstantExprEvalSelector(plan.getExpression().eval(bindings)); |
| } |
| |
| // if any unknown column input types, fall back to an expression selector that examines input bindings on a |
| // per row basis |
| if (plan.any(ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS)) { |
| return new RowBasedExpressionColumnValueSelector(plan, bindings); |
| } |
| |
| // generic expression value selector for fully known input types |
| return new ExpressionColumnValueSelector(plan.getAppliedExpression(), bindings); |
| } |
| |
| /** |
| * Makes a single or multi-value {@link DimensionSelector} wrapper around a {@link ColumnValueSelector} created by |
| * {@link ExpressionSelectors#makeExprEvalSelector(ColumnSelectorFactory, Expr)} as appropriate |
| */ |
| public static DimensionSelector makeDimensionSelector( |
| final ColumnSelectorFactory columnSelectorFactory, |
| final Expr expression, |
| @Nullable final ExtractionFn extractionFn |
| ) |
| { |
| final ExpressionPlan plan = ExpressionPlanner.plan(columnSelectorFactory, expression); |
| |
| if (plan.any(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE)) { |
| final String column = plan.getSingleInputName(); |
| if (plan.getSingleInputType() == ValueType.STRING) { |
| return new SingleStringInputDeferredEvaluationExpressionDimensionSelector( |
| columnSelectorFactory.makeDimensionSelector(DefaultDimensionSpec.of(column)), |
| expression |
| ); |
| } |
| } |
| |
| final ColumnValueSelector<ExprEval> baseSelector = makeExprEvalSelector(columnSelectorFactory, expression); |
| |
| if (baseSelector instanceof ConstantExprEvalSelector) { |
| // Optimization for dimension selectors on constants. |
| return DimensionSelector.constant(baseSelector.getObject().asString(), extractionFn); |
| } else if (baseSelector instanceof NilColumnValueSelector) { |
| // Optimization for null dimension selector. |
| return DimensionSelector.constant(null); |
| } else { |
| if (plan.any( |
| ExpressionPlan.Trait.NON_SCALAR_OUTPUT, |
| ExpressionPlan.Trait.NEEDS_APPLIED, |
| ExpressionPlan.Trait.UNKNOWN_INPUTS, |
| ExpressionPlan.Trait.INCOMPLETE_INPUTS |
| )) { |
| return ExpressionMultiValueDimensionSelector.fromValueSelector(baseSelector, extractionFn); |
| } else { |
| return ExpressionSingleValueDimensionSelector.fromValueSelector(baseSelector, extractionFn); |
| } |
| } |
| } |
| |
| |
| /** |
| * Returns whether an expression can be applied to unique values of a particular column (like those in a dictionary) |
| * rather than being applied to each row individually. |
| * |
| * This function should only be called if you have already determined that an expression is over a single column, |
| * and that single column has a dictionary. |
| * |
| * @param bindingAnalysis result of calling {@link Expr#analyzeInputs()} on an expression |
| * @param hasMultipleValues result of calling {@link ColumnCapabilities#hasMultipleValues()} |
| */ |
| public static boolean canMapOverDictionary( |
| final Expr.BindingAnalysis bindingAnalysis, |
| final ColumnCapabilities.Capable hasMultipleValues |
| ) |
| { |
| Preconditions.checkState(bindingAnalysis.getRequiredBindings().size() == 1, "requiredBindings.size == 1"); |
| return !hasMultipleValues.isUnknown() && !bindingAnalysis.hasInputArrays() && !bindingAnalysis.isOutputArray(); |
| } |
| |
| /** |
| * Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link Expr.BindingAnalysis} which |
| * provides the set of identifiers which need a binding (list of required columns), and context of whether or not they |
| * are used as array or scalar inputs |
| */ |
| public static Expr.ObjectBinding createBindings( |
| Expr.BindingAnalysis bindingAnalysis, |
| ColumnSelectorFactory columnSelectorFactory |
| ) |
| { |
| final List<String> columns = bindingAnalysis.getRequiredBindingsList(); |
| return createBindings(columnSelectorFactory, columns); |
| } |
| |
| /** |
| * Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link Expr.BindingAnalysis} which |
| * provides the set of identifiers which need a binding (list of required columns), and context of whether or not they |
| * are used as array or scalar inputs |
| */ |
| public static Expr.ObjectBinding createBindings( |
| ColumnSelectorFactory columnSelectorFactory, |
| List<String> columns |
| ) |
| { |
| final Map<String, Supplier<Object>> suppliers = new HashMap<>(); |
| for (String columnName : columns) { |
| final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(columnName); |
| final ValueType nativeType = columnCapabilities != null ? columnCapabilities.getType() : null; |
| final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues().isTrue(); |
| final Supplier<Object> supplier; |
| |
| if (nativeType == ValueType.FLOAT) { |
| ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName); |
| supplier = makeNullableNumericSupplier(selector, selector::getFloat); |
| } else if (nativeType == ValueType.LONG) { |
| ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName); |
| supplier = makeNullableNumericSupplier(selector, selector::getLong); |
| } else if (nativeType == ValueType.DOUBLE) { |
| ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName); |
| supplier = makeNullableNumericSupplier(selector, selector::getDouble); |
| } else if (nativeType == ValueType.STRING) { |
| supplier = supplierFromDimensionSelector( |
| columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)), |
| multiVal |
| ); |
| } else if (nativeType == null || ValueType.isArray(nativeType)) { |
| // Unknown ValueType or array type. Try making an Object selector and see if that gives us anything useful. |
| supplier = supplierFromObjectSelector(columnSelectorFactory.makeColumnValueSelector(columnName)); |
| } else { |
| // Unhandleable ValueType (COMPLEX). |
| supplier = null; |
| } |
| |
| if (supplier != null) { |
| suppliers.put(columnName, supplier); |
| } |
| } |
| |
| if (suppliers.isEmpty()) { |
| return ExprUtils.nilBindings(); |
| } else if (suppliers.size() == 1 && columns.size() == 1) { |
| // If there's only one column (and it has a supplier), we can skip the Map and just use that supplier when |
| // asked for something. |
| final String column = Iterables.getOnlyElement(suppliers.keySet()); |
| final Supplier<Object> supplier = Iterables.getOnlyElement(suppliers.values()); |
| |
| return identifierName -> { |
| // There's only one binding, and it must be the single column, so it can safely be ignored in production. |
| assert column.equals(identifierName); |
| return supplier.get(); |
| }; |
| } else { |
| return InputBindings.withSuppliers(suppliers); |
| } |
| } |
| |
| /** |
| * Wraps a {@link ColumnValueSelector} and uses it to supply numeric values in a null-aware way. |
| * |
| * @see org.apache.druid.segment.BaseNullableColumnValueSelector#isNull() for why this only works in the numeric case |
| */ |
| private static <T> Supplier<T> makeNullableNumericSupplier( |
| ColumnValueSelector selector, |
| Supplier<T> supplier |
| ) |
| { |
| if (NullHandling.replaceWithDefault()) { |
| return supplier; |
| } else { |
| return () -> { |
| if (selector.isNull()) { |
| return null; |
| } |
| return supplier.get(); |
| }; |
| } |
| } |
| |
| /** |
| * Create a supplier to feed {@link Expr.ObjectBinding} for a dimension selector, coercing values to always appear as |
| * arrays if specified. |
| */ |
| @VisibleForTesting |
| static Supplier<Object> supplierFromDimensionSelector(final DimensionSelector selector, boolean coerceArray) |
| { |
| Preconditions.checkNotNull(selector, "selector"); |
| return () -> { |
| final IndexedInts row = selector.getRow(); |
| |
| if (row.size() == 1 && !coerceArray) { |
| return selector.lookupName(row.get(0)); |
| } else { |
| // column selector factories hate you and use [] and [null] interchangeably for nullish data |
| if (row.size() == 0) { |
| return new String[]{null}; |
| } |
| final String[] strings = new String[row.size()]; |
| // noinspection SSBasedInspection |
| for (int i = 0; i < row.size(); i++) { |
| strings[i] = selector.lookupName(row.get(i)); |
| } |
| return strings; |
| } |
| }; |
| } |
| |
| |
| /** |
| * Create a fallback supplier to feed {@link Expr.ObjectBinding} for a selector, used if column cannot be reliably |
| * detected as a primitive type |
| */ |
| @Nullable |
| static Supplier<Object> supplierFromObjectSelector(final BaseObjectColumnValueSelector<?> selector) |
| { |
| if (selector instanceof NilColumnValueSelector) { |
| return null; |
| } |
| |
| final Class<?> clazz = selector.classOfObject(); |
| if (Number.class.isAssignableFrom(clazz) || String.class.isAssignableFrom(clazz)) { |
| // Number, String supported as-is. |
| return selector::getObject; |
| } else if (clazz.isAssignableFrom(Number.class) || clazz.isAssignableFrom(String.class)) { |
| // Might be Numbers and Strings. Use a selector that double-checks. |
| return () -> { |
| final Object val = selector.getObject(); |
| if (val instanceof Number || val instanceof String || (val != null && val.getClass().isArray())) { |
| return val; |
| } else if (val instanceof List) { |
| return ExprEval.coerceListToArray((List) val, true); |
| } else { |
| return null; |
| } |
| }; |
| } else if (clazz.isAssignableFrom(List.class)) { |
| return () -> { |
| final Object val = selector.getObject(); |
| if (val != null) { |
| return ExprEval.coerceListToArray((List) val, true); |
| } |
| return null; |
| }; |
| } else { |
| // No numbers or strings. |
| return null; |
| } |
| } |
| |
| /** |
| * Coerces {@link ExprEval} value back to selector friendly {@link List} if the evaluated expression result is an |
| * array type |
| */ |
| @Nullable |
| public static Object coerceEvalToSelectorObject(ExprEval eval) |
| { |
| switch (eval.type()) { |
| case STRING_ARRAY: |
| return Arrays.stream(eval.asStringArray()).collect(Collectors.toList()); |
| case DOUBLE_ARRAY: |
| return Arrays.stream(eval.asDoubleArray()).collect(Collectors.toList()); |
| case LONG_ARRAY: |
| return Arrays.stream(eval.asLongArray()).collect(Collectors.toList()); |
| default: |
| return eval.value(); |
| } |
| } |
| } |