blob: 4eb91eb7ae1df1c4324443b9cf3011ea91a42ee6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.virtual;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
public class ExpressionPlanner
{
private ExpressionPlanner()
{
// No instantiation.
}
/**
* Druid tries to be chill to expressions to make up for not having a well defined table schema across segments. This
* method performs some analysis to determine what sort of selectors can be constructed on top of an expression,
* whether or not the expression will need implicitly mapped across multi-valued inputs, if the expression produces
* multi-valued outputs, is vectorizable, and everything else interesting when making a selector.
*
* Results are stored in a {@link ExpressionPlan}, which can be examined to do whatever is necessary to make things
* function properly.
*/
public static ExpressionPlan plan(ColumnInspector inspector, Expr expression)
{
final Expr.BindingAnalysis analysis = expression.analyzeInputs();
Parser.validateExpr(expression, analysis);
EnumSet<ExpressionPlan.Trait> traits = EnumSet.noneOf(ExpressionPlan.Trait.class);
Set<String> noCapabilities = new HashSet<>();
Set<String> maybeMultiValued = new HashSet<>();
List<String> needsApplied = ImmutableList.of();
ValueType singleInputType = null;
ExprType outputType = null;
final Set<String> columns = analysis.getRequiredBindings();
// check and set traits which allow optimized selectors to be created
if (columns.isEmpty()) {
traits.add(ExpressionPlan.Trait.CONSTANT);
} else if (columns.size() == 1) {
final String column = Iterables.getOnlyElement(columns);
final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
// These flags allow for selectors that wrap a single underlying column to be optimized, through caching results
// and via allowing deferred execution in the case of building dimension selectors.
// SINGLE_INPUT_SCALAR
// is set if an input is single valued, and the output is definitely single valued, with an additional requirement
// for strings that the column is dictionary encoded.
// SINGLE_INPUT_MAPPABLE
// is set when a single input string column, which can be multi-valued, but if so, it must be implicitly mappable
// (i.e. the expression is not treating its input as an array and not wanting to output an array)
if (capabilities != null && !analysis.hasInputArrays() && !analysis.isOutputArray()) {
boolean isSingleInputMappable = false;
boolean isSingleInputScalar = capabilities.hasMultipleValues().isFalse();
if (capabilities.getType() == ValueType.STRING) {
isSingleInputScalar &= capabilities.isDictionaryEncoded().isTrue();
isSingleInputMappable = capabilities.isDictionaryEncoded().isTrue() &&
!capabilities.hasMultipleValues().isUnknown();
}
// if satisfied, set single input output type and flags
if (isSingleInputScalar || isSingleInputMappable) {
singleInputType = capabilities.getType();
if (isSingleInputScalar) {
traits.add(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
}
if (isSingleInputMappable) {
traits.add(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
}
}
}
}
// if we didn't eliminate this expression as a single input scalar or mappable expression, it might need
// automatic transformation to map across multi-valued inputs (or row by row detection in the worst case)
if (ExpressionPlan.none(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) {
final Set<String> definitelyMultiValued = new HashSet<>();
for (String column : analysis.getRequiredBindings()) {
final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
if (capabilities != null) {
if (capabilities.hasMultipleValues().isTrue()) {
definitelyMultiValued.add(column);
} else if (capabilities.getType().equals(ValueType.STRING) &&
capabilities.hasMultipleValues().isMaybeTrue() &&
!analysis.getArrayBindings().contains(column)
) {
maybeMultiValued.add(column);
}
} else {
noCapabilities.add(column);
}
}
// find any inputs which will need implicitly mapped across multi-valued rows
needsApplied =
columns.stream()
.filter(c -> definitelyMultiValued.contains(c) && !analysis.getArrayBindings().contains(c))
.collect(Collectors.toList());
// if any multi-value inputs, set flag for non-scalar inputs
if (analysis.hasInputArrays()) {
traits.add(ExpressionPlan.Trait.NON_SCALAR_INPUTS);
}
if (!noCapabilities.isEmpty()) {
traits.add(ExpressionPlan.Trait.UNKNOWN_INPUTS);
}
if (!maybeMultiValued.isEmpty()) {
traits.add(ExpressionPlan.Trait.INCOMPLETE_INPUTS);
}
// if expression needs transformed, lets do it
if (!needsApplied.isEmpty()) {
traits.add(ExpressionPlan.Trait.NEEDS_APPLIED);
}
}
// only set output type if we are pretty confident about input types
final boolean shouldComputeOutput = ExpressionPlan.none(
traits,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS
);
if (shouldComputeOutput) {
outputType = expression.getOutputType(inspector);
}
// if analysis predicts output, or inferred output type is array, output will be multi-valued
if (analysis.isOutputArray() || ExprType.isArray(outputType)) {
traits.add(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
// single input mappable may not produce array output explicitly, only through implicit mapping
traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
}
// vectorized expressions do not support incomplete, multi-valued inputs or outputs, or implicit mapping
// they also do not support unknown inputs, but they also do not currently have to deal with them, as missing
// capabilites is indicative of a non-existent column instead of an unknown schema. If this ever changes,
// this check should also change
boolean supportsVector = ExpressionPlan.none(
traits,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
);
if (supportsVector && expression.canVectorize(inspector)) {
// make sure to compute the output type for a vector expression though, because we might have skipped it earlier
// due to unknown inputs, but that's ok here since it just means it doesnt exist
outputType = expression.getOutputType(inspector);
traits.add(ExpressionPlan.Trait.VECTORIZABLE);
}
return new ExpressionPlan(
inspector,
expression,
analysis,
traits,
outputType,
singleInputType,
Sets.union(noCapabilities, maybeMultiValued),
needsApplied
);
}
}