blob: c6d273e3d8940d963c67bc915ccc749b5179e88e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.virtual;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import javax.annotation.Nullable;
import java.util.Map;
public class ExpressionPlannerTest extends InitializedNullHandlingTest
{
public static final ColumnInspector SYNTHETIC_INSPECTOR = new ColumnInspector()
{
private final Map<String, ColumnCapabilities> capabilitiesMap =
ImmutableMap.<String, ColumnCapabilities>builder()
.put(
"long1",
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)
)
.put(
"long2",
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)
)
.put(
"float1",
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT)
)
.put(
"float2",
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT)
)
.put(
"double1",
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE)
)
.put(
"double2",
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE)
)
.put(
"scalar_string",
ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities()
)
.put(
// segment style single value dictionary encoded with unique sorted dictionary
"scalar_dictionary_string",
new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setDictionaryEncoded(true)
.setHasBitmapIndexes(true)
.setDictionaryValuesSorted(true)
.setDictionaryValuesUnique(true)
.setHasMultipleValues(false)
)
.put(
// dictionary encoded but not unique or sorted, maybe an indexed table from a join result
"scalar_dictionary_string_nonunique",
new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setDictionaryEncoded(true)
.setHasBitmapIndexes(false)
.setDictionaryValuesSorted(false)
.setDictionaryValuesUnique(false)
.setHasMultipleValues(false)
)
.put(
// string with unknown multi-valuedness
"string_unknown",
new ColumnCapabilitiesImpl().setType(ValueType.STRING)
)
.put(
// dictionary encoded multi valued string dimension
"multi_dictionary_string",
new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setDictionaryEncoded(true)
.setHasBitmapIndexes(true)
.setDictionaryValuesUnique(true)
.setDictionaryValuesSorted(true)
.setHasMultipleValues(true)
)
.put(
// simple multi valued string dimension unsorted
"multi_dictionary_string_nonunique",
new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setDictionaryEncoded(false)
.setHasBitmapIndexes(false)
.setDictionaryValuesUnique(false)
.setDictionaryValuesSorted(false)
.setHasMultipleValues(true)
)
.put(
"string_array_1",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.STRING_ARRAY)
)
.put(
"string_array_2",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.STRING_ARRAY)
)
.put(
"long_array_1",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.LONG_ARRAY)
)
.put(
"long_array_2",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.LONG_ARRAY)
)
.put(
"double_array_1",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.DOUBLE_ARRAY)
)
.put(
"double_array_2",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ValueType.DOUBLE_ARRAY)
)
.build();
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
return capabilitiesMap.get(column);
}
};
@Rule
public ExpectedException expectedException = ExpectedException.none();
@Test
public void testUnknown()
{
// column has no capabilities
// the vectorize query engine contracts is such that the lack of column capabilities is indicative of a nil column
// so this is vectorizable
// for non-vectorized expression processing, this will probably end up using a selector that examines inputs on a
// row by row basis to determine if the expression needs applied to multi-valued inputs
ExpressionPlan thePlan = plan("concat(x, 'x')");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.CONSTANT
)
);
// this expression has no "unapplied bindings", nothing to apply
Assert.assertEquals("concat(\"x\", 'x')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("concat(\"x\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
Assert.assertTrue(inferred.hasNulls().isTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
// what if both inputs are unknown, can we know things?
thePlan = plan("x * y");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.UNKNOWN_INPUTS
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.VECTORIZABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.CONSTANT
)
);
Assert.assertEquals("(\"x\" * \"y\")", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("(\"x\" * \"y\")", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertNull(thePlan.getOutputType());
Assert.assertNull(thePlan.inferColumnCapabilities(null));
// no we cannot
}
@Test
public void testScalarStringNondictionaryEncoded()
{
ExpressionPlan thePlan = plan("concat(scalar_string, 'x')");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertEquals("concat(\"scalar_string\", 'x')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("concat(\"scalar_string\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
Assert.assertTrue(inferred.hasNulls().isTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
}
@Test
public void testScalarNumeric()
{
ExpressionPlan thePlan = plan("long1 + 5");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertEquals("(\"long1\" + 5)", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("(\"long1\" + 5)", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertEquals("(\"long1\" + 5)", thePlan.getAppliedFoldExpression("long1").stringify());
Assert.assertEquals(ExprType.LONG, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.LONG, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
if (NullHandling.sqlCompatible()) {
Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
} else {
Assert.assertFalse(inferred.hasNulls().isMaybeTrue());
}
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
thePlan = plan("long1 + 5.0");
Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType());
thePlan = plan("double1 * double2");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertEquals("(\"double1\" * \"double2\")", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("(\"double1\" * \"double2\")", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertEquals("(\"double1\" * \"double2\")", thePlan.getAppliedFoldExpression("double1").stringify());
Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType());
inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.DOUBLE, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
if (NullHandling.sqlCompatible()) {
Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
} else {
Assert.assertFalse(inferred.hasNulls().isMaybeTrue());
}
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
}
@Test
public void testScalarStringDictionaryEncoded()
{
ExpressionPlan thePlan = plan("concat(scalar_dictionary_string, 'x')");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertEquals("concat(\"scalar_dictionary_string\", 'x')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals(
"concat(\"scalar_dictionary_string\", 'x')",
thePlan.getAppliedFoldExpression("__acc").stringify()
);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
Assert.assertTrue(inferred.hasNulls().isTrue());
Assert.assertTrue(inferred.isDictionaryEncoded().isTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertTrue(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
// multiple input columns
thePlan = plan("concat(scalar_dictionary_string, scalar_dictionary_string_nonunique)");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertEquals(
"concat(\"scalar_dictionary_string\", \"scalar_dictionary_string_nonunique\")",
thePlan.getAppliedExpression().stringify()
);
Assert.assertEquals(
"concat(\"scalar_dictionary_string\", \"scalar_dictionary_string_nonunique\")",
thePlan.getAppliedFoldExpression("__acc").stringify()
);
// what if scalar_dictionary_string_nonunique is an accumulator instead? nope, still no NEEDS_APPLIED so nothing to do
Assert.assertEquals(
"concat(\"scalar_dictionary_string\", \"scalar_dictionary_string_nonunique\")",
thePlan.getAppliedFoldExpression("scalar_dictionary_string_nonunique").stringify()
);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
Assert.assertTrue(inferred.hasNulls().isTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
// array output of dictionary encoded string are not considered single scalar/mappable, nor vectorizable
thePlan = plan("array(scalar_dictionary_string)");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.VECTORIZABLE
)
);
}
@Test
public void testMultiValueStringDictionaryEncoded()
{
ExpressionPlan thePlan = plan("concat(multi_dictionary_string, 'x')");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
Assert.assertTrue(inferred.isDictionaryEncoded().isTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
Assert.assertTrue(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NEEDS_APPLIED
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertEquals(
"map((\"multi_dictionary_string_nonunique\") -> concat(\"scalar_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string_nonunique\")",
thePlan.getAppliedExpression().stringify()
);
Assert.assertEquals(
"fold((\"multi_dictionary_string_nonunique\", \"scalar_string\") -> concat(\"scalar_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string_nonunique\", \"scalar_string\")",
thePlan.getAppliedFoldExpression("scalar_string").stringify()
);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NEEDS_APPLIED
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
// whoa
Assert.assertEquals(
"cartesian_map((\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\") -> concat(\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string\", \"multi_dictionary_string_nonunique\")",
thePlan.getAppliedExpression().stringify()
);
// sort of funny, but technically correct
Assert.assertEquals(
"cartesian_fold((\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\", \"__acc\") -> concat(\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string\", \"multi_dictionary_string_nonunique\", \"__acc\")",
thePlan.getAppliedFoldExpression("__acc").stringify()
);
inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
thePlan = plan("array_append(multi_dictionary_string, 'foo')");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.VECTORIZABLE
)
);
}
@Test
public void testMultiValueStringDictionaryEncodedIllegalAccumulator()
{
expectedException.expect(IllegalStateException.class);
expectedException.expectMessage(
"Accumulator cannot be implicitly transformed, if it is an ARRAY or multi-valued type it must be used explicitly as such"
);
ExpressionPlan thePlan = plan("concat(multi_dictionary_string, 'x')");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NEEDS_APPLIED
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.VECTORIZABLE
)
);
// what happens if we try to use a multi-valued input that was not explicitly used as multi-valued as the
// accumulator?
thePlan.getAppliedFoldExpression("multi_dictionary_string");
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
}
@Test
public void testIncompleteString()
{
ExpressionPlan thePlan = plan("concat(string_unknown, 'x')");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.INCOMPLETE_INPUTS
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.VECTORIZABLE
)
);
// incomplete inputs are not transformed either, rather this will need to be detected and handled on a row-by-row
// basis
Assert.assertEquals("concat(\"string_unknown\", 'x')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("concat(\"string_unknown\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify());
// incomplete and unknown skip output type since we don't reliably know
Assert.assertNull(thePlan.getOutputType());
Assert.assertNull(thePlan.inferColumnCapabilities(null));
}
@Test
public void testArrayOutput()
{
// its ok to use scalar inputs to array expressions, string columns cant help it if sometimes they are single
// valued and sometimes they are multi-valued
ExpressionPlan thePlan = plan("array_append(scalar_string, 'x')");
assertArrayInAndOut(thePlan);
// with a string hint, it should look like a multi-valued string
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ValueType.STRING);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
// with no hint though, let the array free
inferred = thePlan.inferColumnCapabilities(ValueType.STRING_ARRAY);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING_ARRAY, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType());
// multi-valued are cool too
thePlan = plan("array_append(multi_dictionary_string, 'x')");
assertArrayInAndOut(thePlan);
// what about incomplete inputs with arrays? they are not reported as incomplete because they are treated as arrays
thePlan = plan("array_append(string_unknown, 'x')");
assertArrayInAndOut(thePlan);
Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType());
// what about if it is the scalar argument? there it is
thePlan = plan("array_append(multi_dictionary_string, string_unknown)");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.VECTORIZABLE
)
);
// incomplete and unknown skip output type since we don't reliably know
Assert.assertNull(thePlan.getOutputType());
// array types are cool too
thePlan = plan("array_append(string_array_1, 'x')");
assertArrayInAndOut(thePlan);
thePlan = plan("array_append(string_array_1, 'x')");
assertArrayInAndOut(thePlan);
}
@Test
public void testScalarOutputMultiValueInput()
{
ExpressionPlan thePlan = plan("array_to_string(array_append(scalar_string, 'x'), ',')");
assertArrayInput(thePlan);
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ValueType.STRING);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertNull(inferred.getComplexTypeName());
Assert.assertTrue(inferred.hasNulls().isTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
Assert.assertEquals(
"array_to_string(array_append(\"scalar_string\", 'x'), ',')",
thePlan.getAppliedExpression().stringify()
);
Assert.assertEquals(
"array_to_string(array_append(\"scalar_string\", 'x'), ',')",
thePlan.getAppliedFoldExpression("__acc").stringify()
);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
// what about a multi-valued input
thePlan = plan("array_to_string(array_append(scalar_string, multi_dictionary_string), ',')");
assertArrayInput(thePlan);
Assert.assertEquals(
"array_to_string(map((\"multi_dictionary_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\"), ',')",
thePlan.getAppliedExpression().stringify()
);
Assert.assertEquals(
"array_to_string(fold((\"multi_dictionary_string\", \"scalar_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\", \"scalar_string\"), ',')",
thePlan.getAppliedFoldExpression("scalar_string").stringify()
);
// why is this null
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
}
@Test
public void testScalarOutputArrayInput()
{
ExpressionPlan thePlan = plan("array_to_string(array_append(string_array_1, 'x'), ',')");
assertArrayInput(thePlan);
Assert.assertEquals(
"array_to_string(array_append(\"string_array_1\", 'x'), ',')",
thePlan.getAppliedExpression().stringify()
);
Assert.assertEquals(
"array_to_string(array_append(\"string_array_1\", 'x'), ',')",
thePlan.getAppliedFoldExpression("__acc").stringify()
);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
thePlan = plan("array_to_string(array_concat(string_array_1, string_array_2), ',')");
assertArrayInput(thePlan);
Assert.assertEquals(ExprType.STRING, thePlan.getOutputType());
thePlan = plan("fold((x, acc) -> acc + x, array_concat(long_array_1, long_array_2), 0)");
assertArrayInput(thePlan);
Assert.assertEquals(
"fold((\"x\", \"acc\") -> (\"acc\" + \"x\"), array_concat(\"long_array_1\", \"long_array_2\"), 0)",
thePlan.getAppliedExpression().stringify()
);
Assert.assertEquals(
"fold((\"x\", \"acc\") -> (\"acc\" + \"x\"), array_concat(\"long_array_1\", \"long_array_2\"), 0)",
thePlan.getAppliedFoldExpression("__acc").stringify()
);
Assert.assertEquals(ExprType.LONG, thePlan.getOutputType());
thePlan = plan("fold((x, acc) -> acc * x, array_concat(double_array_1, double_array_2), 0.0)");
assertArrayInput(thePlan);
Assert.assertEquals(
"fold((\"x\", \"acc\") -> (\"acc\" * \"x\"), array_concat(\"double_array_1\", \"double_array_2\"), 0.0)",
thePlan.getAppliedExpression().stringify()
);
Assert.assertEquals(
"fold((\"x\", \"acc\") -> (\"acc\" * \"x\"), array_concat(\"double_array_1\", \"double_array_2\"), 0.0)",
thePlan.getAppliedFoldExpression("__acc").stringify()
);
Assert.assertEquals(ExprType.DOUBLE, thePlan.getOutputType());
}
@Test
public void testArrayConstruction()
{
ExpressionPlan thePlan = plan("array(long1, long2)");
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertEquals(ExprType.LONG_ARRAY, thePlan.getOutputType());
thePlan = plan("array(long1, double1)");
Assert.assertEquals(ExprType.DOUBLE_ARRAY, thePlan.getOutputType());
thePlan = plan("array(long1, double1, scalar_string)");
Assert.assertEquals(ExprType.STRING_ARRAY, thePlan.getOutputType());
}
private static ExpressionPlan plan(String expression)
{
return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR, Parser.parse(expression, TestExprMacroTable.INSTANCE));
}
private static void assertArrayInput(ExpressionPlan thePlan)
{
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NON_SCALAR_INPUTS
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.VECTORIZABLE
)
);
}
private static void assertArrayInAndOut(ExpressionPlan thePlan)
{
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT
)
);
Assert.assertFalse(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.VECTORIZABLE
)
);
}
}