blob: b7d01c895e9bf1151b39db8d52c595caafd31876 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.scan.project;
import static org.junit.Assert.assertSame;
import java.util.ArrayList;
import java.util.List;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.categories.RowSetTest;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder;
import org.apache.drill.exec.physical.resultSet.ResultVectorCache;
import org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl;
import org.apache.drill.exec.physical.resultSet.impl.ResultVectorCacheImpl;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Test the mechanism that handles all-null columns during projection.
* An all-null column is one projected in the query, but which does
* not actually exist in the underlying data source (or input
* operator.)
* <p>
* In anticipation of having type information, this mechanism
* can create the classic nullable Int null column, or one of
* any other type and mode.
*/
@Category(RowSetTest.class)
public class TestNullColumnLoader extends SubOperatorTest {
private ResolvedNullColumn makeNullCol(String name, MajorType nullType) {
return makeNullCol(name, nullType, null);
}
private ResolvedNullColumn makeNullCol(String name) {
return makeNullCol(name, null, null);
}
private ResolvedNullColumn makeNullCol(String name, MajorType nullType, String defaultValue) {
// For this test, we don't need the projection, so just
// set it to null.
return new ResolvedNullColumn(name, nullType, defaultValue, null, 0);
}
/**
* Test the simplest case: default null type, nothing in the vector
* cache. Specify no column type, the special NULL type, or a
* predefined type. Output types should be set accordingly.
*/
@Test
public void testBasics() {
final List<ResolvedNullColumn> defns = new ArrayList<>();
defns.add(makeNullCol("unspecified", null));
defns.add(makeNullCol("nullType", Types.optional(MinorType.NULL)));
defns.add(makeNullCol("specifiedOpt", Types.optional(MinorType.VARCHAR)));
defns.add(makeNullCol("specifiedReq", Types.required(MinorType.VARCHAR)));
defns.add(makeNullCol("specifiedArray", Types.repeated(MinorType.VARCHAR)));
final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
final NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, null, false);
// Create a batch
final VectorContainer output = staticLoader.load(2);
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder()
.add("unspecified", NullColumnLoader.DEFAULT_NULL_TYPE)
.add("nullType", NullColumnLoader.DEFAULT_NULL_TYPE)
.addNullable("specifiedOpt", MinorType.VARCHAR)
.addNullable("specifiedReq", MinorType.VARCHAR)
.addArray("specifiedArray", MinorType.VARCHAR)
.buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.addRow(null, null, null, null, new String[] {})
.addRow(null, null, null, null, new String[] {})
.build();
RowSetUtilities.verify(expected, fixture.wrap(output));
staticLoader.close();
}
/**
* Test the ability to use a type other than nullable INT for null
* columns. This occurs, for example, in the CSV reader where no
* column is ever INT (nullable or otherwise) and we want our null
* columns to be (non-nullable) VARCHAR.
*/
@Test
public void testCustomNullType() {
final List<ResolvedNullColumn> defns = new ArrayList<>();
defns.add(makeNullCol("unspecified", null));
defns.add(makeNullCol("nullType", MajorType.newBuilder()
.setMinorType(MinorType.NULL)
.setMode(DataMode.OPTIONAL)
.build()));
// Null required is an oxymoron, so is not tested.
// Null type array does not make sense, so is not tested.
final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
final MajorType nullType = MajorType.newBuilder()
.setMinorType(MinorType.VARCHAR)
.setMode(DataMode.OPTIONAL)
.build();
final NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, false);
// Create a batch
final VectorContainer output = staticLoader.load(2);
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder()
.add("unspecified", nullType)
.add("nullType", nullType)
.buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.addRow(null, null)
.addRow(null, null)
.build();
RowSetUtilities.verify(expected, fixture.wrap(output));
staticLoader.close();
}
/**
* Test the ability to provide a default value for a "null" column.
* Default values are only allowed for required "null" columns. For
* nullable columns, NULL is already the default.
*/
@Test
public void testDefaultValue() {
final List<ResolvedNullColumn> defns = new ArrayList<>();
defns.add(makeNullCol("int", Types.required(MinorType.INT), "10"));
defns.add(makeNullCol("str", Types.required(MinorType.VARCHAR), "foo"));
defns.add(makeNullCol("dub", Types.required(MinorType.FLOAT8), "20.0"));
final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
final MajorType nullType = Types.optional(MinorType.VARCHAR);
final NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, false);
// Create a batch
final VectorContainer output = staticLoader.load(2);
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder()
.add("int", MinorType.INT)
.add("str", MinorType.VARCHAR)
.add("dub", MinorType.FLOAT8)
.buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.addRow(10, "foo", 20.0D)
.addRow(10, "foo", 20.0D)
.build();
RowSetUtilities.verify(expected, fixture.wrap(output));
staticLoader.close();
}
/**
* Drill requires "schema persistence": if a scan operator
* reads two files, F1 and F2, then the scan operator must
* provide the same vectors from both readers. Not just the
* same types, the same value vector instances (but, of course,
* populated with different data.)
* <p>
* Test the case in which the reader for F1 found columns
* (a, b, c) but, F2 found only (a, b), requiring that we
* fill in column c, filled with nulls, but of the same type that it
* was in file F1. We use a vector cache to pull off this trick.
* This test ensures that the null column mechanism looks in that
* vector cache when asked to create a nullable column.
*/
@Test
public void testCachedTypesMapToNullable() {
final List<ResolvedNullColumn> defns = new ArrayList<>();
defns.add(makeNullCol("req"));
defns.add(makeNullCol("opt"));
defns.add(makeNullCol("rep"));
defns.add(makeNullCol("unk"));
// Populate the cache with a column of each mode.
final ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
cache.vectorFor(SchemaBuilder.columnSchema("req", MinorType.FLOAT8, DataMode.REQUIRED));
final ValueVector opt = cache.vectorFor(SchemaBuilder.columnSchema("opt", MinorType.FLOAT8, DataMode.OPTIONAL));
final ValueVector rep = cache.vectorFor(SchemaBuilder.columnSchema("rep", MinorType.FLOAT8, DataMode.REPEATED));
// Use nullable Varchar for unknown null columns.
final MajorType nullType = Types.optional(MinorType.VARCHAR);
final NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, false);
// Create a batch
final VectorContainer output = staticLoader.load(2);
// Verify vectors are reused
assertSame(opt, output.getValueVector(1).getValueVector());
assertSame(rep, output.getValueVector(2).getValueVector());
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder()
.addNullable("req", MinorType.FLOAT8)
.addNullable("opt", MinorType.FLOAT8)
.addArray("rep", MinorType.FLOAT8)
.addNullable("unk", MinorType.VARCHAR)
.buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.addRow(null, null, new int[] { }, null)
.addRow(null, null, new int[] { }, null)
.build();
RowSetUtilities.verify(expected, fixture.wrap(output));
staticLoader.close();
}
/**
* Suppose, in the previous test, that one of the columns that
* goes missing is a required column. The null-column mechanism can
* create the "null" column as a required column, then fill it with
* empty values (zero or "") -- if the scan operator feels doing so would
* be helpful.
*/
@Test
public void testCachedTypesAllowRequired() {
final List<ResolvedNullColumn> defns = new ArrayList<>();
defns.add(makeNullCol("req"));
defns.add(makeNullCol("opt"));
defns.add(makeNullCol("rep"));
defns.add(makeNullCol("unk"));
// Populate the cache with a column of each mode.
final ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
cache.vectorFor(SchemaBuilder.columnSchema("req", MinorType.FLOAT8, DataMode.REQUIRED));
final ValueVector opt = cache.vectorFor(SchemaBuilder.columnSchema("opt", MinorType.FLOAT8, DataMode.OPTIONAL));
final ValueVector rep = cache.vectorFor(SchemaBuilder.columnSchema("rep", MinorType.FLOAT8, DataMode.REPEATED));
// Use nullable Varchar for unknown null columns.
final MajorType nullType = Types.optional(MinorType.VARCHAR);
final NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, true);
// Create a batch
final VectorContainer output = staticLoader.load(2);
// Verify vectors are reused
assertSame(opt, output.getValueVector(1).getValueVector());
assertSame(rep, output.getValueVector(2).getValueVector());
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder()
.add("req", MinorType.FLOAT8)
.addNullable("opt", MinorType.FLOAT8)
.addArray("rep", MinorType.FLOAT8)
.addNullable("unk", MinorType.VARCHAR)
.buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.addRow(0.0, null, new int[] { }, null)
.addRow(0.0, null, new int[] { }, null)
.build();
RowSetUtilities.verify(expected, fixture.wrap(output));
staticLoader.close();
}
/**
* Test the shim class that adapts between the null column loader
* and the projection mechanism. The projection mechanism uses this
* to pull in the null columns which the null column loader has
* created.
*/
@Test
public void testNullColumnBuilder() {
final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
final NullColumnBuilder builder = new NullBuilderBuilder().build();
builder.add("unspecified");
builder.add("nullType", Types.optional(MinorType.NULL));
builder.add("specifiedOpt", Types.optional(MinorType.VARCHAR));
builder.add("specifiedReq", Types.required(MinorType.VARCHAR));
builder.add("specifiedArray", Types.repeated(MinorType.VARCHAR));
builder.build(cache);
// Create a batch
builder.load(2);
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder()
.add("unspecified", NullColumnLoader.DEFAULT_NULL_TYPE)
.add("nullType", NullColumnLoader.DEFAULT_NULL_TYPE)
.addNullable("specifiedOpt", MinorType.VARCHAR)
.addNullable("specifiedReq", MinorType.VARCHAR)
.addArray("specifiedArray", MinorType.VARCHAR)
.buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.addRow(null, null, null, null, new String[] {})
.addRow(null, null, null, null, new String[] {})
.build();
RowSetUtilities.verify(expected, fixture.wrap(builder.output()));
builder.close();
}
/**
* Test using an output schema, along with a default value property,
* to define a default value for missing columns.
*/
@Test
public void testNullColumnBuilderWithSchema() {
// Note: upper case names in schema, lower case in "projection" list
final TupleMetadata outputSchema = new SchemaBuilder()
.add("IntReq", MinorType.INT)
.add("StrReq", MinorType.VARCHAR)
.addNullable("IntOpt", MinorType.INT)
.addNullable("StrOpt", MinorType.VARCHAR)
.addNullable("DubOpt", MinorType.FLOAT8) // No default
.buildSchema();
outputSchema.metadata("intReq").setDefaultValue("10");
outputSchema.metadata("strReq").setDefaultValue("foo");
outputSchema.metadata("intOpt").setDefaultValue("20");
outputSchema.metadata("strOpt").setDefaultValue("bar");
final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
final NullColumnBuilder builder = new NullBuilderBuilder()
.setNullType(Types.optional(MinorType.VARCHAR))
.setOutputSchema(outputSchema).build();
builder.add("strReq");
builder.add("strOpt");
builder.add("dubOpt");
builder.add("intReq");
builder.add("intOpt");
builder.add("extra");
builder.build(cache);
// Create a batch
builder.load(2);
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder()
.add("strReq", MinorType.VARCHAR)
.addNullable("strOpt", MinorType.VARCHAR)
.addNullable("dubOpt", MinorType.FLOAT8)
.add("intReq", MinorType.INT)
.addNullable("intOpt", MinorType.INT)
.addNullable("extra", MinorType.VARCHAR)
.buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.addRow("foo", null, null, 10, null, null)
.addRow("foo", null, null, 10, null, null)
.build();
RowSetUtilities.verify(expected, fixture.wrap(builder.output()));
builder.close();
}
/**
* Test the various conflicts that can occur:
* <ul>
* <li>Schema is required, but no default value for null column.</li>
* <li>Query wants a different type than that in the schema.</li>
* <li>Query wants a different mode than that in the schema.</li>
* <ul>
*
* The type and mode provided to the builder is that which would result from
* schema smoothing. The types and modes should usually match, but verify
* the rules when they don't.
* <p>
* Defaults for null columns are ignored: null columns use NULL as the
* null value.
*/
@Test
public void testSchemaWithConflicts() {
// Note: upper case names in schema, lower case in "projection" list
final TupleMetadata outputSchema = new SchemaBuilder()
.add("IntReq", MinorType.INT)
.add("StrReq", MinorType.VARCHAR) // No default
.addNullable("IntOpt", MinorType.INT)
.addNullable("StrOpt", MinorType.VARCHAR)
.buildSchema();
outputSchema.metadata("intReq").setDefaultValue("10");
outputSchema.metadata("intOpt").setDefaultValue("20");
outputSchema.metadata("strOpt").setDefaultValue("bar");
final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
final NullColumnBuilder builder = new NullBuilderBuilder()
.setNullType(Types.optional(MinorType.VARCHAR))
.setOutputSchema(outputSchema).build();
// Defined, required, no default so --> optional
builder.add("strReq");
builder.add("strOpt");
// Defined, has default, but conflicting type, so default --> null, so --> optional
builder.add("intReq", Types.required(MinorType.BIGINT));
// Defined, has default, conflicting mode, so keep default
builder.add("intOpt", Types.required(MinorType.INT));
builder.build(cache);
// Create a batch
builder.load(2);
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder()
.addNullable("strReq", MinorType.VARCHAR)
.addNullable("strOpt", MinorType.VARCHAR)
.addNullable("intReq", MinorType.BIGINT)
.add("intOpt", MinorType.INT)
.buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.addRow(null, null, null, 20)
.addRow(null, null, null, 20)
.build();
RowSetUtilities.verify(expected, fixture.wrap(builder.output()));
builder.close();
}
}