blob: 7f1785da78a4baca7bbc8d808ba60c96de65edf1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.scan.project;
import java.util.List;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedColumn;
import org.apache.drill.exec.physical.resultSet.project.RequestedColumn;
import org.apache.drill.exec.physical.resultSet.project.RequestedTuple;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.complex.DictVector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Perform a schema projection for the case of an explicit list of
* projected columns. Example: SELECT a, b, c.
* <p>
* An explicit projection starts with the requested set of columns,
* then looks in the table schema to find matches. That is, it is
* driven by the query itself.
* <p>
* An explicit projection may include columns that do not exist in
* the source schema. In this case, we fill in null columns for
* unmatched projections.
*/
public class ExplicitSchemaProjection extends ReaderLevelProjection {
private static final Logger logger = LoggerFactory.getLogger(ExplicitSchemaProjection.class);
private final ScanLevelProjection scanProj;
public ExplicitSchemaProjection(ScanLevelProjection scanProj,
TupleMetadata readerSchema,
ResolvedTuple rootTuple,
List<ReaderProjectionResolver> resolvers) {
super(resolvers);
this.scanProj = scanProj;
resolveRootTuple(rootTuple, readerSchema);
}
private void resolveRootTuple(ResolvedTuple rootTuple, TupleMetadata readerSchema) {
for (ColumnProjection col : scanProj.columns()) {
if (col instanceof UnresolvedColumn) {
resolveColumn(rootTuple, ((UnresolvedColumn) col).element(), readerSchema);
} else {
resolveSpecial(rootTuple, col, readerSchema);
}
}
}
private void resolveColumn(ResolvedTuple outputTuple, RequestedColumn inputCol, TupleMetadata readerSchema) {
int tableColIndex = readerSchema.index(inputCol.name());
if (tableColIndex == -1) {
resolveNullColumn(outputTuple, inputCol);
} else {
resolveTableColumn(outputTuple, inputCol, readerSchema.metadata(tableColIndex), tableColIndex);
}
}
private void resolveDictValueColumn(ResolvedTuple outputTuple,
RequestedColumn inputCol, TupleMetadata readerSchema) {
int tableColIndex = readerSchema.index(DictVector.FIELD_VALUE_NAME);
if (tableColIndex == -1) {
resolveNullColumn(outputTuple, inputCol);
} else {
resolveTableColumn(outputTuple, inputCol, readerSchema.metadata(tableColIndex), tableColIndex);
}
}
private void resolveTableColumn(ResolvedTuple outputTuple,
RequestedColumn requestedCol, ColumnMetadata column, int sourceIndex) {
// Is the requested column implied to be a map?
// A requested column is a map if the user requests x.y and we
// are resolving column x. The presence of y as a member implies
// that x is a map.
if (requestedCol.isTuple()) {
if (column.isDict()) {
resolveDict(outputTuple, requestedCol, column, sourceIndex);
} else {
resolveMap(outputTuple, requestedCol, column, sourceIndex);
}
}
// Is the requested column implied to be an array?
// This occurs when the projection list contains at least one
// array index reference such as x[10].
else if (requestedCol.isArray()) {
resolveArray(outputTuple, requestedCol, column, sourceIndex);
}
// A plain old column. Might be an array or a map, but if
// so, the request list just mentions it by name without implying
// the column type. That is, the project list just contains x
// by itself.
else {
projectTableColumn(outputTuple, requestedCol, column, sourceIndex);
}
}
private void resolveMap(ResolvedTuple outputTuple, RequestedColumn requestedCol, ColumnMetadata column,
int sourceIndex) {
// If the actual column isn't a map, try to change column datatype
if (!column.isMap()) {
if(column.isScalar() && ((PrimitiveColumnMetadata) column).isSchemaForUnknown()) {
column = MetadataUtils.newMap(column.name());
} else {
throw UserException
.validationError()
.message("Project list implies a map column, but actual column is not a map")
.addContext("Projected column:", requestedCol.fullName())
.addContext("Table column:", column.name())
.addContext("Type:", column.type().name())
.addContext(scanProj.context())
.build(logger);
}
}
// The requested column is implied to be a map because it lists
// members to project. Project these.
ResolvedMapColumn mapCol = new ResolvedMapColumn(outputTuple, column.schema(), sourceIndex);
resolveTuple(mapCol.members(), requestedCol.tuple(), column.tupleSchema());
// If the projection is simple, then just project the map column
// as is. A projection is simple if all map columns from the table
// are projected, and no null columns are needed. The simple case
// occurs more often than one might expect because the result set
// loader only projected those columns that were needed, so the only
// issue we have to handle is null columns.
//
// In the simple case, we discard the map tuple just created
// since we ended up not needing it.
if (mapCol.members().isSimpleProjection()) {
outputTuple.removeChild(mapCol.members());
projectTableColumn(outputTuple, requestedCol, column, sourceIndex);
}
// The resolved tuple may have a subset of table columns
// and/or null columns. Project a new map that will be created
// to hold the projected map elements.
else {
outputTuple.add(mapCol);
}
}
private void resolveDict(ResolvedTuple outputTuple, RequestedColumn requestedCol, ColumnMetadata column,
int sourceIndex) {
// If the actual column isn't a dict, try to change column datatype
if (!column.isDict()) {
if(column.isScalar() && ((PrimitiveColumnMetadata) column).isSchemaForUnknown()) {
column = MetadataUtils.newDict(column.name());
} else {
throw UserException
.validationError()
.message("Project list implies a dict column, but actual column is not a dict")
.addContext("Projected column:", requestedCol.fullName())
.addContext("Table column:", column.name())
.addContext("Type:", column.type().name())
.addContext(scanProj.context())
.build(logger);
}
}
ResolvedDictColumn dictColumn = new ResolvedDictColumn(outputTuple, column.schema(), sourceIndex);
resolveDictTuple(dictColumn.members(), requestedCol.tuple(), column.tupleSchema());
// The same as for Map
if (dictColumn.members().isSimpleProjection()) {
outputTuple.removeChild(dictColumn.members());
projectTableColumn(outputTuple, requestedCol, column, sourceIndex);
} else {
outputTuple.add(dictColumn);
}
}
private void resolveTuple(ResolvedTuple mapTuple,
RequestedTuple requestedTuple, TupleMetadata mapSchema) {
for (RequestedColumn col : requestedTuple.projections()) {
resolveColumn(mapTuple, col, mapSchema);
}
}
private void resolveDictTuple(ResolvedTuple mapTuple,
RequestedTuple requestedTuple, TupleMetadata mapSchema) {
for (RequestedColumn col : requestedTuple.projections()) {
resolveDictValueColumn(mapTuple, col, mapSchema);
}
}
private void resolveArray(ResolvedTuple outputTuple,
RequestedColumn requestedCol, ColumnMetadata column,
int sourceIndex) {
// If the actual column isn't a array or list,
// then the request is invalid.
if (column.type() != MinorType.LIST && ! column.isArray()) {
throw UserException
.validationError()
.message("Project list implies an array, but actual column is not an array")
.addContext("Projected column:", requestedCol.fullName())
.addContext("Table column:", column.name())
.addContext("Type:", column.type().name())
.addContext("Actual cardinality:", column.mode().name())
.addContext(scanProj.context())
.build(logger);
}
// The project operator will do the actual array element
// projection.
projectTableColumn(outputTuple, requestedCol, column, sourceIndex);
}
/**
* Project a column to the specified output tuple. The name comes from the
* project list. (If the actual column name is `X` (upper case), but the
* project list requests `x` (lower case), project the column using the
* lower-case name. The column type comes from the table column. The source
* index is the location in the table map or row.
*
* @param outputTuple
* projected tuple being built
* @param requestedCol
* column as requested in the project list
* @param column
* metadata for the actual table column
* @param sourceIndex
* index of the column within the table tuple (implies the location
* of the table vector to be projected)
*/
private void projectTableColumn(ResolvedTuple outputTuple,
RequestedColumn requestedCol,
ColumnMetadata column, int sourceIndex) {
outputTuple.add(
new ResolvedTableColumn(requestedCol.name(),
MaterializedField.create(requestedCol.name(),
column.majorType()),
outputTuple, sourceIndex));
}
/**
* Resolve a null column. This is a projected column which does not match
* an implicit or table column. We consider two cases: a simple top-level
* column reference ("a", say) and an implied map reference ("a.b", say.)
* If the column appears to be a map, determine the set of children, which
* map appear to any depth, that were requested.
*
* @param outputTuple
* projected tuple being built
* @param requestedCol
* column as requested in the project list
*/
private void resolveNullColumn(ResolvedTuple outputTuple, RequestedColumn requestedCol) {
ResolvedColumn nullCol;
if (requestedCol.isTuple()) {
nullCol = resolveMapMembers(outputTuple, requestedCol);
} else {
nullCol = outputTuple.nullBuilder.add(requestedCol.name());
}
outputTuple.add(nullCol);
}
/**
* A child column of a map is not projected. Recurse to determine the full
* set of nullable child columns.
*
* @param outputTuple projected tuple being built
* @param col the map column which was projected
* @return a list of null markers for the requested children
*/
private ResolvedColumn resolveMapMembers(ResolvedTuple outputTuple, RequestedColumn col) {
ResolvedMapColumn mapCol = new ResolvedMapColumn(outputTuple, col.name());
ResolvedTuple members = mapCol.members();
for (RequestedColumn child : col.tuple().projections()) {
if (child.isTuple()) {
members.add(resolveMapMembers(members, child));
} else {
members.add(members.nullBuilder.add(child.name()));
}
}
return mapCol;
}
}