blob: 3aee1e8ceddefc6924d06c5440b16e6b76f973d0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.scan.v3.schema;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.drill.common.exceptions.CustomErrorContext;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.planner.sql.TypeInferenceUtils;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.MapColumnMetadata;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.PrimitiveColumnMetadata;
import org.apache.drill.exec.record.metadata.Propertied;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.record.metadata.TupleSchema;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Set of schema utilities that don't fit well as methods on the column
* or tuple classes.
* <p>
* Provides methods to check if a column is consistent with the projection
* requested for a query. Used for scans: the reader offers certain columns
* and the scan operator must decide whether to accept them, and if so,
* if the column that has actually appeared is consistent with the projection
* schema path provided by the planner. An obvious example is if projection
* asks for {@code a[0]} (and array), but the reader offer up {@code a}
* as a non-array column.
* <p>
* Checks are reasonable, but not complete. Particularly in the {@code DICT}
* case, projection depends on multiple factors, such as the type of the
* key and values. This class does not (yet) handle that complexity.
* Instead, the goal is no false negatives for the complex cases, while
* catching the simple cases.
* <p>
* The Project operator or other consuming operator is the final arbitrator
* of whether a particular column satisfies a particular projection. This
* class tries to catch those errors early to provide better error
* messages.
*/
public class SchemaUtils {
protected static final Logger logger = LoggerFactory.getLogger(SchemaUtils.class);
/**
* Check if the given read column is consistent with the projection requested for
* that column. Does not handle subtleties such as DICT key types, actual types
* in a UNION, etc.
*
* @param colReq the column-level projection description
* @param readCol metadata for the column which the reader has actually
* produced
* @return {@code true} if the column is consistent with projection (or if the
* column is too complex to check), {@code false} if the column is not
* consistent and represents an error case. Also returns {@code true} if
* the column is not projected, as any type of column can be ignored
*/
public static boolean isConsistent(ProjectedColumn colReq, ColumnMetadata readCol) {
if (readCol.isDynamic()) {
// Don't know the type. This is a name-only probe.
return true;
}
// If the projection is map-like, but the proposed concrete column
// is not map-like, then the columns are not compatible.
if (colReq.isMap() && !(readCol.isMap() || readCol.isDict() || readCol.isVariant())) {
return false;
}
// If the projection is array-like, but the proposed concrete column
// is not map-like, or does not have at least as many dimensions as the
// projection, then the column is not compatible.
if (colReq.isArray()) {
if (colReq.arrayDims() == 1) {
return readCol.isArray() || readCol.isDict() || readCol.isVariant();
} else {
return readCol.type() == MinorType.LIST || readCol.isDict() || readCol.isVariant();
}
}
return true;
}
/**
* Perform the column-level projection as described in
* {@link #isConsistent(ProjectedColumn, ColumnMetadata)}, and raise a
* {@code UserException} if the column is not consistent with projection.
*
* @param colReq the column-level projection description
* @param actual metadata for the column which the reader has actually
* produced
* @param errorContext additional error context to pass along in the
* exception
* @throws UserException if the read column is not consistent with the
* projection description for the column
*/
public static void verifyCompatibility(ProjectedColumn colReq, ColumnMetadata actual,
String source, CustomErrorContext errorContext) {
if (!isConsistent(colReq, actual)) {
throw UserException.validationError()
.message(source + " column type not compatible with projection specification")
.addContext("Projected column", colReq.projectString())
.addContext(source + " column", actual.columnString())
.addContext(errorContext)
.build(logger);
}
}
public static void verifyConsistency(ColumnMetadata existing,
ColumnMetadata revised, String source,
CustomErrorContext errorContext) {
if (existing.isDynamic() || revised.isDynamic()) {
return;
}
if (existing.type() != revised.type() ||
existing.mode() != revised.mode()) {
throw UserException.validationError()
.message("Scan and " + source + " column type conflict")
.addContext("Scan column", existing.columnString())
.addContext(source + " column", revised.columnString())
.addContext(errorContext)
.build(logger);
}
}
public static void verifyProjection(ColumnMetadata existing,
ColumnMetadata revised, String source,
CustomErrorContext errorContext) {
if (existing instanceof ProjectedColumn) {
verifyCompatibility((ProjectedColumn) existing, revised, source, errorContext);
} else {
verifyConsistency(existing, revised, source, errorContext);
}
}
public static void mergeColProperties(ColumnMetadata existing, ColumnMetadata revised) {
mergeProperties(existing, revised);
if (existing.isMap() && revised.isMap()) {
mergeProperties(existing.tupleSchema(), revised.tupleSchema());
}
}
public static void mergeProperties(Propertied existing, Propertied revised) {
if (!revised.hasProperties()) {
return;
}
existing.properties().putAll(revised.properties());
}
public static boolean isStrict(TupleMetadata schema) {
return schema.booleanProperty(TupleMetadata.IS_STRICT_SCHEMA_PROP);
}
public static void markStrict(TupleMetadata schema) {
schema.setBooleanProperty(TupleMetadata.IS_STRICT_SCHEMA_PROP, true);
}
public static String implicitColType(ColumnMetadata col) {
return col.property(ColumnMetadata.IMPLICIT_COL_TYPE);
}
public static boolean isImplicit(ColumnMetadata col) {
return implicitColType(col) != null;
}
public static void markImplicit(ColumnMetadata col, String value) {
col.setProperty(ColumnMetadata.IMPLICIT_COL_TYPE, value);
}
public static void markAsPartition(ColumnMetadata col, int level) {
markImplicit(col, ColumnMetadata.IMPLICIT_PARTITION_PREFIX + level);
}
public static void markExcludeFromWildcard(ColumnMetadata col) {
col.setBooleanProperty(ColumnMetadata.EXCLUDE_FROM_WILDCARD, true);
}
public static boolean isExcludedFromWildcard(ColumnMetadata col) {
return col.booleanProperty(ColumnMetadata.EXCLUDE_FROM_WILDCARD);
}
public static ScanProjectionParser.ProjectionParseResult projectAll() {
TupleMetadata projSet = new TupleSchema();
projSet.setProperty(ScanProjectionParser.PROJECTION_TYPE_PROP, ScanProjectionParser.PROJECT_ALL);
return new ScanProjectionParser.ProjectionParseResult(0, projSet);
}
public static void markProjectAll(ColumnMetadata col) {
Preconditions.checkArgument(col.isMap());
col.tupleSchema().setProperty(ScanProjectionParser.PROJECTION_TYPE_PROP, ScanProjectionParser.PROJECT_ALL);
}
public static ScanProjectionParser.ProjectionParseResult projectNone() {
TupleMetadata projSet = new TupleSchema();
projSet.setProperty(ScanProjectionParser.PROJECTION_TYPE_PROP, ScanProjectionParser.PROJECT_NONE);
return new ScanProjectionParser.ProjectionParseResult(-1, projSet);
}
public static boolean isProjectAll(TupleMetadata tuple) {
return ScanProjectionParser.PROJECT_ALL.equals(tuple.property(ScanProjectionParser.PROJECTION_TYPE_PROP));
}
public static boolean isProjectNone(TupleMetadata tuple) {
return ScanProjectionParser.PROJECT_NONE.equals(tuple.property(ScanProjectionParser.PROJECTION_TYPE_PROP));
}
public static void copyMapProperties(ProjectedColumn source,
ColumnMetadata dest) {
if (source != null && source.isMap()) {
Preconditions.checkArgument(dest.isMap());
SchemaUtils.copyProperties(source.tupleSchema(), dest.tupleSchema());
} else {
markProjectAll(dest);
}
}
static void copyProperties(TupleMetadata source,
TupleMetadata dest) {
String value = source.property(ScanProjectionParser.PROJECTION_TYPE_PROP);
if (value != null) {
dest.setProperty(ScanProjectionParser.PROJECTION_TYPE_PROP, value);
}
}
/**
* Converts specified {@code RelDataType relDataType} into {@link ColumnMetadata}.
* For the case when specified relDataType is struct, map with recursively converted children
* will be created.
*
* @param name filed name
* @param relDataType filed type
* @return {@link ColumnMetadata} which corresponds to specified {@code RelDataType relDataType}
*/
public static ColumnMetadata getColumnMetadata(String name, RelDataType relDataType) {
switch (relDataType.getSqlTypeName()) {
case ARRAY:
return getArrayMetadata(name, relDataType);
case MAP:
case OTHER:
throw new UnsupportedOperationException(String.format("Unsupported data type: %s", relDataType.getSqlTypeName()));
default:
if (relDataType.isStruct()) {
return getStructMetadata(name, relDataType);
} else {
return new PrimitiveColumnMetadata(
MaterializedField.create(name,
TypeInferenceUtils.getDrillMajorTypeFromCalciteType(relDataType)));
}
}
}
/**
* Returns {@link ColumnMetadata} instance which corresponds to specified array {@code RelDataType relDataType}.
*
* @param name name of the filed
* @param relDataType the source of type information to construct the schema
* @return {@link ColumnMetadata} instance
*/
private static ColumnMetadata getArrayMetadata(String name, RelDataType relDataType) {
RelDataType componentType = relDataType.getComponentType();
ColumnMetadata childColumnMetadata = getColumnMetadata(name, componentType);
switch (componentType.getSqlTypeName()) {
case ARRAY:
// for the case when nested type is array, it should be placed into repeated list
return MetadataUtils.newRepeatedList(name, childColumnMetadata);
case MAP:
case OTHER:
throw new UnsupportedOperationException(String.format("Unsupported data type: %s", relDataType.getSqlTypeName()));
default:
if (componentType.isStruct()) {
// for the case when nested type is struct, it should be placed into repeated map
return MetadataUtils.newMapArray(name, childColumnMetadata.tupleSchema());
} else {
// otherwise creates column metadata with repeated data mode
return new PrimitiveColumnMetadata(
MaterializedField.create(name,
Types.overrideMode(
TypeInferenceUtils.getDrillMajorTypeFromCalciteType(componentType),
TypeProtos.DataMode.REPEATED)));
}
}
}
/**
* Returns {@link MapColumnMetadata} column metadata created based on specified {@code RelDataType relDataType} with
* converted to {@link ColumnMetadata} {@code relDataType}'s children.
*
* @param name name of the filed
* @param relDataType {@link RelDataType} the source of the children for resulting schema
* @return {@link MapColumnMetadata} column metadata
*/
private static MapColumnMetadata getStructMetadata(String name, RelDataType relDataType) {
TupleMetadata mapSchema = new TupleSchema();
relDataType.getFieldList().stream()
.map(field -> getColumnMetadata(field.getName(), field.getType()))
.filter(metadata -> metadata.type() != MinorType.LATE)
.forEach(mapSchema::addColumn);
return MetadataUtils.newMap(name, mapSchema);
}
}