blob: b3cd9e4dfd493e438853d4d97642c0687a8f415f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.analysis;
import java.util.ArrayList;
import java.util.List;
import org.apache.impala.catalog.ArrayType;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.MapType;
import org.apache.impala.catalog.StructField;
import org.apache.impala.catalog.StructType;
import org.apache.impala.catalog.Type;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
/**
* Represents a resolved or unresolved dot-separated path that is rooted at a registered
* tuple descriptor, catalog table/view, or an existing resolved path.
*
* This class implements the resolution logic for mapping an implicit or explicit
* raw path to the corresponding physical types/positions in the schema tree.
*
* Implicit vs. Explicit Paths
* The item of an array and the key/value of maps are accessed via their implicit field
* names. However, if the type of an array item or a map value is a struct, then we allow
* omitting the explicit reference to the struct type in paths for accessing fields
* within that struct as a shorthand for user convenience. An explicit reference to the
* struct type is always legal. Paths that explicitly reference such a struct are
* "physical" because they typically map exactly to the schema representation in the
* underlying storage format (e.g. Parquet/Avro). Paths that omit the struct reference
* are called "implicit". During resolution, explicit paths are always preferred over
* implicit paths for resolving ambiguities.
*
* Example
* create table d.t (
* c array<struct<f:int,item:int,pos:int>>
* );
*
* select ... from d.t.c
* d.t.c <-- resolves to type array<struct<f:int,item:int,pos:int>>
* c alias <-- type struct<item:struct<f:int,item:int,pos:int>,pos:bigint>>
*
* select c.item.f, c.f from d.t.c
* c.item.f <-- explicit path to "f"
* c.f <-- implicit path to "f", skips "item" reference
* (same for the unqualified versions item.f and f)
*
* select c.item, c.item.item from d.t.c
* c.item <-- explicit path to "item" struct of type struct<f:int,item:string>
* c.item.item <-- explicit path to string "item"; there is no logical path to the
* string "item" due to the "item" name conflict
* c.pos <-- explicit path to "pos" of type bigint
* c.item.pos <-- explicit path to "pos" of type int; there is no logical path to the
* int "pos" due to the "pos" name conflict
* (same for unqualified versions item, item.item, pos, item.pos)
*
* Please refer to TestImplicitAndExplicitPaths() for analogous examples for maps.
*
* Illegal Implicit Paths
* The intention of implicit paths is to allow users to skip a *single* trivial level of
* indirection in common cases. In particular, it is illegal to implicitly skip multiple
* levels in a path, illustrated as follows.
*
* Example
* create table d.t (
* c array<array<struct<e:int,f:string>>>
* );
*
* select c.f from d.t.c
* select 1 from d.t.c, c.f
* c.f <-- illegal path because it would have to implicitly skip two 'item' fields
*
*
* Uses of Paths and Terminology
*
* Uncorrelated References: Star exprs, SlotRefs and TableRefs that are rooted at a
* catalog Table or a registered TupleDescriptor in the same query block.
*
* Relative References: TableRefs that are rooted at a TupleDescriptor.
*
* Correlated References: SlotRefs and TableRefs that are rooted at a TupleDescriptor
* registered in an ancestor query block are called 'correlated'. All correlated
* references are relative, but not all relative references are correlated.
*
* A Path itself is never said to be un/correlated because it is intentionally unaware
* of the query block that it is used in.
*/
public class Path {
// Implicit field names of collections.
public static final String ARRAY_ITEM_FIELD_NAME = "item";
public static final String ARRAY_POS_FIELD_NAME = "pos";
public static final String MAP_KEY_FIELD_NAME = "key";
public static final String MAP_VALUE_FIELD_NAME = "value";
public static enum PathType {
SLOT_REF,
TABLE_REF,
STAR,
ANY, // Reference to any field or table in schema.
}
// Implicit or explicit raw path to be resolved relative to rootDesc_ or rootTable_.
// Every raw-path element is mapped to zero, one or two types/positions in resolution.
private final List<String> rawPath_;
// Registered table alias that this path is rooted at, if any.
// Null if the path is rooted at a catalog table/view.
private final TupleDescriptor rootDesc_;
// Catalog table that this resolved path is rooted at, if any.
// Null if the path is rooted at a registered tuple that does not
// belong to a catalog table/view.
private final FeTable rootTable_;
// Root path that a relative path was created from.
private final Path rootPath_;
// List of matched types and field positions set during resolution. The matched
// types/positions describe the physical path through the schema tree.
private final List<Type> matchedTypes_ = new ArrayList<>();
private final List<Integer> matchedPositions_ = new ArrayList<>();
// Remembers the indices into rawPath_ and matchedTypes_ of the first collection
// matched during resolution.
private int firstCollectionPathIdx_ = -1;
private int firstCollectionTypeIdx_ = -1;
// Indicates whether this path has been resolved. Set in resolve().
private boolean isResolved_ = false;
// Caches the result of getAbsolutePath() to avoid re-computing it.
private List<Integer> absolutePath_ = null;
/**
* Constructs a Path rooted at the given rootDesc.
*/
public Path(TupleDescriptor rootDesc, List<String> rawPath) {
Preconditions.checkNotNull(rootDesc);
Preconditions.checkNotNull(rawPath);
rootTable_ = rootDesc.getTable();
rootDesc_ = rootDesc;
rootPath_ = null;
rawPath_ = rawPath;
}
/**
* Constructs a Path rooted at the given rootTable.
*/
public Path(FeTable rootTable, List<String> rawPath) {
Preconditions.checkNotNull(rootTable);
Preconditions.checkNotNull(rawPath);
rootTable_ = rootTable;
rootDesc_ = null;
rootPath_ = null;
rawPath_ = rawPath;
}
/**
* Constructs a new unresolved path relative to an existing resolved path.
*/
public Path(Path rootPath, List<String> relRawPath) {
Preconditions.checkNotNull(rootPath);
Preconditions.checkState(rootPath.isResolved());
Preconditions.checkNotNull(relRawPath);
rootTable_ = rootPath.rootTable_;
rootDesc_ = rootPath.rootDesc_;
rootPath_ = rootPath;
rawPath_ = Lists.newArrayListWithCapacity(
rootPath.getRawPath().size() + relRawPath.size());
rawPath_.addAll(rootPath.getRawPath());
rawPath_.addAll(relRawPath);
matchedTypes_.addAll(rootPath.matchedTypes_);
matchedPositions_.addAll(rootPath.matchedPositions_);
firstCollectionPathIdx_ = rootPath.firstCollectionPathIdx_;
firstCollectionTypeIdx_ = rootPath.firstCollectionTypeIdx_;
}
/**
* Resolves this path in the context of the root tuple descriptor / root table
* or continues resolving this relative path from an existing root path.
* Returns true if the path could be fully resolved, false otherwise.
* A failed resolution leaves this Path in a partially resolved state.
*/
public boolean resolve() {
if (isResolved_) return true;
Preconditions.checkState(rootDesc_ != null || rootTable_ != null);
Type currentType = null;
int rawPathIdx = 0;
if (rootPath_ != null) {
// Continue resolving this path relative to the rootPath_.
currentType = rootPath_.destType();
rawPathIdx = rootPath_.getRawPath().size();
} else if (rootDesc_ != null) {
currentType = rootDesc_.getType();
} else {
// Directly start from the item type because only implicit paths are allowed.
currentType = rootTable_.getType().getItemType();
}
// Map all remaining raw-path elements to field types and positions.
while (rawPathIdx < rawPath_.size()) {
if (!currentType.isComplexType()) return false;
StructType structType = getTypeAsStruct(currentType);
// Resolve explicit path.
StructField field = structType.getField(rawPath_.get(rawPathIdx));
if (field == null) {
// Resolve implicit path.
if (structType instanceof CollectionStructType) {
field = ((CollectionStructType) structType).getOptionalField();
// Collections must be matched explicitly.
if (field.getType().isCollectionType()) return false;
} else {
// Failed to resolve implicit or explicit path.
return false;
}
// Update the physical types/positions.
matchedTypes_.add(field.getType());
matchedPositions_.add(field.getPosition());
currentType = field.getType();
// Do not consume a raw-path element.
continue;
}
matchedTypes_.add(field.getType());
matchedPositions_.add(field.getPosition());
if (field.getType().isCollectionType() && firstCollectionPathIdx_ == -1) {
Preconditions.checkState(firstCollectionTypeIdx_ == -1);
firstCollectionPathIdx_ = rawPathIdx;
firstCollectionTypeIdx_ = matchedTypes_.size() - 1;
}
currentType = field.getType();
++rawPathIdx;
}
Preconditions.checkState(matchedTypes_.size() == matchedPositions_.size());
Preconditions.checkState(matchedTypes_.size() >= rawPath_.size());
isResolved_ = true;
return true;
}
/**
* If the given type is a collection, returns a collection struct type representing
* named fields of its explicit path. Returns the given type itself if it is already
* a struct. Requires that the given type is a complex type.
*/
public static StructType getTypeAsStruct(Type t) {
Preconditions.checkState(t.isComplexType());
if (t.isStructType()) return (StructType) t;
if (t.isArrayType()) {
return CollectionStructType.createArrayStructType((ArrayType) t);
} else {
Preconditions.checkState(t.isMapType());
return CollectionStructType.createMapStructType((MapType) t);
}
}
/**
* Returns a list of table names that might be referenced by the given path.
* The path must be non-empty.
*
* Examples: path -> result
* a -> [<sessionDb>.a]
* a.b -> [<sessionDb>.a, a.b]
* a.b.c -> [<sessionDb>.a, a.b]
* a.b.c... -> [<sessionDb>.a, a.b]
*/
public static List<TableName> getCandidateTables(List<String> path, String sessionDb) {
Preconditions.checkArgument(path != null && !path.isEmpty());
List<TableName> result = new ArrayList<>();
int end = Math.min(2, path.size());
for (int tblNameIdx = 0; tblNameIdx < end; ++tblNameIdx) {
String dbName = (tblNameIdx == 0) ? sessionDb : path.get(0);
String tblName = path.get(tblNameIdx);
result.add(new TableName(dbName, tblName));
}
return result;
}
public FeTable getRootTable() { return rootTable_; }
public TupleDescriptor getRootDesc() { return rootDesc_; }
public boolean isRootedAtTable() { return rootTable_ != null; }
public boolean isRootedAtTuple() { return rootDesc_ != null; }
public List<String> getRawPath() { return rawPath_; }
public boolean isResolved() { return isResolved_; }
public List<Type> getMatchedTypes() {
Preconditions.checkState(isResolved_);
return matchedTypes_;
}
public boolean hasNonDestCollection() {
Preconditions.checkState(isResolved_);
return firstCollectionPathIdx_ != -1 &&
firstCollectionPathIdx_ != rawPath_.size() - 1;
}
public String getFirstCollectionName() {
Preconditions.checkState(isResolved_);
if (firstCollectionPathIdx_ == -1) return null;
return rawPath_.get(firstCollectionPathIdx_);
}
public Type getFirstCollectionType() {
Preconditions.checkState(isResolved_);
if (firstCollectionTypeIdx_ == -1) return null;
return matchedTypes_.get(firstCollectionTypeIdx_);
}
public int getFirstCollectionIndex() {
Preconditions.checkState(isResolved_);
return firstCollectionTypeIdx_;
}
public Type destType() {
Preconditions.checkState(isResolved_);
if (!matchedTypes_.isEmpty()) return matchedTypes_.get(matchedTypes_.size() - 1);
if (rootDesc_ != null) return rootDesc_.getType();
if (rootTable_ != null) return rootTable_.getType();
return null;
}
public FeTable destTable() {
Preconditions.checkState(isResolved_);
if (rootTable_ != null && rootDesc_ == null && matchedTypes_.isEmpty()) {
return rootTable_;
}
return null;
}
/**
* Returns the destination Column of this path, or null if the destination of this
* path is not a Column. This path must be rooted at a table or a tuple descriptor
* corresponding to a table for the destination to be a Column.
*/
public Column destColumn() {
Preconditions.checkState(isResolved_);
if (rootTable_ == null || rawPath_.size() != 1) return null;
return rootTable_.getColumn(rawPath_.get(rawPath_.size() - 1));
}
/**
* Returns the destination tuple descriptor of this path, or null
* if the destination of this path is not a registered alias.
*/
public TupleDescriptor destTupleDesc() {
Preconditions.checkState(isResolved_);
if (rootDesc_ != null && matchedTypes_.isEmpty()) return rootDesc_;
return null;
}
public List<String> getFullyQualifiedRawPath() {
Preconditions.checkState(rootTable_ != null || rootDesc_ != null);
List<String> result = Lists.newArrayListWithCapacity(rawPath_.size() + 2);
if (rootDesc_ != null) {
result.addAll(Lists.newArrayList(rootDesc_.getAlias().split("\\.")));
} else {
result.add(rootTable_.getDb().getName());
result.add(rootTable_.getName());
}
result.addAll(rawPath_);
return result;
}
/**
* Returns the absolute explicit path starting from the fully-qualified table name.
* The goal is produce a canonical non-ambiguous path that can be used as an
* identifier for table and slot references.
*
* Example:
* create table mydb.test (a array<struct<f1:int,f2:string>>);
* use mydb;
* select f1 from test t, t.a;
*
* This function should return the following for the path of the 'f1' SlotRef:
* mydb.test.a.item.f1
*/
public List<String> getCanonicalPath() {
List<String> result = new ArrayList<>();
getCanonicalPath(result);
return result;
}
/**
* Recursive helper for getCanonicalPath().
*/
private void getCanonicalPath(List<String> result) {
Type currentType = null;
if (isRootedAtTuple()) {
rootDesc_.getPath().getCanonicalPath(result);
currentType = rootDesc_.getType();
} else {
Preconditions.checkState(isRootedAtTable());
result.add(rootTable_.getTableName().getDb());
result.add(rootTable_.getTableName().getTbl());
currentType = rootTable_.getType().getItemType();
}
// Compute the explicit path from the matched positions. Note that rawPath_ is
// not sufficient because it could contain implicit matches.
for (int i = 0; i < matchedPositions_.size(); ++i) {
StructType structType = getTypeAsStruct(currentType);
int matchPos = matchedPositions_.get(i);
Preconditions.checkState(matchPos < structType.getFields().size());
StructField match = structType.getFields().get(matchPos);
result.add(match.getName());
currentType = match.getType();
}
}
/**
* Returns the absolute physical path in positions starting from the schema root to the
* destination of this path.
*/
public List<Integer> getAbsolutePath() {
if (absolutePath_ != null) return absolutePath_;
Preconditions.checkState(isResolved_);
absolutePath_ = new ArrayList<>();
if (rootDesc_ != null) absolutePath_.addAll(rootDesc_.getPath().getAbsolutePath());
absolutePath_.addAll(matchedPositions_);
return absolutePath_;
}
@Override
public String toString() {
Preconditions.checkState(rootTable_ != null || rootDesc_ != null);
String pathRoot = null;
if (rootDesc_ != null) {
pathRoot = rootDesc_.getAlias();
} else {
pathRoot = rootTable_.getFullName();
}
if (rawPath_.isEmpty()) return pathRoot;
return pathRoot + "." + Joiner.on(".").join(rawPath_);
}
/**
* Returns a raw path from a known root alias and field name.
*/
public static List<String> createRawPath(String rootAlias, String fieldName) {
List<String> result = Lists.newArrayList(rootAlias.split("\\."));
result.add(fieldName);
return result;
}
public static Path createRelPath(Path rootPath, String... fieldNames) {
Preconditions.checkState(rootPath.isResolved());
Path result = new Path(rootPath, Lists.newArrayList(fieldNames));
return result;
}
}