blob: 48bed79d62c2e80c9764ee0577242e6469fcd2a5 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.analysis;
import static org.apache.impala.analysis.ToSqlOptions.REWRITTEN;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
import org.apache.impala.authorization.AuthorizationChecker;
import org.apache.impala.authorization.AuthorizationContext;
import org.apache.impala.authorization.AuthorizationFactory;
import org.apache.impala.authorization.PrivilegeRequest;
import org.apache.impala.authorization.AuthorizationException;
import org.apache.impala.catalog.FeCatalog;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.RuntimeEnv;
import org.apache.impala.rewrite.ExprRewriter;
import org.apache.impala.thrift.TAccessEvent;
import org.apache.impala.thrift.TClientRequest;
import org.apache.impala.thrift.TLineageGraph;
import org.apache.impala.thrift.TQueryCtx;
import org.apache.impala.thrift.TQueryOptions;
import org.apache.impala.util.EventSequence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
* Wrapper class for parsing, analyzing and rewriting a SQL stmt.
public class AnalysisContext {
private final static Logger LOG = LoggerFactory.getLogger(AnalysisContext.class);
private final TQueryCtx queryCtx_;
private final AuthorizationFactory authzFactory_;
private final EventSequence timeline_;
// Set in analyzeAndAuthorize().
private FeCatalog catalog_;
private AnalysisResult analysisResult_;
// Use Hive's scheme for auto-generating column labels. Only used for testing.
private boolean useHiveColLabels_;
public AnalysisContext(TQueryCtx queryCtx, AuthorizationFactory authzFactory,
EventSequence timeline) {
queryCtx_ = queryCtx;
authzFactory_ = authzFactory;
timeline_ = timeline;
public FeCatalog getCatalog() { return catalog_; }
public TQueryCtx getQueryCtx() { return queryCtx_; }
public TQueryOptions getQueryOptions() {
return queryCtx_.client_request.query_options;
public String getUser() { return queryCtx_.session.connected_user; }
public void setUseHiveColLabels(boolean b) {
useHiveColLabels_ = b;
static public class AnalysisResult {
private StatementBase stmt_;
private Analyzer analyzer_;
private boolean userHasProfileAccess_ = true;
public boolean isAlterTableStmt() { return stmt_ instanceof AlterTableStmt; }
public boolean isAlterViewStmt() { return stmt_ instanceof AlterViewStmt; }
public boolean isComputeStatsStmt() { return stmt_ instanceof ComputeStatsStmt; }
public boolean isQueryStmt() { return stmt_ instanceof QueryStmt; }
public boolean isSetOperationStmt() { return stmt_ instanceof SetOperationStmt; }
public boolean isInsertStmt() { return stmt_ instanceof InsertStmt; }
public boolean isDropDbStmt() { return stmt_ instanceof DropDbStmt; }
public boolean isDropTableOrViewStmt() {
return stmt_ instanceof DropTableOrViewStmt;
public boolean isDropFunctionStmt() { return stmt_ instanceof DropFunctionStmt; }
public boolean isDropDataSrcStmt() { return stmt_ instanceof DropDataSrcStmt; }
public boolean isDropStatsStmt() { return stmt_ instanceof DropStatsStmt; }
public boolean isCreateTableLikeStmt() {
return stmt_ instanceof CreateTableLikeStmt;
public boolean isCreateViewStmt() { return stmt_ instanceof CreateViewStmt; }
public boolean isCreateTableAsSelectStmt() {
return stmt_ instanceof CreateTableAsSelectStmt;
public boolean isCreateTableStmt() { return stmt_ instanceof CreateTableStmt; }
public boolean isCreateDbStmt() { return stmt_ instanceof CreateDbStmt; }
public boolean isCreateUdfStmt() { return stmt_ instanceof CreateUdfStmt; }
public boolean isCreateUdaStmt() { return stmt_ instanceof CreateUdaStmt; }
public boolean isCreateDataSrcStmt() { return stmt_ instanceof CreateDataSrcStmt; }
public boolean isLoadDataStmt() { return stmt_ instanceof LoadDataStmt; }
public boolean isUseStmt() { return stmt_ instanceof UseStmt; }
public boolean isSetStmt() { return stmt_ instanceof SetStmt; }
public boolean isShowTablesStmt() { return stmt_ instanceof ShowTablesStmt; }
public boolean isDescribeHistoryStmt() {
return stmt_ instanceof DescribeHistoryStmt;
public boolean isShowDbsStmt() { return stmt_ instanceof ShowDbsStmt; }
public boolean isShowDataSrcsStmt() { return stmt_ instanceof ShowDataSrcsStmt; }
public boolean isShowStatsStmt() { return stmt_ instanceof ShowStatsStmt; }
public boolean isShowFunctionsStmt() { return stmt_ instanceof ShowFunctionsStmt; }
public boolean isShowCreateTableStmt() {
return stmt_ instanceof ShowCreateTableStmt;
public boolean isShowCreateFunctionStmt() {
return stmt_ instanceof ShowCreateFunctionStmt;
public boolean isShowFilesStmt() { return stmt_ instanceof ShowFilesStmt; }
public boolean isAdminFnStmt() { return stmt_ instanceof AdminFnStmt; }
public boolean isDescribeDbStmt() { return stmt_ instanceof DescribeDbStmt; }
public boolean isDescribeTableStmt() { return stmt_ instanceof DescribeTableStmt; }
public boolean isResetMetadataStmt() { return stmt_ instanceof ResetMetadataStmt; }
public boolean isExplainStmt() { return stmt_.isExplain(); }
public boolean isShowRolesStmt() { return stmt_ instanceof ShowRolesStmt; }
public boolean isShowGrantPrincipalStmt() {
return stmt_ instanceof ShowGrantPrincipalStmt;
public boolean isCreateDropRoleStmt() { return stmt_ instanceof CreateDropRoleStmt; }
public boolean isGrantRevokeRoleStmt() {
return stmt_ instanceof GrantRevokeRoleStmt;
public boolean isGrantRevokePrivStmt() {
return stmt_ instanceof GrantRevokePrivStmt;
public boolean isTruncateStmt() { return stmt_ instanceof TruncateStmt; }
public boolean isUpdateStmt() { return stmt_ instanceof UpdateStmt; }
public UpdateStmt getUpdateStmt() { return (UpdateStmt) stmt_; }
public boolean isDeleteStmt() { return stmt_ instanceof DeleteStmt; }
public DeleteStmt getDeleteStmt() { return (DeleteStmt) stmt_; }
public boolean isCommentOnStmt() { return stmt_ instanceof CommentOnStmt; }
public boolean isAlterDbStmt() { return stmt_ instanceof AlterDbStmt; }
public boolean isCatalogOp() {
return isUseStmt() || isViewMetadataStmt() || isDdlStmt();
public boolean isTestCaseStmt() { return stmt_ instanceof CopyTestCaseStmt; }
private boolean isDdlStmt() {
return isCreateTableLikeStmt() || isCreateTableStmt() ||
isCreateViewStmt() || isCreateDbStmt() || isDropDbStmt() ||
isDropTableOrViewStmt() || isResetMetadataStmt() || isAlterTableStmt() ||
isAlterViewStmt() || isComputeStatsStmt() || isCreateUdfStmt() ||
isCreateUdaStmt() || isDropFunctionStmt() || isCreateTableAsSelectStmt() ||
isCreateDataSrcStmt() || isDropDataSrcStmt() || isDropStatsStmt() ||
isCreateDropRoleStmt() || isGrantRevokeStmt() || isTruncateStmt() ||
isCommentOnStmt() || isAlterDbStmt();
private boolean isViewMetadataStmt() {
return isShowFilesStmt() || isShowTablesStmt() || isShowDbsStmt() ||
isShowFunctionsStmt() || isShowRolesStmt() || isShowGrantPrincipalStmt() ||
isShowCreateTableStmt() || isShowDataSrcsStmt() || isShowStatsStmt() ||
isDescribeTableStmt() || isDescribeDbStmt() || isShowCreateFunctionStmt() ||
private boolean isGrantRevokeStmt() {
return isGrantRevokeRoleStmt() || isGrantRevokePrivStmt();
public boolean isDmlStmt() {
return isInsertStmt() || isUpdateStmt() || isDeleteStmt();
* Returns true for statements that may produce several privilege requests of
* hierarchical nature, e.g., table/column.
public boolean isHierarchicalAuthStmt() {
return isQueryStmt() || isInsertStmt() || isUpdateStmt() || isDeleteStmt()
|| isCreateTableAsSelectStmt() || isCreateViewStmt() || isAlterViewStmt()
|| isTestCaseStmt();
* Returns true for statements that may produce a single column-level privilege
* request without a request at the table level.
* Example: USE functional; ALTER TABLE allcomplextypes.int_array_col [...];
* The path 'allcomplextypes.int_array_col' table ref path resolves to
* a column, so a column-level privilege request is registered.
public boolean isSingleColumnPrivStmt() {
return isDescribeTableStmt() || isResetMetadataStmt() || isUseStmt()
|| isShowTablesStmt() || isAlterTableStmt() || isShowFunctionsStmt();
public AlterTableStmt getAlterTableStmt() {
return (AlterTableStmt) stmt_;
public AlterViewStmt getAlterViewStmt() {
return (AlterViewStmt) stmt_;
public ComputeStatsStmt getComputeStatsStmt() {
return (ComputeStatsStmt) stmt_;
public CreateTableLikeStmt getCreateTableLikeStmt() {
return (CreateTableLikeStmt) stmt_;
public CreateViewStmt getCreateViewStmt() {
return (CreateViewStmt) stmt_;
public CreateTableAsSelectStmt getCreateTableAsSelectStmt() {
return (CreateTableAsSelectStmt) stmt_;
public CreateTableStmt getCreateTableStmt() {
return (CreateTableStmt) stmt_;
public CreateDbStmt getCreateDbStmt() {
return (CreateDbStmt) stmt_;
public CreateUdfStmt getCreateUdfStmt() {
return (CreateUdfStmt) stmt_;
public CreateUdaStmt getCreateUdaStmt() {
return (CreateUdaStmt) stmt_;
public DropDbStmt getDropDbStmt() {
return (DropDbStmt) stmt_;
public DropTableOrViewStmt getDropTableOrViewStmt() {
return (DropTableOrViewStmt) stmt_;
public TruncateStmt getTruncateStmt() {
return (TruncateStmt) stmt_;
public DropFunctionStmt getDropFunctionStmt() {
return (DropFunctionStmt) stmt_;
public LoadDataStmt getLoadDataStmt() {
return (LoadDataStmt) stmt_;
public QueryStmt getQueryStmt() {
return (QueryStmt) stmt_;
public InsertStmt getInsertStmt() {
if (isCreateTableAsSelectStmt()) {
return getCreateTableAsSelectStmt().getInsertStmt();
} else {
return (InsertStmt) stmt_;
public UseStmt getUseStmt() {
return (UseStmt) stmt_;
public SetStmt getSetStmt() {
return (SetStmt) stmt_;
public ShowTablesStmt getShowTablesStmt() {
return (ShowTablesStmt) stmt_;
public ShowDbsStmt getShowDbsStmt() {
return (ShowDbsStmt) stmt_;
public ShowDataSrcsStmt getShowDataSrcsStmt() {
return (ShowDataSrcsStmt) stmt_;
public ShowStatsStmt getShowStatsStmt() {
return (ShowStatsStmt) stmt_;
public ShowFunctionsStmt getShowFunctionsStmt() {
return (ShowFunctionsStmt) stmt_;
public ShowFilesStmt getShowFilesStmt() {
return (ShowFilesStmt) stmt_;
public DescribeHistoryStmt getDescribeHistoryStmt() {
return (DescribeHistoryStmt) stmt_;
public DescribeDbStmt getDescribeDbStmt() {
return (DescribeDbStmt) stmt_;
public DescribeTableStmt getDescribeTableStmt() {
return (DescribeTableStmt) stmt_;
public ShowCreateTableStmt getShowCreateTableStmt() {
return (ShowCreateTableStmt) stmt_;
public ShowCreateFunctionStmt getShowCreateFunctionStmt() {
return (ShowCreateFunctionStmt) stmt_;
public CommentOnStmt getCommentOnStmt() {
return (CommentOnStmt) stmt_;
public AlterDbStmt getAlterDbStmt() {
return (AlterDbStmt) stmt_;
public AdminFnStmt getAdminFnStmt() {
return (AdminFnStmt) stmt_;
public StatementBase getStmt() { return stmt_; }
public Analyzer getAnalyzer() { return analyzer_; }
public Set<TAccessEvent> getAccessEvents() { return analyzer_.getAccessEvents(); }
public boolean canRewriteStatement() {
return !isCreateViewStmt() && !isAlterViewStmt() && !isShowCreateTableStmt();
public boolean requiresSubqueryRewrite() {
return canRewriteStatement() && analyzer_.containsSubquery();
public boolean requiresAcidComplexScanRewrite() {
return canRewriteStatement() && analyzer_.hasTopLevelAcidCollectionTableRef();
public boolean requiresExprRewrite() {
return isQueryStmt() || isInsertStmt() || isCreateTableAsSelectStmt()
|| isUpdateStmt() || isDeleteStmt();
public boolean requiresSetOperationRewrite() {
return analyzer_.containsSetOperation() && !isCreateViewStmt() && !isAlterViewStmt()
&& !isShowCreateTableStmt();
public TLineageGraph getThriftLineageGraph() {
return analyzer_.getThriftSerializedLineageGraph();
public void setUserHasProfileAccess(boolean value) { userHasProfileAccess_ = value; }
public boolean userHasProfileAccess() { return userHasProfileAccess_; }
public Analyzer createAnalyzer(StmtTableCache stmtTableCache) {
return createAnalyzer(stmtTableCache, null);
public Analyzer createAnalyzer(StmtTableCache stmtTableCache,
AuthorizationContext authzCtx) {
Analyzer result = new Analyzer(stmtTableCache, queryCtx_, authzFactory_, authzCtx);
return result;
* Analyzes and authorizes the given statement using the provided table cache and
* authorization checker.
* AuthorizationExceptions take precedence over AnalysisExceptions so as not to
* reveal the existence/absence of objects the user is not authorized to see.
public AnalysisResult analyzeAndAuthorize(StatementBase stmt,
StmtTableCache stmtTableCache, AuthorizationChecker authzChecker)
throws ImpalaException {
// TODO: Clean up the creation/setting of the analysis result.
analysisResult_ = new AnalysisResult();
analysisResult_.stmt_ = stmt;
catalog_ = stmtTableCache.catalog;
// Analyze statement and record exception.
AnalysisException analysisException = null;
TClientRequest clientRequest = queryCtx_.getClient_request();
AuthorizationContext authzCtx = authzChecker.createAuthorizationContext(true,
clientRequest.isSetRedacted_stmt() ?
clientRequest.getRedacted_stmt() : clientRequest.getStmt(),
queryCtx_.getSession(), Optional.of(timeline_));
Preconditions.checkState(authzCtx != null);
try {
analyze(stmtTableCache, authzCtx);
} catch (AnalysisException e) {
analysisException = e;
} finally {
timeline_.markEvent("Analysis finished");
// Authorize statement and record exception. Authorization relies on information
// collected during analysis.
AuthorizationException authException = null;
try {
authzChecker.authorize(authzCtx, analysisResult_, catalog_);
} catch (AuthorizationException e) {
authException = e;
} finally {
authzChecker.postAuthorize(authzCtx, authException == null);
// AuthorizationExceptions take precedence over AnalysisExceptions so as not
// to reveal the existence/absence of objects the user is not authorized to see.
if (authException != null) throw authException;
if (analysisException != null) throw analysisException;
return analysisResult_;
* Analyzes the statement set in 'analysisResult_' with a new Analyzer based on the
* given loaded tables. Performs expr and subquery rewrites which require re-analyzing
* the transformed statement.
private void analyze(StmtTableCache stmtTableCache, AuthorizationContext authzCtx)
throws AnalysisException {
analysisResult_.analyzer_ = createAnalyzer(stmtTableCache, authzCtx);
// Enforce the statement expression limit at the end of analysis so that there is an
// accurate count of the total number of expressions. The first analyze() call is not
// very expensive (~seconds) even for large statements. The limit on the total length
// of the SQL statement (max_statement_length_bytes) provides an upper bound.
// It is important to enforce this before expression rewrites, because rewrites are
// expensive with large expression trees. For example, a SQL that takes a few seconds
// to analyze the first time may take 10 minutes for rewrites.
// The rewrites should have no user-visible effect on query results, including types
// and labels. Remember the original result types and column labels to restore them
// after the rewritten stmt has been reset() and re-analyzed. For a CTAS statement,
// the types represent column types of the table that will be created, including the
// partition columns, if any.
List<Type> origResultTypes = new ArrayList<>();
for (Expr e : analysisResult_.stmt_.getResultExprs()) {
List<String> origColLabels =
// Apply column/row masking, expr, setop, and subquery rewrites.
boolean reAnalyze = false;
if (authzFactory_.getAuthorizationConfig().isEnabled()) {
reAnalyze = analysisResult_.stmt_.resolveTableMask(analysisResult_.analyzer_);
// If any catalog table/view is replaced by table masking views, we need to
// resolve them. Also re-analyze the SlotRefs to reference the output exprs of
// the table masking views.
if (reAnalyze) {
reAnalyzeWithoutPrivChecks(stmtTableCache, authzCtx, origResultTypes,
reAnalyze = false;
ExprRewriter rewriter = analysisResult_.analyzer_.getExprRewriter();
if (analysisResult_.requiresExprRewrite()) {
reAnalyze = rewriter.changed();
if (analysisResult_.requiresSubqueryRewrite()) {
new StmtRewriter.SubqueryRewriter().rewrite(analysisResult_);
reAnalyze = true;
if (analysisResult_.requiresSetOperationRewrite()) {
new StmtRewriter().rewrite(analysisResult_);
reAnalyze = true;
if (analysisResult_.requiresAcidComplexScanRewrite()) {
new StmtRewriter.AcidRewriter().rewrite(analysisResult_);
reAnalyze = true;
if (!reAnalyze) return;
// For SetOperationStmt we must replace the query statement with the rewritten version
// before re-analysis.
if (analysisResult_.requiresSetOperationRewrite()) {
if (analysisResult_.isSetOperationStmt()) {
if (((SetOperationStmt) analysisResult_.getStmt()).hasRewrittenStmt()) {
analysisResult_.stmt_ =
((SetOperationStmt) analysisResult_.getStmt()).getRewrittenStmt();
reAnalyzeWithoutPrivChecks(stmtTableCache, authzCtx, origResultTypes, origColLabels);
private void reAnalyzeWithoutPrivChecks(StmtTableCache stmtTableCache,
AuthorizationContext authzCtx, List<Type> origResultTypes,
List<String> origColLabels) throws AnalysisException {
boolean isExplain = analysisResult_.isExplainStmt();
// Some expressions, such as function calls with constant arguments, can get
// folded into literals. Since literals do not require privilege requests, we
// must save the original privileges in order to not lose them during
// re-analysis.
ImmutableList<PrivilegeRequest> origPrivReqs =
// Re-analyze the stmt with a new analyzer.
analysisResult_.analyzer_ = createAnalyzer(stmtTableCache, authzCtx);
// We restore the privileges collected in the first pass below. So, no point in
// collecting them again.
try {
analysisResult_.analyzer_.setEnablePrivChecks(true); // restore
} catch (AnalysisException e) {
LOG.error(String.format("Error analyzing the rewritten query.\n" +
"Original SQL: %s\nRewritten SQL: %s", analysisResult_.stmt_.toSql(),
throw e;
// Restore the original result types and column labels.
if (LOG.isTraceEnabled()) {
LOG.trace("Rewritten SQL: " + analysisResult_.stmt_.toSql(REWRITTEN));
// Restore privilege requests found during the first pass
for (PrivilegeRequest req : origPrivReqs) {
if (isExplain) analysisResult_.stmt_.setIsExplain();
public Analyzer getAnalyzer() { return analysisResult_.getAnalyzer(); }
public EventSequence getTimeline() { return timeline_; }
// This should only be called after analyzeAndAuthorize().
public AnalysisResult getAnalysisResult() {
return analysisResult_;