blob: 7f92dc842af330c3ac51535a5e62dedc0f7b742b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.analysis;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.impala.common.AnalysisException;
import com.google.common.base.Preconditions;
/**
* Representation of the group by clause within a select statement.
*/
public class GroupByClause {
private final static Logger LOG = LoggerFactory.getLogger(GroupByClause.class);
public enum GroupingSetsType {NONE, CUBE, SETS, ROLLUP};
private final static int GROUPING_SET_LIMIT = 64;
// Original grouping exprs from the parser. Never analyzed or modified.
// Non-null.
private final List<Expr> origGroupingExprs_;
// The type of grouping set. Non-null.
private final GroupingSetsType groupingSetsType_;
// Indices of the expressions in 'origGroupingExprs_' referenced by the GROUPING SETS
// clause. Non-null iff groupingSetsType is SETS.
private final List<List<Integer>> groupingSetsList_;
/////////////////////////////////////////
// BEGIN: Members that need to be reset()
boolean isAnalyzed_ = false;
// ID of each of the distinct grouping sets. Each ID is a bitfield with a bit set if
// that distinct grouping expr is part of that grouping set. Populated during analysis.
private final List<Long> groupingIDs_;
// The list of analyzed expressions in each distinct grouping set. Populated during
// analysis.
private final List<List<Expr>> analyzedGroupingSets_;
// END: Members that need to be reset()
/////////////////////////////////////////
/**
* Constructor for regular GROUP BY, ROLLUP and CUBE.
*/
GroupByClause(List<Expr> groupingExprs,
GroupingSetsType groupingSetsType) {
Preconditions.checkNotNull(groupingSetsType);
Preconditions.checkState(groupingSetsType != GroupingSetsType.SETS);
origGroupingExprs_ = groupingExprs;
groupingSetsType_ = groupingSetsType;
groupingSetsList_ = null;
groupingIDs_ = new ArrayList<>();
analyzedGroupingSets_ = new ArrayList<>();
}
/**
* Constructor for GROUPING SETS
*/
GroupByClause(List<List<Expr>> groupingSetsList) {
groupingSetsType_ = GroupingSetsType.SETS;
groupingIDs_ = new ArrayList<Long>();
groupingSetsList_ = new ArrayList<>();
analyzedGroupingSets_ = new ArrayList<>();
// Extract unique expressions from the list and clone into 'origGroupingExprs_'.
origGroupingExprs_ = new ArrayList<Expr>();
for (List<Expr> groupingSetExprs : groupingSetsList) {
List<Integer> exprIndices = new ArrayList<>();
groupingSetsList_.add(exprIndices);
for (Expr e : groupingSetExprs) {
int idx = origGroupingExprs_.indexOf(e);
if (idx < 0) {
idx = origGroupingExprs_.size();
origGroupingExprs_.add(e);
}
exprIndices.add(idx);
}
}
}
/**
* Constructor for unanalyzed clone of GROUPING SETS.
*/
GroupByClause(List<Expr> groupingExprs, List<List<Integer>> groupingSetsList) {
groupingSetsType_ = GroupingSetsType.SETS;
groupingIDs_ = new ArrayList<Long>();
groupingSetsList_ = groupingSetsList;
origGroupingExprs_ = groupingExprs;
analyzedGroupingSets_ = new ArrayList<>();
}
public List<Expr> getOrigGroupingExprs() { return origGroupingExprs_; }
public List<List<Expr>> getAnalyzedGroupingSets() {
Preconditions.checkState(isAnalyzed_);
return analyzedGroupingSets_;
}
/**
* Add a new grouping ID if it is not already present in 'groupingIDs_'. If it is a new
* grouping ID, expand the list of expressions it references and append them to
* 'analyzedGroupingSets_'.
* @param groupingExprs duplicated list of analyzed grouping exprs
* @param addtlGroupingExprs additional grouping exprs that will be added to each
* grouping set, e.g. correlated columns from a subquery rewrite.
*/
private void addGroupingID(long id, List<Expr> groupingExprs,
List<Expr> addtlGroupingExprs) throws AnalysisException {
Preconditions.checkState(id >= 0 && id < (1L << groupingExprs.size()),
"bad id: " + id);
if (groupingIDs_.contains(id)) return;
if (groupingIDs_.size() >= GROUPING_SET_LIMIT) {
throw new AnalysisException("Limit of " + GROUPING_SET_LIMIT +
" grouping sets exceeded");
}
groupingIDs_.add(id);
List<Expr> groupingSet = new ArrayList<>();
for (int i = 0; i < groupingExprs.size(); ++i) {
Expr groupingExpr = groupingExprs.get(i);
if ((id & (1L << i)) != 0) {
groupingSet.add(groupingExpr);
} else {
// Populate NULL slots so that tuple layouts are identical.
// TODO: it would probably be more efficient to omit these slots and handle it
// in transposition.
groupingSet.add(NullLiteral.create(groupingExpr.getType()));
}
}
// The additional grouping expressions are a part of each grouping set.
groupingSet.addAll(addtlGroupingExprs);
analyzedGroupingSets_.add(groupingSet);
}
public boolean hasGroupingSets() {
Preconditions.checkState(groupingSetsType_ != null);
return groupingSetsType_ != GroupingSetsType.NONE;
}
public String getTypeString() {
return hasGroupingSets() ? groupingSetsType_.toString() : "";
}
public GroupByClause clone() {
List<Expr> groupingExprsClone = Expr.cloneList(origGroupingExprs_);
if (groupingSetsType_ == GroupingSetsType.SETS) {
// Safe to share 'groupingSetsList_' because it is not mutated.
return new GroupByClause(groupingExprsClone, groupingSetsList_);
} else {
return new GroupByClause(groupingExprsClone, groupingSetsType_);
}
}
/**
* Analyze the grouping sets and generate grouping IDs for them.
* Should not be called if hasGroupingSets() is false.
* @param groupingExprs grouping exprs from SelectStmt, which must line up with the
* original grouping exprs, but can have additional exprs appended on end. These
* grouping exprs must have ordinals and aliases substituted, i.e. equivalent
* expressions in the list must compare equals with Expr.equals().
*/
public void analyzeGroupingSets(List<Expr> groupingExprs) throws AnalysisException {
Preconditions.checkState(hasGroupingSets());
Preconditions.checkState(groupingExprs.size() >= origGroupingExprs_.size());
Preconditions.checkState(groupingIDs_.isEmpty());
Preconditions.checkState(analyzedGroupingSets_.isEmpty());
// Number of grouping expressions in the original clause. 'groupingExprs' may
// have additional expressions appended at the end as a result of rewriting.
int numOrigGroupingExprs = origGroupingExprs_.size();
// Deduplicate the grouping exprs that may be referenced multiple times in the
// original 'groupingExprs' map. Compute a map of each expr index in
// 'origGroupingExprs_' to the index of the deduped expr.
Map<Integer, Integer> firstEquivExpr = new HashMap<>();
List<Expr> dedupedGroupingSetExprs = new ArrayList<>();
for (int i = 0; i < numOrigGroupingExprs; ++i) {
int j = dedupedGroupingSetExprs.indexOf(groupingExprs.get(i));
if (j == -1) {
firstEquivExpr.put(i, dedupedGroupingSetExprs.size());
dedupedGroupingSetExprs.add(groupingExprs.get(i));
} else {
firstEquivExpr.put(i, j);
}
}
List<Expr> addtlGroupingExprs =
groupingExprs.subList(numOrigGroupingExprs, groupingExprs.size());
Expr.removeDuplicates(addtlGroupingExprs);
int numGroupingSetExprs = dedupedGroupingSetExprs.size();
if (numGroupingSetExprs >= (Long.SIZE - 1)) {
throw new AnalysisException(
"Number of grouping columns (" + numGroupingSetExprs + ") exceeds " +
"GROUP BY with " + getTypeString() + " limit of " + (Long.SIZE - 1));
}
// Generate the grouping IDs. Each grouping ID is a bitfield tracking which exprs
// in 'dedupedGroupingSetExprs' are included in that group.
if (groupingSetsType_ == GroupingSetsType.CUBE) {
// Enumerate all combinations of bits, i.e. all integers [0, numGroupingSetExprs).
// E.g. for CUBE(a, b, c), we enumerate 111, 110, 101, 100, 010, 001, 000,
// meaning the sets (a, b, c), (b, c), (a, c), (c), (b), (a), ().
for (long id = (1L << numGroupingSetExprs) - 1; id >= 0; id--) {
addGroupingID(id, dedupedGroupingSetExprs, addtlGroupingExprs);
}
} else if (groupingSetsType_ == GroupingSetsType.ROLLUP) {
Preconditions.checkState(numGroupingSetExprs > 0);
// Start with all bits set, then unset the bits one-by-one to enumerate all the
// grouping sets for rollup.
// E.g. for ROLLUP(a, b, c), we enumerate 111, 011, 001, 000, meaning the sets
// (a, b, c), (a, b), (a), ().
long bit = (1L << numGroupingSetExprs);
long id = bit - 1;
while (bit != 0) {
addGroupingID(id, dedupedGroupingSetExprs, addtlGroupingExprs);
bit >>= 1;
id &= ~bit;
}
Preconditions.checkState(id == 0);
} else {
Preconditions.checkState(groupingSetsType_ == GroupingSetsType.SETS);
for (List<Integer> set : groupingSetsList_) {
// Construct a mask with a bit set for each expression in the grouping set.
// We rely on addGroupingID() to ignore duplicate masks.
long mask = 0;
for (int origPos : set) {
Preconditions.checkState(origPos >= 0, "bad pos" + origPos);
Preconditions.checkState(origPos < numOrigGroupingExprs, "bad pos " + origPos);
int dedupedPos = firstEquivExpr.get(origPos);
Preconditions.checkState(dedupedPos >= 0, "bad pos" + dedupedPos);
Preconditions.checkState(dedupedPos < numGroupingSetExprs,
"bad pos" + dedupedPos);
mask |= (1L << dedupedPos);
}
addGroupingID(mask, dedupedGroupingSetExprs, addtlGroupingExprs);
}
}
isAnalyzed_ = true;
}
/**
* Reset analysis state from call to analyze().
*/
void reset() {
// origGroupingExprs_ not modified during analysis.
// groupingIDs_ were generated during analysis, so clear them out.
groupingIDs_.clear();
analyzedGroupingSets_.clear();
isAnalyzed_ = false;
}
/**
* Generate SQL for this GROUP BY clause.
* Note that if additional grouping exprs were appended during rewriting, the resulting
* SQL may not be parseable by Impala.
* @param groupingExprs grouping exprs from SelectStmt, which must line up with the
* original grouping exprs, but can have additional exprs appended on end.
*/
public String toSql(List<Expr> groupingExprs, ToSqlOptions options) {
StringBuilder sb = new StringBuilder();
sb.append(" GROUP BY ");
if (groupingSetsType_ == GroupingSetsType.NONE) {
sb.append(Expr.toSql(groupingExprs, options));
} else {
if (groupingSetsType_ == GroupingSetsType.CUBE) {
// Use the standard syntax even if input was non-standard "WITH CUBE".
sb.append("CUBE(");
sb.append(Expr.toSql(origGroupingExprs_, options));
sb.append(")");
} else if (groupingSetsType_ == GroupingSetsType.ROLLUP) {
// Use the standard syntax even if input was non-standard "WITH ROLLUP".
sb.append("ROLLUP(");
sb.append(Expr.toSql(origGroupingExprs_, options));
sb.append(")");
} else {
Preconditions.checkState(groupingSetsType_ == GroupingSetsType.SETS);
sb.append("GROUPING SETS(");
for (int i = 0; i < groupingSetsList_.size(); ++i) {
sb.append("(");
List<Expr> groupingSetExprs = new ArrayList<>();
for (int exprIdx : groupingSetsList_.get(i)) {
groupingSetExprs.add(origGroupingExprs_.get(exprIdx));
}
sb.append(Expr.toSql(groupingSetExprs, options));
sb.append(")");
sb.append((i < groupingSetsList_.size() - 1) ? ", " : "");
}
sb.append(")");
// Any additional grouping exprs are not part of CUBE, ROLLUP or GROUPING SETS
// clause and should be appended afterwards.
if (groupingExprs.size() > origGroupingExprs_.size()) {
sb.append(", ");
sb.append(Expr.toSql(groupingExprs.subList(
origGroupingExprs_.size(), groupingExprs.size()), options));
}
}
}
return sb.toString();
}
public String toString() {
return this.toSql(origGroupingExprs_, ToSqlOptions.DEFAULT);
}
public String debugString() {
return this.toSql(origGroupingExprs_, ToSqlOptions.SHOW_IMPLICIT_CASTS);
}
}