blob: 75153f669d5b45accb7e0ae52e854a685de8cae1 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lens.driver.jdbc;
import static org.apache.hadoop.hive.ql.parse.HiveParser.*;
import java.util.*;
import java.util.regex.Pattern;
import org.apache.lens.api.util.CommonUtils;
import org.apache.lens.cube.metadata.CubeMetastoreClient;
import org.apache.lens.cube.parse.CubeSemanticAnalyzer;
import org.apache.lens.cube.parse.HQLParser;
import org.apache.lens.server.api.LensConfConstants;
import org.apache.lens.server.api.error.LensException;
import org.apache.lens.server.api.query.rewrite.QueryRewriter;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.mutable.MutableInt;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.QB;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.antlr.runtime.CommonToken;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
/**
* The Class ColumnarSQLRewriter.
*/
@Slf4j
public class ColumnarSQLRewriter implements QueryRewriter {
/** The clause name. */
protected String clauseName = null;
/** The qb. */
protected QB qb;
/** The ast. */
protected ASTNode ast;
/** The query. */
protected String query;
/** The limit. */
protected String limit;
/** The fact filters. */
protected final StringBuilder factFilters = new StringBuilder();
/** The fact in line query. */
protected final StringBuilder factInLineQuery = new StringBuilder();
/** The all sub queries. */
protected StringBuilder allSubQueries = new StringBuilder();
/** The fact keys. */
Set<String> factKeys = new LinkedHashSet<String>();
/** The rewritten query. */
protected StringBuilder rewrittenQuery = new StringBuilder();
/** The fact filters for push down */
protected StringBuilder factFilterPush = new StringBuilder();
/** The join list. */
protected List<String> joinList = new ArrayList<String>();
/** The join condition. */
protected StringBuilder joinCondition = new StringBuilder();
/** The allkeys. */
protected List<String> allkeys = new ArrayList<String>();
/** The agg column. */
protected List<String> aggColumn = new ArrayList<String>();
/** The right filter. */
protected List<String> rightFilter = new ArrayList<String>();
/** The left filter. */
protected String leftFilter;
/** The map agg tab alias. */
private final Map<String, String> mapAggTabAlias = new LinkedHashMap<String, String>();
/** The map aliases. */
private final Map<String, String> mapAliases = new LinkedHashMap<String, String>();
/** The table to alias map. */
private final Map<String, String> tableToAliasMap = new LinkedHashMap<String, String>();
/** The tables to accessed column map. */
private final Map<String, LinkedHashSet<String>> tableToAccessedColMap =
new LinkedHashMap<String, LinkedHashSet<String>>();
/** The dimension table to subquery map. */
private final Map<String, String> dimTableToSubqueryMap = new LinkedHashMap<String, String>();
/** The where tree. */
protected String whereTree;
/** The having tree. */
protected String havingTree;
/** The order by tree. */
protected String orderByTree;
/** The select tree. */
protected String selectTree;
/** The group by tree. */
protected String groupByTree;
/** The join tree. */
protected String joinTree;
/** The from tree. */
protected String fromTree;
/** The join ast. */
@Getter
protected ASTNode joinAST;
/** The having ast. */
@Getter
protected ASTNode havingAST;
/** The select ast. */
@Getter
protected ASTNode selectAST;
/** The where ast. */
@Getter
protected ASTNode whereAST;
/** The order by ast. */
@Getter
protected ASTNode orderByAST;
/** The group by ast. */
@Getter
protected ASTNode groupByAST;
/** The from ast. */
@Getter
protected ASTNode fromAST;
protected Map<String, String> regexReplaceMap;
/**
* Instantiates a new columnar sql rewriter.
*/
public ColumnarSQLRewriter() {
}
@Override
public void init(Configuration conf) {
regexReplaceMap = CommonUtils.parseMapFromString(conf.get(JDBCDriverConfConstants.REGEX_REPLACEMENT_VALUES));
}
public String getClause() {
if (clauseName == null) {
TreeSet<String> ks = new TreeSet<String>(qb.getParseInfo().getClauseNames());
clauseName = ks.first();
}
return clauseName;
}
/*
* Analyze query AST and split into trees
*/
/**
* Analyze internal.
*
* @throws SemanticException the semantic exception
*/
public void analyzeInternal(Configuration conf, HiveConf hconf) throws SemanticException {
CubeSemanticAnalyzer c1 = new CubeSemanticAnalyzer(conf, hconf);
QB qb = new QB(null, null, false);
if (!c1.doPhase1(ast, qb, c1.initPhase1Ctx(), null)) {
return;
}
if (!qb.getSubqAliases().isEmpty()) {
log.warn("Subqueries in from clause is not supported by {} Query : {}", this, this.query);
throw new SemanticException("Subqueries in from clause is not supported by " + this + " Query : " + this.query);
}
// Get clause name
TreeSet<String> ks = new TreeSet<String>(qb.getParseInfo().getClauseNames());
clauseName = ks.first();
// Split query into trees
if (qb.getParseInfo().getWhrForClause(clauseName) != null) {
this.whereTree = HQLParser.getString(qb.getParseInfo().getWhrForClause(clauseName));
this.whereAST = qb.getParseInfo().getWhrForClause(clauseName);
}
if (qb.getParseInfo().getHavingForClause(clauseName) != null) {
this.havingTree = HQLParser.getString(qb.getParseInfo().getHavingForClause(clauseName));
this.havingAST = qb.getParseInfo().getHavingForClause(clauseName);
}
if (qb.getParseInfo().getOrderByForClause(clauseName) != null) {
this.orderByTree = HQLParser.getString(qb.getParseInfo().getOrderByForClause(clauseName));
this.orderByAST = qb.getParseInfo().getOrderByForClause(clauseName);
}
if (qb.getParseInfo().getGroupByForClause(clauseName) != null) {
this.groupByTree = HQLParser.getString(qb.getParseInfo().getGroupByForClause(clauseName));
this.groupByAST = qb.getParseInfo().getGroupByForClause(clauseName);
}
if (qb.getParseInfo().getSelForClause(clauseName) != null) {
this.selectTree = HQLParser.getString(qb.getParseInfo().getSelForClause(clauseName));
this.selectAST = qb.getParseInfo().getSelForClause(clauseName);
}
this.joinTree = HQLParser.getString(qb.getParseInfo().getJoinExpr());
this.joinAST = qb.getParseInfo().getJoinExpr();
this.fromAST = HQLParser.findNodeByPath(ast, TOK_FROM);
this.fromTree = HQLParser.getString(fromAST);
}
/*
* Get the table qualified name eg. database.table_name table_alias
*/
/**
* Gets the table from tab ref node.
*
* @param tree the tree
* @return the table from tab ref node
*/
public String getTableFromTabRefNode(ASTNode tree) {
String table = "";
ASTNode tabName = (ASTNode) tree.getChild(0);
if (tabName.getChildCount() == 2) {
table = tabName.getChild(0).getText() + "." + tabName.getChild(1).getText();
} else {
table = tabName.getChild(0).getText();
}
if (tree.getChildCount() > 1) {
table = table + " " + tree.getChild(1).getText();
}
String[] tabSplit = table.split(" +");
if (tabSplit.length == 2) {
tableToAliasMap.put(tabSplit[0], tabSplit[1]);
}
return table;
}
/*
* Get join conditions specified in join clause
*/
/**
* Gets the join cond.
*
* @param node the node
*/
public void getJoinCond(ASTNode node) {
if (node == null) {
return;
}
int rootType = node.getToken().getType();
String rightTable = "";
if (rootType == TOK_JOIN || rootType == TOK_LEFTOUTERJOIN || rootType == TOK_RIGHTOUTERJOIN
|| rootType == TOK_FULLOUTERJOIN || rootType == TOK_LEFTSEMIJOIN || rootType == TOK_UNIQUEJOIN) {
ASTNode left = (ASTNode) node.getChild(0);
ASTNode right = (ASTNode) node.getChild(1);
rightTable = getTableFromTabRefNode(right);
getAllDimColumns(fromAST);
getAllDimColumns(selectAST);
getAllDimColumns(whereAST);
buildDimSubqueries();
// Get the table from input db.table alias.
// If alias provided put the same alias in the subquery.
String[] tabSplit = rightTable.split(" +");
String subqueryForTable = "";
if (tabSplit.length == 2) {
subqueryForTable = dimTableToSubqueryMap.get(tabSplit[0]) + " " + tabSplit[1];
} else {
subqueryForTable = dimTableToSubqueryMap.get(tabSplit[0]);
}
String joinType = "";
String joinFilter = "";
String joinToken = node.getToken().getText();
if (joinToken.equals("TOK_JOIN")) {
joinType = "inner join";
} else if (joinToken.equals("TOK_LEFTOUTERJOIN")) {
joinType = "left outer join";
} else if (joinToken.equals("TOK_RIGHTOUTERJOIN")) {
joinType = "right outer join";
} else if (joinToken.equals("TOK_FULLOUTERJOIN")) {
joinType = "full outer join";
} else if (joinToken.equals("TOK_LEFTSEMIJOIN")) {
joinType = "left semi join";
} else if (joinToken.equals("TOK_UNIQUEJOIN")) {
joinType = "unique join";
} else {
log.info("Non supported join type : {}", joinToken);
}
if (node.getChildCount() > 2) {
// User has specified a join condition for filter pushdown.
joinFilter = HQLParser.getString((ASTNode) node.getChild(2));
}
joinList.add(joinType + (" ") + (subqueryForTable) + (" on ") + (joinFilter) + (" "));
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getJoinCond(child);
}
}
/**
* Construct join chain
*
* @return
*/
public StringBuilder constructJoinChain() {
getJoinCond(fromAST);
Collections.reverse(joinList);
for (String key : joinList) {
joinCondition.append(" ").append(key);
}
return joinCondition;
}
/**
* Get the count of columns in a given select expression
*
* @param node
* @return Column count
*/
public int getColumnCount(ASTNode node) {
int count = 0;
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
if (child.getToken().getType() == TOK_TABLE_OR_COL) {
count++;
} else {
count += getColumnCount(child);
}
}
return count;
}
/**
* Check if expression is answerable from fact, then push it to fact pushdown subquery
*
* @param node
* @return true if expressions is used
*/
public boolean isExpressionsAnswerableFromFact(ASTNode node) {
boolean isAnswerable = true;
for (int i = 0; i < node.getChildCount(); i++) {
if (node.getChild(i).getType() == HiveParser.TOK_SELEXPR) {
int cnt = getColumnCount((ASTNode) node.getChild(i));
if (cnt >= 2) {
if (cnt == getNumFactTableInExpressions((ASTNode) node.getChild(i), new MutableInt(0))) {
isAnswerable = true;
} else {
isAnswerable = false;
}
}
}
}
return isAnswerable;
}
/**
* Get number of fact columns used in the an expression
*
* @param node
* @param count
* @return Number of fact columns used in expression
*/
protected int getNumFactTableInExpressions(ASTNode node, MutableInt count) {
if (node == null) {
log.debug("ASTNode is null ");
return 0;
}
if (node.getToken().getType() == HiveParser.TOK_TABLE_OR_COL) {
String factAlias = getFactAlias();
String table = node.getChild(0).getText();
if (table.equals(factAlias)) {
count.add(1);
}
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getNumFactTableInExpressions(child, count);
}
return count.intValue();
}
/*
* Get filter conditions if user has specified a join condition for filter pushdown.
*/
/**
* Gets the filter in join cond.
*
* @param node the node
*/
public void getFilterInJoinCond(ASTNode node) {
if (node == null) {
log.debug("Join AST is null ");
return;
}
if (node.getToken().getType() == HiveParser.KW_AND) {
ASTNode right = (ASTNode) node.getChild(1);
String filterCond = HQLParser.getString(right);
rightFilter.add(filterCond);
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getFilterInJoinCond(child);
}
}
/**
* Get the fact alias
*
* @return
*/
public String getFactAlias() {
String factAlias = "";
String factNameAndAlias = getFactNameAlias(fromAST);
String[] keys = factNameAndAlias.split("\\s+");
if (keys.length == 2) {
factAlias = keys[1];
}
return factAlias;
}
/**
* Get fact filters for pushdown
*
* @param node
*/
public void factFilterPushDown(ASTNode node) {
if (node == null) {
log.debug("Join AST is null ");
return;
}
String filterCond = "";
if (node.getToken().getType() == HiveParser.KW_AND) {
ASTNode parentNode = (ASTNode) node.getChild(0).getParent();
// Skip the join conditions used as "and" for fact filter pushdown.
// eg. inner join fact.id1 = dim.id and fact.id2 = dim.id
if (parentNode.getChild(0).getChild(0).getType() == HiveParser.DOT
&& parentNode.getChild(0).getChild(1).getType() == HiveParser.DOT
&& parentNode.getChild(1).getChild(0).getType() == HiveParser.DOT
&& parentNode.getChild(1).getChild(1).getType() == HiveParser.DOT) {
return;
}
ASTNode right = (ASTNode) node.getChild(1);
filterCond = HQLParser.getString(right);
}
String factAlias = getFactAlias();
if (filterCond.matches("(.*)" + factAlias + "(.*)")) {
factFilterPush.append(filterCond).append(" and ");
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
factFilterPushDown(child);
}
}
/**
* Get fact keys used in the AST
*
* @param node
*/
public void getFactKeysFromNode(ASTNode node) {
if (node == null) {
log.debug("AST is null ");
return;
}
if (HQLParser.isAggregateAST(node)) {
return;
} else {
if (node.getToken().getType() == HiveParser.DOT
&& node.getParent().getChild(0).getType() != HiveParser.Identifier) {
String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString();
String column = node.getChild(1).toString().toLowerCase();
String factAlias = getFactAlias();
if (table.equals(factAlias)) {
factKeys.add(factAlias + "." + column);
}
}
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getFactKeysFromNode(child);
}
}
/**
* Get all fact keys used in all ASTs
*/
public void getAllFactKeys() {
if (fromAST != null) {
getFactKeysFromNode(fromAST);
}
if (whereAST != null) {
getFactKeysFromNode(whereAST);
}
if (selectAST != null) {
getFactKeysFromNode(selectAST);
}
}
/**
* Get all columns used for dimmension tables
* @param node
*/
public void getAllDimColumns(ASTNode node) {
if (node == null) {
log.debug("Input AST is null ");
return;
}
// Assuming column is specified with table.column format
if (node.getToken().getType() == HiveParser.DOT) {
String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString();
String column = node.getChild(1).toString();
Iterator iterator = tableToAliasMap.keySet().iterator();
while (iterator.hasNext()) {
String tab = (String) iterator.next();
String alias = tableToAliasMap.get(tab);
if ((table.equals(tab) || table.equals(alias)) && column != null) {
LinkedHashSet<String> cols;
if (!tableToAccessedColMap.containsKey(tab)) {
cols = new LinkedHashSet<String>();
cols.add(column);
tableToAccessedColMap.put(tab, cols);
} else {
cols = tableToAccessedColMap.get(tab);
if (!cols.contains(column)) {
cols.add(column);
}
}
}
}
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getAllDimColumns(child);
}
}
/**
* Build dimension table subqueries
*/
public void buildDimSubqueries() {
Iterator iterator = tableToAccessedColMap.keySet().iterator();
while (iterator.hasNext()) {
StringBuilder query = new StringBuilder();
String tab = (String) iterator.next();
LinkedHashSet<String> cols = tableToAccessedColMap.get(tab);
query.append("(").append("select ").append(StringUtils.join(cols, ","))
.append(" from ").append(tab).append(")");
dimTableToSubqueryMap.put(tab, query.toString());
}
}
/*
* Build fact sub query using where tree and join tree
*/
/**
* Builds the subqueries.
*
* @param node the node
*/
public void buildSubqueries(ASTNode node) {
if (node == null) {
log.debug("Join AST is null ");
return;
}
String subquery = "";
if (node.getToken().getType() == HiveParser.EQUAL) {
if (node.getChild(0).getType() == HiveParser.DOT && node.getChild(1).getType() == HiveParser.DOT) {
ASTNode left = (ASTNode) node.getChild(0);
ASTNode right = (ASTNode) node.getChild(1);
ASTNode parentNode = (ASTNode) node.getParent();
// Skip the join conditions used as "and" while building subquery
// eg. inner join fact.id1 = dim.id and fact.id2 = dim.id
if (parentNode.getChild(0).getChild(0).getType() == HiveParser.DOT
&& parentNode.getChild(0).getChild(1).getType() == HiveParser.DOT
&& parentNode.getChild(1).getChild(0).getType() == HiveParser.DOT
&& parentNode.getChild(1).getChild(1).getType() == HiveParser.DOT) {
return;
}
// Get the fact and dimension columns in table_name.column_name format
String factJoinKeys = HQLParser.getString(left).replaceAll("\\s+", "")
.replaceAll("[(,)]", "");
String dimJoinKeys = HQLParser.getString(right).replaceAll("\\s+", "")
.replaceAll("[(,)]", "");
int dimTableDelimIndex = dimJoinKeys.indexOf("__");
String dimTableName = dimJoinKeys.substring(0, dimTableDelimIndex);
String dimAlias = dimJoinKeys.
substring(dimTableDelimIndex + 3, dimJoinKeys.indexOf('.')).trim();
// Construct part of subquery by referring join condition
// fact.fact_key = dim_table.dim_key
// eg. "fact_key in ( select dim_key from dim_table where "
String queryphase1 = factJoinKeys.concat(" in ").concat(" ( ").concat(" select ")
.concat(dimTableName).concat(" ")
.concat(dimJoinKeys.substring(dimJoinKeys.lastIndexOf(".")))
.concat(" from ").concat(dimTableName).concat(" where ");
getAllFilters(whereAST);
rightFilter.add(leftFilter);
Set<String> setAllFilters = new LinkedHashSet<String>(rightFilter);
// Check the occurrence of dimension table in the filter list and
// combine all filters of same dimension table with and .
// eg. "dim_table.key1 = 'abc' and dim_table.key2 = 'xyz'"
if (setAllFilters.toString().replaceAll("\\s+", "")
.matches("(.*)" + dimAlias + "(.*)")) {
factFilters.delete(0, factFilters.length());
// All filters in where clause
for (int i = 0; i < setAllFilters.toArray().length; i++) {
if (setAllFilters.toArray()[i].toString().replaceAll("\\s+", "")
.matches("(.*)" + dimAlias + ("(.*)"))) {
String filters2 = setAllFilters.toArray()[i].toString();
filters2 = filters2.replaceAll(
getTableOrAlias(filters2, "alias"),
getTableOrAlias(filters2, "table")
).concat(" and ");
factFilters.append(filters2);
}
}
// Merge fact subquery and dim subqury to construct the final subquery
// eg. "fact_key in ( select dim_key from dim_table where
// dim_table.key2 = 'abc' and dim_table.key3 = 'xyz'"
subquery = queryphase1.concat(factFilters.toString().substring(0, factFilters.toString().lastIndexOf("and")))
.concat(")");
// include subqueries which are applicable only to filter records from fact
if (subquery.matches("(.*)" + getFactAlias() + "(.*)")) {
allSubQueries.append(subquery).append(" and ");
}
}
}
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
buildSubqueries(child);
}
}
/**
* Get the table or alias from the given key string
*
* @param keyString
* @param type
* @return
*/
public String getTableOrAlias(String keyString, String type) {
String ref = "";
if (type.equals("table")) {
ref = keyString.substring(0, keyString.indexOf("__")).replaceAll("[(,)]", "");
}
if (type.equals("alias")) {
ref = keyString.substring(0, keyString.indexOf(".")).replaceAll("[(,)]", "");
}
return ref;
}
/*
* Get aggregate columns used in the select query
*/
/**
* Gets the aggregate columns.
*
* @param node the node
* @return the aggregate columns
*/
public ArrayList<String> getAggregateColumns(ASTNode node, MutableInt count) {
StringBuilder aggmeasures = new StringBuilder();
if (HQLParser.isAggregateAST(node)) {
if (node.getToken().getType() == HiveParser.TOK_FUNCTION || node.getToken().getType() == HiveParser.DOT) {
ASTNode right = (ASTNode) node.getChild(1);
String aggCol = HQLParser.getString(right);
String funident = HQLParser.findNodeByPath(node, Identifier).toString();
String measure = funident.concat("(").concat(aggCol).concat(")");
count.add(1);
String alias = "alias" + String.valueOf(count);
String allaggmeasures = aggmeasures.append(measure).append(" as ").append(alias).toString();
String aggColAlias = funident + "(" + alias + ")";
String measureRegex = "\\s*" + Pattern.quote(funident)
+ "\\s*\\(\\s*\\Q" + aggCol.replaceAll("\\s+", "\\\\E\\\\s+\\\\Q") + "\\E\\s*\\)\\s*";
mapAggTabAlias.put(measureRegex, aggColAlias);
if (!aggColumn.contains(allaggmeasures)) {
aggColumn.add(allaggmeasures);
}
}
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getAggregateColumns(child, count);
}
return (ArrayList<String>) aggColumn;
}
/*
* Get all columns in table.column format
*/
/**
* Gets the tables and columns.
*
* @param node the node
* @return the tables and columns
*/
public ArrayList<String> getTablesAndColumns(ASTNode node) {
if (node.getToken().getType() == HiveParser.DOT) {
String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString();
String column = node.getChild(1).toString().toLowerCase();
String keys = table.concat(".").concat(column);
allkeys.add(keys);
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getTablesAndColumns(child);
}
return (ArrayList<String>) allkeys;
}
/*
* Get the limit value
*/
/**
* Gets the limit clause.
*
* @param node the node
* @return the limit clause
*/
public String getLimitClause(ASTNode node) {
if (node.getToken().getType() == HiveParser.TOK_LIMIT) {
limit = HQLParser.findNodeByPath(node, HiveParser.Number).toString();
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getLimitClause(child);
}
return limit;
}
/*
* Get all filters conditions in where clause
*/
/**
* Gets the all filters.
*
* @param node the node
*/
public void getAllFilters(ASTNode node) {
if (node == null) {
return;
}
if (node.getToken().getType() == HiveParser.KW_AND) {
ASTNode right = (ASTNode) node.getChild(1);
String allFilters = HQLParser.getString(right);
leftFilter = HQLParser.getString((ASTNode) node.getChild(0));
rightFilter.add(allFilters);
} else if (node.getToken().getType() == HiveParser.TOK_WHERE) {
ASTNode right = (ASTNode) node.getChild(1);
String allFilters = HQLParser.getString(right);
leftFilter = HQLParser.getString((ASTNode) node.getChild(0));
rightFilter.add(allFilters);
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getAllFilters(child);
}
}
/*
* Get the fact table name and alias
*/
/**
* Gets the fact name alias.
*
* @param fromAST the from ast
* @return the fact name alias
*/
public String getFactNameAlias(ASTNode fromAST) {
String factTable;
String factAlias;
ArrayList<String> allTables = new ArrayList<>();
getAllTablesfromFromAST(fromAST, allTables);
String[] keys = allTables.get(0).trim().split(" +");
if (keys.length == 2) {
factTable = keys[0];
factAlias = keys[1];
return factTable + " " + factAlias;
} else {
factTable = keys[0];
}
return factTable;
}
/*
* Reset the instance variables if input query is union of multiple select queries
*/
/**
* Reset.
*/
public void reset() {
factInLineQuery.setLength(0);
factKeys.clear();
aggColumn.clear();
allSubQueries.setLength(0);
factFilterPush.setLength(0);
rightFilter.clear();
joinCondition.setLength(0);
tableToAliasMap.clear();
tableToAccessedColMap.clear();
dimTableToSubqueryMap.clear();
selectTree = null;
selectAST = null;
fromTree = null;
fromAST = null;
joinTree = null;
joinAST = null;
whereTree = null;
whereAST = null;
groupByTree = null;
groupByAST = null;
havingTree = null;
havingAST = null;
orderByTree = null;
orderByAST = null;
mapAliases.clear();
joinList.clear();
limit = null;
}
/*
* Check the incompatible hive udf and replace it with database udf.
*/
/**
* Replace udf for db.
*
* @param query the query
* @return the string
*/
public String replaceUDFForDB(String query) {
for (Map.Entry<String, String> entry : regexReplaceMap.entrySet()) {
query = query.replaceAll(entry.getKey(), entry.getValue());
}
return query;
}
/**
* Replace alias in AST trees
*
*/
public void replaceAliasInAST() {
updateAliasFromAST(fromAST);
if (fromTree != null) {
replaceAlias(fromAST);
fromTree = HQLParser.getString(fromAST);
}
if (selectTree != null) {
replaceAlias(selectAST);
selectTree = HQLParser.getString(selectAST);
}
if (whereTree != null) {
replaceAlias(whereAST);
whereTree = HQLParser.getString(whereAST);
}
if (groupByTree != null) {
replaceAlias(groupByAST);
groupByTree = HQLParser.getString(groupByAST);
}
if (orderByTree != null) {
replaceAlias(orderByAST);
orderByTree = HQLParser.getString(orderByAST);
}
if (havingTree != null) {
replaceAlias(havingAST);
havingTree = HQLParser.getString(havingAST);
}
}
/*
* Construct the rewritten query using trees
*/
/**
* Builds the query.
*
* @throws SemanticException
*/
public void buildQuery(Configuration conf, HiveConf hconf) throws SemanticException {
analyzeInternal(conf, hconf);
replaceWithUnderlyingStorage(hconf);
replaceAliasInAST();
getFilterInJoinCond(fromAST);
MutableInt alaisCount = new MutableInt(0);
getAggregateColumns(selectAST, alaisCount);
if (havingAST != null) {
getAggregateColumns(havingAST, alaisCount);
}
constructJoinChain();
getAllFilters(whereAST);
buildSubqueries(fromAST);
getAllFactKeys();
factFilterPushDown(whereAST);
factFilterPushDown(fromAST);
// Get the limit clause
String limit = getLimitClause(ast);
// Construct the final fact in-line query with keys,
// measures and individual sub queries built.
if (whereTree == null || joinTree == null || allSubQueries.length() == 0
|| aggColumn.isEmpty() || !isExpressionsAnswerableFromFact(selectAST)) {
log.info("@@@Query not eligible for inner subquery rewrite");
// construct query without fact sub query
constructQuery(selectTree, whereTree, groupByTree, havingTree, orderByTree, limit);
return;
} else {
String factNameAndAlias = getFactNameAlias(fromAST).trim();
factInLineQuery.append(" (select ").append(factKeys.toString().replaceAll("\\[", "").replaceAll("\\]", ""));
if (!aggColumn.isEmpty()) {
factInLineQuery.append(",").append(aggColumn.toString().replace("[", "").replace("]", ""));
}
if (factInLineQuery.toString().substring(factInLineQuery.toString().length() - 1).equals(",")) {
factInLineQuery.setLength(factInLineQuery.length() - 1);
}
factInLineQuery.append(" from ").append(factNameAndAlias);
if (allSubQueries != null) {
factInLineQuery.append(" where ");
if (factFilterPush != null) {
factInLineQuery.append(factFilterPush);
}
factInLineQuery.append(allSubQueries.toString().substring(0, allSubQueries.lastIndexOf("and")));
}
if (!aggColumn.isEmpty()) {
factInLineQuery.append(" group by ");
factInLineQuery.append(factKeys.toString().replaceAll("\\[", "").replaceAll("\\]", ""));
}
factInLineQuery.append(")");
}
// Replace the aggregate column aliases from fact
// sub query query to the outer query
for (Map.Entry<String, String> entry : mapAggTabAlias.entrySet()) {
selectTree = selectTree.replaceAll(entry.getKey(), entry.getValue());
if (orderByTree != null) {
orderByTree = orderByTree.replaceAll(entry.getKey(), entry.getValue());
}
if (havingTree != null) {
havingTree = havingTree.replaceAll(entry.getKey(), entry.getValue());
}
}
//for subquery with count function should be replaced with sum in outer query
if (selectTree.toLowerCase().matches("(.*)count\\((.*)")) {
selectTree = selectTree.replaceAll("count\\(", "sum\\(");
}
// construct query with fact sub query
constructQuery(selectTree, whereTree, groupByTree, havingTree, orderByTree, limit);
}
/*
* Get first child from the from tree
*/
/**
* Gets the all tablesfrom from ast.
*
* @param from the from
* @param fromTables the from tables
* @return the all tablesfrom from ast
*/
protected void getAllTablesfromFromAST(ASTNode from, ArrayList<String> fromTables) {
String table;
if (TOK_TABREF == from.getToken().getType()) {
ASTNode tabName = (ASTNode) from.getChild(0);
if (tabName.getChildCount() == 2) {
table = tabName.getChild(0).getText() + "." + tabName.getChild(1).getText();
} else {
table = tabName.getChild(0).getText();
}
if (from.getChildCount() > 1) {
table = table + " " + from.getChild(1).getText();
}
fromTables.add(table);
}
for (int i = 0; i < from.getChildCount(); i++) {
ASTNode child = (ASTNode) from.getChild(i);
getAllTablesfromFromAST(child, fromTables);
}
}
/**
* Update alias and map old alias with new one
*
* @param from
*/
protected void updateAliasFromAST(ASTNode from) {
String newAlias;
String table;
String dbAndTable = "";
if (TOK_TABREF == from.getToken().getType()) {
ASTNode tabName = (ASTNode) from.getChild(0);
if (tabName.getChildCount() == 2) {
dbAndTable = tabName.getChild(0).getText() + "_" + tabName.getChild(1).getText();
table = tabName.getChild(1).getText();
} else {
table = tabName.getChild(0).getText();
}
if (from.getChildCount() > 1) {
ASTNode alias = (ASTNode) from.getChild(1);
newAlias = dbAndTable + "_" + from.getChild(1).getText();
mapAliases.put(alias.getText(), table + "__" + newAlias);
alias.getToken().setText(table + "__" + newAlias);
}
}
for (int i = 0; i < from.getChildCount(); i++) {
updateAliasFromAST((ASTNode) from.getChild(i));
}
}
/**
* Update alias in all AST trees
*
* @param tree
*/
protected void replaceAlias(ASTNode tree) {
if (TOK_TABLE_OR_COL == tree.getToken().getType()) {
ASTNode alias = (ASTNode) tree.getChild(0);
if (mapAliases.get(tree.getChild(0).toString()) != null) {
alias.getToken().setText(mapAliases.get(tree.getChild(0).toString()));
} else {
alias.getToken().setText(tree.getChild(0).toString());
}
}
for (int i = 0; i < tree.getChildCount(); i++) {
replaceAlias((ASTNode) tree.getChild(i));
}
}
/*
* Construct final query using all trees
*/
/**
* Construct query.
*
* @param selecttree the selecttree
* @param wheretree the wheretree
* @param groupbytree the groupbytree
* @param havingtree the havingtree
* @param orderbytree the orderbytree
* @param limit the limit
*/
protected void constructQuery(String selecttree, String wheretree, String groupbytree,
String havingtree, String orderbytree, String limit) {
String finalJoinClause = "";
String factNameAndAlias = getFactNameAlias(fromAST);
if (joinCondition != null) {
finalJoinClause = factNameAndAlias.concat(" ").concat(joinCondition.toString());
} else {
finalJoinClause = factNameAndAlias;
}
rewrittenQuery.append("select ").append(selecttree).append(" from ");
if (factInLineQuery.length() != 0) {
finalJoinClause = finalJoinClause.substring(finalJoinClause.indexOf(" "));
rewrittenQuery.append(factInLineQuery);
rewrittenQuery.append(finalJoinClause);
} else {
rewrittenQuery.append(finalJoinClause);
}
if (wheretree != null) {
rewrittenQuery.append(" where ").append(wheretree);
}
if (groupbytree != null) {
rewrittenQuery.append(" group by ").append(groupbytree);
}
if (havingtree != null) {
rewrittenQuery.append(" having ").append(havingtree);
}
if (orderbytree != null) {
rewrittenQuery.append(" order by ").append(orderbytree);
}
if (limit != null) {
rewrittenQuery.append(" limit ").append(limit);
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lens.server.api.query.QueryRewriter#rewrite(java.lang.String, org.apache.hadoop.conf.Configuration)
*/
@Override
public String rewrite(String query, Configuration conf, HiveConf metastoreConf) throws LensException {
this.query = query;
StringBuilder mergedQuery;
rewrittenQuery.setLength(0);
String queryReplacedUdf;
reset();
try {
String finalRewrittenQuery;
if (query.toLowerCase().matches("(.*)union all(.*)")) {
finalRewrittenQuery = "";
String[] queries = query.toLowerCase().split("union all");
for (int i = 0; i < queries.length; i++) {
log.info("Union Query Part {} : {}", i, queries[i]);
ast = HQLParser.parseHQL(queries[i], metastoreConf);
buildQuery(conf, metastoreConf);
mergedQuery = rewrittenQuery.append(" union all ");
finalRewrittenQuery = mergedQuery.toString().substring(0, mergedQuery.lastIndexOf("union all"));
reset();
}
} else {
ast = HQLParser.parseHQL(query, metastoreConf);
buildQuery(conf, metastoreConf);
finalRewrittenQuery = rewrittenQuery.toString();
}
queryReplacedUdf = replaceUDFForDB(finalRewrittenQuery);
log.info("Input Query : {}", query);
log.info("Rewritten Query : {}", queryReplacedUdf);
} catch (SemanticException e) {
throw new LensException(e);
}
return queryReplacedUdf;
}
@NoArgsConstructor
protected static class NativeTableInfo {
private Map<String, String> columnMapping = new LinkedHashMap<>();
NativeTableInfo(Table tbl) {
String columnMappingProp = tbl.getProperty(LensConfConstants.NATIVE_TABLE_COLUMN_MAPPING);
if (StringUtils.isNotBlank(columnMappingProp)) {
String[] columnMapArray = StringUtils.split(columnMappingProp, ",");
for (String columnMapEntry : columnMapArray) {
String[] mapEntry = StringUtils.split(columnMapEntry, "=");
columnMapping.put(mapEntry[0].trim(), mapEntry[1].trim());
}
}
}
String getNativeColumn(String col) {
String retCol = columnMapping.get(col);
return retCol != null ? retCol : col;
}
}
protected Map<String, NativeTableInfo> aliasToNativeTableInfo = new LinkedHashMap<>();
/**
* Replace with underlying storage.
*
* @param metastoreConf the metastore configuration
*/
protected void replaceWithUnderlyingStorage(HiveConf metastoreConf) {
replaceDBAndTableNames(metastoreConf, fromAST);
if (aliasToNativeTableInfo.isEmpty()) {
return;
}
replaceColumnNames(selectAST);
replaceColumnNames(fromAST);
replaceColumnNames(whereAST);
replaceColumnNames(groupByAST);
replaceColumnNames(orderByAST);
replaceColumnNames(havingAST);
}
// Replace Lens database names with storage's proper DB and table name based
// on table properties.
protected void replaceDBAndTableNames(HiveConf metastoreConf, ASTNode tree) {
if (tree == null) {
return;
}
if (TOK_TABREF == tree.getToken().getType()) {
// TOK_TABREF will have TOK_TABNAME as first child and alias as second child.
String alias;
String tblName = null;
Table tbl = null;
ASTNode tabNameChild = (ASTNode) tree.getChild(0);
if (TOK_TABNAME == tabNameChild.getToken().getType()) {
// If it has two children, the first one is the DB name and second one is
// table identifier
// Else, we have to add the DB name as the first child
try {
if (tabNameChild.getChildCount() == 2) {
ASTNode dbIdentifier = (ASTNode) tabNameChild.getChild(0);
ASTNode tableIdentifier = (ASTNode) tabNameChild.getChild(1);
tblName = tableIdentifier.getText();
String lensTable = dbIdentifier.getText() + "." + tblName;
tbl = CubeMetastoreClient.getInstance(metastoreConf).getHiveTable(lensTable);
String table = getUnderlyingTableName(tbl);
String db = getUnderlyingDBName(tbl);
// Replace both table and db names
if ("default".equalsIgnoreCase(db)) {
// Remove the db name for this case
tabNameChild.deleteChild(0);
} else if (StringUtils.isNotBlank(db)) {
dbIdentifier.getToken().setText(db);
} // If db is empty, then leave the tree untouched
if (StringUtils.isNotBlank(table)) {
tableIdentifier.getToken().setText(table);
}
} else {
ASTNode tableIdentifier = (ASTNode) tabNameChild.getChild(0);
tblName = tableIdentifier.getText();
tbl = CubeMetastoreClient.getInstance(metastoreConf).getHiveTable(tblName);
String table = getUnderlyingTableName(tbl);
// Replace table name
if (StringUtils.isNotBlank(table)) {
tableIdentifier.getToken().setText(table);
}
// Add db name as a new child
String dbName = getUnderlyingDBName(tbl);
if (StringUtils.isNotBlank(dbName) && !"default".equalsIgnoreCase(dbName)) {
ASTNode dbIdentifier = new ASTNode(new CommonToken(HiveParser.Identifier, dbName));
dbIdentifier.setParent(tabNameChild);
tabNameChild.insertChild(0, dbIdentifier);
}
}
} catch (LensException | HiveException e) {
log.warn("No corresponding table in metastore:", e);
}
}
if (tree.getChildCount() == 2) {
alias = tree.getChild(1).getText();
} else {
alias = tblName;
}
if (StringUtils.isNotBlank(alias)) {
alias = alias.toLowerCase();
if (!aliasToNativeTableInfo.containsKey(alias)) {
if (tbl != null) {
aliasToNativeTableInfo.put(alias, new NativeTableInfo(tbl));
}
}
}
} else {
for (int i = 0; i < tree.getChildCount(); i++) {
replaceDBAndTableNames(metastoreConf, (ASTNode) tree.getChild(i));
}
}
}
void replaceColumnNames(ASTNode node) {
if (node == null) {
return;
}
int nodeType = node.getToken().getType();
if (nodeType == HiveParser.DOT) {
ASTNode tabident = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier);
ASTNode colIdent = (ASTNode) node.getChild(1);
String column = colIdent.getText().toLowerCase();
String alias = tabident.getText().toLowerCase();
if (aliasToNativeTableInfo.get(alias) != null) {
colIdent.getToken().setText(aliasToNativeTableInfo.get(alias).getNativeColumn(column));
}
} else {
// recurse down
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
replaceColumnNames(child);
}
}
}
/**
* Gets the underlying db name.
*
* @param tbl the table
* @return the underlying db name
* @throws HiveException the hive exception
*/
String getUnderlyingDBName(Table tbl) throws HiveException {
return tbl == null ? null : tbl.getProperty(LensConfConstants.NATIVE_DB_NAME);
}
/**
* Gets the underlying table name.
*
* @param tbl the table
* @return the underlying table name
* @throws HiveException the hive exception
*/
String getUnderlyingTableName(Table tbl) throws HiveException {
return tbl == null ? null : tbl.getProperty(LensConfConstants.NATIVE_TABLE_NAME);
}
}