blob: 7e40f0d4e296f946899b223633c9a2a0a2e55a92 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
import java.math.BigDecimal;
import java.util.stream.Collectors;
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rex.RexDynamicParam;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.DateString;
import org.apache.calcite.util.TimeString;
import org.apache.calcite.util.TimestampString;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.TimestampTZUtil;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ASTBuilder {
public static final Logger LOGGER = LoggerFactory.getLogger(ASTBuilder.class);
public static ASTBuilder construct(int tokenType, String text) {
ASTBuilder b = new ASTBuilder();
b.curr = createAST(tokenType, text);
return b;
}
public static ASTNode createAST(int tokenType, String text) {
return (ASTNode) ParseDriver.adaptor.create(tokenType, text);
}
public static ASTNode destNode() {
return ASTBuilder
.construct(HiveParser.TOK_DESTINATION, "TOK_DESTINATION")
.add(
ASTBuilder.construct(HiveParser.TOK_DIR, "TOK_DIR").add(HiveParser.TOK_TMP_FILE,
"TOK_TMP_FILE")).node();
}
public static ASTNode table(final RelNode scan) {
HiveTableScan hts = null;
if (scan instanceof HiveJdbcConverter) {
hts = ((HiveJdbcConverter) scan).getTableScan().getHiveTableScan();
} else if (scan instanceof DruidQuery) {
hts = (HiveTableScan) ((DruidQuery) scan).getTableScan();
} else {
hts = (HiveTableScan) scan;
}
assert hts != null;
RelOptHiveTable hTbl = (RelOptHiveTable) hts.getTable();
ASTBuilder tableNameBuilder = ASTBuilder.construct(HiveParser.TOK_TABNAME, "TOK_TABNAME")
.add(HiveParser.Identifier, hTbl.getHiveTableMD().getDbName())
.add(HiveParser.Identifier, hTbl.getHiveTableMD().getTableName());
if (hTbl.getHiveTableMD().getMetaTable() != null) {
tableNameBuilder.add(HiveParser.Identifier, hTbl.getHiveTableMD().getMetaTable());
} else if (hTbl.getHiveTableMD().getBranchName() != null) {
tableNameBuilder.add(HiveParser.Identifier, hTbl.getHiveTableMD().getBranchName());
}
ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABREF, "TOK_TABREF").add(tableNameBuilder);
if (hTbl.getHiveTableMD().getAsOfTimestamp() != null) {
ASTBuilder asOfBuilder = ASTBuilder.construct(HiveParser.TOK_AS_OF_TIME, "TOK_AS_OF_TIME")
.add(HiveParser.StringLiteral, hTbl.getHiveTableMD().getAsOfTimestamp());
b.add(asOfBuilder);
}
if (hTbl.getHiveTableMD().getAsOfVersion() != null) {
ASTBuilder asOfBuilder = ASTBuilder.construct(HiveParser.TOK_AS_OF_VERSION, "TOK_AS_OF_VERSION")
.add(HiveParser.Number, hTbl.getHiveTableMD().getAsOfVersion());
b.add(asOfBuilder);
}
if (hTbl.getHiveTableMD().getVersionIntervalFrom() != null) {
ASTBuilder asOfBuilder = ASTBuilder.construct(HiveParser.TOK_FROM_VERSION, "TOK_FROM_VERSION")
.add(HiveParser.Number, hTbl.getHiveTableMD().getVersionIntervalFrom());
b.add(asOfBuilder);
}
ASTBuilder propList = ASTBuilder.construct(HiveParser.TOK_TABLEPROPLIST, "TOK_TABLEPROPLIST");
if (scan instanceof DruidQuery) {
//Passing query spec, column names and column types to be used as part of Hive Physical execution
DruidQuery dq = (DruidQuery) scan;
//Adding Query specs to be used by org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_JSON + "\"")
.add(HiveParser.StringLiteral, "\"" + SemanticAnalyzer.escapeSQLString(
dq.getQueryString()) + "\""));
// Adding column names used later by org.apache.hadoop.hive.druid.serde.DruidSerDe
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_FIELD_NAMES + "\"")
.add(HiveParser.StringLiteral,
"\"" + dq.getRowType().getFieldNames().stream().map(Object::toString)
.collect(Collectors.joining(",")) + "\""
));
// Adding column types used later by org.apache.hadoop.hive.druid.serde.DruidSerDe
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_FIELD_TYPES + "\"")
.add(HiveParser.StringLiteral,
"\"" + dq.getRowType().getFieldList().stream()
.map(e -> TypeConverter.convert(e.getType()).getTypeName())
.collect(Collectors.joining(",")) + "\""
));
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"" + Constants.DRUID_QUERY_TYPE + "\"")
.add(HiveParser.StringLiteral, "\"" + dq.getQueryType().getQueryName() + "\""));
} else if (scan instanceof HiveJdbcConverter) {
HiveJdbcConverter jdbcConverter = (HiveJdbcConverter) scan;
final String query = jdbcConverter.generateSql();
LOGGER.debug("Generated SQL query: " + System.lineSeparator() + query);
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"" + Constants.JDBC_QUERY + "\"")
.add(HiveParser.StringLiteral, "\"" + SemanticAnalyzer.escapeSQLString(query) + "\""));
// Whether we can split the query
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"" + Constants.JDBC_SPLIT_QUERY + "\"")
.add(HiveParser.StringLiteral, "\"" + jdbcConverter.splittingAllowed() + "\""));
// Adding column names used later by org.apache.hadoop.hive.druid.serde.DruidSerDe
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"" + Constants.JDBC_QUERY_FIELD_NAMES + "\"")
.add(HiveParser.StringLiteral,
"\"" + scan.getRowType().getFieldNames().stream().map(Object::toString)
.collect(Collectors.joining(",")) + "\""
));
// Adding column types used later by org.apache.hadoop.hive.druid.serde.DruidSerDe
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"" + Constants.JDBC_QUERY_FIELD_TYPES + "\"")
.add(HiveParser.StringLiteral,
"\"" + scan.getRowType().getFieldList().stream()
.map(e -> TypeConverter.convert(e.getType()).getTypeName())
.collect(Collectors.joining(",")) + "\""
));
}
if (hts.isInsideView()) {
// We need to carry the insideView information from calcite into the ast.
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, "\"insideView\"")
.add(HiveParser.StringLiteral, "\"TRUE\""));
}
if (hts.getTableScanTrait() != null) {
// We need to carry the fetchDeletedRows information from calcite into the ast.
propList.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTY, "TOK_TABLEPROPERTY")
.add(HiveParser.StringLiteral, String.format("\"%s\"", hts.getTableScanTrait().getPropertyKey()))
.add(HiveParser.StringLiteral, "\"TRUE\""));
}
b.add(ASTBuilder.construct(HiveParser.TOK_TABLEPROPERTIES, "TOK_TABLEPROPERTIES").add(propList));
// NOTE: Calcite considers tbls to be equal if their names are the same. Hence
// we need to provide Calcite the fully qualified table name (dbname.tblname)
// and not the user provided aliases.
// However, in HIVE DB name can not appear in select list; in case of join
// where table names differ only in DB name, Hive would require user
// introducing explicit aliases for tbl.
b.add(HiveParser.Identifier, hts.getTableAlias());
return b.node();
}
public static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond) {
ASTBuilder b = null;
switch (joinType) {
case SEMI:
b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN");
break;
case INNER:
b = ASTBuilder.construct(HiveParser.TOK_JOIN, "TOK_JOIN");
break;
case LEFT:
b = ASTBuilder.construct(HiveParser.TOK_LEFTOUTERJOIN, "TOK_LEFTOUTERJOIN");
break;
case RIGHT:
b = ASTBuilder.construct(HiveParser.TOK_RIGHTOUTERJOIN, "TOK_RIGHTOUTERJOIN");
break;
case FULL:
b = ASTBuilder.construct(HiveParser.TOK_FULLOUTERJOIN, "TOK_FULLOUTERJOIN");
break;
case ANTI:
b = ASTBuilder.construct(HiveParser.TOK_LEFTANTISEMIJOIN, "TOK_LEFTANTISEMIJOIN");
break;
}
b.add(left).add(right).add(cond);
return b.node();
}
public static ASTNode subQuery(ASTNode qry, String alias) {
return ASTBuilder.construct(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY").add(qry)
.add(HiveParser.Identifier, alias).node();
}
public static ASTNode qualifiedName(String tableName, String colName) {
ASTBuilder b = ASTBuilder
.construct(HiveParser.DOT, ".")
.add(
ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add(
HiveParser.Identifier, tableName)).add(HiveParser.Identifier, colName);
return b.node();
}
public static ASTNode unqualifiedName(String colName) {
ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add(
HiveParser.Identifier, colName);
return b.node();
}
public static ASTNode where(ASTNode cond) {
return ASTBuilder.construct(HiveParser.TOK_WHERE, "TOK_WHERE").add(cond).node();
}
public static ASTNode having(ASTNode cond) {
return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node();
}
public static ASTNode limit(Object offset, Object limit) {
return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT")
.add(HiveParser.Number, offset.toString())
.add(HiveParser.Number, limit.toString()).node();
}
public static ASTNode selectExpr(ASTNode expr, String alias) {
return ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR").add(expr)
.add(HiveParser.Identifier, alias).node();
}
public static ASTNode literal(RexLiteral literal) {
Object val = null;
int type = 0;
SqlTypeName sqlType = literal.getType().getSqlTypeName();
switch (sqlType) {
case BINARY:
case DATE:
case TIME:
case TIMESTAMP:
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
case INTERVAL_DAY:
case INTERVAL_DAY_HOUR:
case INTERVAL_DAY_MINUTE:
case INTERVAL_DAY_SECOND:
case INTERVAL_HOUR:
case INTERVAL_HOUR_MINUTE:
case INTERVAL_HOUR_SECOND:
case INTERVAL_MINUTE:
case INTERVAL_MINUTE_SECOND:
case INTERVAL_MONTH:
case INTERVAL_SECOND:
case INTERVAL_YEAR:
case INTERVAL_YEAR_MONTH:
case MAP:
case ARRAY:
case ROW:
if (literal.getValue() == null) {
return ASTBuilder.construct(HiveParser.TOK_NULL, "TOK_NULL").node();
}
break;
case TINYINT:
case SMALLINT:
case INTEGER:
case BIGINT:
case DOUBLE:
case DECIMAL:
case FLOAT:
case REAL:
case VARCHAR:
case CHAR:
case BOOLEAN:
if (literal.getValue3() == null) {
return ASTBuilder.construct(HiveParser.TOK_NULL, "TOK_NULL").node();
}
}
switch (sqlType) {
case TINYINT:
case SMALLINT:
case INTEGER:
case BIGINT:
val = literal.getValue3();
// Calcite considers all numeric literals as bigdecimal values
// Hive makes a distinction between them most importantly IntegralLiteral
if (val instanceof BigDecimal) {
val = ((BigDecimal) val).longValue();
}
switch (sqlType) {
case TINYINT:
val += "Y";
break;
case SMALLINT:
val += "S";
break;
case INTEGER:
val += "";
break;
case BIGINT:
val += "L";
break;
}
type = HiveParser.IntegralLiteral;
break;
case DOUBLE:
val = literal.getValue3() + "D";
type = HiveParser.NumberLiteral;
break;
case DECIMAL:
val = literal.getValue3() + "BD";
type = HiveParser.NumberLiteral;
break;
case FLOAT:
case REAL:
val = literal.getValue3() + "F";
type = HiveParser.Number;
break;
case VARCHAR:
case CHAR:
val = literal.getValue3();
String escapedVal = BaseSemanticAnalyzer.escapeSQLString(String.valueOf(val));
type = HiveParser.StringLiteral;
val = "'" + escapedVal + "'";
break;
case BOOLEAN:
val = literal.getValue3();
type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE;
break;
case DATE:
val = "'" + literal.getValueAs(DateString.class).toString() + "'";
type = HiveParser.TOK_DATELITERAL;
break;
case TIME:
val = "'" + literal.getValueAs(TimeString.class).toString() + "'";
type = HiveParser.TOK_TIMESTAMPLITERAL;
break;
case TIMESTAMP:
val = "'" + literal.getValueAs(TimestampString.class).toString() + "'";
type = HiveParser.TOK_TIMESTAMPLITERAL;
break;
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
// Calcite stores timestamp with local time-zone in UTC internally, thus
// when we bring it back, we need to add the UTC suffix.
val = "'" + literal.getValueAs(TimestampString.class).toString() + " UTC'";
type = HiveParser.TOK_TIMESTAMPLOCALTZLITERAL;
break;
case INTERVAL_YEAR:
case INTERVAL_MONTH:
case INTERVAL_YEAR_MONTH: {
type = HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL;
BigDecimal monthsBd = (BigDecimal) literal.getValue();
HiveIntervalYearMonth intervalYearMonth = new HiveIntervalYearMonth(monthsBd.intValue());
val = "'" + intervalYearMonth.toString() + "'";
}
break;
case INTERVAL_DAY:
case INTERVAL_DAY_HOUR:
case INTERVAL_DAY_MINUTE:
case INTERVAL_DAY_SECOND:
case INTERVAL_HOUR:
case INTERVAL_HOUR_MINUTE:
case INTERVAL_HOUR_SECOND:
case INTERVAL_MINUTE:
case INTERVAL_MINUTE_SECOND:
case INTERVAL_SECOND: {
type = HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL;
BigDecimal millisBd = (BigDecimal) literal.getValue();
// Calcite literal is in millis, convert to seconds
BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000));
HiveIntervalDayTime intervalDayTime = new HiveIntervalDayTime(secsBd);
val = "'" + intervalDayTime.toString() + "'";
}
break;
case NULL:
type = HiveParser.TOK_NULL;
break;
//binary, ROW type should not be seen.
case BINARY:
case ROW:
default:
throw new RuntimeException("Unsupported Type: " + sqlType);
}
return (ASTNode) ParseDriver.adaptor.create(type, String.valueOf(val));
}
public static ASTNode dynamicParam(RexDynamicParam param) {
ASTNode node = (ASTNode)ParseDriver.adaptor.create(HiveParser.TOK_PARAMETER,
Integer.toString(param.getIndex()));
return node;
}
ASTNode curr;
public ASTNode node() {
return curr;
}
public ASTBuilder add(int tokenType, String text) {
ParseDriver.adaptor.addChild(curr, createAST(tokenType, text));
return this;
}
public ASTBuilder add(ASTBuilder b) {
ParseDriver.adaptor.addChild(curr, b.curr);
return this;
}
public ASTBuilder add(ASTNode n) {
if (n != null) {
ParseDriver.adaptor.addChild(curr, n);
}
return this;
}
}