blob: 3079100c425e669c8111183d46251dceb5a84100 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.extensions.sql.zetasql;
import com.google.zetasql.ZetaSQLFunction.FunctionSignatureId;
import java.util.Arrays;
import java.util.List;
import org.apache.beam.sdk.annotations.Internal;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.SqlOperator;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet;
/** SqlStdOperatorMappingTable. */
@Internal
public class SqlStdOperatorMappingTable {
static final List<FunctionSignatureId> ZETASQL_BUILTIN_FUNCTION_WHITELIST =
Arrays.asList(
FunctionSignatureId.FN_AND,
FunctionSignatureId.FN_OR,
FunctionSignatureId.FN_NOT,
FunctionSignatureId.FN_MULTIPLY_DOUBLE,
FunctionSignatureId.FN_MULTIPLY_INT64,
FunctionSignatureId.FN_MULTIPLY_NUMERIC,
FunctionSignatureId.FN_DIVIDE_DOUBLE,
FunctionSignatureId.FN_DIVIDE_NUMERIC,
FunctionSignatureId.FN_ADD_DOUBLE,
FunctionSignatureId.FN_ADD_INT64,
FunctionSignatureId.FN_ADD_NUMERIC,
FunctionSignatureId.FN_SUBTRACT_DOUBLE,
FunctionSignatureId.FN_SUBTRACT_INT64,
FunctionSignatureId.FN_SUBTRACT_NUMERIC,
FunctionSignatureId.FN_UNARY_MINUS_INT64,
FunctionSignatureId.FN_UNARY_MINUS_DOUBLE,
FunctionSignatureId.FN_UNARY_MINUS_NUMERIC,
FunctionSignatureId.FN_GREATER,
FunctionSignatureId.FN_GREATER_OR_EQUAL,
FunctionSignatureId.FN_LESS,
FunctionSignatureId.FN_LESS_OR_EQUAL,
FunctionSignatureId.FN_EQUAL,
FunctionSignatureId.FN_NOT_EQUAL,
FunctionSignatureId.FN_IS_NULL,
FunctionSignatureId.FN_IS_TRUE,
FunctionSignatureId.FN_IS_FALSE,
FunctionSignatureId.FN_STARTS_WITH_STRING,
FunctionSignatureId.FN_SUBSTR_STRING,
FunctionSignatureId.FN_TRIM_STRING,
FunctionSignatureId.FN_LTRIM_STRING,
FunctionSignatureId.FN_RTRIM_STRING,
FunctionSignatureId.FN_REPLACE_STRING,
FunctionSignatureId.FN_CONCAT_STRING,
FunctionSignatureId.FN_COUNT_STAR,
FunctionSignatureId.FN_COUNT,
FunctionSignatureId.FN_MAX,
FunctionSignatureId.FN_MIN,
FunctionSignatureId.FN_AVG_DOUBLE,
FunctionSignatureId.FN_AVG_INT64,
FunctionSignatureId.FN_AVG_NUMERIC,
FunctionSignatureId.FN_SUM_DOUBLE,
FunctionSignatureId.FN_SUM_INT64,
FunctionSignatureId.FN_SUM_NUMERIC,
FunctionSignatureId.FN_MOD_INT64,
FunctionSignatureId.FN_MOD_NUMERIC,
FunctionSignatureId.FN_CASE_NO_VALUE,
FunctionSignatureId.FN_CASE_WITH_VALUE,
FunctionSignatureId.FN_TIMESTAMP_ADD,
// TODO: FunctionSignatureId.FN_TIMESTAMP_SUB,
FunctionSignatureId.FN_FLOOR_DOUBLE,
FunctionSignatureId.FN_FLOOR_NUMERIC,
FunctionSignatureId.FN_CEIL_DOUBLE,
FunctionSignatureId.FN_CEIL_NUMERIC,
FunctionSignatureId.FN_REVERSE_STRING,
FunctionSignatureId.FN_CHAR_LENGTH_STRING,
FunctionSignatureId.FN_ENDS_WITH_STRING,
FunctionSignatureId.FN_STRING_LIKE,
FunctionSignatureId.FN_COALESCE,
FunctionSignatureId.FN_IF,
FunctionSignatureId.FN_IFNULL,
FunctionSignatureId.FN_NULLIF,
FunctionSignatureId.FN_EXTRACT_FROM_DATE,
FunctionSignatureId.FN_EXTRACT_FROM_DATETIME,
FunctionSignatureId.FN_EXTRACT_FROM_TIME,
FunctionSignatureId.FN_EXTRACT_FROM_TIMESTAMP,
FunctionSignatureId.FN_TIMESTAMP_FROM_STRING,
FunctionSignatureId.FN_TIMESTAMP_FROM_DATE,
// TODO: FunctionSignatureId.FN_TIMESTAMP_FROM_DATETIME
FunctionSignatureId.FN_DATE_FROM_YEAR_MONTH_DAY
// TODO: FunctionSignatureId.FN_DATE_FROM_TIMESTAMP
);
// todo: Some of operators defined here are later overridden in ZetaSQLPlannerImpl.
// We should remove them from this table and add generic way to provide custom
// implementation. (Ex.: timestamp_add)
public static final ImmutableMap<String, SqlOperator> ZETASQL_FUNCTION_TO_CALCITE_SQL_OPERATOR =
ImmutableMap.<String, SqlOperator>builder()
// grouped window function
.put("TUMBLE", SqlStdOperatorTable.TUMBLE)
.put("HOP", SqlStdOperatorTable.HOP)
.put("SESSION", SqlStdOperatorTable.SESSION)
// built-in logical operator
.put("$and", SqlStdOperatorTable.AND)
.put("$or", SqlStdOperatorTable.OR)
.put("$not", SqlStdOperatorTable.NOT)
// built-in comparison operator
.put("$equal", SqlStdOperatorTable.EQUALS)
.put("$not_equal", SqlStdOperatorTable.NOT_EQUALS)
.put("$greater", SqlStdOperatorTable.GREATER_THAN)
.put("$greater_or_equal", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL)
.put("$less", SqlStdOperatorTable.LESS_THAN)
.put("$less_or_equal", SqlStdOperatorTable.LESS_THAN_OR_EQUAL)
.put("$like", SqlOperators.LIKE)
// .put("$in", SqlStdOperatorTable.IN)
// .put("$between", SqlStdOperatorTable.BETWEEN)
.put("$is_null", SqlStdOperatorTable.IS_NULL)
.put("$is_true", SqlStdOperatorTable.IS_TRUE)
.put("$is_false", SqlStdOperatorTable.IS_FALSE)
// +, -, *, /
.put("$add", SqlStdOperatorTable.PLUS)
.put("$subtract", SqlStdOperatorTable.MINUS)
.put("$multiply", SqlStdOperatorTable.MULTIPLY)
.put("$unary_minus", SqlStdOperatorTable.UNARY_MINUS)
.put("$divide", SqlStdOperatorTable.DIVIDE)
// built-in string function
.put("concat", SqlOperators.CONCAT)
// .put("lower", SqlStdOperatorTable.LOWER)
// .put("upper", SqlStdOperatorTable.UPPER)
.put("substr", SqlOperators.SUBSTR)
.put("trim", SqlOperators.TRIM)
.put("replace", SqlOperators.REPLACE)
.put("char_length", SqlOperators.CHAR_LENGTH)
// string function UDFs
// .put("strpos", )
// .put("length", )
// tells Calcite codegen that starts_with function is a udf.
.put("starts_with", SqlOperators.START_WITHS)
.put("ends_with", SqlOperators.ENDS_WITH)
.put("ltrim", SqlOperators.LTRIM)
.put("rtrim", SqlOperators.RTRIM)
// .put("regexp_match",)
// .put("regexp_extract",)
// .put("regexp_replace",)
// .put("regexp_extract_all",)
// .put("byte_length",)
// .put("format",)
// .put("split",)
// .put("regexp_contains", )
// .put("normalize",)
// .put("to_base32",)
// .put("to_base64",)
// .put("to_hex",)
// .put("from_base64",)
// .put("from_base32",)
// .put("from_hex",)
// .put("to_code_points")
// .put("code_points_to_string")
// .put("lpad", )
// .put("rpad", )
// .put("repeat", )
.put("reverse", SqlOperators.REVERSE)
// built-in aggregate function
.put("$count_star", SqlStdOperatorTable.COUNT)
// TODO: add support to all aggregate functions.
.put("max", SqlStdOperatorTable.MAX)
.put("min", SqlStdOperatorTable.MIN)
.put("avg", SqlStdOperatorTable.AVG)
.put("sum", SqlStdOperatorTable.SUM)
// .put("any_value", SqlStdOperatorTable.ANY_VALUE)
.put("count", SqlStdOperatorTable.COUNT)
// aggregate UDF
// .put("array_agg", )
// .put("array_concat_agg")
// .put("string_agg")
// .put("bit_and")
// .put("bit_or")
// .put("bit_xor")
// .put("logical_and")
// .put("logical_or")
// built-in statistical aggregate function
// .put("covar_pop", SqlStdOperatorTable.COVAR_POP)
// .put("covar_samp", SqlStdOperatorTable.COVAR_SAMP)
// .put("stddev_pop", SqlStdOperatorTable.STDDEV_POP)
// .put("stddev_samp", SqlStdOperatorTable.STDDEV_SAMP)
// .put("var_pop", SqlStdOperatorTable.VAR_POP)
// .put("var_samp", SqlStdOperatorTable.VAR_SAMP)
// statistical aggregate UDF
// .put("corr", )
// built-in approximate aggregate function
// .put("approx_count_distinct", SqlStdOperatorTable.APPROX_COUNT_DISTINCT)
// approximate aggregate UDF
// .put("approx_quantiles", )
// .put("approx_top_sum")
// HLL++ UDF
// hll_count.merge
// hll_count.extract
// hll_count.init
// hll_count.merge_partial
// CAST
// CAST operator does not go through lookup table.
// .put("cast", SqlStdOperatorTable.CAST)
// built-in math functions
// .put("math", SqlStdOperatorTable.ABS)
// .put("sign", SqlStdOperatorTable.SIGN)
// .put("round", SqlStdOperatorTable.ROUND)
.put("ceil", SqlStdOperatorTable.CEIL)
.put("floor", SqlStdOperatorTable.FLOOR)
.put("mod", SqlStdOperatorTable.MOD)
// .put("sqrt", SqlStdOperatorTable.SQRT)
// .put("exp", SqlStdOperatorTable.EXP)
// .put("ln and log", SqlStdOperatorTable.LN)
// .put("log10", SqlStdOperatorTable.LOG10)
// .put("cos", SqlStdOperatorTable.COS)
// .put("acos", SqlStdOperatorTable.ACOS)
// .put("sin", SqlStdOperatorTable.SIN)
// .put("asin", SqlStdOperatorTable.ASIN)
// .put("tan", SqlStdOperatorTable.TAN)
// .put("atan", SqlStdOperatorTable.ATAN)
// .put("atan2", SqlStdOperatorTable.ATAN2)
// .put("abs", SqlStdOperatorTable.ABS)
// .put("pow", SqlStdOperatorTable.POWER)
// .put("div", SqlStdOperatorTable.DIVIDE)
// .put("trunc", SqlStdOperatorTable.TRUNCATE)
// math UDF
// .put("is_inf",)
// .put("is_nan",)
// .put("ieee_divide")
// .put("safe_add")
// .put("safe_divide")
// .put("safe_subtract")
// .put("safe_multiply")
// .put("safe_negate")
// .put("greatest")
// .put("least")
// .put("log")
// .put("cosh")
// .put("acosh")
// .put("sinh")
// .put("asinh")
// .put("tanh")
// .put("atanh")
// Analytic functions
// .put("dense_rank", SqlStdOperatorTable.DENSE_RANK)
// .put("rank", SqlStdOperatorTable.RANK)
// .put("row_number", SqlStdOperatorTable.ROW_NUMBER)
// .put("percent_rank", SqlStdOperatorTable.PERCENT_RANK)
// .put("cume_dist", SqlStdOperatorTable.CUME_DIST)
// .put("ntile", SqlStdOperatorTable.NTILE)
// .put("lead", SqlStdOperatorTable.LEAD)
// .put("lag", SqlStdOperatorTable.LAG)
// .put("first_value", SqlStdOperatorTable.FIRST_VALUE)
// .put("last_value", SqlStdOperatorTable.LAST_VALUE)
// .put("nth_value", SqlStdOperatorTable.NTH_VALUE)
// .put("percentile_cont", )
// .put("percentile_disc",)
// misc functions
// .put("fingerprint")
// .put("fingerprint2011")
// hash functions
// .put("md5")
// .put("sha1")
// .put("sha256")
// .put("sha512")
// date functions
// .put("date_add", SqlStdOperatorTable.DATETIME_PLUS)
// .put("date_sub", SqlStdOperatorTable.MINUS_DATE)
.put("date", SqlOperators.DATE_OP)
// time functions
// .put("time_add", SqlStdOperatorTable.DATETIME_PLUS)
// .put("time_sub", SqlStdOperatorTable.MINUS_DATE)
// timestamp functions
.put(
"timestamp_add",
SqlStdOperatorTable.DATETIME_PLUS) // overridden in ZetaSQLPlannerImpl
// .put("timestamp_sub", SqlStdOperatorTable.MINUS_DATE)
.put("$extract", SqlStdOperatorTable.EXTRACT)
.put("timestamp", SqlOperators.TIMESTAMP_OP)
// other functions
// .put("session_user", SqlStdOperatorTable.SESSION_USER)
// .put("bit_cast_to_int32")
// .put("bit_cast_to_int64")
// .put("bit_cast_to_uint32")
// .put("bit_cast_to_uint64")
// .put("countif", )
// case operator
.put("$case_no_value", SqlStdOperatorTable.CASE)
// if operator - IF(cond, pos, neg) can actually be mapped directly to `CASE WHEN cond
// THEN pos ELSE neg`
.put("if", SqlStdOperatorTable.CASE)
// $case_no_value specializations
// all of these operators can have their operands adjusted to achieve the same thing with
// a call to $case_with_value
.put("$case_with_value", SqlStdOperatorTable.CASE)
.put("coalesce", SqlStdOperatorTable.CASE)
.put("ifnull", SqlStdOperatorTable.CASE)
.put("nullif", SqlStdOperatorTable.CASE)
.build();
// argument one and two should compose of a interval.
public static final ImmutableSet<String> FUNCTION_FAMILY_DATE_ADD =
ImmutableSet.of(
"date_add",
"date_sub",
"datetime_add",
"datetime_sub",
"time_add",
"time_sub",
"timestamp_add",
"timestamp_sub");
public static final ImmutableMap<String, SqlOperatorRewriter>
ZETASQL_FUNCTION_TO_CALCITE_SQL_OPERATOR_REWRITER =
ImmutableMap.<String, SqlOperatorRewriter>builder()
.put("$case_with_value", new SqlCaseWithValueOperatorRewriter())
.put("coalesce", new SqlCoalesceOperatorRewriter())
.put("ifnull", new SqlIfNullOperatorRewriter())
.put("nullif", new SqlNullIfOperatorRewriter())
.put("$extract", new SqlExtractTimestampOperatorRewriter())
.build();
}