gensrc/script/gen_functions.py - doris - Git at Google

 #!/usr/bin/env python
 # encoding: utf-8

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 # This file is copied from
 # https://github.com/cloudera/Impala/blob/v0.7refresh/common/function-registry/gen_functions.py
 # and modified by Doris

 """
 # This script will generate the implementation of the simple functions for the BE.
 # These include:
 #   - Arithmetic functions
 #   - Binary functions
 #   - Cast functions
 #
 # The script outputs (run: 'src/common/function/gen_functions.py')
 #   - header and implemention for above functions:
 #     - src/gen_cpp/opcode/functions.[h/cc]
 #   - python file that contains the metadata for those functions:
 #     - src/gen_cpp/generated_functions.py
 """

 import string
 import os
 import errno

 unary_op = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   e->_result.${result_field} = ${native_op} *val;\n\
   return &e->_result.${result_field};\n\
 }\n\n")


 binary_op_divid = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op1 = e->children()[0];\n\
   ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
   Expr* op2 = e->children()[1];\n\
   ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
   if (val1 == NULL || val2 == NULL) return NULL;\n\
   double value= *val2;\n\
   if (value == 0) return NULL;\n\
   e->_result.${result_field} = (*val1 ${native_op} *val2);\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 binary_op = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op1 = e->children()[0];\n\
   ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
   Expr* op2 = e->children()[1];\n\
   ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
   if (val1 == NULL || val2 == NULL) return NULL;\n\
   e->_result.${result_field} = (*val1 ${native_op} *val2);\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 double_mod = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op1 = e->children()[0];\n\
   ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
   Expr* op2 = e->children()[1];\n\
   ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
   if (val1 == NULL || val2 == NULL) return NULL;\n\
   double value= *val2;\n\
   if (value == 0) return NULL;\n\
   e->_result.${result_field} = fmod(*val1, *val2);\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 binary_func = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op1 = e->children()[0];\n\
   ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->get_value(row));\n\
   Expr* op2 = e->children()[1];\n\
   ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->get_value(row));\n\
   if (val1 == NULL || val2 == NULL) return NULL;\n\
   e->_result.${result_field} = val1->${native_func}(*val2);\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 float_to_decimal = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   e->_result.${result_field}.assign_from_float(*val);;\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 double_to_decimal = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   e->_result.${result_field}.assign_from_double(*val);;\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 cast = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   e->_result.${result_field} = *val;\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 string_to_int = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   StringParser::ParseResult result;\n\
   e->_result.${result_field} = \
       StringParser::string_to_int<${native_type2}>(val->ptr, val->len, &result);\n\
   if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return NULL;\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 string_to_float = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   StringParser::ParseResult result;\n\
   e->_result.${result_field} = \
       StringParser::string_to_float<${native_type2}>(val->ptr, val->len, &result);\n\
   if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return NULL;\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 numeric_to_date = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   DateTimeValue *date_val = &e->_result.${result_field};\n\
   if (!date_val->from_date_int64(*val)) {\n\
     return NULL;\n\
   }\n\
   date_val->cast_to_date();\n\
   return date_val;\n\
 }\n\n")

 string_to_date = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   DateTimeValue *date_val = &e->_result.${result_field};\n\
   if (!date_val->from_date_str(val->ptr, val->len)) {\n\
     return NULL;\n\
   }\n\
   date_val->cast_to_date();\n\
   return date_val;\n\
 }\n\n")

 datetime_to_date = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   DateTimeValue *date_val = &e->_result.${result_field};\n\
   *date_val = *val;\n\
   date_val->cast_to_date();\n\
   return date_val;\n\
 }\n\n")

 numeric_to_datetime = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   DateTimeValue *date_val = &e->_result.${result_field};\n\
   if (!date_val->from_date_int64(*val)) {\n\
     return NULL;\n\
   }\n\
   date_val->to_datetime();\n\
   return date_val;\n\
 }\n\n")

 string_to_datetime = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   DateTimeValue *date_val = &e->_result.${result_field};\n\
   if (!date_val->from_date_str(val->ptr, val->len)) {\n\
     return NULL;\n\
   }\n\
   date_val->to_datetime();\n\
   return date_val;\n\
 }\n\n")

 date_to_datetime = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   DateTimeValue *date_val = &e->_result.${result_field};\n\
   *date_val = *val;\n\
   date_val->to_datetime();\n\
   return date_val;\n\
 }\n\n")

 datetime_to_numeric = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   e->_result.${result_field} = val->to_int64();\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 decimal_to_string = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   e->_result.set_string_val(val->to_string());\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 datetime_to_string = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   char buf[64];\n\
   val->to_string(buf);\n\
   e->_result.set_string_val(buf);\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 numeric_to_string = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   e->_result.set_string_val(std::to_string(*val));\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 largeint_to_string = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   char buf[64];\n\
   int len = 64;\n\
   char *str = LargeIntValue::to_string(*val, buf, &len);\n\
   e->_result.set_string_val(std::string(str, len));\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 float_to_string = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   char buf[64];\n\
   my_gcvt(*val, MY_GCVT_ARG_FLOAT, 64, buf, NULL);\n\
   e->_result.set_string_val(buf);\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 double_to_string = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   char buf[64];\n\
   my_gcvt(*val, MY_GCVT_ARG_DOUBLE, 64, buf, NULL);\n\
   e->_result.set_string_val(buf);\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 # Need to special case tinyint.  boost thinks it is a char and handles it differently.
 # e.g. '0' is written as an empty string.
 string_to_tinyint = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   string tmp(val->ptr, val->len);\n\
   try {\n\
     e->_result.${result_field} = static_cast<int8_t>(lexical_cast<int16_t>(tmp));\n\
   } catch (bad_lexical_cast &) {\n\
     return NULL;\n\
   }\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 tinyint_to_string = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   Expr* op = e->children()[0];\n\
   ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->get_value(row));\n\
   if (val == NULL) return NULL;\n\
   int64_t tmp_val = *val;\n\
   e->_result.set_string_val(lexical_cast<string>(tmp_val));\n\
   return &e->_result.${result_field};\n\
 }\n\n")

 case = string.Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
   CaseExpr* expr = static_cast<CaseExpr*>(e);\n\
   int num_children = e->get_num_children();\n\
   int loop_end = (expr->has_else_expr()) ? num_children - 1 : num_children;\n\
   // Make sure we set the right compute function.\n\
   DCHECK_EQ(expr->has_case_expr(), true);\n\
   // Need at least case, when and then expr, and optionally an else.\n\
   DCHECK_GE(num_children, (expr->has_else_expr()) ? 4 : 3);\n\
   // All case and when exprs return the same type (we guaranteed that during analysis).\n\
   void* case_val = e->children()[0]->get_value(row);\n\
   if (case_val == NULL) {\n\
     if (expr->has_else_expr()) {\n\
       // Return else value.\n\
       return e->children()[num_children - 1]->get_value(row);\n\
     } else {\n\
       return NULL;\n\
     }\n\
   }\n\
   for (int i = 1; i < loop_end; i += 2) {\n\
     ${native_type1}* when_val =\n\
         reinterpret_cast<${native_type1}*>(e->children()[i]->get_value(row));\n\
     if (when_val == NULL) continue;\n\
     if (*reinterpret_cast<${native_type1}*>(case_val) == *when_val) {\n\
       // Return then value.\n\
       return e->children()[i + 1]->get_value(row);\n\
     }\n\
   }\n\
   if (expr->has_else_expr()) {\n\
     // Return else value.\n\
     return e->children()[num_children - 1]->get_value(row);\n\
   }\n\
   return NULL;\n\
 }\n\n")

 python_template = string.Template("\
   ['${fn_name}', '${return_type}', [${args}], 'ComputeFunctions::${fn_signature}', []], \n")

 # Mapping of function to template
 templates = {
   'Add': binary_op,
   'Subtract': binary_op,
   'Multiply': binary_op,
   'Divide': binary_op_divid,
   'Int_Divide': binary_op_divid,
   'Mod': binary_op_divid,
   'BitAnd': binary_op,
   'BitXor': binary_op,
   'BitOr': binary_op,
   'BitNot': unary_op,
   'Eq': binary_op,
   'Ne': binary_op,
   'Ge': binary_op,
   'Gt': binary_op,
   'Lt': binary_op,
   'Le': binary_op,
   'Cast': cast,
 }

 # Some aggregate types that are useful for defining functions
 types = {
   'BOOLEAN': ['BOOLEAN'],
   'TINYINT': ['TINYINT'],
   'SMALLINT': ['SMALLINT'],
   'INT': ['INT'],
   'BIGINT': ['BIGINT'],
   'LARGEINT': ['LARGEINT'],
   'FLOAT': ['FLOAT'],
   'DOUBLE': ['DOUBLE'],
   'STRING': ['VARCHAR'],
   'DATE': ['DATE'],
   'DATETIME': ['DATETIME'],
   'DECIMALV2': ['DECIMALV2'],
   'NATIVE_INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'],
   'INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'],
   'FLOAT_TYPES': ['FLOAT', 'DOUBLE'],
   'NUMERIC_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', \
           'LARGEINT', 'DECIMALV2'],
   'STRING_TYPES': ['VARCHAR'],
   'DATETIME_TYPES': ['DATE', 'DATETIME'],
   'FIXED_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'],
   'NATIVE_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
   'STRCAST_FIXED_TYPES': ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'],
   'ALL_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT', 'FLOAT',\
                      'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMALV2'],
   'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMALV2'],
 }

 # Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]]
 functions = [
   # Arithmetic Expr
   ['Add', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
   ['Subtract', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
   ['Multiply', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
   ['Divide', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]],
   ['Int_Divide', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
   ['Mod', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
   ['Mod', ['DECIMALV2'], [['DECIMALV2'], ['DECIMALV2']]],
   ['Mod', ['DOUBLE'], [['DOUBLE'], ['DOUBLE']], double_mod],
   ['BitAnd', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
   ['BitXor', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
   ['BitOr', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]],
   ['BitNot', ['INT_TYPES'], [['INT_TYPES']]],

   # BinaryPredicates
   ['Eq', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
   ['Ne', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
   ['Gt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
   ['Lt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
   ['Ge', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
   ['Le', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']]],
   ['Eq', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
   ['Ne', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
   ['Gt', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
   ['Lt', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
   ['Ge', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
   ['Le', ['BOOLEAN'], [['LARGEINT'], ['LARGEINT']],],
   ['Eq', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
   ['Ne', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
   ['Gt', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
   ['Lt', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
   ['Ge', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
   ['Le', ['BOOLEAN'], [['STRING'], ['STRING']], binary_func],
   ['Eq', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
   ['Ne', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
   ['Gt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
   ['Lt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
   ['Ge', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
   ['Le', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],],
   ['Eq', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
   ['Ne', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
   ['Gt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
   ['Lt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
   ['Ge', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],
   ['Le', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],],

   # Casts
   ['Cast', ['BOOLEAN'], [['NATIVE_TYPES'], ['BOOLEAN']]],
   ['Cast', ['TINYINT'], [['NATIVE_TYPES'], ['TINYINT']]],
   ['Cast', ['SMALLINT'], [['NATIVE_TYPES'], ['SMALLINT']]],
   ['Cast', ['INT'], [['NATIVE_TYPES'], ['INT']]],
   ['Cast', ['BIGINT'], [['NATIVE_TYPES'], ['BIGINT']]],
   ['Cast', ['LARGEINT'], [['NATIVE_TYPES'], ['LARGEINT']]],
   ['Cast', ['LARGEINT'], [['DECIMALV2'], ['LARGEINT']]],
   ['Cast', ['NATIVE_TYPES'], [['LARGEINT'], ['NATIVE_TYPES']]],
   ['Cast', ['FLOAT'], [['NATIVE_TYPES'], ['FLOAT']]],
   ['Cast', ['DOUBLE'], [['NATIVE_TYPES'], ['DOUBLE']]],
   ['Cast', ['DECIMALV2'], [['FIXED_TYPES'], ['DECIMALV2']]],
   ['Cast', ['DECIMALV2'], [['FLOAT'], ['DECIMALV2']], float_to_decimal],
   ['Cast', ['DECIMALV2'], [['DOUBLE'], ['DECIMALV2']], double_to_decimal],
   ['Cast', ['NATIVE_TYPES'], [['DECIMALV2'], ['NATIVE_TYPES']]],
   ['Cast', ['NATIVE_INT_TYPES'], [['STRING'], ['NATIVE_INT_TYPES']], string_to_int],
   ['Cast', ['LARGEINT'], [['STRING'], ['LARGEINT']], string_to_int],
   ['Cast', ['FLOAT_TYPES'], [['STRING'], ['FLOAT_TYPES']], string_to_float],
   ['Cast', ['STRING'], [['STRCAST_FIXED_TYPES'], ['STRING']], numeric_to_string],
   ['Cast', ['STRING'], [['LARGEINT'], ['STRING']], largeint_to_string],
   ['Cast', ['STRING'], [['FLOAT'], ['STRING']], float_to_string],
   ['Cast', ['STRING'], [['DOUBLE'], ['STRING']], double_to_string],
   ['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string],
   ['Cast', ['STRING'], [['DECIMALV2'], ['STRING']], decimal_to_string],
   # Datetime cast
   ['Cast', ['DATE'], [['NUMERIC_TYPES'], ['DATE']], numeric_to_date],
   ['Cast', ['DATETIME'], [['NUMERIC_TYPES'], ['DATETIME']], numeric_to_datetime],
   ['Cast', ['DATE'], [['STRING_TYPES'], ['DATE']], string_to_date],
   ['Cast', ['DATETIME'], [['STRING_TYPES'], ['DATETIME']], string_to_datetime],
   ['Cast', ['DATE'], [['DATETIME'], ['DATE']], datetime_to_date],
   ['Cast', ['DATETIME'], [['DATE'], ['DATETIME']], date_to_datetime],
   ['Cast', ['NUMERIC_TYPES'], [['DATETIME'], ['NUMERIC_TYPES']], datetime_to_numeric],
   ['Cast', ['NUMERIC_TYPES'], [['DATE'], ['NUMERIC_TYPES']], datetime_to_numeric],
   ['Cast', ['STRING_TYPES'], [['DATE'], ['STRING_TYPES']], datetime_to_string],
   ['Cast', ['STRING_TYPES'], [['DATETIME'], ['STRING_TYPES']], datetime_to_string],

   # Case
   # The case expr is special because it has a variable number of function args,
   # but we guarantee that all of them are of the same type during query analysis,
   # so we just list exactly one here.
   # In addition, the return type given here is a dummy, because it is
   # not necessarily the same as the function args type.
   ['Case', ['ALL_TYPES'], [['ALL_TYPES']], case],
 ]

 native_types = {
   'BOOLEAN': 'bool',
   'TINYINT': 'char',
   'SMALLINT': 'short',
   'INT': 'int',
   'BIGINT': 'long',
   'LARGEINT': '__int128',
   'FLOAT': 'float',
   'DOUBLE': 'double',
   'VARCHAR': 'StringValue',
   'DATE': 'Date',
   'DATETIME': 'DateTime',
   'TIME': 'double',
   'DECIMALV2': 'DecimalV2Value',
 }

 # Portable type used in the function implementation
 implemented_types = {
   'BOOLEAN': 'bool',
   'TINYINT': 'int8_t',
   'SMALLINT': 'int16_t',
   'INT': 'int32_t',
   'BIGINT': 'int64_t',
   'LARGEINT': '__int128',
   'FLOAT': 'float',
   'DOUBLE': 'double',
   'VARCHAR': 'StringValue',
   'DATE': 'DateTimeValue',
   'DATETIME': 'DateTimeValue',
   'TIME': 'double',
   'DECIMALV2': 'DecimalV2Value',
 }
 result_fields = {
   'BOOLEAN': 'bool_val',
   'TINYINT': 'tinyint_val',
   'SMALLINT': 'smallint_val',
   'INT': 'int_val',
   'BIGINT': 'bigint_val',
   'LARGEINT': 'large_int_val',
   'FLOAT': 'float_val',
   'DOUBLE': 'double_val',
   'VARCHAR': 'string_val',
   'DATE': 'datetime_val',
   'DATETIME': 'datetime_val',
   'TIME': 'double_val',
   'DECIMALV2': 'decimalv2_val',
 }

 native_ops = {
   'BITAND': '&',
   'BITNOT': '~',
   'BITOR': '|',
   'BITXOR': '^',
   'DIVIDE': '/',
   'EQ': '==',
   'GT': '>',
   'GE': '>=',
   'INT_DIVIDE': '/',
   'SUBTRACT': '-',
   'MOD': '%',
   'MULTIPLY': '*',
   'LT': '<',
   'LE': '<=',
   'NE': '!=',
   'ADD': '+',
 }

 native_funcs = {
   'EQ': 'eq',
   'LE': 'le',
   'LT': 'lt',
   'NE': 'ne',
   'GE': 'ge',
   'GT': 'gt',
 }

 cc_preamble = '\
 \n\
 // This is a generated file, DO NOT EDIT.\n\
 // To add new functions, see impala/common/function-registry/gen_opcodes.py\n\
 \n\
 #include "gen_cpp/opcode/functions.h"\n\
 #include "exprs/expr.h"\n\
 #include "exprs/case_expr.h"\n\
 #include "runtime/string_value.hpp"\n\
 #include "runtime/tuple_row.h"\n\
 #include "util/mysql_dtoa.h"\n\
 #include "util/string_parser.hpp"\n\
 #include <boost/lexical_cast.hpp>\n\
 \n\
 using namespace boost;\n\
 using namespace std;\n\
 \n\
 namespace doris { \n\
 \n'

 cc_epilogue = '\
 }\n'

 h_preamble = '\
 \n\
 #ifndef DORIS_OPCODE_FUNCTIONS_H\n\
 #define DORIS_OPCODE_FUNCTIONS_H\n\
 \n\
 namespace doris {\n\
 class Expr;\n\
 class OpcodeRegistry;\n\
 class TupleRow;\n\
 \n\
 class ComputeFunctions {\n\
  public:\n'

 h_epilogue = '\
 };\n\
 \n\
 }\n\
 \n\
 #endif\n'

 python_preamble = '\
 #!/usr/bin/env python\n\
 \n\
 # This is a generated file, DO NOT EDIT IT.\n\
 # To add new functions, see impala/common/function-registry/gen_opcodes.py\n\
 \n\
 functions = [\n'

 python_epilogue = ']'

 header_template = string.Template("\
   static void* ${fn_signature}(Expr* e, TupleRow* row);\n")

 BE_PATH = "../gen_cpp/opcode/"

 def initialize_sub(op, return_type, arg_types):
     """
     Expand the signature data for template substitution.  Returns
     a dictionary with all the entries for all the templates used in this script
     """
     sub = {}
     sub["fn_name"] = op
     sub["fn_signature"] = op
     sub["return_type"] = return_type
     sub["result_field"] = result_fields[return_type]
     sub["args"] = ""
     if op.upper() in native_ops:
         sub["native_op"] = native_ops[op.upper()]
     for idx in range(0, len(arg_types)):
         arg = arg_types[idx]
         sub["fn_signature"] += "_" + native_types[arg]
         sub["native_type" + repr(idx + 1)] = implemented_types[arg]
         sub["args"] += "'" + arg + "', "
     return sub

 if __name__ == "__main__":

     try:
         os.makedirs(BE_PATH)
     except OSError as e:
         if e.errno == errno.EEXIST:
             pass
         else:
             raise

     h_file = open(BE_PATH + 'functions.h', 'w')
     cc_file = open(BE_PATH + 'functions.cc', 'w')
     python_file = open('generated_functions.py', 'w')
     h_file.write(h_preamble)
     cc_file.write(cc_preamble)
     python_file.write(python_preamble)

     # Generate functions and headers
     for func_data in functions:
         op = func_data[0]
         # If a specific template has been specified, use that one.
         if len(func_data) >= 4:
             template = func_data[3]
         else:
             # Skip functions with no template (shouldn't be auto-generated)
             if not op in templates:
                 continue
             template = templates[op]

         # Expand all arguments
         return_types = []
         for ret in func_data[1]:
             for t in types[ret]:
                 return_types.append(t)
         signatures = []
         for args in func_data[2]:
             expanded_arg = []
             for arg in args:
                 for t in types[arg]:
                     expanded_arg.append(t)
             signatures.append(expanded_arg)

         # Put arguments into substitution structure
         num_functions = 0
         for args in signatures:
             num_functions = max(num_functions, len(args))
         num_functions = max(num_functions, len(return_types))
         num_args = len(signatures)

         # Validate the input is correct
         if len(return_types) != 1 and len(return_types) != num_functions:
             print("Invalid Declaration: " + func_data)
             sys.exit(1)

         for args in signatures:
             if len(args) != 1 and len(args) != num_functions:
                 print("Invalid Declaration: " + func_data)
                 sys.exit(1)

         # Iterate over every function signature to generate
         for i in range(0, num_functions):
             if len(return_types) == 1:
                 return_type = return_types[0]
             else:
                 return_type = return_types[i]

             arg_types = []
             for j in range(0, num_args):
                 if len(signatures[j]) == 1:
                     arg_types.append(signatures[j][0])
                 else:
                     arg_types.append(signatures[j][i])

             # At this point, 'return_type' is a single type and 'arg_types'
             # is a list of single types
             sub = initialize_sub(op, return_type, arg_types)
             if template == binary_func:
                 sub["native_func"] = native_funcs[op.upper()]

             h_file.write(header_template.substitute(sub))
             cc_file.write(template.substitute(sub))
             python_file.write(python_template.substitute(sub))

     h_file.write(h_epilogue)
     cc_file.write(cc_epilogue)
     python_file.write(python_epilogue)
     h_file.close()
     cc_file.close()
     python_file.close()