be/src/exprs/scalar-expr.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "exprs/scalar-expr.inline.h"

 #include <sstream>
 #include <thrift/protocol/TDebugProtocol.h>

 #include "codegen/codegen-anyval.h"
 #include "codegen/llvm-codegen.h"
 #include "common/object-pool.h"
 #include "common/status.h"
 #include "exprs/case-expr.h"
 #include "exprs/cast-format-expr.h"
 #include "exprs/compound-predicates.h"
 #include "exprs/conditional-functions.h"
 #include "exprs/hive-udf-call.h"
 #include "exprs/in-predicate.h"
 #include "exprs/is-not-empty-predicate.h"
 #include "exprs/is-null-predicate.h"
 #include "exprs/kudu-partition-expr.h"
 #include "exprs/like-predicate.h"
 #include "exprs/literal.h"
 #include "exprs/null-literal.h"
 #include "exprs/operators.h"
 #include "exprs/scalar-expr-evaluator.h"
 #include "exprs/scalar-fn-call.h"
 #include "exprs/slot-ref.h"
 #include "exprs/string-functions.h"
 #include "exprs/timestamp-functions.h"
 #include "exprs/tuple-is-null-predicate.h"
 #include "exprs/udf-builtins.h"
 #include "exprs/utility-functions.h"
 #include "exprs/valid-tuple-id.h"
 #include "runtime/runtime-state.h"
 #include "runtime/tuple-row.h"
 #include "runtime/tuple.h"
 #include "runtime/types.h"
 #include "udf/udf-internal.h"
 #include "udf/udf.h"

 #include "gen-cpp/Exprs_types.h"
 #include "gen-cpp/ImpalaService_types.h"

 #include "common/names.h"

 using namespace impala_udf;

 namespace impala {

 const char* ScalarExpr::LLVM_CLASS_NAME = "class.impala::ScalarExpr";

 ScalarExpr::ScalarExpr(const ColumnType& type, bool is_constant)
   : Expr(type),
     is_constant_(is_constant) {
 }

 ScalarExpr::ScalarExpr(const TExprNode& node)
   : Expr(node),
     is_constant_(node.is_constant) {
   if (node.__isset.fn) fn_ = node.fn;
 }

 Status ScalarExpr::Create(const TExpr& texpr, const RowDescriptor& row_desc,
     RuntimeState* state, ObjectPool* pool, ScalarExpr** scalar_expr) {
   *scalar_expr = nullptr;
   ScalarExpr* root;
   RETURN_IF_ERROR(CreateNode(texpr.nodes[0], pool, &root));
   RETURN_IF_ERROR(Expr::CreateTree(texpr, pool, root));
   // Assume that the root is a potential entry point for interpreted callers.
   // This is not always true but would require some work to determine for
   // each of the callsites of Create().
   // TODO: fix this - reducing the number of entry points would reduce codegen overhead
   // somewhat.
   Status status = root->Init(row_desc, /*is_entry_point*/ true, state);
   if (UNLIKELY(!status.ok())) {
     root->Close();
     return status;
   }
   int fn_ctx_idx = 0;
   root->AssignFnCtxIdx(&fn_ctx_idx);
   *scalar_expr = root;
   return Status::OK();
 }

 Status ScalarExpr::Create(const vector<TExpr>& texprs, const RowDescriptor& row_desc,
     RuntimeState* state, ObjectPool* pool, vector<ScalarExpr*>* exprs) {
   exprs->clear();
   for (const TExpr& texpr: texprs) {
     ScalarExpr* expr;
     RETURN_IF_ERROR(Create(texpr, row_desc, state, pool, &expr));
     DCHECK(expr != nullptr);
     exprs->push_back(expr);
   }
   return Status::OK();
 }

 Status ScalarExpr::Create(const TExpr& texpr, const RowDescriptor& row_desc,
     RuntimeState* state, ScalarExpr** scalar_expr) {
   return ScalarExpr::Create(texpr, row_desc, state, state->obj_pool(), scalar_expr);
 }

 Status ScalarExpr::Create(const vector<TExpr>& texprs, const RowDescriptor& row_desc,
     RuntimeState* state, vector<ScalarExpr*>* exprs) {
   return ScalarExpr::Create(texprs, row_desc, state, state->obj_pool(), exprs);
 }

 void ScalarExpr::AssignFnCtxIdx(int* next_fn_ctx_idx) {
   fn_ctx_idx_start_ = *next_fn_ctx_idx;
   if (HasFnCtx()) {
     fn_ctx_idx_ = *next_fn_ctx_idx;
     ++(*next_fn_ctx_idx);
   }
   for (ScalarExpr* child : children()) child->AssignFnCtxIdx(next_fn_ctx_idx);
   fn_ctx_idx_end_ = *next_fn_ctx_idx;
 }

 Status ScalarExpr::CreateNode(
     const TExprNode& texpr_node, ObjectPool* pool, ScalarExpr** expr) {
   switch (texpr_node.node_type) {
     case TExprNodeType::BOOL_LITERAL:
     case TExprNodeType::FLOAT_LITERAL:
     case TExprNodeType::INT_LITERAL:
     case TExprNodeType::STRING_LITERAL:
     case TExprNodeType::DECIMAL_LITERAL:
     case TExprNodeType::TIMESTAMP_LITERAL:
     case TExprNodeType::DATE_LITERAL:
       *expr = pool->Add(new Literal(texpr_node));
       return Status::OK();
     case TExprNodeType::CASE_EXPR:
       if (!texpr_node.__isset.case_expr) {
         return Status("Case expression not set in thrift node");
       }
       *expr = pool->Add(new CaseExpr(texpr_node));
       return Status::OK();
     case TExprNodeType::COMPOUND_PRED:
       if (texpr_node.fn.name.function_name == "and") {
         *expr = pool->Add(new AndPredicate(texpr_node));
       } else if (texpr_node.fn.name.function_name == "or") {
         *expr = pool->Add(new OrPredicate(texpr_node));
       } else {
         DCHECK_EQ(texpr_node.fn.name.function_name, "not");
         *expr = pool->Add(new ScalarFnCall(texpr_node));
       }
       return Status::OK();
     case TExprNodeType::NULL_LITERAL:
       *expr = pool->Add(new NullLiteral(texpr_node));
       return Status::OK();
     case TExprNodeType::SLOT_REF:
       if (!texpr_node.__isset.slot_ref) {
         return Status("Slot reference not set in thrift node");
       }
       *expr = pool->Add(new SlotRef(texpr_node));
       return Status::OK();
     case TExprNodeType::TUPLE_IS_NULL_PRED:
       *expr = pool->Add(new TupleIsNullPredicate(texpr_node));
       return Status::OK();
     case TExprNodeType::FUNCTION_CALL:
       if (!texpr_node.__isset.fn) {
         return Status("Function not set in thrift node");
       }
       // Special-case functions that have their own Expr classes
       // TODO: is there a better way to do this?
       if (texpr_node.fn.name.function_name == "if") {
         *expr = pool->Add(new IfExpr(texpr_node));
       } else if (texpr_node.fn.name.function_name == "isnull" ||
                  texpr_node.fn.name.function_name == "ifnull" ||
                  texpr_node.fn.name.function_name == "nvl") {
         *expr = pool->Add(new IsNullExpr(texpr_node));
       } else if (texpr_node.fn.name.function_name == "coalesce") {
         *expr = pool->Add(new CoalesceExpr(texpr_node));
       } else if (texpr_node.fn.binary_type == TFunctionBinaryType::JAVA) {
         *expr = pool->Add(new HiveUdfCall(texpr_node));
       } else if (texpr_node.__isset.cast_expr &&
           !texpr_node.cast_expr.cast_format.empty()) {
         *expr = pool->Add(new CastFormatExpr(texpr_node));
       } else {
         *expr = pool->Add(new ScalarFnCall(texpr_node));
       }
       return Status::OK();
     case TExprNodeType::IS_NOT_EMPTY_PRED:
       *expr = pool->Add(new IsNotEmptyPredicate(texpr_node));
       return Status::OK();
     case TExprNodeType::KUDU_PARTITION_EXPR:
       *expr = pool->Add(new KuduPartitionExpr(texpr_node));
       return Status::OK();
     case TExprNodeType::VALID_TUPLE_ID_EXPR:
       *expr = pool->Add(new ValidTupleIdExpr(texpr_node));
       return Status::OK();
     default:
       *expr = nullptr;
       stringstream os;
       os << "Unknown expr node type: " << texpr_node.node_type;
       return Status(os.str());
   }
 }

 Status ScalarExpr::OpenEvaluator(FunctionContext::FunctionStateScope scope,
     RuntimeState* state, ScalarExprEvaluator* eval) const {
   for (int i = 0; i < children_.size(); ++i) {
     RETURN_IF_ERROR(children_[i]->OpenEvaluator(scope, state, eval));
   }
   return Status::OK();
 }

 void ScalarExpr::CloseEvaluator(FunctionContext::FunctionStateScope scope,
     RuntimeState* state, ScalarExprEvaluator* eval) const {
   for (ScalarExpr* child : children_) child->CloseEvaluator(scope, state, eval);
 }

 void ScalarExpr::Close() {
   Expr::Close();
 }

 void ScalarExpr::Close(const vector<ScalarExpr*>& exprs) {
   for (ScalarExpr* expr : exprs) expr->Close();
 }

 struct MemLayoutData {
   int expr_idx;
   int byte_size;
   bool variable_length;
   int alignment;

   // TODO: why put var-len at end?
   bool operator<(const MemLayoutData& rhs) const {
     // variable_len go at end
     if (this->variable_length && !rhs.variable_length) return false;
     if (!this->variable_length && rhs.variable_length) return true;
     return this->byte_size < rhs.byte_size;
   }
 };

 int ScalarExpr::ComputeResultsLayout(const vector<ScalarExpr*>& exprs,
     vector<int>* offsets, int* var_result_begin) {
   if (exprs.size() == 0) {
     *var_result_begin = -1;
     return 0;
   }


   vector<MemLayoutData> data;
   data.resize(exprs.size());

   // Collect all the byte sizes and sort them
   for (int i = 0; i < exprs.size(); ++i) {
     DCHECK(!exprs[i]->type().IsComplexType()) << "NYI";
     data[i].expr_idx = i;
     data[i].byte_size = exprs[i]->type().GetSlotSize();
     DCHECK_GT(data[i].byte_size, 0);
     data[i].variable_length = exprs[i]->type().IsVarLenStringType();

   }

   sort(data.begin(), data.end());

   int byte_offset = 0;
   offsets->resize(exprs.size());
   *var_result_begin = -1;

   for (int i = 0; i < data.size(); ++i) {

     (*offsets)[data[i].expr_idx] = byte_offset;
     if (data[i].variable_length && *var_result_begin == -1) {
       *var_result_begin = byte_offset;
     }
     DCHECK(!(i == 0 && byte_offset > 0)) << "first value should be at start of layout";
     byte_offset += data[i].byte_size;
   }

   return byte_offset;
 }

 Status ScalarExpr::Init(
     const RowDescriptor& row_desc, bool is_entry_point, RuntimeState* state) {
   DCHECK(type_.type != INVALID_TYPE);
   for (int i = 0; i < children_.size(); ++i) {
     RETURN_IF_ERROR(children_[i]->Init(row_desc, false, state));
   }
   // Add the expression to the list of expressions to codegen in the codegen phase.
   if (ShouldCodegen(state)) {
     // If the expression is not interpretable, we need an entry point to evaluate
     // the expression from interpreted code, e.g. GetConstValue().
     bool is_codegen_entry_point = is_entry_point || !IsInterpretable();
     state->AddScalarExprToCodegen(this, is_codegen_entry_point);
   }
   return Status::OK();
 }

 string ScalarExpr::DebugString() const {
   // TODO: implement partial debug string for member vars
   stringstream out;
   out << " type=" << type_.DebugString();
   if (!children_.empty()) {
     out << " children=" << DebugString(children_);
   }
   return out.str();
 }

 string ScalarExpr::DebugString(const vector<ScalarExpr*>& exprs) {
   stringstream out;
   out << "[";
   for (int i = 0; i < exprs.size(); ++i) {
     out << (i == 0 ? "" : " ") << exprs[i]->DebugString();
   }
   out << "]";
   return out.str();
 }

 bool ScalarExpr::ShouldCodegen(const RuntimeState* state) const {
   // Use the interpreted path and call the builtin without codegen if any of the
   // followings is true:
   // 1. The expression does not have an associated RuntimeState, e.g. is a partition
   //    key expression in a descriptor table.
   // 2. codegen is disabled by query option.
   // 3. there is an optimization hint to disable codegen and the expr can be interpreted.
   return state != nullptr && !state->CodegenDisabledByQueryOption()
       && !(state->CodegenHasDisableHint() && IsInterpretable());
 }

 int ScalarExpr::GetSlotIds(vector<SlotId>* slot_ids) const {
   int n = 0;
   for (int i = 0; i < children_.size(); ++i) {
     n += children_[i]->GetSlotIds(slot_ids);
   }
   return n;
 }

 llvm::Function* ScalarExpr::GetStaticGetValWrapper(
     ColumnType type, LlvmCodeGen* codegen) {
   switch (type.type) {
     case TYPE_BOOLEAN:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_BOOLEAN_VAL, false);
     case TYPE_TINYINT:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_TINYINT_VAL, false);
     case TYPE_SMALLINT:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_SMALLINT_VAL, false);
     case TYPE_INT:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_INT_VAL, false);
     case TYPE_BIGINT:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_BIGINT_VAL, false);
     case TYPE_FLOAT:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_FLOAT_VAL, false);
     case TYPE_DOUBLE:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DOUBLE_VAL, false);
     case TYPE_STRING:
     case TYPE_CHAR:
     case TYPE_VARCHAR:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_STRING_VAL, false);
     case TYPE_TIMESTAMP:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_TIMESTAMP_VAL, false);
     case TYPE_DECIMAL:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DECIMAL_VAL, false);
     case TYPE_DATE:
       return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DATE_VAL, false);
     default:
       DCHECK(false) << "Invalid type: " << type.DebugString();
       return NULL;
   }
 }

 llvm::Function* ScalarExpr::CreateIrFunctionPrototype(
     const string& name, LlvmCodeGen* codegen, llvm::Value* (*args)[2]) {
   llvm::Type* return_type = CodegenAnyVal::GetLoweredType(codegen, type());
   LlvmCodeGen::FnPrototype prototype(codegen, name, return_type);
   prototype.AddArgument(
       LlvmCodeGen::NamedVariable(
           "eval", codegen->GetStructPtrType<ScalarExprEvaluator>()));
   prototype.AddArgument(LlvmCodeGen::NamedVariable(
       "row", codegen->GetStructPtrType<TupleRow>()));
   llvm::Function* function = prototype.GeneratePrototype(NULL, args[0]);
   DCHECK(function != NULL);
   return function;
 }

 Status ScalarExpr::GetCodegendComputeFn(
     LlvmCodeGen* codegen, bool is_codegen_entry_point, llvm::Function** fn) {
   if (ir_compute_fn_ != nullptr) {
     *fn = ir_compute_fn_;
   } else {
     RETURN_IF_ERROR(GetCodegendComputeFnImpl(codegen, fn));
     ir_compute_fn_ = *fn;
   }
   if (is_codegen_entry_point && !added_to_jit_) {
     // Ensure Get*Val() is made callable if this function is called at least once
     // with is_codegen_entry_point=true.
     added_to_jit_ = true;
     codegen->AddFunctionToJit(*fn, &codegend_compute_fn_);
   }
   return Status::OK();
 }

 Status ScalarExpr::GetCodegendComputeFnWrapper(
     LlvmCodeGen* codegen, llvm::Function** fn) {
   for (ScalarExpr* expr : children_) {
     llvm::Function* dummy;
     // The codegen'd function will call expr->Get*Val(). Ensure that the child expr
     // is a codegen entry point we expr->GetVal() uses the fast codegen'd path.
     RETURN_IF_ERROR(expr->GetCodegendComputeFn(codegen, true, &dummy));
   }

   llvm::Function* static_getval_fn = GetStaticGetValWrapper(type(), codegen);

   // Call it passing this as the additional first argument.
   llvm::Value* args[2];
   *fn = CreateIrFunctionPrototype("CodegenComputeFnWrapper", codegen, &args);
   llvm::BasicBlock* entry_block =
       llvm::BasicBlock::Create(codegen->context(), "entry", *fn);
   LlvmBuilder builder(entry_block);
   llvm::Value* this_ptr =
       codegen->CastPtrToLlvmPtr(codegen->GetStructPtrType<ScalarExpr>(), this);
   llvm::Value* compute_fn_args[] = {this_ptr, args[0], args[1]};
   llvm::Value* ret = CodegenAnyVal::CreateCall(
       codegen, &builder, static_getval_fn, compute_fn_args, "ret");
   builder.CreateRet(ret);
   *fn = codegen->FinalizeFunction(*fn);
   if (UNLIKELY(*fn == nullptr)) {
     return Status(TErrorCode::IR_VERIFY_FAILED, "CodegendComputeFnWrapper");
   }
   return Status::OK();
 }

 #define SCALAR_EXPR_GET_VAL_INTERPRETED(type)                 \
   type ScalarExpr::Get##type##Interpreted(                    \
       ScalarExprEvaluator* eval, const TupleRow* row) const { \
     DCHECK(false) << DebugString();                           \
     return type::null();                                      \
   }

 // At least one of these should always be overridden.
 SCALAR_EXPR_GET_VAL_INTERPRETED(BooleanVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(TinyIntVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(SmallIntVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(IntVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(BigIntVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(FloatVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(DoubleVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(StringVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(TimestampVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(DecimalVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(DateVal);
 SCALAR_EXPR_GET_VAL_INTERPRETED(CollectionVal);

 string ScalarExpr::DebugString(const string& expr_name) const {
   stringstream out;
   out << expr_name << "(" << ScalarExpr::DebugString() << ")";
   return out.str();
 }

 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include "exprs/scalar-expr.inline.h"

	#include <sstream>
	#include <thrift/protocol/TDebugProtocol.h>

	#include "codegen/codegen-anyval.h"
	#include "codegen/llvm-codegen.h"
	#include "common/object-pool.h"
	#include "common/status.h"
	#include "exprs/case-expr.h"
	#include "exprs/cast-format-expr.h"
	#include "exprs/compound-predicates.h"
	#include "exprs/conditional-functions.h"
	#include "exprs/hive-udf-call.h"
	#include "exprs/in-predicate.h"
	#include "exprs/is-not-empty-predicate.h"
	#include "exprs/is-null-predicate.h"
	#include "exprs/kudu-partition-expr.h"
	#include "exprs/like-predicate.h"
	#include "exprs/literal.h"
	#include "exprs/null-literal.h"
	#include "exprs/operators.h"
	#include "exprs/scalar-expr-evaluator.h"
	#include "exprs/scalar-fn-call.h"
	#include "exprs/slot-ref.h"
	#include "exprs/string-functions.h"
	#include "exprs/timestamp-functions.h"
	#include "exprs/tuple-is-null-predicate.h"
	#include "exprs/udf-builtins.h"
	#include "exprs/utility-functions.h"
	#include "exprs/valid-tuple-id.h"
	#include "runtime/runtime-state.h"
	#include "runtime/tuple-row.h"
	#include "runtime/tuple.h"
	#include "runtime/types.h"
	#include "udf/udf-internal.h"
	#include "udf/udf.h"

	#include "gen-cpp/Exprs_types.h"
	#include "gen-cpp/ImpalaService_types.h"

	#include "common/names.h"

	using namespace impala_udf;

	namespace impala {

	const char* ScalarExpr::LLVM_CLASS_NAME = "class.impala::ScalarExpr";

	ScalarExpr::ScalarExpr(const ColumnType& type, bool is_constant)
	: Expr(type),
	is_constant_(is_constant) {
	}

	ScalarExpr::ScalarExpr(const TExprNode& node)
	: Expr(node),
	is_constant_(node.is_constant) {
	if (node.__isset.fn) fn_ = node.fn;
	}

	Status ScalarExpr::Create(const TExpr& texpr, const RowDescriptor& row_desc,
	RuntimeState* state, ObjectPool* pool, ScalarExpr** scalar_expr) {
	*scalar_expr = nullptr;
	ScalarExpr* root;
	RETURN_IF_ERROR(CreateNode(texpr.nodes[0], pool, &root));
	RETURN_IF_ERROR(Expr::CreateTree(texpr, pool, root));
	// Assume that the root is a potential entry point for interpreted callers.
	// This is not always true but would require some work to determine for
	// each of the callsites of Create().
	// TODO: fix this - reducing the number of entry points would reduce codegen overhead
	// somewhat.
	Status status = root->Init(row_desc, /is_entry_point/ true, state);
	if (UNLIKELY(!status.ok())) {
	root->Close();
	return status;
	}
	int fn_ctx_idx = 0;
	root->AssignFnCtxIdx(&fn_ctx_idx);
	*scalar_expr = root;
	return Status::OK();
	}

	Status ScalarExpr::Create(const vector<TExpr>& texprs, const RowDescriptor& row_desc,
	RuntimeState* state, ObjectPool* pool, vector<ScalarExpr> exprs) {
	exprs->clear();
	for (const TExpr& texpr: texprs) {
	ScalarExpr* expr;
	RETURN_IF_ERROR(Create(texpr, row_desc, state, pool, &expr));
	DCHECK(expr != nullptr);
	exprs->push_back(expr);
	}
	return Status::OK();
	}

	Status ScalarExpr::Create(const TExpr& texpr, const RowDescriptor& row_desc,
	RuntimeState* state, ScalarExpr** scalar_expr) {
	return ScalarExpr::Create(texpr, row_desc, state, state->obj_pool(), scalar_expr);
	}

	Status ScalarExpr::Create(const vector<TExpr>& texprs, const RowDescriptor& row_desc,
	RuntimeState* state, vector<ScalarExpr> exprs) {
	return ScalarExpr::Create(texprs, row_desc, state, state->obj_pool(), exprs);
	}

	void ScalarExpr::AssignFnCtxIdx(int* next_fn_ctx_idx) {
	fn_ctx_idx_start_ = *next_fn_ctx_idx;
	if (HasFnCtx()) {
	fn_ctx_idx_ = *next_fn_ctx_idx;
	++(*next_fn_ctx_idx);
	}
	for (ScalarExpr* child : children()) child->AssignFnCtxIdx(next_fn_ctx_idx);
	fn_ctx_idx_end_ = *next_fn_ctx_idx;
	}

	Status ScalarExpr::CreateNode(
	const TExprNode& texpr_node, ObjectPool* pool, ScalarExpr** expr) {
	switch (texpr_node.node_type) {
	case TExprNodeType::BOOL_LITERAL:
	case TExprNodeType::FLOAT_LITERAL:
	case TExprNodeType::INT_LITERAL:
	case TExprNodeType::STRING_LITERAL:
	case TExprNodeType::DECIMAL_LITERAL:
	case TExprNodeType::TIMESTAMP_LITERAL:
	case TExprNodeType::DATE_LITERAL:
	*expr = pool->Add(new Literal(texpr_node));
	return Status::OK();
	case TExprNodeType::CASE_EXPR:
	if (!texpr_node.__isset.case_expr) {
	return Status("Case expression not set in thrift node");
	}
	*expr = pool->Add(new CaseExpr(texpr_node));
	return Status::OK();
	case TExprNodeType::COMPOUND_PRED:
	if (texpr_node.fn.name.function_name == "and") {
	*expr = pool->Add(new AndPredicate(texpr_node));
	} else if (texpr_node.fn.name.function_name == "or") {
	*expr = pool->Add(new OrPredicate(texpr_node));
	} else {
	DCHECK_EQ(texpr_node.fn.name.function_name, "not");
	*expr = pool->Add(new ScalarFnCall(texpr_node));
	}
	return Status::OK();
	case TExprNodeType::NULL_LITERAL:
	*expr = pool->Add(new NullLiteral(texpr_node));
	return Status::OK();
	case TExprNodeType::SLOT_REF:
	if (!texpr_node.__isset.slot_ref) {
	return Status("Slot reference not set in thrift node");
	}
	*expr = pool->Add(new SlotRef(texpr_node));
	return Status::OK();
	case TExprNodeType::TUPLE_IS_NULL_PRED:
	*expr = pool->Add(new TupleIsNullPredicate(texpr_node));
	return Status::OK();
	case TExprNodeType::FUNCTION_CALL:
	if (!texpr_node.__isset.fn) {
	return Status("Function not set in thrift node");
	}
	// Special-case functions that have their own Expr classes
	// TODO: is there a better way to do this?
	if (texpr_node.fn.name.function_name == "if") {
	*expr = pool->Add(new IfExpr(texpr_node));
	} else if (texpr_node.fn.name.function_name == "isnull" \|\|
	texpr_node.fn.name.function_name == "ifnull" \|\|
	texpr_node.fn.name.function_name == "nvl") {
	*expr = pool->Add(new IsNullExpr(texpr_node));
	} else if (texpr_node.fn.name.function_name == "coalesce") {
	*expr = pool->Add(new CoalesceExpr(texpr_node));
	} else if (texpr_node.fn.binary_type == TFunctionBinaryType::JAVA) {
	*expr = pool->Add(new HiveUdfCall(texpr_node));
	} else if (texpr_node.__isset.cast_expr &&
	!texpr_node.cast_expr.cast_format.empty()) {
	*expr = pool->Add(new CastFormatExpr(texpr_node));
	} else {
	*expr = pool->Add(new ScalarFnCall(texpr_node));
	}
	return Status::OK();
	case TExprNodeType::IS_NOT_EMPTY_PRED:
	*expr = pool->Add(new IsNotEmptyPredicate(texpr_node));
	return Status::OK();
	case TExprNodeType::KUDU_PARTITION_EXPR:
	*expr = pool->Add(new KuduPartitionExpr(texpr_node));
	return Status::OK();
	case TExprNodeType::VALID_TUPLE_ID_EXPR:
	*expr = pool->Add(new ValidTupleIdExpr(texpr_node));
	return Status::OK();
	default:
	*expr = nullptr;
	stringstream os;
	os << "Unknown expr node type: " << texpr_node.node_type;
	return Status(os.str());
	}
	}

	Status ScalarExpr::OpenEvaluator(FunctionContext::FunctionStateScope scope,
	RuntimeState* state, ScalarExprEvaluator* eval) const {
	for (int i = 0; i < children_.size(); ++i) {
	RETURN_IF_ERROR(children_[i]->OpenEvaluator(scope, state, eval));
	}
	return Status::OK();
	}

	void ScalarExpr::CloseEvaluator(FunctionContext::FunctionStateScope scope,
	RuntimeState* state, ScalarExprEvaluator* eval) const {
	for (ScalarExpr* child : children_) child->CloseEvaluator(scope, state, eval);
	}

	void ScalarExpr::Close() {
	Expr::Close();
	}

	void ScalarExpr::Close(const vector<ScalarExpr*>& exprs) {
	for (ScalarExpr* expr : exprs) expr->Close();
	}

	struct MemLayoutData {
	int expr_idx;
	int byte_size;
	bool variable_length;
	int alignment;

	// TODO: why put var-len at end?
	bool operator<(const MemLayoutData& rhs) const {
	// variable_len go at end
	if (this->variable_length && !rhs.variable_length) return false;
	if (!this->variable_length && rhs.variable_length) return true;
	return this->byte_size < rhs.byte_size;
	}
	};

	int ScalarExpr::ComputeResultsLayout(const vector<ScalarExpr*>& exprs,
	vector<int>* offsets, int* var_result_begin) {
	if (exprs.size() == 0) {
	*var_result_begin = -1;
	return 0;
	}


	vector<MemLayoutData> data;
	data.resize(exprs.size());

	// Collect all the byte sizes and sort them
	for (int i = 0; i < exprs.size(); ++i) {
	DCHECK(!exprs[i]->type().IsComplexType()) << "NYI";
	data[i].expr_idx = i;
	data[i].byte_size = exprs[i]->type().GetSlotSize();
	DCHECK_GT(data[i].byte_size, 0);
	data[i].variable_length = exprs[i]->type().IsVarLenStringType();

	}

	sort(data.begin(), data.end());

	int byte_offset = 0;
	offsets->resize(exprs.size());
	*var_result_begin = -1;

	for (int i = 0; i < data.size(); ++i) {

	(*offsets)[data[i].expr_idx] = byte_offset;
	if (data[i].variable_length && *var_result_begin == -1) {
	*var_result_begin = byte_offset;
	}
	DCHECK(!(i == 0 && byte_offset > 0)) << "first value should be at start of layout";
	byte_offset += data[i].byte_size;
	}

	return byte_offset;
	}

	Status ScalarExpr::Init(
	const RowDescriptor& row_desc, bool is_entry_point, RuntimeState* state) {
	DCHECK(type_.type != INVALID_TYPE);
	for (int i = 0; i < children_.size(); ++i) {
	RETURN_IF_ERROR(children_[i]->Init(row_desc, false, state));
	}
	// Add the expression to the list of expressions to codegen in the codegen phase.
	if (ShouldCodegen(state)) {
	// If the expression is not interpretable, we need an entry point to evaluate
	// the expression from interpreted code, e.g. GetConstValue().
	bool is_codegen_entry_point = is_entry_point \|\| !IsInterpretable();
	state->AddScalarExprToCodegen(this, is_codegen_entry_point);
	}
	return Status::OK();
	}

	string ScalarExpr::DebugString() const {
	// TODO: implement partial debug string for member vars
	stringstream out;
	out << " type=" << type_.DebugString();
	if (!children_.empty()) {
	out << " children=" << DebugString(children_);
	}
	return out.str();
	}

	string ScalarExpr::DebugString(const vector<ScalarExpr*>& exprs) {
	stringstream out;
	out << "[";
	for (int i = 0; i < exprs.size(); ++i) {
	out << (i == 0 ? "" : " ") << exprs[i]->DebugString();
	}
	out << "]";
	return out.str();
	}

	bool ScalarExpr::ShouldCodegen(const RuntimeState* state) const {
	// Use the interpreted path and call the builtin without codegen if any of the
	// followings is true:
	// 1. The expression does not have an associated RuntimeState, e.g. is a partition
	// key expression in a descriptor table.
	// 2. codegen is disabled by query option.
	// 3. there is an optimization hint to disable codegen and the expr can be interpreted.
	return state != nullptr && !state->CodegenDisabledByQueryOption()
	&& !(state->CodegenHasDisableHint() && IsInterpretable());
	}

	int ScalarExpr::GetSlotIds(vector<SlotId>* slot_ids) const {
	int n = 0;
	for (int i = 0; i < children_.size(); ++i) {
	n += children_[i]->GetSlotIds(slot_ids);
	}
	return n;
	}

	llvm::Function* ScalarExpr::GetStaticGetValWrapper(
	ColumnType type, LlvmCodeGen* codegen) {
	switch (type.type) {
	case TYPE_BOOLEAN:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_BOOLEAN_VAL, false);
	case TYPE_TINYINT:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_TINYINT_VAL, false);
	case TYPE_SMALLINT:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_SMALLINT_VAL, false);
	case TYPE_INT:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_INT_VAL, false);
	case TYPE_BIGINT:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_BIGINT_VAL, false);
	case TYPE_FLOAT:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_FLOAT_VAL, false);
	case TYPE_DOUBLE:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DOUBLE_VAL, false);
	case TYPE_STRING:
	case TYPE_CHAR:
	case TYPE_VARCHAR:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_STRING_VAL, false);
	case TYPE_TIMESTAMP:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_TIMESTAMP_VAL, false);
	case TYPE_DECIMAL:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DECIMAL_VAL, false);
	case TYPE_DATE:
	return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DATE_VAL, false);
	default:
	DCHECK(false) << "Invalid type: " << type.DebugString();
	return NULL;
	}
	}

	llvm::Function* ScalarExpr::CreateIrFunctionPrototype(
	const string& name, LlvmCodeGen* codegen, llvm::Value* (*args)[2]) {
	llvm::Type* return_type = CodegenAnyVal::GetLoweredType(codegen, type());
	LlvmCodeGen::FnPrototype prototype(codegen, name, return_type);
	prototype.AddArgument(
	LlvmCodeGen::NamedVariable(
	"eval", codegen->GetStructPtrType<ScalarExprEvaluator>()));
	prototype.AddArgument(LlvmCodeGen::NamedVariable(
	"row", codegen->GetStructPtrType<TupleRow>()));
	llvm::Function* function = prototype.GeneratePrototype(NULL, args[0]);
	DCHECK(function != NULL);
	return function;
	}

	Status ScalarExpr::GetCodegendComputeFn(
	LlvmCodeGen* codegen, bool is_codegen_entry_point, llvm::Function** fn) {
	if (ir_compute_fn_ != nullptr) {
	*fn = ir_compute_fn_;
	} else {
	RETURN_IF_ERROR(GetCodegendComputeFnImpl(codegen, fn));
	ir_compute_fn_ = *fn;
	}
	if (is_codegen_entry_point && !added_to_jit_) {
	// Ensure Get*Val() is made callable if this function is called at least once
	// with is_codegen_entry_point=true.
	added_to_jit_ = true;
	codegen->AddFunctionToJit(*fn, &codegend_compute_fn_);
	}
	return Status::OK();
	}

	Status ScalarExpr::GetCodegendComputeFnWrapper(
	LlvmCodeGen* codegen, llvm::Function** fn) {
	for (ScalarExpr* expr : children_) {
	llvm::Function* dummy;
	// The codegen'd function will call expr->Get*Val(). Ensure that the child expr
	// is a codegen entry point we expr->GetVal() uses the fast codegen'd path.
	RETURN_IF_ERROR(expr->GetCodegendComputeFn(codegen, true, &dummy));
	}

	llvm::Function* static_getval_fn = GetStaticGetValWrapper(type(), codegen);

	// Call it passing this as the additional first argument.
	llvm::Value* args[2];
	*fn = CreateIrFunctionPrototype("CodegenComputeFnWrapper", codegen, &args);
	llvm::BasicBlock* entry_block =
	llvm::BasicBlock::Create(codegen->context(), "entry", *fn);
	LlvmBuilder builder(entry_block);
	llvm::Value* this_ptr =
	codegen->CastPtrToLlvmPtr(codegen->GetStructPtrType<ScalarExpr>(), this);
	llvm::Value* compute_fn_args[] = {this_ptr, args[0], args[1]};
	llvm::Value* ret = CodegenAnyVal::CreateCall(
	codegen, &builder, static_getval_fn, compute_fn_args, "ret");
	builder.CreateRet(ret);
	fn = codegen->FinalizeFunction(fn);
	if (UNLIKELY(*fn == nullptr)) {
	return Status(TErrorCode::IR_VERIFY_FAILED, "CodegendComputeFnWrapper");
	}
	return Status::OK();
	}

	#define SCALAR_EXPR_GET_VAL_INTERPRETED(type) \
	type ScalarExpr::Get##type##Interpreted( \
	ScalarExprEvaluator* eval, const TupleRow* row) const { \
	DCHECK(false) << DebugString(); \
	return type::null(); \
	}

	// At least one of these should always be overridden.
	SCALAR_EXPR_GET_VAL_INTERPRETED(BooleanVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(TinyIntVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(SmallIntVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(IntVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(BigIntVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(FloatVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(DoubleVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(StringVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(TimestampVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(DecimalVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(DateVal);
	SCALAR_EXPR_GET_VAL_INTERPRETED(CollectionVal);

	string ScalarExpr::DebugString(const string& expr_name) const {
	stringstream out;
	out << expr_name << "(" << ScalarExpr::DebugString() << ")";
	return out.str();
	}

	}