blob: b22e7ad5b5efa9be1d4095adcaef3144197785f2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <sstream>
#include <unordered_set>
#include <utility>
#include "arrow/status.h"
#include "gandiva/decimal_ir.h"
#include "gandiva/decimal_type_util.h"
// Algorithms adapted from Apache Impala
namespace gandiva {
#define ADD_TRACE_32(msg, value) \
if (enable_ir_traces_) { \
AddTrace32(msg, value); \
}
#define ADD_TRACE_128(msg, value) \
if (enable_ir_traces_) { \
AddTrace128(msg, value); \
}
// These are the functions defined in this file. The rest are in precompiled folder,
// and the i128 needs to be dis-assembled for those.
static const char* kAddFunction = "add_decimal128_decimal128";
static const char* kSubtractFunction = "subtract_decimal128_decimal128";
static const char* kEQFunction = "equal_decimal128_decimal128";
static const char* kNEFunction = "not_equal_decimal128_decimal128";
static const char* kLTFunction = "less_than_decimal128_decimal128";
static const char* kLEFunction = "less_than_or_equal_to_decimal128_decimal128";
static const char* kGTFunction = "greater_than_decimal128_decimal128";
static const char* kGEFunction = "greater_than_or_equal_to_decimal128_decimal128";
static const std::unordered_set<std::string> kDecimalIRBuilderFunctions{
kAddFunction, kSubtractFunction, kEQFunction, kNEFunction,
kLTFunction, kLEFunction, kGTFunction, kGEFunction};
const char* DecimalIR::kScaleMultipliersName = "gandivaScaleMultipliers";
/// Populate globals required by decimal IR.
/// TODO: can this be done just once ?
void DecimalIR::AddGlobals(Engine* engine) {
auto types = engine->types();
// populate vector : [ 1, 10, 100, 1000, ..]
std::string value = "1";
std::vector<llvm::Constant*> scale_multipliers;
for (int i = 0; i < DecimalTypeUtil::kMaxPrecision + 1; ++i) {
auto multiplier =
llvm::ConstantInt::get(llvm::Type::getInt128Ty(*engine->context()), value, 10);
scale_multipliers.push_back(multiplier);
value.append("0");
}
auto array_type =
llvm::ArrayType::get(types->i128_type(), DecimalTypeUtil::kMaxPrecision + 1);
auto initializer = llvm::ConstantArray::get(
array_type, llvm::ArrayRef<llvm::Constant*>(scale_multipliers));
auto globalScaleMultipliers = new llvm::GlobalVariable(
*engine->module(), array_type, true /*constant*/,
llvm::GlobalValue::LinkOnceAnyLinkage, initializer, kScaleMultipliersName);
globalScaleMultipliers->setAlignment(LLVM_ALIGN(16));
}
// Lookup intrinsic functions
void DecimalIR::InitializeIntrinsics() {
sadd_with_overflow_fn_ = llvm::Intrinsic::getDeclaration(
module(), llvm::Intrinsic::sadd_with_overflow, types()->i128_type());
DCHECK_NE(sadd_with_overflow_fn_, nullptr);
smul_with_overflow_fn_ = llvm::Intrinsic::getDeclaration(
module(), llvm::Intrinsic::smul_with_overflow, types()->i128_type());
DCHECK_NE(smul_with_overflow_fn_, nullptr);
i128_with_overflow_struct_type_ =
sadd_with_overflow_fn_->getFunctionType()->getReturnType();
}
// CPP: return kScaleMultipliers[scale]
llvm::Value* DecimalIR::GetScaleMultiplier(llvm::Value* scale) {
auto const_array = module()->getGlobalVariable(kScaleMultipliersName);
auto ptr = ir_builder()->CreateGEP(const_array->getValueType(), const_array,
{types()->i32_constant(0), scale});
return ir_builder()->CreateLoad(types()->i128_type(), ptr);
}
// CPP: x <= y ? y : x
llvm::Value* DecimalIR::GetHigherScale(llvm::Value* x_scale, llvm::Value* y_scale) {
llvm::Value* le = ir_builder()->CreateICmpSLE(x_scale, y_scale);
return ir_builder()->CreateSelect(le, y_scale, x_scale);
}
// CPP: return (increase_scale_by <= 0) ?
// in_value : in_value * GetScaleMultiplier(increase_scale_by)
llvm::Value* DecimalIR::IncreaseScale(llvm::Value* in_value,
llvm::Value* increase_scale_by) {
llvm::Value* le_zero =
ir_builder()->CreateICmpSLE(increase_scale_by, types()->i32_constant(0));
// then block
auto then_lambda = [&] { return in_value; };
// else block
auto else_lambda = [&] {
llvm::Value* multiplier = GetScaleMultiplier(increase_scale_by);
return ir_builder()->CreateMul(in_value, multiplier);
};
return BuildIfElse(le_zero, types()->i128_type(), then_lambda, else_lambda);
}
// CPP: return (increase_scale_by <= 0) ?
// {in_value,false} : {in_value * GetScaleMultiplier(increase_scale_by),true}
//
// The return value also indicates if there was an overflow while increasing the scale.
DecimalIR::ValueWithOverflow DecimalIR::IncreaseScaleWithOverflowCheck(
llvm::Value* in_value, llvm::Value* increase_scale_by) {
llvm::Value* le_zero =
ir_builder()->CreateICmpSLE(increase_scale_by, types()->i32_constant(0));
// then block
auto then_lambda = [&] {
ValueWithOverflow ret{in_value, types()->false_constant()};
return ret.AsStruct(this);
};
// else block
auto else_lambda = [&] {
llvm::Value* multiplier = GetScaleMultiplier(increase_scale_by);
return ir_builder()->CreateCall(smul_with_overflow_fn_, {in_value, multiplier});
};
auto ir_struct =
BuildIfElse(le_zero, i128_with_overflow_struct_type_, then_lambda, else_lambda);
return ValueWithOverflow::MakeFromStruct(this, ir_struct);
}
// CPP: return (reduce_scale_by <= 0) ?
// in_value : in_value / GetScaleMultiplier(reduce_scale_by)
//
// ReduceScale cannot cause an overflow.
llvm::Value* DecimalIR::ReduceScale(llvm::Value* in_value, llvm::Value* reduce_scale_by) {
auto le_zero = ir_builder()->CreateICmpSLE(reduce_scale_by, types()->i32_constant(0));
// then block
auto then_lambda = [&] { return in_value; };
// else block
auto else_lambda = [&] {
// TODO : handle rounding.
llvm::Value* multiplier = GetScaleMultiplier(reduce_scale_by);
return ir_builder()->CreateSDiv(in_value, multiplier);
};
return BuildIfElse(le_zero, types()->i128_type(), then_lambda, else_lambda);
}
/// @brief Fast-path for add
/// Adjust x and y to the same scale, and add them.
llvm::Value* DecimalIR::AddFastPath(const ValueFull& x, const ValueFull& y) {
auto higher_scale = GetHigherScale(x.scale(), y.scale());
ADD_TRACE_32("AddFastPath : higher_scale", higher_scale);
// CPP : x_scaled = IncreaseScale(x_value, higher_scale - x_scale)
auto x_delta = ir_builder()->CreateSub(higher_scale, x.scale());
auto x_scaled = IncreaseScale(x.value(), x_delta);
ADD_TRACE_128("AddFastPath : x_scaled", x_scaled);
// CPP : y_scaled = IncreaseScale(y_value, higher_scale - y_scale)
auto y_delta = ir_builder()->CreateSub(higher_scale, y.scale());
auto y_scaled = IncreaseScale(y.value(), y_delta);
ADD_TRACE_128("AddFastPath : y_scaled", y_scaled);
auto sum = ir_builder()->CreateAdd(x_scaled, y_scaled);
ADD_TRACE_128("AddFastPath : sum", sum);
return sum;
}
// @brief Add with overflow check.
/// Adjust x and y to the same scale, add them, and reduce sum to output scale.
/// If there is an overflow, the sum is set to 0.
DecimalIR::ValueWithOverflow DecimalIR::AddWithOverflowCheck(const ValueFull& x,
const ValueFull& y,
const ValueFull& out) {
auto higher_scale = GetHigherScale(x.scale(), y.scale());
ADD_TRACE_32("AddWithOverflowCheck : higher_scale", higher_scale);
// CPP : x_scaled = IncreaseScale(x_value, higher_scale - x.scale())
auto x_delta = ir_builder()->CreateSub(higher_scale, x.scale());
auto x_scaled = IncreaseScaleWithOverflowCheck(x.value(), x_delta);
ADD_TRACE_128("AddWithOverflowCheck : x_scaled", x_scaled.value());
// CPP : y_scaled = IncreaseScale(y_value, higher_scale - y_scale)
auto y_delta = ir_builder()->CreateSub(higher_scale, y.scale());
auto y_scaled = IncreaseScaleWithOverflowCheck(y.value(), y_delta);
ADD_TRACE_128("AddWithOverflowCheck : y_scaled", y_scaled.value());
// CPP : sum = x_scaled + y_scaled
auto sum_ir_struct = ir_builder()->CreateCall(sadd_with_overflow_fn_,
{x_scaled.value(), y_scaled.value()});
auto sum = ValueWithOverflow::MakeFromStruct(this, sum_ir_struct);
ADD_TRACE_128("AddWithOverflowCheck : sum", sum.value());
// CPP : overflow ? 0 : sum / GetScaleMultiplier(max_scale - out_scale)
auto overflow = GetCombinedOverflow({x_scaled, y_scaled, sum});
ADD_TRACE_32("AddWithOverflowCheck : overflow", overflow);
auto then_lambda = [&] {
// if there is an overflow, the value returned won't be used. so, save the division.
return types()->i128_constant(0);
};
auto else_lambda = [&] {
auto reduce_scale_by = ir_builder()->CreateSub(higher_scale, out.scale());
return ReduceScale(sum.value(), reduce_scale_by);
};
auto sum_descaled =
BuildIfElse(overflow, types()->i128_type(), then_lambda, else_lambda);
return ValueWithOverflow(sum_descaled, overflow);
}
// This is pretty complex, so use CPP fns.
llvm::Value* DecimalIR::AddLarge(const ValueFull& x, const ValueFull& y,
const ValueFull& out) {
auto block = ir_builder()->GetInsertBlock();
auto out_high_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_hi", block);
auto out_low_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_low", block);
auto x_split = ValueSplit::MakeFromInt128(this, x.value());
auto y_split = ValueSplit::MakeFromInt128(this, y.value());
std::vector<llvm::Value*> args = {
x_split.high(), x_split.low(), x.precision(), x.scale(),
y_split.high(), y_split.low(), y.precision(), y.scale(),
out.precision(), out.scale(), out_high_ptr, out_low_ptr,
};
ir_builder()->CreateCall(module()->getFunction("add_large_decimal128_decimal128"),
args);
auto out_high = ir_builder()->CreateLoad(types()->i64_type(), out_high_ptr);
auto out_low = ir_builder()->CreateLoad(types()->i64_type(), out_low_ptr);
auto sum = ValueSplit(out_high, out_low).AsInt128(this);
ADD_TRACE_128("AddLarge : sum", sum);
return sum;
}
/// The output scale/precision cannot be arbitrary values. The algo here depends on them
/// to be the same as computed in DecimalTypeSql.
/// TODO: enforce this.
Status DecimalIR::BuildAdd() {
// Create fn prototype :
// int128_t
// add_decimal128_decimal128(int128_t x_value, int32_t x_precision, int32_t x_scale,
// int128_t y_value, int32_t y_precision, int32_t y_scale
// int32_t out_precision, int32_t out_scale)
auto i32 = types()->i32_type();
auto i128 = types()->i128_type();
auto function = BuildFunction(kAddFunction, i128,
{
{"x_value", i128},
{"x_precision", i32},
{"x_scale", i32},
{"y_value", i128},
{"y_precision", i32},
{"y_scale", i32},
{"out_precision", i32},
{"out_scale", i32},
});
auto arg_iter = function->arg_begin();
ValueFull x(&arg_iter[0], &arg_iter[1], &arg_iter[2]);
ValueFull y(&arg_iter[3], &arg_iter[4], &arg_iter[5]);
ValueFull out(nullptr, &arg_iter[6], &arg_iter[7]);
auto entry = llvm::BasicBlock::Create(*context(), "entry", function);
ir_builder()->SetInsertPoint(entry);
// CPP :
// if (out_precision < 38) {
// return AddFastPath(x, y)
// } else {
// ret = AddWithOverflowCheck(x, y)
// if (ret.overflow)
// return AddLarge(x, y)
// else
// return ret.value;
// }
llvm::Value* lt_max_precision = ir_builder()->CreateICmpSLT(
out.precision(), types()->i32_constant(DecimalTypeUtil::kMaxPrecision));
auto then_lambda = [&] {
// fast-path add
return AddFastPath(x, y);
};
auto else_lambda = [&] {
if (kUseOverflowIntrinsics) {
// do the add and check if there was overflow
auto ret = AddWithOverflowCheck(x, y, out);
// if there is an overflow, switch to the AddLarge codepath.
return BuildIfElse(
ret.overflow(), types()->i128_type(), [&] { return AddLarge(x, y, out); },
[&] { return ret.value(); });
} else {
return AddLarge(x, y, out);
}
};
auto value =
BuildIfElse(lt_max_precision, types()->i128_type(), then_lambda, else_lambda);
// store result to out
ir_builder()->CreateRet(value);
return Status::OK();
}
Status DecimalIR::BuildSubtract() {
// Create fn prototype :
// int128_t
// subtract_decimal128_decimal128(int128_t x_value, int32_t x_precision, int32_t
// x_scale,
// int128_t y_value, int32_t y_precision, int32_t y_scale
// int32_t out_precision, int32_t out_scale)
auto i32 = types()->i32_type();
auto i128 = types()->i128_type();
auto function = BuildFunction(kSubtractFunction, i128,
{
{"x_value", i128},
{"x_precision", i32},
{"x_scale", i32},
{"y_value", i128},
{"y_precision", i32},
{"y_scale", i32},
{"out_precision", i32},
{"out_scale", i32},
});
auto entry = llvm::BasicBlock::Create(*context(), "entry", function);
ir_builder()->SetInsertPoint(entry);
// reuse add function after negating y_value. i.e
// add(x_value, x_precision, x_scale, -y_value, y_precision, y_scale,
// out_precision, out_scale)
std::vector<llvm::Value*> args;
int i = 0;
for (auto& in_arg : function->args()) {
if (i == 3) {
auto y_neg_value = ir_builder()->CreateNeg(&in_arg);
args.push_back(y_neg_value);
} else {
args.push_back(&in_arg);
}
++i;
}
auto value = ir_builder()->CreateCall(module()->getFunction(kAddFunction), args);
// store result to out
ir_builder()->CreateRet(value);
return Status::OK();
}
Status DecimalIR::BuildCompare(const std::string& function_name,
llvm::ICmpInst::Predicate cmp_instruction) {
// Create fn prototype :
// bool
// function_name(int128_t x_value, int32_t x_precision, int32_t x_scale,
// int128_t y_value, int32_t y_precision, int32_t y_scale)
auto i32 = types()->i32_type();
auto i128 = types()->i128_type();
auto function = BuildFunction(function_name, types()->i1_type(),
{
{"x_value", i128},
{"x_precision", i32},
{"x_scale", i32},
{"y_value", i128},
{"y_precision", i32},
{"y_scale", i32},
});
auto arg_iter = function->arg_begin();
ValueFull x(&arg_iter[0], &arg_iter[1], &arg_iter[2]);
ValueFull y(&arg_iter[3], &arg_iter[4], &arg_iter[5]);
auto entry = llvm::BasicBlock::Create(*context(), "entry", function);
ir_builder()->SetInsertPoint(entry);
// Make call to pre-compiled IR function.
auto x_split = ValueSplit::MakeFromInt128(this, x.value());
auto y_split = ValueSplit::MakeFromInt128(this, y.value());
std::vector<llvm::Value*> args = {
x_split.high(), x_split.low(), x.precision(), x.scale(),
y_split.high(), y_split.low(), y.precision(), y.scale(),
};
auto cmp_value = ir_builder()->CreateCall(
module()->getFunction("compare_decimal128_decimal128_internal"), args);
auto result =
ir_builder()->CreateICmp(cmp_instruction, cmp_value, types()->i32_constant(0));
ir_builder()->CreateRet(result);
return Status::OK();
}
llvm::Value* DecimalIR::CallDecimalFunction(const std::string& function_name,
llvm::Type* return_type,
const std::vector<llvm::Value*>& params) {
if (kDecimalIRBuilderFunctions.count(function_name) != 0) {
// this is fn built with the irbuilder.
return ir_builder()->CreateCall(module()->getFunction(function_name), params);
}
// ppre-compiler fn : disassemble i128 to two i64s and re-assemble.
auto i128 = types()->i128_type();
auto i64 = types()->i64_type();
std::vector<llvm::Value*> dis_assembled_args;
for (auto& arg : params) {
if (arg->getType() == i128) {
// split i128 arg into two int64s.
auto split = ValueSplit::MakeFromInt128(this, arg);
dis_assembled_args.push_back(split.high());
dis_assembled_args.push_back(split.low());
} else {
dis_assembled_args.push_back(arg);
}
}
llvm::Value* result = nullptr;
if (return_type == i128) {
// for i128 ret, replace with two int64* args, and join them.
auto block = ir_builder()->GetInsertBlock();
auto out_high_ptr = new llvm::AllocaInst(i64, 0, "out_hi", block);
auto out_low_ptr = new llvm::AllocaInst(i64, 0, "out_low", block);
dis_assembled_args.push_back(out_high_ptr);
dis_assembled_args.push_back(out_low_ptr);
// Make call to pre-compiled IR function.
ir_builder()->CreateCall(module()->getFunction(function_name), dis_assembled_args);
auto out_high = ir_builder()->CreateLoad(i64, out_high_ptr);
auto out_low = ir_builder()->CreateLoad(i64, out_low_ptr);
result = ValueSplit(out_high, out_low).AsInt128(this);
} else {
DCHECK_NE(return_type, types()->void_type());
// Make call to pre-compiled IR function.
result = ir_builder()->CreateCall(module()->getFunction(function_name),
dis_assembled_args);
}
return result;
}
Status DecimalIR::AddFunctions(Engine* engine) {
auto decimal_ir = std::make_shared<DecimalIR>(engine);
// Populate global variables used by decimal operations.
decimal_ir->AddGlobals(engine);
// Lookup intrinsic functions
decimal_ir->InitializeIntrinsics();
ARROW_RETURN_NOT_OK(decimal_ir->BuildAdd());
ARROW_RETURN_NOT_OK(decimal_ir->BuildSubtract());
ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kEQFunction, llvm::ICmpInst::ICMP_EQ));
ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kNEFunction, llvm::ICmpInst::ICMP_NE));
ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kLTFunction, llvm::ICmpInst::ICMP_SLT));
ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kLEFunction, llvm::ICmpInst::ICMP_SLE));
ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kGTFunction, llvm::ICmpInst::ICMP_SGT));
ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kGEFunction, llvm::ICmpInst::ICMP_SGE));
return Status::OK();
}
// Do an bitwise-or of all the overflow bits.
llvm::Value* DecimalIR::GetCombinedOverflow(
std::vector<DecimalIR::ValueWithOverflow> vec) {
llvm::Value* res = types()->false_constant();
for (auto& val : vec) {
res = ir_builder()->CreateOr(res, val.overflow());
}
return res;
}
DecimalIR::ValueSplit DecimalIR::ValueSplit::MakeFromInt128(DecimalIR* decimal_ir,
llvm::Value* in) {
auto builder = decimal_ir->ir_builder();
auto types = decimal_ir->types();
auto high = builder->CreateLShr(in, types->i128_constant(64));
high = builder->CreateTrunc(high, types->i64_type());
auto low = builder->CreateTrunc(in, types->i64_type());
return ValueSplit(high, low);
}
/// Convert IR struct {%i64, %i64} to cpp class ValueSplit
DecimalIR::ValueSplit DecimalIR::ValueSplit::MakeFromStruct(DecimalIR* decimal_ir,
llvm::Value* dstruct) {
auto builder = decimal_ir->ir_builder();
auto high = builder->CreateExtractValue(dstruct, 0);
auto low = builder->CreateExtractValue(dstruct, 1);
return DecimalIR::ValueSplit(high, low);
}
llvm::Value* DecimalIR::ValueSplit::AsInt128(DecimalIR* decimal_ir) const {
auto builder = decimal_ir->ir_builder();
auto types = decimal_ir->types();
auto value = builder->CreateSExt(high_, types->i128_type());
value = builder->CreateShl(value, types->i128_constant(64));
value = builder->CreateAdd(value, builder->CreateZExt(low_, types->i128_type()));
return value;
}
/// Convert IR struct {%i128, %i1} to cpp class ValueWithOverflow
DecimalIR::ValueWithOverflow DecimalIR::ValueWithOverflow::MakeFromStruct(
DecimalIR* decimal_ir, llvm::Value* dstruct) {
auto builder = decimal_ir->ir_builder();
auto value = builder->CreateExtractValue(dstruct, 0);
auto overflow = builder->CreateExtractValue(dstruct, 1);
return DecimalIR::ValueWithOverflow(value, overflow);
}
/// Convert to IR struct {%i128, %i1}
llvm::Value* DecimalIR::ValueWithOverflow::AsStruct(DecimalIR* decimal_ir) const {
auto builder = decimal_ir->ir_builder();
auto undef = llvm::UndefValue::get(decimal_ir->i128_with_overflow_struct_type_);
auto struct_val = builder->CreateInsertValue(undef, value(), 0);
return builder->CreateInsertValue(struct_val, overflow(), 1);
}
/// debug traces
void DecimalIR::AddTrace(const std::string& fmt, std::vector<llvm::Value*> args) {
DCHECK(enable_ir_traces_);
auto ir_str = ir_builder()->CreateGlobalStringPtr(fmt);
args.insert(args.begin(), ir_str);
ir_builder()->CreateCall(module()->getFunction("printf"), args, "trace");
}
void DecimalIR::AddTrace32(const std::string& msg, llvm::Value* value) {
AddTrace("DECIMAL_IR_TRACE:: " + msg + " %d\n", {value});
}
void DecimalIR::AddTrace128(const std::string& msg, llvm::Value* value) {
// convert i128 into two i64s for printing
auto split = ValueSplit::MakeFromInt128(this, value);
AddTrace("DECIMAL_IR_TRACE:: " + msg + " %llx:%llx (%lld:%llu)\n",
{split.high(), split.low(), split.high(), split.low()});
}
} // namespace gandiva