blob: b856fb23b2960d98f058144dbdd88cdbd62f0c7f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "codegen/llvm-codegen.h"
#include <fstream>
#include <iostream>
#include <sstream>
#include <boost/algorithm/string.hpp>
#include <boost/thread/mutex.hpp>
#include <gutil/strings/substitute.h>
#include <llvm/ADT/Triple.h>
#include <llvm/Analysis/InstructionSimplify.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/DataLayout.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/GlobalVariable.h>
#include <llvm/IR/InstIterator.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/NoFolder.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Linker/Linker.h>
#include <llvm/Support/DynamicLibrary.h>
#include <llvm/Support/ErrorHandling.h>
#include <llvm/Support/Host.h>
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include "codegen/codegen-anyval.h"
#include "codegen/codegen-symbol-emitter.h"
#include "codegen/impala-ir-data.h"
#include "codegen/instruction-counter.h"
#include "codegen/mcjit-mem-mgr.h"
#include "common/logging.h"
#include "impala-ir/impala-ir-names.h"
#include "runtime/descriptors.h"
#include "runtime/hdfs-fs-cache.h"
#include "runtime/lib-cache.h"
#include "runtime/mem-pool.h"
#include "runtime/mem-tracker.h"
#include "runtime/string-value.h"
#include "runtime/timestamp-value.h"
#include "util/cpu-info.h"
#include "util/hdfs-util.h"
#include "util/path-builder.h"
#include "util/runtime-profile-counters.h"
#include "util/test-info.h"
#include "common/names.h"
using namespace llvm;
using namespace strings;
using std::fstream;
using std::move;
DEFINE_bool(print_llvm_ir_instruction_count, false,
"if true, prints the instruction counts of all JIT'd functions");
DEFINE_bool(disable_optimization_passes, false,
"if true, disables llvm optimization passes (used for testing)");
DEFINE_bool(dump_ir, false, "if true, output IR after optimization passes");
DEFINE_bool(perf_map, false,
"if true, generate /tmp/perf-<pid>.map file for linux perf symbols. "
"This is not recommended for production use because it may affect performance.");
DEFINE_string(unopt_module_dir, "",
"if set, saves unoptimized generated IR modules to the specified directory.");
DEFINE_string(opt_module_dir, "",
"if set, saves optimized generated IR modules to the specified directory.");
DEFINE_string(asm_module_dir, "",
"if set, saves disassembly for generated IR modules to the specified directory.");
DECLARE_string(local_library_dir);
namespace impala {
bool LlvmCodeGen::llvm_initialized_ = false;
string LlvmCodeGen::cpu_name_;
vector<string> LlvmCodeGen::cpu_attrs_;
unordered_set<string> LlvmCodeGen::gv_ref_ir_fns_;
[[noreturn]] static void LlvmCodegenHandleError(
void* user_data, const std::string& reason, bool gen_crash_diag) {
LOG(FATAL) << "LLVM hit fatal error: " << reason.c_str();
}
bool LlvmCodeGen::IsDefinedInImpalad(const string& fn_name) {
void* fn_ptr = NULL;
Status status =
LibCache::instance()->GetSoFunctionPtr("", fn_name, &fn_ptr, NULL, true);
return status.ok();
}
void LlvmCodeGen::ParseGlobalConstant(Value* val, unordered_set<string>* ref_fns) {
// Parse constants to find any referenced functions.
vector<string> fn_names;
if (isa<Function>(val)) {
fn_names.push_back(cast<Function>(val)->getName().str());
} else if (isa<BlockAddress>(val)) {
const BlockAddress *ba = cast<BlockAddress>(val);
fn_names.push_back(ba->getFunction()->getName().str());
} else if (isa<GlobalAlias>(val)) {
GlobalAlias* alias = cast<GlobalAlias>(val);
ParseGlobalConstant(alias->getAliasee(), ref_fns);
} else if (isa<ConstantExpr>(val)) {
const ConstantExpr* ce = cast<ConstantExpr>(val);
if (ce->isCast()) {
for (User::const_op_iterator oi=ce->op_begin(); oi != ce->op_end(); ++oi) {
Function* fn = dyn_cast<Function>(*oi);
if (fn != NULL) fn_names.push_back(fn->getName().str());
}
}
} else if (isa<ConstantStruct>(val) || isa<ConstantArray>(val) ||
isa<ConstantDataArray>(val)) {
const Constant* val_constant = cast<Constant>(val);
for (int i = 0; i < val_constant->getNumOperands(); ++i) {
ParseGlobalConstant(val_constant->getOperand(i), ref_fns);
}
} else if (isa<ConstantVector>(val) || isa<ConstantDataVector>(val)) {
const Constant* val_const = cast<Constant>(val);
for (int i = 0; i < val->getType()->getVectorNumElements(); ++i) {
ParseGlobalConstant(val_const->getAggregateElement(i), ref_fns);
}
} else {
// Ignore constants which cannot contain function pointers. Ignore other global
// variables referenced by this global variable as InitializeLlvm() will parse
// all global variables.
DCHECK(isa<UndefValue>(val) || isa<ConstantFP>(val) || isa<ConstantInt>(val) ||
isa<GlobalVariable>(val) || isa<ConstantTokenNone>(val) ||
isa<ConstantPointerNull>(val) || isa<ConstantAggregateZero>(val) ||
isa<ConstantDataSequential>(val));
}
// Adds all functions not defined in Impalad native binary.
for (const string& fn_name: fn_names) {
if (!IsDefinedInImpalad(fn_name)) ref_fns->insert(fn_name);
}
}
void LlvmCodeGen::ParseGVForFunctions(Module* module, unordered_set<string>* ref_fns) {
for (GlobalVariable& gv: module->globals()) {
if (gv.hasInitializer() && gv.isConstant()) {
Constant* val = gv.getInitializer();
if (val->getNumOperands() > 0) ParseGlobalConstant(val, ref_fns);
}
}
}
void LlvmCodeGen::InitializeLlvm(bool load_backend) {
DCHECK(!llvm_initialized_);
llvm::remove_fatal_error_handler();
llvm::install_fatal_error_handler(LlvmCodegenHandleError);
// These functions can *only* be called once per process and are used to set up
// LLVM subsystems for code generation targeting the machine we're running on.
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
llvm::InitializeNativeTargetAsmParser();
llvm::InitializeNativeTargetDisassembler();
llvm_initialized_ = true;
if (load_backend) {
string path;
// For test env, we have to load libfesupport.so to provide sym for LLVM.
PathBuilder::GetFullBuildPath("service/libfesupport.so", &path);
bool failed = llvm::sys::DynamicLibrary::LoadLibraryPermanently(path.c_str());
DCHECK_EQ(failed, 0);
}
cpu_name_ = llvm::sys::getHostCPUName().str();
LOG(INFO) << "CPU class for runtime code generation: " << cpu_name_;
GetHostCPUAttrs(&cpu_attrs_);
LOG(INFO) << "CPU flags for runtime code generation: "
<< boost::algorithm::join(cpu_attrs_, ",");
// Write an empty map file for perf to find.
if (FLAGS_perf_map) CodegenSymbolEmitter::WritePerfMap();
ObjectPool init_pool;
scoped_ptr<LlvmCodeGen> init_codegen;
Status status = LlvmCodeGen::CreateFromMemory(&init_pool, NULL, "init", &init_codegen);
ParseGVForFunctions(init_codegen->module_, &gv_ref_ir_fns_);
// Validate the module by verifying that functions for all IRFunction::Type
// can be found.
for (int i = IRFunction::FN_START; i < IRFunction::FN_END; ++i) {
DCHECK(FN_MAPPINGS[i].fn == i);
const string& fn_name = FN_MAPPINGS[i].fn_name;
DCHECK(init_codegen->module_->getFunction(fn_name) != NULL)
<< "Failed to find function " << fn_name;
}
}
LlvmCodeGen::LlvmCodeGen(
ObjectPool* pool, MemTracker* parent_mem_tracker, const string& id)
: id_(id),
profile_(pool, "CodeGen"),
mem_tracker_(new MemTracker(&profile_, -1, "CodeGen", parent_mem_tracker)),
optimizations_enabled_(false),
is_corrupt_(false),
is_compiled_(false),
context_(new llvm::LLVMContext()),
module_(NULL),
memory_manager_(NULL),
loaded_functions_(IRFunction::FN_END, NULL) {
DCHECK(llvm_initialized_) << "Must call LlvmCodeGen::InitializeLlvm first.";
load_module_timer_ = ADD_TIMER(&profile_, "LoadTime");
prepare_module_timer_ = ADD_TIMER(&profile_, "PrepareTime");
module_bitcode_size_ = ADD_COUNTER(&profile_, "ModuleBitcodeSize", TUnit::BYTES);
codegen_timer_ = ADD_TIMER(&profile_, "CodegenTime");
optimization_timer_ = ADD_TIMER(&profile_, "OptimizationTime");
compile_timer_ = ADD_TIMER(&profile_, "CompileTime");
num_functions_ = ADD_COUNTER(&profile_, "NumFunctions", TUnit::UNIT);
num_instructions_ = ADD_COUNTER(&profile_, "NumInstructions", TUnit::UNIT);
}
Status LlvmCodeGen::CreateFromFile(ObjectPool* pool, MemTracker* parent_mem_tracker,
const string& file, const string& id, scoped_ptr<LlvmCodeGen>* codegen) {
codegen->reset(new LlvmCodeGen(pool, parent_mem_tracker, id));
SCOPED_TIMER((*codegen)->profile_.total_time_counter());
unique_ptr<Module> loaded_module;
RETURN_IF_ERROR((*codegen)->LoadModuleFromFile(file, &loaded_module));
return (*codegen)->Init(std::move(loaded_module));
}
Status LlvmCodeGen::CreateFromMemory(ObjectPool* pool, MemTracker* parent_mem_tracker,
const string& id, scoped_ptr<LlvmCodeGen>* codegen) {
codegen->reset(new LlvmCodeGen(pool, parent_mem_tracker, id));
SCOPED_TIMER((*codegen)->profile_.total_time_counter());
// Select the appropriate IR version. We cannot use LLVM IR with SSE4.2 instructions on
// a machine without SSE4.2 support.
StringRef module_ir;
string module_name;
if (CpuInfo::IsSupported(CpuInfo::SSE4_2)) {
module_ir = StringRef(reinterpret_cast<const char*>(impala_sse_llvm_ir),
impala_sse_llvm_ir_len);
module_name = "Impala IR with SSE 4.2 support";
} else {
module_ir = StringRef(reinterpret_cast<const char*>(impala_no_sse_llvm_ir),
impala_no_sse_llvm_ir_len);
module_name = "Impala IR with no SSE 4.2 support";
}
unique_ptr<MemoryBuffer> module_ir_buf(
MemoryBuffer::getMemBuffer(module_ir, "", false));
unique_ptr<Module> loaded_module;
RETURN_IF_ERROR((*codegen)->LoadModuleFromMemory(std::move(module_ir_buf),
module_name, &loaded_module));
return (*codegen)->Init(std::move(loaded_module));
}
Status LlvmCodeGen::LoadModuleFromFile(const string& file, unique_ptr<Module>* module) {
unique_ptr<MemoryBuffer> file_buffer;
{
SCOPED_TIMER(load_module_timer_);
ErrorOr<unique_ptr<MemoryBuffer>> tmp_file_buffer = MemoryBuffer::getFile(file);
if (!tmp_file_buffer) {
stringstream ss;
ss << "Could not load module " << file << ": "
<< tmp_file_buffer.getError().message();
return Status(ss.str());
}
file_buffer = std::move(tmp_file_buffer.get());
}
COUNTER_ADD(module_bitcode_size_, file_buffer->getBufferSize());
return LoadModuleFromMemory(std::move(file_buffer), file, module);
}
Status LlvmCodeGen::LoadModuleFromMemory(unique_ptr<MemoryBuffer> module_ir_buf,
string module_name, unique_ptr<Module>* module) {
DCHECK(!module_name.empty());
SCOPED_TIMER(prepare_module_timer_);
ErrorOr<unique_ptr<Module>> tmp_module(NULL);
COUNTER_ADD(module_bitcode_size_, module_ir_buf->getMemBufferRef().getBufferSize());
tmp_module = getLazyBitcodeModule(std::move(module_ir_buf), context(), false);
if (!tmp_module) {
stringstream ss;
ss << "Could not parse module " << module_name << ": " << tmp_module.getError();
return Status(ss.str());
}
*module = std::move(tmp_module.get());
// We never run global constructors or destructors so let's strip them out for all
// modules when we load them.
StripGlobalCtorsDtors((*module).get());
(*module)->setModuleIdentifier(module_name);
return Status::OK();
}
// TODO: Create separate counters/timers (file size, load time) for each module linked
Status LlvmCodeGen::LinkModule(const string& file) {
if (linked_modules_.find(file) != linked_modules_.end()) return Status::OK();
SCOPED_TIMER(profile_.total_time_counter());
unique_ptr<Module> new_module;
RETURN_IF_ERROR(LoadModuleFromFile(file, &new_module));
// The module data layout must match the one selected by the execution engine.
new_module->setDataLayout(execution_engine_->getDataLayout());
// Record all IR functions in 'new_module' referenced by the module's global variables
// if they are not defined in the Impalad native code. They must be materialized to
// avoid linking error.
unordered_set<string> ref_fns;
ParseGVForFunctions(new_module.get(), &ref_fns);
// Record all the materializable functions in the new module before linking.
// Linking the new module to the main module (i.e. 'module_') may materialize
// functions in the new module. These materialized functions need to be parsed
// to materialize any functions they call in 'module_'.
unordered_set<string> materializable_fns;
for (Function& fn: new_module->functions()) {
if (fn.isMaterializable()) materializable_fns.insert(fn.getName().str());
}
bool error = Linker::linkModules(*module_, std::move(new_module));
if (error) {
stringstream ss;
ss << "Problem linking " << file << " to main module.";
return Status(ss.str());
}
linked_modules_.insert(file);
for (const string& fn_name: ref_fns) {
Function* fn = module_->getFunction(fn_name);
// The global variable from source module which references 'fn' can have private
// linkage and it may not be linked into 'module_'.
if (fn != NULL && fn->isMaterializable()) {
RETURN_IF_ERROR(MaterializeFunction(fn));
materializable_fns.erase(fn->getName().str());
}
}
// Parse functions in the source module materialized during linking and materialize
// their callees. Do it after linking so LLVM has "merged" functions defined in both
// modules. LLVM may not link in functions (and their callees) from source module if
// they're defined in destination module already.
for (const string& fn_name: materializable_fns) {
Function* fn = module_->getFunction(fn_name);
if (fn != NULL && !fn->isMaterializable()) RETURN_IF_ERROR(MaterializeCallees(fn));
}
return Status::OK();
}
void LlvmCodeGen::StripGlobalCtorsDtors(llvm::Module* module) {
GlobalVariable* constructors = module->getGlobalVariable("llvm.global_ctors");
if (constructors != NULL) constructors->eraseFromParent();
GlobalVariable* destructors = module->getGlobalVariable("llvm.global_dtors");
if (destructors != NULL) destructors->eraseFromParent();
}
Status LlvmCodeGen::CreateImpalaCodegen(ObjectPool* pool, MemTracker* parent_mem_tracker,
const string& id, scoped_ptr<LlvmCodeGen>* codegen_ret) {
RETURN_IF_ERROR(CreateFromMemory(pool, parent_mem_tracker, id, codegen_ret));
LlvmCodeGen* codegen = codegen_ret->get();
// Parse module for cross compiled functions and types
SCOPED_TIMER(codegen->profile_.total_time_counter());
SCOPED_TIMER(codegen->prepare_module_timer_);
// Get type for StringValue
codegen->string_val_type_ = codegen->GetType(StringValue::LLVM_CLASS_NAME);
// Get type for TimestampValue
codegen->timestamp_val_type_ = codegen->GetType(TimestampValue::LLVM_CLASS_NAME);
// Verify size is correct
const DataLayout& data_layout = codegen->execution_engine()->getDataLayout();
const StructLayout* layout =
data_layout.getStructLayout(static_cast<StructType*>(codegen->string_val_type_));
if (layout->getSizeInBytes() != sizeof(StringValue)) {
DCHECK_EQ(layout->getSizeInBytes(), sizeof(StringValue));
return Status("Could not create llvm struct type for StringVal");
}
// Materialize functions implicitly referenced by the global variables.
for (const string& fn_name : gv_ref_ir_fns_) {
Function* fn = codegen->module_->getFunction(fn_name);
DCHECK(fn != NULL);
codegen->MaterializeFunction(fn);
}
return Status::OK();
}
Status LlvmCodeGen::Init(unique_ptr<Module> module) {
DCHECK(module != NULL);
llvm::CodeGenOpt::Level opt_level = CodeGenOpt::Aggressive;
#ifndef NDEBUG
// For debug builds, don't generate JIT compiled optimized assembly.
// This takes a non-neglible amount of time (~.5 ms per function) and
// blows up the fe tests (which take ~10-20 ms each).
opt_level = CodeGenOpt::None;
#endif
module_ = module.get();
EngineBuilder builder(std::move(module));
builder.setEngineKind(EngineKind::JIT);
builder.setOptLevel(opt_level);
unique_ptr<ImpalaMCJITMemoryManager> memory_manager(new ImpalaMCJITMemoryManager);
memory_manager_ = memory_manager.get();
builder.setMCJITMemoryManager(move(memory_manager));
builder.setMCPU(cpu_name_);
builder.setMAttrs(cpu_attrs_);
builder.setErrorStr(&error_string_);
execution_engine_.reset(builder.create());
if (execution_engine_ == NULL) {
module_ = NULL; // module_ was owned by builder.
stringstream ss;
ss << "Could not create ExecutionEngine: " << error_string_;
return Status(ss.str());
}
// The module data layout must match the one selected by the execution engine.
module_->setDataLayout(execution_engine_->getDataLayout());
void_type_ = Type::getVoidTy(context());
ptr_type_ = PointerType::get(GetType(TYPE_TINYINT), 0);
true_value_ = ConstantInt::get(context(), APInt(1, true, true));
false_value_ = ConstantInt::get(context(), APInt(1, false, true));
SetupJITListeners();
RETURN_IF_ERROR(LoadIntrinsics());
return Status::OK();
}
void LlvmCodeGen::SetupJITListeners() {
bool need_symbol_emitter = !FLAGS_asm_module_dir.empty() || FLAGS_perf_map;
if (!need_symbol_emitter) return;
symbol_emitter_.reset(new CodegenSymbolEmitter(id_));
execution_engine_->RegisterJITEventListener(symbol_emitter_.get());
symbol_emitter_->set_emit_perf_map(FLAGS_perf_map);
if (!FLAGS_asm_module_dir.empty()) {
symbol_emitter_->set_asm_path(Substitute("$0/$1.asm", FLAGS_asm_module_dir, id_));
}
}
LlvmCodeGen::~LlvmCodeGen() {
if (memory_manager_ != NULL) mem_tracker_->Release(memory_manager_->bytes_tracked());
if (mem_tracker_->parent() != NULL) mem_tracker_->UnregisterFromParent();
mem_tracker_.reset();
// Execution engine executes callback on event listener, so tear down engine first.
execution_engine_.reset();
symbol_emitter_.reset();
}
void LlvmCodeGen::EnableOptimizations(bool enable) {
optimizations_enabled_ = enable;
}
void LlvmCodeGen::GetHostCPUAttrs(vector<string>* attrs) {
// LLVM's ExecutionEngine expects features to be enabled or disabled with a list
// of strings like ["+feature1", "-feature2"].
StringMap<bool> cpu_features;
llvm::sys::getHostCPUFeatures(cpu_features);
for (const StringMapEntry<bool>& entry: cpu_features) {
attrs->emplace_back(
Substitute("$0$1", entry.second ? "+" : "-", entry.first().data()));
}
}
string LlvmCodeGen::GetIR(bool full_module) const {
string str;
raw_string_ostream stream(str);
if (full_module) {
module_->print(stream, NULL);
} else {
for (int i = 0; i < codegend_functions_.size(); ++i) {
codegend_functions_[i]->print(stream, true);
}
}
return str;
}
Type* LlvmCodeGen::GetType(const ColumnType& type) {
switch (type.type) {
case TYPE_NULL:
return Type::getInt1Ty(context());
case TYPE_BOOLEAN:
return Type::getInt1Ty(context());
case TYPE_TINYINT:
return Type::getInt8Ty(context());
case TYPE_SMALLINT:
return Type::getInt16Ty(context());
case TYPE_INT:
return Type::getInt32Ty(context());
case TYPE_BIGINT:
return Type::getInt64Ty(context());
case TYPE_FLOAT:
return Type::getFloatTy(context());
case TYPE_DOUBLE:
return Type::getDoubleTy(context());
case TYPE_STRING:
case TYPE_VARCHAR:
return string_val_type_;
case TYPE_CHAR:
// IMPALA-3207: Codegen for CHAR is not yet implemented, this should not
// be called for TYPE_CHAR.
DCHECK(false) << "NYI";
return NULL;
case TYPE_TIMESTAMP:
return timestamp_val_type_;
case TYPE_DECIMAL:
return Type::getIntNTy(context(), type.GetByteSize() * 8);
default:
DCHECK(false) << "Invalid type: " << type;
return NULL;
}
}
PointerType* LlvmCodeGen::GetPtrType(const ColumnType& type) {
return PointerType::get(GetType(type), 0);
}
Type* LlvmCodeGen::GetType(const string& name) {
Type* type = module_->getTypeByName(name);
DCHECK(type != NULL) << name;
return type;
}
PointerType* LlvmCodeGen::GetPtrType(const string& name) {
Type* type = GetType(name);
DCHECK(type != NULL) << name;
return PointerType::get(type, 0);
}
PointerType* LlvmCodeGen::GetPtrType(Type* type) {
return PointerType::get(type, 0);
}
PointerType* LlvmCodeGen::GetPtrPtrType(Type* type) {
return PointerType::get(PointerType::get(type, 0), 0);
}
// Llvm doesn't let you create a PointerValue from a c-side ptr. Instead
// cast it to an int and then to 'type'.
Value* LlvmCodeGen::CastPtrToLlvmPtr(Type* type, const void* ptr) {
Constant* const_int = ConstantInt::get(Type::getInt64Ty(context()), (int64_t)ptr);
return ConstantExpr::getIntToPtr(const_int, type);
}
Constant* LlvmCodeGen::GetIntConstant(PrimitiveType type, uint64_t val) {
switch (type) {
case TYPE_TINYINT:
return ConstantInt::get(context(), APInt(8, val));
case TYPE_SMALLINT:
return ConstantInt::get(context(), APInt(16, val));
case TYPE_INT:
return ConstantInt::get(context(), APInt(32, val));
case TYPE_BIGINT:
return ConstantInt::get(context(), APInt(64, val));
default:
DCHECK(false);
return NULL;
}
}
Constant* LlvmCodeGen::GetIntConstant(int num_bytes, uint64_t low_bits, uint64_t high_bits) {
DCHECK_GE(num_bytes, 1);
DCHECK_LE(num_bytes, 16);
DCHECK(BitUtil::IsPowerOf2(num_bytes));
vector<uint64_t> vals({low_bits, high_bits});
return ConstantInt::get(context(), APInt(8 * num_bytes, vals));
}
Value* LlvmCodeGen::GetStringConstant(LlvmBuilder* builder, char* data, int len) {
// Create a global string with private linkage.
Constant* const_string =
ConstantDataArray::getString(context(), StringRef(data, len), false);
GlobalVariable* gv = new GlobalVariable(
*module_, const_string->getType(), true, GlobalValue::PrivateLinkage, const_string);
// Get a pointer to the first element of the string.
return builder->CreateConstInBoundsGEP2_32(NULL, gv, 0, 0, "");
}
AllocaInst* LlvmCodeGen::CreateEntryBlockAlloca(Function* f, const NamedVariable& var) {
IRBuilder<> tmp(&f->getEntryBlock(), f->getEntryBlock().begin());
AllocaInst* alloca = tmp.CreateAlloca(var.type, NULL, var.name.c_str());
if (var.type == GetType(CodegenAnyVal::LLVM_DECIMALVAL_NAME)) {
// Generated functions may manipulate DecimalVal arguments via SIMD instructions such
// as 'movaps' that require 16-byte memory alignment. LLVM uses 8-byte alignment by
// default, so explicitly set the alignment for DecimalVals.
alloca->setAlignment(16);
}
return alloca;
}
AllocaInst* LlvmCodeGen::CreateEntryBlockAlloca(
const LlvmBuilder& builder, Type* type, const char* name) {
return CreateEntryBlockAlloca(
builder.GetInsertBlock()->getParent(), NamedVariable(name, type));
}
AllocaInst* LlvmCodeGen::CreateEntryBlockAlloca(const LlvmBuilder& builder, Type* type,
int num_entries, int alignment, const char* name) {
Function* fn = builder.GetInsertBlock()->getParent();
IRBuilder<> tmp(&fn->getEntryBlock(), fn->getEntryBlock().begin());
AllocaInst* alloca =
tmp.CreateAlloca(type, GetIntConstant(TYPE_INT, num_entries), name);
alloca->setAlignment(alignment);
return alloca;
}
void LlvmCodeGen::CreateIfElseBlocks(Function* fn, const string& if_name,
const string& else_name, BasicBlock** if_block, BasicBlock** else_block,
BasicBlock* insert_before) {
*if_block = BasicBlock::Create(context(), if_name, fn, insert_before);
*else_block = BasicBlock::Create(context(), else_name, fn, insert_before);
}
Status LlvmCodeGen::MaterializeCallees(Function* fn) {
for (inst_iterator iter = inst_begin(fn); iter != inst_end(fn); ++iter) {
Instruction* instr = &*iter;
Function* called_fn = NULL;
if (isa<CallInst>(instr)) {
CallInst* call_instr = reinterpret_cast<CallInst*>(instr);
called_fn = call_instr->getCalledFunction();
} else if (isa<InvokeInst>(instr)) {
InvokeInst* invoke_instr = reinterpret_cast<InvokeInst*>(instr);
called_fn = invoke_instr->getCalledFunction();
}
if (called_fn != NULL) RETURN_IF_ERROR(MaterializeFunctionHelper(called_fn));
}
return Status::OK();
}
Status LlvmCodeGen::MaterializeFunctionHelper(Function *fn) {
DCHECK(!is_compiled_);
if (fn->isIntrinsic() || !fn->isMaterializable()) return Status::OK();
std::error_code err = module_->materialize(fn);
if (UNLIKELY(err)) {
return Status(Substitute("Failed to materialize $0: $1",
fn->getName().str(), err.message()));
}
// Materialized functions are marked as not materializable by LLVM.
DCHECK(!fn->isMaterializable());
RETURN_IF_ERROR(MaterializeCallees(fn));
return Status::OK();
}
Status LlvmCodeGen::MaterializeFunction(Function *fn) {
SCOPED_TIMER(profile_.total_time_counter());
SCOPED_TIMER(prepare_module_timer_);
return MaterializeFunctionHelper(fn);
}
Function* LlvmCodeGen::GetFunction(const string& symbol, bool clone) {
Function* fn = module_->getFunction(symbol.c_str());
if (fn == NULL) {
LOG(ERROR) << "Unable to locate function " << symbol;
return NULL;
}
Status status = MaterializeFunction(fn);
if (UNLIKELY(!status.ok())) return NULL;
if (clone) return CloneFunction(fn);
return fn;
}
Function* LlvmCodeGen::GetFunction(IRFunction::Type ir_type, bool clone) {
Function* fn = loaded_functions_[ir_type];
if (fn == NULL) {
DCHECK(FN_MAPPINGS[ir_type].fn == ir_type);
const string& fn_name = FN_MAPPINGS[ir_type].fn_name;
fn = module_->getFunction(fn_name);
if (fn == NULL) {
LOG(ERROR) << "Unable to locate function " << fn_name;
return NULL;
}
// Mixing "NoInline" with "AlwaysInline" will lead to compilation failure.
if (!fn->hasFnAttribute(Attribute::NoInline)) fn->addFnAttr(Attribute::AlwaysInline);
loaded_functions_[ir_type] = fn;
}
Status status = MaterializeFunction(fn);
if (UNLIKELY(!status.ok())) return NULL;
if (clone) return CloneFunction(fn);
return fn;
}
// TODO: this should return a Status
bool LlvmCodeGen::VerifyFunction(Function* fn) {
if (is_corrupt_) return false;
// Check that there are no calls to Expr::GetConstant(). These should all have been
// inlined via Expr::InlineConstants().
for (inst_iterator iter = inst_begin(fn); iter != inst_end(fn); ++iter) {
Instruction* instr = &*iter;
if (!isa<CallInst>(instr)) continue;
CallInst* call_instr = reinterpret_cast<CallInst*>(instr);
Function* called_fn = call_instr->getCalledFunction();
// look for call to Expr::GetConstant()
if (called_fn != NULL &&
called_fn->getName().find(Expr::GET_CONSTANT_INT_SYMBOL_PREFIX) != string::npos) {
LOG(ERROR) << "Found call to Expr::GetConstant*(): " << Print(call_instr);
is_corrupt_ = true;
break;
}
}
// There is an llvm bug (#10957) that causes the first step of the verifier to always
// abort the process if it runs into an issue and ignores ReturnStatusAction. This
// would cause impalad to go down if one query has a problem. To work around this, we
// will copy that step here and not abort on error. Adapted from the pre-verifier
// function pass.
// TODO: doesn't seem there is much traction in getting this fixed but we'll see
for (Function::iterator i = fn->begin(), e = fn->end(); i != e; ++i) {
if (i->empty() || !i->back().isTerminator()) {
LOG(ERROR) << "Basic block must end with terminator: \n" << Print(&(*i));
is_corrupt_ = true;
break;
}
}
if (!is_corrupt_) {
string str;
raw_string_ostream stream(str);
is_corrupt_ = verifyFunction(*fn, &stream);
if (is_corrupt_) LOG(ERROR) << str;
}
if (is_corrupt_) {
string fn_name = fn->getName(); // llvm has some fancy operator overloading
LOG(ERROR) << "Function corrupt: " << fn_name;
fn->dump();
return false;
}
return true;
}
void LlvmCodeGen::SetNoInline(llvm::Function* function) const {
function->removeFnAttr(llvm::Attribute::AlwaysInline);
function->addFnAttr(llvm::Attribute::NoInline);
}
LlvmCodeGen::FnPrototype::FnPrototype(
LlvmCodeGen* codegen, const string& name, Type* ret_type)
: codegen_(codegen), name_(name), ret_type_(ret_type) {
DCHECK(!codegen_->is_compiled_) << "Not valid to add additional functions";
}
Function* LlvmCodeGen::FnPrototype::GeneratePrototype(
LlvmBuilder* builder, Value** params, bool print_ir) {
vector<Type*> arguments;
for (int i = 0; i < args_.size(); ++i) {
arguments.push_back(args_[i].type);
}
FunctionType* prototype = FunctionType::get(ret_type_, arguments, false);
Function* fn = Function::Create(
prototype, GlobalValue::ExternalLinkage, name_, codegen_->module_);
DCHECK(fn != NULL);
// Name the arguments
int idx = 0;
for (Function::arg_iterator iter = fn->arg_begin();
iter != fn->arg_end(); ++iter, ++idx) {
iter->setName(args_[idx].name);
if (params != NULL) params[idx] = &*iter;
}
if (builder != NULL) {
BasicBlock* entry_block = BasicBlock::Create(codegen_->context(), "entry", fn);
builder->SetInsertPoint(entry_block);
}
if (print_ir) codegen_->codegend_functions_.push_back(fn);
return fn;
}
int LlvmCodeGen::ReplaceCallSites(Function* caller, Function* new_fn,
const string& target_name) {
DCHECK(!is_compiled_);
DCHECK(caller->getParent() == module_);
DCHECK(caller != NULL);
DCHECK(new_fn != NULL);
vector<CallInst*> call_sites;
FindCallSites(caller, target_name, &call_sites);
int replaced = 0;
for (CallInst* call_instr: call_sites) {
// Replace the called function
call_instr->setCalledFunction(new_fn);
++replaced;
}
return replaced;
}
int LlvmCodeGen::ReplaceCallSitesWithValue(Function* caller, Value* replacement,
const string& target_name) {
DCHECK(!is_compiled_);
DCHECK(caller->getParent() == module_);
DCHECK(caller != NULL);
DCHECK(replacement != NULL);
vector<CallInst*> call_sites;
FindCallSites(caller, target_name, &call_sites);
int replaced = 0;
for (CallInst* call_instr: call_sites) {
call_instr->replaceAllUsesWith(replacement);
++replaced;
}
return replaced;
}
int LlvmCodeGen::ReplaceCallSitesWithBoolConst(llvm::Function* caller, bool constant,
const string& target_name) {
Value* replacement = ConstantInt::get(Type::getInt1Ty(context()), constant);
return ReplaceCallSitesWithValue(caller, replacement, target_name);
}
void LlvmCodeGen::FindCallSites(Function* caller, const string& target_name,
vector<CallInst*>* results) {
for (inst_iterator iter = inst_begin(caller); iter != inst_end(caller); ++iter) {
Instruction* instr = &*iter;
// Look for call instructions. Note that we'll ignore invoke and other related
// instructions that are not a plain function call.
if (CallInst::classof(instr)) {
CallInst* call_instr = reinterpret_cast<CallInst*>(instr);
Function* callee = call_instr->getCalledFunction();
// Check for substring match.
if (callee != NULL && callee->getName().find(target_name) != string::npos) {
results->push_back(call_instr);
}
}
}
}
Function* LlvmCodeGen::CloneFunction(Function* fn) {
DCHECK(!is_compiled_);
ValueToValueMapTy dummy_vmap;
// Verifies that 'fn' has been materialized already. Callers are expected to use
// GetFunction() to obtain the Function object.
DCHECK(!fn->isMaterializable());
// CloneFunction() automatically gives the new function a unique name
Function* fn_clone = llvm::CloneFunction(fn, dummy_vmap, false);
fn_clone->copyAttributesFrom(fn);
module_->getFunctionList().push_back(fn_clone);
return fn_clone;
}
Function* LlvmCodeGen::FinalizeFunction(Function* function) {
if (LIKELY(!function->hasFnAttribute(llvm::Attribute::NoInline))) {
function->addFnAttr(llvm::Attribute::AlwaysInline);
}
if (!VerifyFunction(function)) {
function->eraseFromParent(); // deletes function
return NULL;
}
if (FLAGS_dump_ir) function->dump();
return function;
}
Status LlvmCodeGen::MaterializeModule(Module* module) {
std::error_code err = module->materializeAll();
if (UNLIKELY(err)) {
stringstream err_msg;
err_msg << "Failed to complete materialization of module " << module->getName().str()
<< ": " << err.message();
return Status(err_msg.str());
}
return Status::OK();
}
// It's okay to call this function even if the module has been materialized.
Status LlvmCodeGen::FinalizeLazyMaterialization() {
SCOPED_TIMER(prepare_module_timer_);
for (Function& fn: module_->functions()) {
if (fn.isMaterializable()) {
DCHECK(!module_->isMaterialized());
// Unmaterialized functions can still have their declarations around. LLVM asserts
// these unmaterialized functions' linkage types are external / external weak.
fn.setLinkage(Function::ExternalLinkage);
// DCE may claim the personality function is still referenced by unmaterialized
// functions when it is deleted by DCE. Similarly, LLVM may complain if comdats
// reference unmaterialized functions but their definition cannot be found.
// Since the unmaterialized functions are not used anyway, just remove their
// personality functions and comdats.
fn.setPersonalityFn(NULL);
fn.setComdat(NULL);
fn.setIsMaterializable(false);
}
}
// All unused functions are now not materializable so it should be quick to call
// materializeAll(). We need to call this function in order to destroy the
// materializer so that DCE will not assert fail.
return MaterializeModule(module_);
}
Status LlvmCodeGen::FinalizeModule() {
DCHECK(!is_compiled_);
is_compiled_ = true;
if (FLAGS_unopt_module_dir.size() != 0) {
string path = FLAGS_unopt_module_dir + "/" + id_ + "_unopt.ll";
fstream f(path.c_str(), fstream::out | fstream::trunc);
if (f.fail()) {
LOG(ERROR) << "Could not save IR to: " << path;
} else {
f << GetIR(true);
f.close();
}
}
if (is_corrupt_) return Status("Module is corrupt.");
SCOPED_TIMER(profile_.total_time_counter());
// Don't waste time optimizing module if there are no functions to JIT. This can happen
// if the codegen object is created but no functions are successfully codegen'd.
if (fns_to_jit_compile_.empty()) {
DestroyModule();
return Status::OK();
}
RETURN_IF_ERROR(FinalizeLazyMaterialization());
if (optimizations_enabled_ && !FLAGS_disable_optimization_passes) {
RETURN_IF_ERROR(OptimizeModule());
}
if (FLAGS_opt_module_dir.size() != 0) {
string path = FLAGS_opt_module_dir + "/" + id_ + "_opt.ll";
fstream f(path.c_str(), fstream::out | fstream::trunc);
if (f.fail()) {
LOG(ERROR) << "Could not save IR to: " << path;
} else {
f << GetIR(true);
f.close();
}
}
{
SCOPED_TIMER(compile_timer_);
// Finalize module, which compiles all functions.
execution_engine_->finalizeObject();
}
// Get pointers to all codegen'd functions
for (int i = 0; i < fns_to_jit_compile_.size(); ++i) {
Function* function = fns_to_jit_compile_[i].first;
void* jitted_function = execution_engine_->getPointerToFunction(function);
DCHECK(jitted_function != NULL) << "Failed to jit " << function->getName().data();
*fns_to_jit_compile_[i].second = jitted_function;
}
DestroyModule();
// Track the memory consumed by the compiled code.
int64_t bytes_allocated = memory_manager_->bytes_allocated();
if (!mem_tracker_->TryConsume(bytes_allocated)) {
const string& msg = Substitute(
"Failed to allocate '$0' bytes for compiled code module", bytes_allocated);
return mem_tracker_->MemLimitExceeded(NULL, msg, bytes_allocated);
}
memory_manager_->set_bytes_tracked(bytes_allocated);
return Status::OK();
}
Status LlvmCodeGen::OptimizeModule() {
SCOPED_TIMER(optimization_timer_);
// This pass manager will construct optimizations passes that are "typical" for
// c/c++ programs. We're relying on llvm to pick the best passes for us.
// TODO: we can likely muck with this to get better compile speeds or write
// our own passes. Our subexpression elimination optimization can be rolled into
// a pass.
PassManagerBuilder pass_builder;
// 2 maps to -O2
// TODO: should we switch to 3? (3 may not produce different IR than 2 while taking
// longer, but we should check)
pass_builder.OptLevel = 2;
// Don't optimize for code size (this corresponds to -O2/-O3)
pass_builder.SizeLevel = 0;
pass_builder.Inliner = createFunctionInliningPass();
// The TargetIRAnalysis pass is required to provide information about the target
// machine to optimisation passes, e.g. the cost model.
TargetIRAnalysis target_analysis =
execution_engine_->getTargetMachine()->getTargetIRAnalysis();
// Before running any other optimization passes, run the internalize pass, giving it
// the names of all functions registered by AddFunctionToJit(), followed by the
// global dead code elimination pass. This causes all functions not registered to be
// JIT'd to be marked as internal, and any internal functions that are not used are
// deleted by DCE pass. This greatly decreases compile time by removing unused code.
vector<const char*> exported_fn_names;
for (int i = 0; i < fns_to_jit_compile_.size(); ++i) {
exported_fn_names.push_back(fns_to_jit_compile_[i].first->getName().data());
}
unique_ptr<legacy::PassManager> module_pass_manager(new legacy::PassManager());
module_pass_manager->add(createTargetTransformInfoWrapperPass(target_analysis));
module_pass_manager->add(createInternalizePass(exported_fn_names));
module_pass_manager->add(createGlobalDCEPass());
module_pass_manager->run(*module_);
// Update counters before final optimization, but after removing unused functions. This
// gives us a rough measure of how much work the optimization and compilation must do.
InstructionCounter counter;
counter.visit(*module_);
COUNTER_SET(num_functions_, counter.GetCount(InstructionCounter::TOTAL_FUNCTIONS));
COUNTER_SET(num_instructions_, counter.GetCount(InstructionCounter::TOTAL_INSTS));
int64_t estimated_memory = ESTIMATED_OPTIMIZER_BYTES_PER_INST
* counter.GetCount(InstructionCounter::TOTAL_INSTS);
if (!mem_tracker_->TryConsume(estimated_memory)) {
const string& msg = Substitute(
"Codegen failed to reserve '$0' bytes for optimization", estimated_memory);
return mem_tracker_->MemLimitExceeded(NULL, msg, estimated_memory);
}
// Create and run function pass manager
unique_ptr<legacy::FunctionPassManager> fn_pass_manager(
new legacy::FunctionPassManager(module_));
fn_pass_manager->add(createTargetTransformInfoWrapperPass(target_analysis));
pass_builder.populateFunctionPassManager(*fn_pass_manager);
fn_pass_manager->doInitialization();
for (Module::iterator it = module_->begin(), end = module_->end(); it != end ; ++it) {
if (!it->isDeclaration()) fn_pass_manager->run(*it);
}
fn_pass_manager->doFinalization();
// Create and run module pass manager
module_pass_manager.reset(new legacy::PassManager());
module_pass_manager->add(createTargetTransformInfoWrapperPass(target_analysis));
pass_builder.populateModulePassManager(*module_pass_manager);
module_pass_manager->run(*module_);
if (FLAGS_print_llvm_ir_instruction_count) {
for (int i = 0; i < fns_to_jit_compile_.size(); ++i) {
InstructionCounter counter;
counter.visit(*fns_to_jit_compile_[i].first);
VLOG(1) << fns_to_jit_compile_[i].first->getName().str();
VLOG(1) << counter.PrintCounters();
}
}
mem_tracker_->Release(estimated_memory);
return Status::OK();
}
void LlvmCodeGen::DestroyModule() {
// Clear all references to LLVM objects owned by the module.
loaded_functions_.clear();
codegend_functions_.clear();
registered_exprs_map_.clear();
registered_exprs_.clear();
llvm_intrinsics_.clear();
hash_fns_.clear();
fns_to_jit_compile_.clear();
execution_engine_->removeModule(module_);
module_ = NULL;
}
void LlvmCodeGen::AddFunctionToJit(Function* fn, void** fn_ptr) {
Type* decimal_val_type = GetType(CodegenAnyVal::LLVM_DECIMALVAL_NAME);
if (fn->getReturnType() == decimal_val_type) {
// Per the x86 calling convention ABI, DecimalVals should be returned via an extra
// first DecimalVal* argument. We generate non-compliant functions that return the
// DecimalVal directly, which we can call from generated code, but not from compiled
// native code. To avoid accidentally calling a non-compliant function from native
// code, call 'function' from an ABI-compliant wrapper.
stringstream name;
name << fn->getName().str() << "ABIWrapper";
LlvmCodeGen::FnPrototype prototype(this, name.str(), void_type_);
// Add return argument
prototype.AddArgument(NamedVariable("result", decimal_val_type->getPointerTo()));
// Add regular arguments
for (Function::arg_iterator arg = fn->arg_begin(); arg != fn->arg_end(); ++arg) {
prototype.AddArgument(NamedVariable(arg->getName(), arg->getType()));
}
LlvmBuilder builder(context());
Value* args[fn->arg_size() + 1];
Function* fn_wrapper = prototype.GeneratePrototype(&builder, &args[0]);
fn_wrapper->addFnAttr(llvm::Attribute::AlwaysInline);
// Mark first argument as sret (not sure if this is necessary but it can't hurt)
fn_wrapper->addAttribute(1, Attribute::StructRet);
// Call 'fn' and store the result in the result argument
Value* result =
builder.CreateCall(fn, ArrayRef<Value*>({&args[1], fn->arg_size()}), "result");
builder.CreateStore(result, args[0]);
builder.CreateRetVoid();
fn = FinalizeFunction(fn_wrapper);
DCHECK(fn != NULL);
}
AddFunctionToJitInternal(fn, fn_ptr);
}
void LlvmCodeGen::AddFunctionToJitInternal(Function* fn, void** fn_ptr) {
DCHECK(!is_compiled_);
fns_to_jit_compile_.push_back(make_pair(fn, fn_ptr));
}
void LlvmCodeGen::CodegenDebugTrace(LlvmBuilder* builder, const char* str,
Value* v1) {
LOG(ERROR) << "Remove IR codegen debug traces before checking in.";
// Make a copy of str into memory owned by this object. This is no guarantee that str is
// still around when the debug printf is executed.
debug_strings_.push_back(Substitute("LLVM Trace: $0", str));
str = debug_strings_.back().c_str();
Function* printf = module_->getFunction("printf");
DCHECK(printf != NULL);
// Call printf by turning 'str' into a constant ptr value
Value* str_ptr = CastPtrToLlvmPtr(ptr_type_, const_cast<char*>(str));
vector<Value*> calling_args;
calling_args.push_back(str_ptr);
if (v1 != NULL) calling_args.push_back(v1);
builder->CreateCall(printf, calling_args);
}
void LlvmCodeGen::GetSymbols(unordered_set<string>* symbols) {
for (const Function& fn: module_->functions()) {
if (fn.isMaterializable()) symbols->insert(fn.getName());
}
}
// TODO: cache this function (e.g. all min(int, int) are identical).
// we probably want some more global IR function cache, or, implement this
// in c and precompile it with clang.
// define i32 @Min(i32 %v1, i32 %v2) {
// entry:
// %0 = icmp slt i32 %v1, %v2
// br i1 %0, label %ret_v1, label %ret_v2
//
// ret_v1: ; preds = %entry
// ret i32 %v1
//
// ret_v2: ; preds = %entry
// ret i32 %v2
// }
Function* LlvmCodeGen::CodegenMinMax(const ColumnType& type, bool min) {
LlvmCodeGen::FnPrototype prototype(this, min ? "Min" : "Max", GetType(type));
prototype.AddArgument(LlvmCodeGen::NamedVariable("v1", GetType(type)));
prototype.AddArgument(LlvmCodeGen::NamedVariable("v2", GetType(type)));
Value* params[2];
LlvmBuilder builder(context());
Function* fn = prototype.GeneratePrototype(&builder, &params[0]);
Value* compare = NULL;
switch (type.type) {
case TYPE_NULL:
compare = false_value();
break;
case TYPE_BOOLEAN:
if (min) {
// For min, return x && y
compare = builder.CreateAnd(params[0], params[1]);
} else {
// For max, return x || y
compare = builder.CreateOr(params[0], params[1]);
}
break;
case TYPE_TINYINT:
case TYPE_SMALLINT:
case TYPE_INT:
case TYPE_BIGINT:
case TYPE_DECIMAL:
if (min) {
compare = builder.CreateICmpSLT(params[0], params[1]);
} else {
compare = builder.CreateICmpSGT(params[0], params[1]);
}
break;
case TYPE_FLOAT:
case TYPE_DOUBLE:
if (min) {
compare = builder.CreateFCmpULT(params[0], params[1]);
} else {
compare = builder.CreateFCmpUGT(params[0], params[1]);
}
break;
default:
DCHECK(false);
}
if (type.type == TYPE_BOOLEAN) {
builder.CreateRet(compare);
} else {
BasicBlock* ret_v1, *ret_v2;
CreateIfElseBlocks(fn, "ret_v1", "ret_v2", &ret_v1, &ret_v2);
builder.CreateCondBr(compare, ret_v1, ret_v2);
builder.SetInsertPoint(ret_v1);
builder.CreateRet(params[0]);
builder.SetInsertPoint(ret_v2);
builder.CreateRet(params[1]);
}
fn = FinalizeFunction(fn);
return fn;
}
// Intrinsics are loaded one by one. Some are overloaded (e.g. memcpy) and the types must
// be specified.
// TODO: is there a better way to do this?
Status LlvmCodeGen::LoadIntrinsics() {
// Load memcpy
{
Type* types[] = { ptr_type(), ptr_type(), GetType(TYPE_INT) };
Function* fn = Intrinsic::getDeclaration(module_, Intrinsic::memcpy, types);
if (fn == NULL) {
return Status("Could not find memcpy intrinsic.");
}
llvm_intrinsics_[Intrinsic::memcpy] = fn;
}
// TODO: where is the best place to put this?
struct {
Intrinsic::ID id;
const char* error;
} non_overloaded_intrinsics[] = {
{ Intrinsic::x86_sse42_crc32_32_8, "sse4.2 crc32_u8" },
{ Intrinsic::x86_sse42_crc32_32_16, "sse4.2 crc32_u16" },
{ Intrinsic::x86_sse42_crc32_32_32, "sse4.2 crc32_u32" },
{ Intrinsic::x86_sse42_crc32_64_64, "sse4.2 crc32_u64" },
};
const int num_intrinsics =
sizeof(non_overloaded_intrinsics) / sizeof(non_overloaded_intrinsics[0]);
for (int i = 0; i < num_intrinsics; ++i) {
Intrinsic::ID id = non_overloaded_intrinsics[i].id;
Function* fn = Intrinsic::getDeclaration(module_, id);
if (fn == NULL) {
stringstream ss;
ss << "Could not find " << non_overloaded_intrinsics[i].error << " intrinsic";
return Status(ss.str());
}
llvm_intrinsics_[id] = fn;
}
return Status::OK();
}
void LlvmCodeGen::CodegenMemcpy(LlvmBuilder* builder, Value* dst, Value* src, int size) {
DCHECK_GE(size, 0);
if (size == 0) return;
Value* size_val = GetIntConstant(TYPE_BIGINT, size);
CodegenMemcpy(builder, dst, src, size_val);
}
void LlvmCodeGen::CodegenMemcpy(LlvmBuilder* builder, Value* dst, Value* src,
Value* size) {
DCHECK(dst->getType()->isPointerTy()) << Print(dst);
DCHECK(src->getType()->isPointerTy()) << Print(src);
builder->CreateMemCpy(dst, src, size, /* no alignment */ 0);
}
void LlvmCodeGen::CodegenMemset(LlvmBuilder* builder, Value* dst, int value, int size) {
DCHECK(dst->getType()->isPointerTy()) << Print(dst);
DCHECK_GE(size, 0);
if (size == 0) return;
Value* value_const = GetIntConstant(TYPE_TINYINT, value);
builder->CreateMemSet(dst, value_const, size, /* no alignment */ 0);
}
void LlvmCodeGen::CodegenClearNullBits(LlvmBuilder* builder, Value* tuple_ptr,
const TupleDescriptor& tuple_desc) {
Value* int8_ptr = builder->CreateBitCast(tuple_ptr, ptr_type(), "int8_ptr");
Value* null_bytes_offset =
ConstantInt::get(int_type(), tuple_desc.null_bytes_offset());
Value* null_bytes_ptr =
builder->CreateInBoundsGEP(int8_ptr, null_bytes_offset, "null_bytes_ptr");
CodegenMemset(builder, null_bytes_ptr, 0, tuple_desc.num_null_bytes());
}
Value* LlvmCodeGen::CodegenAllocate(LlvmBuilder* builder, MemPool* pool, Value* size,
const char* name) {
DCHECK(pool != NULL);
DCHECK(size->getType()->isIntegerTy());
DCHECK_LE(size->getType()->getIntegerBitWidth(), 64);
// Extend 'size' to i64 if necessary
if (size->getType()->getIntegerBitWidth() < 64) {
size = builder->CreateSExt(size, bigint_type());
}
Function* allocate_fn = GetFunction(IRFunction::MEMPOOL_ALLOCATE, false);
PointerType* pool_type = GetPtrType(MemPool::LLVM_CLASS_NAME);
Value* pool_val = CastPtrToLlvmPtr(pool_type, pool);
Value* alignment = GetIntConstant(TYPE_INT, MemPool::DEFAULT_ALIGNMENT);
Value* fn_args[] = {pool_val, size, alignment};
return builder->CreateCall(allocate_fn, fn_args, name);
}
Value* LlvmCodeGen::CodegenArrayAt(LlvmBuilder* builder, Value* array, int idx,
const char* name) {
DCHECK(array->getType()->isPointerTy() || array->getType()->isArrayTy())
<< Print(array->getType());
Value* ptr = builder->CreateConstGEP1_32(array, idx);
return builder->CreateLoad(ptr, name);
}
void LlvmCodeGen::ClearHashFns() {
hash_fns_.clear();
}
// Codegen to compute hash for a particular byte size. Loops are unrolled in this
// process. For the case where num_bytes == 11, we'd do this by calling
// 1. crc64 (for first 8 bytes)
// 2. crc16 (for bytes 9, 10)
// 3. crc8 (for byte 11)
// The resulting IR looks like:
// define i32 @CrcHash11(i8* %data, i32 %len, i32 %seed) {
// entry:
// %0 = zext i32 %seed to i64
// %1 = bitcast i8* %data to i64*
// %2 = getelementptr i64* %1, i32 0
// %3 = load i64* %2
// %4 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %0, i64 %3)
// %5 = trunc i64 %4 to i32
// %6 = getelementptr i8* %data, i32 8
// %7 = bitcast i8* %6 to i16*
// %8 = load i16* %7
// %9 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %5, i16 %8)
// %10 = getelementptr i8* %6, i32 2
// %11 = load i8* %10
// %12 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %9, i8 %11)
// ret i32 %12
// }
Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
if (CpuInfo::IsSupported(CpuInfo::SSE4_2)) {
if (num_bytes == -1) {
// -1 indicates variable length, just return the generic loop based
// hash fn.
return GetFunction(IRFunction::HASH_CRC, false);
}
map<int, Function*>::iterator cached_fn = hash_fns_.find(num_bytes);
if (cached_fn != hash_fns_.end()) {
return cached_fn->second;
}
// Generate a function to hash these bytes
stringstream ss;
ss << "CrcHash" << num_bytes;
FnPrototype prototype(this, ss.str(), GetType(TYPE_INT));
prototype.AddArgument(LlvmCodeGen::NamedVariable("data", ptr_type()));
prototype.AddArgument(LlvmCodeGen::NamedVariable("len", GetType(TYPE_INT)));
prototype.AddArgument(LlvmCodeGen::NamedVariable("seed", GetType(TYPE_INT)));
Value* args[3];
LlvmBuilder builder(context());
Function* fn = prototype.GeneratePrototype(&builder, &args[0]);
Value* data = args[0];
Value* result = args[2];
Function* crc8_fn = llvm_intrinsics_[Intrinsic::x86_sse42_crc32_32_8];
Function* crc16_fn = llvm_intrinsics_[Intrinsic::x86_sse42_crc32_32_16];
Function* crc32_fn = llvm_intrinsics_[Intrinsic::x86_sse42_crc32_32_32];
Function* crc64_fn = llvm_intrinsics_[Intrinsic::x86_sse42_crc32_64_64];
// Generate the crc instructions starting with the highest number of bytes
if (num_bytes >= 8) {
Value* result_64 = builder.CreateZExt(result, GetType(TYPE_BIGINT));
Value* ptr = builder.CreateBitCast(data, GetPtrType(TYPE_BIGINT));
int i = 0;
while (num_bytes >= 8) {
Value* index[] = {GetIntConstant(TYPE_INT, i++)};
Value* d = builder.CreateLoad(builder.CreateInBoundsGEP(ptr, index));
result_64 = builder.CreateCall(crc64_fn, ArrayRef<Value*>({result_64, d}));
num_bytes -= 8;
}
result = builder.CreateTrunc(result_64, GetType(TYPE_INT));
Value* index[] = {GetIntConstant(TYPE_INT, i * 8)};
// Update data to past the 8-byte chunks
data = builder.CreateInBoundsGEP(data, index);
}
if (num_bytes >= 4) {
DCHECK_LT(num_bytes, 8);
Value* ptr = builder.CreateBitCast(data, GetPtrType(TYPE_INT));
Value* d = builder.CreateLoad(ptr);
result = builder.CreateCall(crc32_fn, ArrayRef<Value*>({result, d}));
Value* index[] = {GetIntConstant(TYPE_INT, 4)};
data = builder.CreateInBoundsGEP(data, index);
num_bytes -= 4;
}
if (num_bytes >= 2) {
DCHECK_LT(num_bytes, 4);
Value* ptr = builder.CreateBitCast(data, GetPtrType(TYPE_SMALLINT));
Value* d = builder.CreateLoad(ptr);
result = builder.CreateCall(crc16_fn, ArrayRef<Value*>({result, d}));
Value* index[] = {GetIntConstant(TYPE_INT, 2)};
data = builder.CreateInBoundsGEP(data, index);
num_bytes -= 2;
}
if (num_bytes > 0) {
DCHECK_EQ(num_bytes, 1);
Value* d = builder.CreateLoad(data);
result = builder.CreateCall(crc8_fn, ArrayRef<Value*>({result, d}));
--num_bytes;
}
DCHECK_EQ(num_bytes, 0);
Value* shift_16 = GetIntConstant(TYPE_INT, 16);
Value* upper_bits = builder.CreateShl(result, shift_16);
Value* lower_bits = builder.CreateLShr(result, shift_16);
result = builder.CreateOr(upper_bits, lower_bits);
builder.CreateRet(result);
fn = FinalizeFunction(fn);
if (fn != NULL) {
hash_fns_[num_bytes] = fn;
}
return fn;
} else {
return GetMurmurHashFunction(num_bytes);
}
}
static Function* GetLenOptimizedHashFn(
LlvmCodeGen* codegen, IRFunction::Type f, int len) {
Function* fn = codegen->GetFunction(f, false);
DCHECK(fn != NULL);
if (len != -1) {
// Clone this function since we're going to modify it by replacing the
// length with num_bytes.
fn = codegen->CloneFunction(fn);
Value* len_arg = codegen->GetArgument(fn, 1);
len_arg->replaceAllUsesWith(codegen->GetIntConstant(TYPE_INT, len));
}
return codegen->FinalizeFunction(fn);
}
Function* LlvmCodeGen::GetFnvHashFunction(int len) {
return GetLenOptimizedHashFn(this, IRFunction::HASH_FNV, len);
}
Function* LlvmCodeGen::GetMurmurHashFunction(int len) {
return GetLenOptimizedHashFn(this, IRFunction::HASH_MURMUR, len);
}
void LlvmCodeGen::ReplaceInstWithValue(Instruction* from, Value* to) {
BasicBlock::iterator iter(from);
llvm::ReplaceInstWithValue(from->getParent()->getInstList(), iter, to);
}
Argument* LlvmCodeGen::GetArgument(Function* fn, int i) {
DCHECK_LE(i, fn->arg_size());
Function::arg_iterator iter = fn->arg_begin();
for (int j = 0; j < i; ++j) ++iter;
return &*iter;
}
Value* LlvmCodeGen::GetPtrTo(LlvmBuilder* builder, Value* v, const char* name) {
Value* ptr = CreateEntryBlockAlloca(*builder, v->getType(), name);
builder->CreateStore(v, ptr);
return ptr;
}
Constant* LlvmCodeGen::ConstantToGVPtr(Type* type, Constant* ir_constant,
const string& name) {
GlobalVariable* gv = new GlobalVariable(*module_, type, true,
GlobalValue::PrivateLinkage, ir_constant, name);
return ConstantExpr::getGetElementPtr(NULL, gv,
ArrayRef<Constant*>({GetIntConstant(TYPE_INT, 0)}));
}
}
namespace boost {
/// Handler for exceptions in cross-compiled functions.
/// When boost is configured with BOOST_NO_EXCEPTIONS, it calls this handler instead of
/// throwing the exception.
[[noreturn]] void throw_exception(std::exception const& e) {
LOG(FATAL) << "Cannot handle exceptions in codegen'd code " << e.what();
}
}