| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| |
| #ifndef IMPALA_CODEGEN_LLVM_CODEGEN_H |
| #define IMPALA_CODEGEN_LLVM_CODEGEN_H |
| |
| #include "common/status.h" |
| |
| #include <map> |
| #include <memory> |
| #include <string> |
| #include <vector> |
| #include <unordered_set> |
| #include <boost/scoped_ptr.hpp> |
| |
| #include <boost/unordered_set.hpp> |
| |
| #include <llvm/IR/DerivedTypes.h> |
| #include <llvm/IR/IRBuilder.h> |
| #include <llvm/IR/Intrinsics.h> |
| #include <llvm/IR/LLVMContext.h> |
| #include <llvm/IR/Module.h> |
| #include <llvm/Support/MemoryBuffer.h> |
| #include <llvm/Support/raw_ostream.h> |
| |
| #include "exprs/scalar-expr.h" |
| #include "impala-ir/impala-ir-functions.h" |
| #include "runtime/types.h" |
| #include "util/runtime-profile.h" |
| |
| /// Forward declare all llvm classes to avoid namespace pollution. |
| namespace llvm { |
| class AllocaInst; |
| class BasicBlock; |
| class ConstantFolder; |
| class DiagnosticInfo; |
| class ExecutionEngine; |
| class Function; |
| class LLVMContext; |
| class Module; |
| class NoFolder; |
| class PointerType; |
| class StructType; |
| class TargetData; |
| class Type; |
| class Value; |
| namespace legacy { |
| class FunctionPassManager; |
| class PassManager; |
| } |
| |
| template<typename T, typename I> |
| class IRBuilder; |
| |
| class IRBuilderDefaultInserter; |
| } |
| |
| // The number of function calls replaced is not knowable when UBSAN is enabled, since it |
| // can double the number of references to a function. To fix, we replaced |
| // "DCHECK_EQ(replaced" with "DCHECK_REPLACE_COUNT(replaced": |
| // |
| // find be/src -type f -execdir sed -i s/DCHECK_EQ\(replaced,\ /DCHECK_REPLACE_COUNT\(replaced,\ /g {} \; |
| #if defined(UNDEFINED_SANITIZER) |
| #define DCHECK_REPLACE_COUNT(p, q) DCHECK_GE(p, q); DCHECK_LE(p, 2*(q)) |
| #else |
| #define DCHECK_REPLACE_COUNT(p, q) DCHECK_EQ(p, q) |
| #endif |
| |
| namespace impala { |
| |
| class CodegenCallGraph; |
| class CodegenSymbolEmitter; |
| class ImpalaMCJITMemoryManager; |
| class SubExprElimination; |
| class TupleDescriptor; |
| |
| /// Define builder subclass in case we want to change the template arguments later |
| class LlvmBuilder : public llvm::IRBuilder<> { |
| using llvm::IRBuilder<>::IRBuilder; |
| }; |
| |
| /// LLVM code generator. This is the top level object to generate jitted code. |
| // |
| /// LLVM provides a c++ IR builder interface so IR does not need to be written |
| /// manually. The interface is very low level so each line of IR that needs to |
| /// be output maps 1:1 with calls to the interface. |
| /// The llvm documentation is not fantastic and a lot of this was figured out |
| /// by experimenting. Thankfully, their API is pretty well designed so it's |
| /// possible to get by without great documentation. The llvm tutorial is very |
| /// helpful, https://llvm.org/docs/tutorial/LangImpl01.html. In this tutorial, they |
| /// go over how to JIT an AST for a toy language they create. |
| /// It is also helpful to use their online app that lets you compile c/c++ to IR. |
| /// http://llvm.org/demo/index.cgi. |
| // |
| /// This class provides two interfaces, one for testing and one for the query |
| /// engine. The interface for the query engine will load the cross-compiled |
| /// IR module (output during the build) and extract all of functions that will |
| /// be called directly. The test interface can be used to load any precompiled |
| /// module or none at all (but this class will not validate the module). |
| // |
| /// There are two classes of functions defined based on how they are generated: |
| /// 1. Handcrafted functions - These functions are built from scratch using the IRbuilder |
| /// interface. |
| /// 2. Cross-compiled functions - These functions are loaded directly from a |
| /// cross-compiled IR module and are either directly used or are cloned and modified |
| /// before use. |
| // |
| /// This class is not threadsafe. During the Prepare() phase of the fragment execution, |
| /// nodes should codegen functions, and register those functions with AddFunctionToJit(). |
| /// Afterward, FinalizeModule() should be called at which point all codegened functions |
| /// are optimized and compiled. After FinalizeModule() returns, all function pointers |
| /// registered with AddFunctionToJit() will be pointing to the appropriate JIT'd function. |
| // |
| /// Currently, each fragment instance will create and initialize one of these |
| /// objects. This requires loading and parsing the cross compiled modules. |
| /// TODO: we should be able to do this once per process and let llvm compile |
| /// functions from across modules. |
| // |
| /// LLVM has a nontrivial memory management scheme and objects will take |
| /// ownership of others. The document is pretty good about being explicit with this |
| /// but it is not very intuitive. |
| /// TODO: look into diagnostic output and debuggability |
| /// TODO: confirm that the multi-threaded usage is correct |
| // |
| /// Function objects in the module are materialized lazily to save the cost of |
| /// parsing IR of functions which are dead code. An unmaterialized function is similar |
| /// to a function declaration which only contains the function signature and needs to |
| /// be materialized before optimization and compilation happen if it's not dead code. |
| /// Materializing a function means parsing the bitcode to populate the basic blocks and |
| /// instructions attached to the function object. Functions reachable by the function |
| /// are also materialized recursively. |
| // |
| /// Memory used for codegen is tracked via the MemTracker hierarchy. Codegen can use |
| /// significant memory for the IR module and for the optimization and compilation |
| /// algorithms. LLVM provides no way to directly track this transient memory - instead |
| /// the memory consumption is estimated based on the size of the IR module and released |
| /// once compilation finishes. Once compilation finishes, the size of the compiled |
| /// machine code is obtained from LLVM and and is tracked until the LlvmCodeGen object |
| /// is torn down and the compiled code is freed. |
| // |
| class LlvmCodeGen { |
| public: |
| /// This function must be called once per process before any llvm API calls are |
| /// made. It is not valid to call it multiple times. LLVM needs to allocate data |
| /// structures for multi-threading support and to enable dynamic linking of jitted code. |
| /// if 'load_backend', load the backend static object for llvm. This is needed |
| /// when libfesupport.so is loaded from java. llvm will by default only look in |
| /// the current object and not be able to find the backend symbols |
| /// TODO: this can probably be removed after impalad refactor where the java |
| /// side is not loading the be explicitly anymore. |
| static Status InitializeLlvm(bool load_backend = false); |
| |
| /// Creates a codegen instance for Impala initialized with the cross-compiled Impala IR. |
| /// 'codegen' will contain the created object on success. |
| /// 'parent_mem_tracker' - if non-NULL, the CodeGen MemTracker is created under this. |
| /// 'id' is used for outputting the IR module for debugging. |
| static Status CreateImpalaCodegen(RuntimeState* state, MemTracker* parent_mem_tracker, |
| const std::string& id, boost::scoped_ptr<LlvmCodeGen>* codegen); |
| |
| ~LlvmCodeGen(); |
| |
| /// Releases all resources associated with the codegen object. It is invalid to call |
| /// any other API methods after calling close. |
| void Close(); |
| |
| RuntimeProfile* runtime_profile() { return profile_; } |
| RuntimeProfile::Counter* ir_generation_timer() { return ir_generation_timer_; } |
| RuntimeProfile::ThreadCounters* llvm_thread_counters() { return llvm_thread_counters_; } |
| |
| /// Turns on/off optimization passes |
| void EnableOptimizations(bool enable); |
| |
| /// For debugging. Returns the IR that was generated. If full_module, the |
| /// entire module is dumped, including what was loaded from precompiled IR. |
| /// If false, only output IR for functions which were handcrafted. |
| std::string GetIR(bool full_module) const; |
| |
| /// Utility struct that wraps a variable name and llvm type. |
| struct NamedVariable { |
| std::string name; |
| llvm::Type* type; |
| |
| NamedVariable(const std::string& name="", llvm::Type* type = NULL) { |
| this->name = name; |
| this->type = type; |
| } |
| }; |
| |
| /// Abstraction over function prototypes. Contains helpers to build prototypes and |
| /// generate IR for the types. |
| class FnPrototype { |
| public: |
| /// Create a function prototype object, specifying the name of the function and |
| /// the return type. |
| FnPrototype(LlvmCodeGen* codegen, const std::string& name, llvm::Type* ret_type); |
| |
| /// Returns name of function |
| const std::string& name() const { return name_; } |
| |
| /// (Re-)sets name of function |
| void SetName(const std::string& name) { name_ = name; } |
| |
| /// Add argument |
| void AddArgument(const NamedVariable& var) { |
| args_.push_back(var); |
| } |
| |
| void AddArgument(const std::string& name, llvm::Type* type) { |
| args_.push_back(NamedVariable(name, type)); |
| } |
| |
| /// Generate LLVM function prototype. |
| /// This is the canonical way to start generating a handcrafted codegen'd function. |
| /// If a non-null 'builder' is passed, this function will also create the entry |
| /// block, add it to the llvm module via the builder by setting the builder's insert |
| /// point to the entry block, and add it to the list of functions handcrafted by |
| /// impala. FinalizeFunction() must be called for any function generated this way |
| /// otherwise it will be deleted during FinalizeModule(). |
| /// |
| /// If 'params' is non-null, this function will also return the arguments values |
| /// (params[0] is the first arg, etc). In that case, 'params' should be preallocated |
| /// to be number of arguments |
| llvm::Function* GeneratePrototype( |
| LlvmBuilder* builder = nullptr, llvm::Value** params = nullptr); |
| |
| private: |
| friend class LlvmCodeGen; |
| |
| LlvmCodeGen* codegen_; |
| std::string name_; |
| llvm::Type* ret_type_; |
| std::vector<NamedVariable> args_; |
| }; |
| |
| /// Get host cpu attributes in format expected by EngineBuilder. |
| static void GetHostCPUAttrs(std::unordered_set<std::string>* attrs); |
| |
| /// Returns whether or not this cpu feature is supported. |
| static bool IsCPUFeatureEnabled(int64_t flag); |
| |
| /// Return a pointer type to 'type' |
| llvm::PointerType* GetPtrType(llvm::Type* type); |
| |
| /// Return a pointer to pointer type to 'type'. |
| llvm::PointerType* GetPtrPtrType(llvm::Type* type); |
| |
| /// Return a pointer to pointer type for 'name' type. |
| llvm::PointerType* GetNamedPtrPtrType(const std::string& name); |
| |
| /// Returns llvm type for Impala's internal representation of this column type, |
| /// i.e. the way Impala represents this type in a Tuple. |
| llvm::Type* GetSlotType(const ColumnType& type); |
| |
| /// Return a pointer type to 'type' (e.g. int16_t*) |
| llvm::PointerType* GetSlotPtrType(const ColumnType& type); |
| |
| /// Returns the type with 'name'. This is used to pull types from clang |
| /// compiled IR. The types we generate at runtime are unnamed. |
| /// The name is generated by the clang compiler in this form: |
| /// <class/struct>.<namespace>::<class name>. For example: |
| /// "class.impala::AggregationNode" |
| llvm::Type* GetNamedType(const std::string& name); |
| |
| /// Returns the pointer type of the type returned by GetNamedType(name) |
| llvm::PointerType* GetNamedPtrType(const std::string& name); |
| |
| /// Template versions of GetNamed*Type functions that expect the llvm name of |
| /// type T to be T::LLVM_CLASS_NAME. T must be a struct/class, so GetStructType |
| /// can return llvm::StructType* to avoid casting on the caller side. |
| template<class T> |
| llvm::StructType* GetStructType() { |
| return llvm::cast<llvm::StructType>(GetNamedType(T::LLVM_CLASS_NAME)); |
| } |
| |
| template<class T> |
| llvm::PointerType* GetStructPtrType() { return GetNamedPtrType(T::LLVM_CLASS_NAME); } |
| |
| template<class T> |
| llvm::PointerType* GetStructPtrPtrType() { |
| return GetNamedPtrPtrType(T::LLVM_CLASS_NAME); |
| } |
| |
| /// Alloca's an instance of the appropriate pointer type and sets it to point at 'v' |
| llvm::Value* GetPtrTo(LlvmBuilder* builder, llvm::Value* v, const char* name = ""); |
| |
| /// Creates a global value 'name' using constant 'ir_constant' and returns |
| /// a pointer to the global value. Useful for creating constant function arguments |
| /// which cannot be represented with primitive types (e.g. struct). |
| llvm::Constant* ConstantToGVPtr(llvm::Type* type, llvm::Constant* ir_constant, |
| const std::string& name); |
| |
| /// Creates a global value 'name' that is an array with element type 'element_type' |
| /// containing 'ir_constants'. Returns a pointer to the global value, i.e. a pointer |
| /// to a constant array of 'element_type'. |
| llvm::Constant* ConstantsToGVArrayPtr(llvm::Type* element_type, |
| llvm::ArrayRef<llvm::Constant*> ir_constants, const std::string& name); |
| |
| /// Returns reference to llvm context object. Each LlvmCodeGen has its own |
| /// context to allow multiple threads to be calling into llvm at the same time. |
| llvm::LLVMContext& context() { return *context_.get(); } |
| |
| /// Returns execution engine interface |
| llvm::ExecutionEngine* execution_engine() { return execution_engine_.get(); } |
| |
| /// Register a expr function with unique id. It can be subsequently retrieved via |
| /// GetRegisteredExprFn with that id. |
| void RegisterExprFn(int64_t id, llvm::Function* function) { |
| DCHECK(registered_exprs_map_.find(id) == registered_exprs_map_.end()); |
| registered_exprs_map_[id] = function; |
| registered_exprs_.insert(function); |
| } |
| |
| /// Returns a registered expr function for id or NULL if it does not exist. |
| llvm::Function* GetRegisteredExprFn(int64_t id) { |
| std::map<int64_t, llvm::Function*>::iterator it = registered_exprs_map_.find(id); |
| if (it == registered_exprs_map_.end()) return NULL; |
| return it->second; |
| } |
| |
| /// Optimize and compile the module. This should be called after all functions to JIT |
| /// have been added to the module via AddFunctionToJit(). If optimizations_enabled_ is |
| /// false, the module will not be optimized before compilation. After FinalizeModule() |
| /// is called, the LLVM module is destroyed and it is invalid to call any LlvmCodegen |
| /// functions. |
| Status FinalizeModule(); |
| |
| /// Loads a native or IR function 'fn' with symbol 'symbol' from the builtins or |
| /// an external library and puts the result in *llvm_fn. *llvm_fn can be safely |
| /// modified in place, because it is either newly generated or cloned. The caller must |
| /// call FinalizeFunction() on 'llvm_fn' once it is done modifying it. The function has |
| /// return type 'return_type' (void if 'return_type' is NULL) and input argument types |
| /// 'arg_types'. The first 'num_fixed_args' arguments are fixed arguments, and the |
| /// remaining arguments are varargs. 'has_varargs' indicates whether the function |
| /// accepts varargs. If 'has_varargs' is true, there must be at least one vararg. If |
| /// the function is loaded from a library, 'cache_entry' is updated to point to the |
| /// library containing the function. If 'cache_entry' is set to a non-NULL value by |
| /// this function, the caller must call LibCache::DecrementUseCount() on it when done |
| /// using the function. |
| Status LoadFunction(const TFunction& fn, const std::string& symbol, |
| const ColumnType* return_type, const std::vector<ColumnType>& arg_types, |
| int num_fixed_args, bool has_varargs, llvm::Function** llvm_fn, |
| LibCacheEntry** cache_entry); |
| |
| /// Replaces all instructions in 'caller' that call 'target_name' with a call |
| /// instruction to 'new_fn'. The argument types of 'new_fn' must exactly match |
| /// the argument types of the function to be replaced. Returns the number of |
| /// call sites updated. |
| /// |
| /// 'target_name' must be a substring of the mangled symbol of the function to be |
| /// replaced. This usually means that the unmangled function name is sufficient. |
| /// |
| /// Note that this modifies 'caller' in-place, so this should only be called on cloned |
| /// functions. |
| int ReplaceCallSites(llvm::Function* caller, llvm::Function* new_fn, |
| const std::string& target_name); |
| |
| /// Same as ReplaceCallSites(), except replaces the function call instructions with the |
| /// boolean value 'constant'. |
| int ReplaceCallSitesWithBoolConst(llvm::Function* caller, bool constant, |
| const std::string& target_name); |
| |
| /// Replace calls to functions in 'caller' where the callee's name has 'target_name' |
| /// as a substring. Calls to functions are replaced with the value 'replacement'. The |
| /// return value is the number of calls replaced. |
| int ReplaceCallSitesWithValue(llvm::Function* caller, llvm::Value* replacement, |
| const std::string& target_name); |
| |
| /// This function replaces calls to FunctionContextImpl::GetConstFnAttr() with constants |
| /// derived from 'return_type', 'arg_types' and the runtime state 'state_'. Please note |
| /// that this function only replaces call instructions inside 'fn' so to replace the |
| /// call to FunctionContextImpl::GetConstFnAttr() inside the callee functions, please |
| /// inline the callee functions (by annotating them with IR_ALWAYS_INLINE). |
| /// |
| /// TODO: implement a loop unroller (or use LLVM's) so we can use |
| /// FunctionContextImpl::GetConstFnAttr() in loops |
| int InlineConstFnAttrs(const FunctionContext::TypeDesc& return_type, |
| const std::vector<FunctionContext::TypeDesc>& arg_types, llvm::Function* fn); |
| |
| /// Returns a copy of fn. The copy is added to the module. |
| llvm::Function* CloneFunction(llvm::Function* fn); |
| |
| /// Replace all uses of the instruction 'from' with the value 'to', and delete |
| /// 'from'. This is a wrapper around llvm::ReplaceInstWithValue(). |
| void ReplaceInstWithValue(llvm::Instruction* from, llvm::Value* to); |
| |
| /// Returns the i-th argument of fn. |
| llvm::Argument* GetArgument(llvm::Function* fn, int i); |
| |
| /// Verify function. All handcrafted functions need to be finalized before being |
| /// passed to AddFunctionToJit() otherwise the functions will be deleted from the |
| /// module when the module is finalized. Also, all loaded functions that need to be JIT |
| /// compiled after modification also need to be finalized. |
| /// If the function does not verify, it returns NULL and the function will eventually |
| /// be deleted in FinalizeModule(), otherwise, it returns the function object. |
| llvm::Function* FinalizeFunction(llvm::Function* function); |
| |
| /// Adds the function to be automatically jit compiled when the codegen object is |
| /// finalized. FinalizeModule() will set fn_ptr to point to the jitted function. |
| /// |
| /// Pre-condition: FinalizeFunction() must have been called on the function passed to |
| /// this method. |
| /// |
| /// Only functions registered with AddFunctionToJit() and their dependencies are |
| /// compiled by FinalizeModule(): other functions are considered dead code and will |
| /// be removed during optimization. |
| /// |
| /// This will also wrap functions returning DecimalVals in an ABI-compliant wrapper (see |
| /// the comment in the .cc file for details). This is so we don't accidentally try to |
| /// call non-compliant code from native code. |
| void AddFunctionToJit(llvm::Function* fn, void** fn_ptr); |
| |
| /// This will generate a printf call instruction to output 'message' at the builder's |
| /// insert point. If 'v1' is non-NULL, it will also be passed to the printf call. Only |
| /// for debugging. |
| void CodegenDebugTrace(LlvmBuilder* builder, const char* message, |
| llvm::Value* v1 = NULL); |
| |
| /// Returns the string representation of a llvm::Value* or llvm::Type* |
| template <typename T> static std::string Print(T* value_or_type) { |
| std::string str; |
| llvm::raw_string_ostream stream(str); |
| value_or_type->print(stream); |
| return str; |
| } |
| |
| /// Returns the cross compiled function. 'ir_type' is an enum which is generated |
| /// by gen_ir_descriptions.py. The returned function and its callee will be materialized |
| /// recursively. Returns NULL if there is any error. |
| /// |
| /// If 'clone' is true, a clone of the function will be returned. Clones should be used |
| /// iff the caller will modify the returned function so that the original unmodified |
| /// function remains available. Avoid cloning if possible to reduce compilation time. |
| /// |
| /// TODO: Return Status instead. |
| llvm::Function* GetFunction(IRFunction::Type ir_type, bool clone); |
| |
| /// Return the function with the symbol name 'symbol' from the module. The returned |
| /// function and its callee will be recursively materialized. Returns NULL if there is |
| /// any error. |
| /// |
| /// If 'clone' is true, a clone of the function will be returned. Clones should be used |
| /// iff the caller will modify the returned function so that the original unmodified |
| /// function remains available. Avoid cloning if possible to reduce compilation time. |
| /// |
| /// TODO: Return Status instead. |
| llvm::Function* GetFunction(const string& symbol, bool clone); |
| |
| /// Returns the hash function with signature: |
| /// int32_t Hash(int8_t* data, int len, int32_t seed); |
| /// If num_bytes is non-zero, the returned function will be codegen'd to only |
| /// work for that number of bytes. It is invalid to call that function with a |
| /// different 'len'. Functions returned by these methods have already been finalized. |
| llvm::Function* GetHashFunction(int num_bytes = -1); |
| llvm::Function* GetFnvHashFunction(int num_bytes = -1); |
| llvm::Function* GetMurmurHashFunction(int num_bytes = -1); |
| |
| /// Set the NoInline attribute on 'function' and remove the AlwaysInline and InlineHint |
| /// attributes if present. |
| void SetNoInline(llvm::Function* function) const; |
| |
| /// Allocate stack storage for local variables. This is similar to traditional c, where |
| /// all the variables must be declared at the top of the function. This helper can be |
| /// called from anywhere and will add a stack allocation for 'var' at the beginning of |
| /// the function. This would be used, for example, if a function needed a temporary |
| /// struct allocated. The allocated variable is scoped to the function. |
| // |
| /// This should always be used instead of calling LlvmBuilder::CreateAlloca directly. |
| /// LLVM doesn't optimize alloca's occurring in the middle of functions very well (e.g, |
| /// an alloca may end up in a loop, potentially blowing the stack). |
| llvm::AllocaInst* CreateEntryBlockAlloca(llvm::Function* f, const NamedVariable& var); |
| llvm::AllocaInst* CreateEntryBlockAlloca( |
| const LlvmBuilder& builder, llvm::Type* type, const char* name = ""); |
| |
| /// Same as above, except allocates an array of 'type' with 'num_entries' entries |
| /// and alignment 'alignment'. |
| llvm::AllocaInst* CreateEntryBlockAlloca(const LlvmBuilder& builder, llvm::Type* type, |
| int num_entries, int alignment, const char* name = ""); |
| |
| /// Utility to create two blocks in 'fn' for if/else codegen. if_block and else_block |
| /// are return parameters. insert_before is optional and if set, the two blocks |
| /// will be inserted before that block otherwise, it will be inserted at the end |
| /// of 'fn'. Being able to place blocks is useful for debugging so the IR has a |
| /// better looking control flow. |
| void CreateIfElseBlocks(llvm::Function* fn, const std::string& if_name, |
| const std::string& else_name, |
| llvm::BasicBlock** if_block, llvm::BasicBlock** else_block, |
| llvm::BasicBlock* insert_before = NULL); |
| |
| /// Create a llvm pointer value from 'ptr'. This is used to pass pointers between |
| /// c-code and code-generated IR. The resulting value will be of 'type'. |
| llvm::Value* CastPtrToLlvmPtr(llvm::Type* type, const void* ptr); |
| |
| /// Returns a constant int of 'byte_size' bytes based on 'low_bits' and 'high_bits' |
| /// which stand for the lower and upper 64-bits of the constant respectively. For |
| /// values less than or equal to 64-bits, 'high_bits' is not used. This function |
| /// can generate constant up to 128-bit wide. 'byte_size' must be power of 2. |
| llvm::Constant* GetIntConstant(int byte_size, uint64_t low_bits, uint64_t high_bits); |
| |
| /// Initialise a constant global string and returns an i8* pointer to it. |
| llvm::Value* GetStringConstant(LlvmBuilder* builder, char* data, int len); |
| |
| /// Returns true/false constants (bool type) |
| llvm::Constant* true_value() { return true_value_; } |
| llvm::Constant* false_value() { return false_value_; } |
| llvm::Constant* null_ptr_value() { return llvm::ConstantPointerNull::get(ptr_type()); } |
| |
| /// Simple wrappers to reduce code verbosity |
| llvm::Type* bool_type() { return llvm::Type::getInt1Ty(context()); } |
| llvm::Type* i8_type() { return llvm::Type::getInt8Ty(context()); } |
| llvm::Type* i16_type() { return llvm::Type::getInt16Ty(context()); } |
| llvm::Type* i32_type() { return llvm::Type::getInt32Ty(context()); } |
| llvm::Type* i64_type() { return llvm::Type::getInt64Ty(context()); } |
| llvm::Type* i128_type() { return llvm::Type::getIntNTy(context(), 128); } |
| llvm::Type* float_type() { return llvm::Type::getFloatTy(context()); } |
| llvm::Type* double_type() { return llvm::Type::getDoubleTy(context()); } |
| llvm::PointerType* ptr_type() { return ptr_type_; } |
| llvm::Type* void_type() { return void_type_; } |
| |
| llvm::PointerType* i8_ptr_type() { return GetPtrType(i8_type()); } |
| llvm::PointerType* i16_ptr_type() { return GetPtrType(i16_type()); } |
| llvm::PointerType* i32_ptr_type() { return GetPtrType(i32_type()); } |
| llvm::PointerType* i64_ptr_type() { return GetPtrType(i64_type()); } |
| llvm::PointerType* float_ptr_type() { return GetPtrType(float_type()); } |
| llvm::PointerType* double_ptr_type() { return GetPtrType(double_type()); } |
| llvm::PointerType* ptr_ptr_type() { return GetPtrType(ptr_type_); } |
| |
| llvm::Constant* GetBoolConstant(bool val) { return val ? true_value_ : false_value_; } |
| llvm::Constant* GetI8Constant(uint64_t val) { |
| return llvm::ConstantInt::get(context(), llvm::APInt(8, val)); |
| } |
| llvm::Constant* GetI16Constant(uint64_t val) { |
| return llvm::ConstantInt::get(context(), llvm::APInt(16, val)); |
| } |
| llvm::Constant* GetI32Constant(uint64_t val) { |
| return llvm::ConstantInt::get(context(), llvm::APInt(32, val)); |
| } |
| llvm::Constant* GetI64Constant(uint64_t val) { |
| return llvm::ConstantInt::get(context(), llvm::APInt(64, val)); |
| } |
| |
| /// Load the module temporarily and populate 'symbols' with the symbols in the module. |
| static Status GetSymbols(const string& file, const string& module_id, |
| boost::unordered_set<std::string>* symbols); |
| |
| /// Codegen at the current builder location in function 'fn' to store the |
| /// max/min('src', value in 'dst_slot_ptr') in 'dst_slot_ptr' |
| void CodegenMinMax(LlvmBuilder* builder, const ColumnType& type, |
| llvm::Value* dst_slot_ptr, llvm::Value* src, bool min, llvm::Function* fn); |
| |
| /// Codegen to call llvm memcpy intrinsic at the current builder location |
| /// dst & src must be pointer types. size is the number of bytes to copy. |
| /// No-op if size is zero. |
| void CodegenMemcpy(LlvmBuilder* builder, llvm::Value* dst, llvm::Value* src, int size); |
| void CodegenMemcpy(LlvmBuilder* builder, llvm::Value* dst, llvm::Value* src, |
| llvm::Value* size); |
| |
| /// Codegen to call llvm memset intrinsic at the current builder location. 'dst' should |
| /// be a pointer. No-op if size is zero. |
| void CodegenMemset(LlvmBuilder* builder, llvm::Value* dst, int value, int size); |
| |
| /// Codegen to set all null bytes of the given tuple to 0. |
| void CodegenClearNullBits(LlvmBuilder* builder, llvm::Value* tuple_ptr, |
| const TupleDescriptor& tuple_desc); |
| |
| /// Codegen to call pool_val->Allocate(size_val). |
| /// 'pool_val' has to be of type MemPool*. |
| llvm::Value* CodegenMemPoolAllocate(LlvmBuilder* builder, llvm::Value* pool_val, |
| llvm::Value* size_val, const char* name = ""); |
| |
| /// Codegens IR to load array[idx] and returns the loaded value. 'array' should be a |
| /// C-style array (e.g. i32*) or an IR array (e.g. [10 x i32]). This function does not |
| /// do bounds checking. |
| llvm::Value* CodegenArrayAt( |
| LlvmBuilder*, llvm::Value* array, int idx, const char* name = ""); |
| |
| /// Codegens IR to call the function corresponding to 'ir_type' with argument 'args' |
| /// and returns the value. |
| llvm::Value* CodegenCallFunction(LlvmBuilder* builder, IRFunction::Type ir_type, |
| llvm::ArrayRef<llvm::Value*> args, const char* name); |
| |
| /// If there are more than this number of expr trees (or functions that evaluate |
| /// expressions), avoid inlining avoid inlining for the exprs exceeding this threshold. |
| static const int CODEGEN_INLINE_EXPRS_THRESHOLD = 100; |
| |
| /// If there are more than this number of expr trees (or functions that evaluate |
| /// expressions), avoid inlining the function that evaluates the expression batch |
| /// into the calling function. |
| static const int CODEGEN_INLINE_EXPR_BATCH_THRESHOLD = 25; |
| |
| private: |
| friend class ExprCodegenTest; |
| friend class LlvmCodeGenTest; |
| friend class LlvmCodeGenTest_CpuAttrWhitelist_Test; |
| friend class LlvmCodeGenTest_HashTest_Test; |
| friend class SubExprElimination; |
| |
| /// Top level codegen object. 'module_id' is used for debugging when outputting the IR. |
| LlvmCodeGen(RuntimeState* state, ObjectPool* pool, MemTracker* parent_mem_tracker, |
| const std::string& module_id); |
| |
| /// Initializes the jitter and execution engine with the given module. |
| Status Init(std::unique_ptr<llvm::Module> module); |
| |
| /// Creates a LlvmCodeGen instance initialized with the module bitcode from 'file'. |
| /// 'codegen' will contain the created object on success. The functions in the module |
| /// are materialized lazily. Getting a reference to a function via GetFunction() will |
| /// materialize the function and its callees recursively. |
| static Status CreateFromFile(RuntimeState* state, ObjectPool* pool, |
| MemTracker* parent_mem_tracker, const std::string& file, |
| const std::string& id, boost::scoped_ptr<LlvmCodeGen>* codegen); |
| |
| /// Creates a LlvmCodeGen instance initialized with the module bitcode in memory. |
| /// 'codegen' will contain the created object on success. The functions in the module |
| /// are materialized lazily. Getting a reference to a function via GetFunction() will |
| /// materialize the function and its callees recursively. |
| static Status CreateFromMemory(RuntimeState* state, ObjectPool* pool, |
| MemTracker* parent_mem_tracker, const std::string& id, |
| boost::scoped_ptr<LlvmCodeGen>* codegen); |
| |
| /// Loads an LLVM module from 'file' which is the local path to the LLVM bitcode file. |
| /// The functions in the module are materialized lazily. Getting a reference to the |
| /// function via GetFunction() will materialize the function and its callees |
| /// recursively. The caller is responsible for cleaning up the module. |
| Status LoadModuleFromFile(const string& file, std::unique_ptr<llvm::Module>* module); |
| |
| /// Loads an LLVM module. 'module_ir_buf' is the memory buffer containing LLVM bitcode. |
| /// 'module_name' is the name of the module to use when reporting errors. The caller is |
| /// responsible for cleaning up 'module'. The functions in the module aren't |
| /// materialized. Getting a reference to the functiom via GetFunction() will materialize |
| /// the function and its callees recursively. |
| Status LoadModuleFromMemory(std::unique_ptr<llvm::MemoryBuffer> module_ir_buf, |
| std::string module_name, std::unique_ptr<llvm::Module>* module); |
| |
| /// Loads a module at 'file' and links it to the module associated with this |
| /// LlvmCodeGen object. The 'file' must be on the local filesystem. |
| Status LinkModuleFromLocalFs(const std::string& file); |
| |
| /// Same as 'LinkModuleFromLocalFs', but takes an hdfs file location instead and makes |
| /// sure that the same hdfs file is not linked twice. The mtime is used ensure that the |
| /// cached hdfs_file that's used is the most recent. |
| Status LinkModuleFromHdfs(const std::string& hdfs_file, const time_t mtime); |
| |
| /// Strip global constructors and destructors from an LLVM module. We never run them |
| /// anyway (they must be explicitly invoked) so it is dead code. |
| static void StripGlobalCtorsDtors(llvm::Module* module); |
| |
| /// Set the "target-cpu" and "target-features" of 'function' to match the host's CPU's |
| /// features. Having consistent attributes for all materialized functions allows |
| /// generated IR to be inlined into cross-compiled functions' IR and vice versa. |
| static void SetCPUAttrs(llvm::Function* function); |
| |
| // Setup any JIT listeners to process generated machine code object, e.g. to generate |
| // perf symbol map or disassembly. |
| void SetupJITListeners(); |
| |
| /// Load the intrinsics impala needs. This is a one time initialization. |
| /// Values are stored in 'llvm_intrinsics_' |
| Status LoadIntrinsics(); |
| |
| /// Internal function for unit tests: skips Impala-specific wrapper generation logic. |
| void AddFunctionToJitInternal(llvm::Function* fn, void** fn_ptr); |
| |
| /// Verifies the function, e.g., checks that the IR is well-formed. Returns false if |
| /// function is invalid. |
| bool VerifyFunction(llvm::Function* function); |
| |
| // Used for testing. |
| void ResetVerification() { is_corrupt_ = false; } |
| |
| /// Optimizes the module. This includes pruning the module of any unused functions. |
| Status OptimizeModule(); |
| |
| /// Clears generated hash fns. This is only used for testing. |
| void ClearHashFns(); |
| |
| /// Finds call instructions in 'caller' where 'target_name' is a substring of the |
| /// callee's name. Found instructions are appended to the 'results' vector. |
| static void FindCallSites(llvm::Function* caller, const std::string& target_name, |
| std::vector<llvm::CallInst*>* results); |
| |
| /// This function parses the function body of the given function 'fn' and materializes |
| /// any functions called by it. |
| Status MaterializeCallees(llvm::Function* fn); |
| |
| /// This is the workhorse for materializing function 'fn'. It's invoked by |
| /// MaterializeFunction(). Calls LLVM to materialize 'fn' if it's materializable |
| /// (i.e. the function has a definition in the module and it's not materialized yet). |
| /// This function parses the bitcode of 'fn' to populate basic blocks, instructions |
| /// and other data structures attached to the function object. Return error status |
| /// for any error. |
| Status MaterializeFunction(llvm::Function* fn); |
| |
| /// Materialize the module owned by this codegen object. This will materialize all |
| /// functions and delete the module's materializer. Returns error status for any error. |
| Status MaterializeModule(); |
| |
| /// With lazy materialization, functions which haven't been materialized when the module |
| /// is finalized must be dead code or referenced only by global variables (e.g. boost |
| /// library functions or virtual function (e.g. IfExpr::GetBooleanVal())), in which case |
| /// the function is not inlined so the native version can be used and the IR version is |
| /// dead code. Mark them as not materializable, change their linkage types to external |
| /// (so linking will happen to the native version) and strip their personality functions |
| /// and comdats. DCE may complain if the above are not done. Return error status if |
| /// there is error in materializing the module. |
| Status FinalizeLazyMaterialization(); |
| |
| /// Destroy the IR module, freeing memory used by the IR. Any machine code that was |
| /// generated is retained by the execution engine. |
| void DestroyModule(); |
| |
| /// Disable CPU attributes in 'cpu_attrs' that are not present in |
| /// the '--llvm_cpu_attr_whitelist' flag. The same attributes in the input are |
| /// always present in the output, except "+" is flipped to "-" for the disabled |
| /// attributes. E.g. if 'cpu_attrs' is {"+x", "+y", "-z"} and the whitelist is |
| /// {"x", "z"}, returns {"+x", "-y", "-z"}. |
| static std::unordered_set<std::string> ApplyCpuAttrWhitelist( |
| const std::unordered_set<std::string>& cpu_attrs); |
| |
| /// Whether InitializeLlvm() has been called. |
| static bool llvm_initialized_; |
| |
| /// Host CPU name and attributes, filled in by InitializeLlvm(). |
| static std::string cpu_name_; |
| /// The cpu_attrs_ should not be modified during the execution except for tests. |
| static std::unordered_set<std::string> cpu_attrs_; |
| |
| /// Value of "target-features" attribute to be set on all IR functions. Derived from |
| /// 'cpu_attrs_'. Using a consistent value for this attribute among |
| /// hand-crafted IR and cross-compiled functions allow them to be inlined into each |
| /// other. |
| static std::string target_features_attr_; |
| |
| /// Mapping between CpuInfo flags and the corresponding strings. |
| /// The key is mapped to the string as follows: |
| /// CpuInfo flag -> enabled feature. |
| /// Bitwise negation of CpuInfo flag -> disabled feature. |
| const static std::map<int64_t, std::string> cpu_flag_mappings_; |
| |
| /// A global shared call graph for all IR functions in the main module. |
| /// Used for determining dependencies when materializing IR functions. |
| static CodegenCallGraph shared_call_graph_; |
| |
| /// Pointer to the RuntimeState which owns this codegen object. Needed in |
| /// InlineConstFnAttr() to access the query options. |
| const RuntimeState* state_; |
| |
| /// ID used for debugging (can be e.g. the fragment instance ID) |
| std::string id_; |
| |
| /// Codegen counters |
| RuntimeProfile* const profile_; |
| |
| /// MemTracker used for tracking memory consumed by codegen. Connected to a parent |
| /// MemTracker if one was provided during initialization. Owned by the ObjectPool |
| /// provided in the constructor. |
| MemTracker* mem_tracker_; |
| |
| /// Time spent reading the .ir file from the file system. |
| RuntimeProfile::Counter* load_module_timer_; |
| |
| /// Time spent creating the initial module with the cross-compiled Impala IR. |
| RuntimeProfile::Counter* prepare_module_timer_; |
| |
| /// Time spent by ExecNodes while adding IR to the module. Update by |
| /// FragmentInstanceState during its 'CODEGEN_START' state. |
| RuntimeProfile::Counter* ir_generation_timer_; |
| |
| /// Time spent optimizing the module. |
| RuntimeProfile::Counter* optimization_timer_; |
| |
| /// Time spent compiling the module. |
| RuntimeProfile::Counter* compile_timer_; |
| |
| /// Total size of bitcode modules loaded in bytes. |
| RuntimeProfile::Counter* module_bitcode_size_; |
| |
| /// Number of functions and instructions that are optimized and compiled after pruning |
| /// unused functions from the module. |
| RuntimeProfile::Counter* num_functions_; |
| RuntimeProfile::Counter* num_instructions_; |
| |
| /// Aggregated llvm thread counters. Also includes the phase represented by |
| /// 'ir_generation_timer_' and hence is also updated by FragmentInstanceState. |
| RuntimeProfile::ThreadCounters* llvm_thread_counters_; |
| |
| /// whether or not optimizations are enabled |
| bool optimizations_enabled_; |
| |
| /// If true, the module is corrupt and we cannot codegen this query. |
| /// TODO: we could consider just removing the offending function and attempting to |
| /// codegen the rest of the query. This requires more testing though to make sure |
| /// that the error is recoverable. |
| bool is_corrupt_; |
| |
| /// If true, the module has been compiled. It is not valid to add additional |
| /// functions after this point. |
| bool is_compiled_; |
| |
| /// Error string that llvm will write to |
| std::string error_string_; |
| |
| /// Top level llvm object. Objects from different contexts do not share anything. |
| /// We can have multiple instances of the LlvmCodeGen object in different threads |
| std::unique_ptr<llvm::LLVMContext> context_; |
| |
| /// Top level codegen object. Contains everything to jit one 'unit' of code. |
| /// module_ is set by Init(). module_ is owned by execution_engine_. |
| llvm::Module* module_; |
| |
| /// Execution/Jitting engine. |
| std::unique_ptr<llvm::ExecutionEngine> execution_engine_; |
| |
| /// The memory manager used by 'execution_engine_'. Owned by 'execution_engine_'. |
| ImpalaMCJITMemoryManager* memory_manager_; |
| |
| /// Functions parsed from pre-compiled module. Indexed by ImpalaIR::Function enum. |
| std::vector<llvm::Function*> cross_compiled_functions_; |
| |
| /// Stores functions handcrafted by impala. This does not contain cross compiled |
| /// functions, only function that were generated from scratch at runtime. Does not |
| /// overlap with loaded_functions_. |
| std::vector<llvm::Function*> handcrafted_functions_; |
| |
| /// Stores the functions that have been finalized. |
| std::unordered_set<llvm::Function*> finalized_functions_; |
| |
| /// A mapping of unique id to registered expr functions |
| std::map<int64_t, llvm::Function*> registered_exprs_map_; |
| |
| /// A set of all the functions in 'registered_exprs_map_' for quick lookup. |
| std::set<llvm::Function*> registered_exprs_; |
| |
| /// A cache of loaded llvm intrinsics |
| std::map<llvm::Intrinsic::ID, llvm::Function*> llvm_intrinsics_; |
| |
| /// This is a cache of generated hash functions by byte size. It is common |
| /// for the caller to know the number of bytes to hash (e.g. tuple width) and |
| /// we can codegen a loop unrolled hash function. |
| std::map<int, llvm::Function*> hash_fns_; |
| |
| /// The locations of modules that have been linked. Uses hdfs file location as the key. |
| /// Used to avoid linking the same module twice, which causes symbol collision errors. |
| std::set<std::string> linked_modules_; |
| |
| /// The vector of functions to automatically JIT compile after FinalizeModule(). |
| std::vector<std::pair<llvm::Function*, void**>> fns_to_jit_compile_; |
| |
| /// Debug strings that will be outputted by jitted code. This is a copy of all |
| /// strings passed to CodegenDebugTrace. |
| std::vector<std::string> debug_strings_; |
| |
| /// llvm representation of a few common types. Owned by context. |
| llvm::PointerType* ptr_type_; // int8_t* |
| llvm::Type* void_type_; // void |
| llvm::Type* string_value_type_; // StringValue |
| llvm::Type* timestamp_value_type_; // TimestampValue |
| llvm::Type* collection_value_type_; // CollectionValue |
| |
| /// llvm constants to help with code gen verbosity |
| llvm::Constant* true_value_; |
| llvm::Constant* false_value_; |
| |
| /// The symbol emitted associated with 'execution_engine_'. Methods on |
| /// 'symbol_emitter_' are called by 'execution_engine_' when code is emitted or freed. |
| /// The lifetime of the symbol emitter must be longer than 'execution_engine_'. |
| boost::scoped_ptr<CodegenSymbolEmitter> symbol_emitter_; |
| |
| /// Provides an implementation of a LLVM diagnostic handler and maintains the error |
| /// information from its callbacks. |
| class DiagnosticHandler { |
| public: |
| /// Returns the last error that was reported via DiagnosticHandlerFn() and then |
| /// clears it. Returns an empty string otherwise. This should be called after any |
| /// LLVM API call that can fail but returns error info via this mechanism. |
| /// TODO: IMPALA-6038: use this to check and handle errors wherever needed. |
| std::string GetErrorString(); |
| |
| /// Handler function that sets the state on an instance of this class which is |
| /// accessible via the LlvmCodeGen object passed to it using the 'context' |
| /// input parameter. |
| static void DiagnosticHandlerFn(const llvm::DiagnosticInfo &info, void *context); |
| |
| private: |
| /// Contains the last error that was reported via DiagnosticHandlerFn(). |
| /// Is cleared by a call to GetErrorString(). |
| std::string error_str_; |
| }; |
| |
| DiagnosticHandler diagnostic_handler_; |
| |
| /// Very rough estimate of memory in bytes that the IR and the intermediate data |
| /// structures used by the optimizer may consume per LLVM IR instruction to be |
| /// optimized (after dead code is removed). The number is chosen to avoid pathological |
| /// behaviour at either extreme: failing queries unnecessarily because the memory |
| /// estimate is too high versus having large amounts of untracked memory because the |
| /// estimate is too low. |
| /// |
| /// This was chosen by looking at the behaviour of TPC-H queries. Using the heap growth |
| /// profile from gperftools reveal that LLVM allocated ~9mb of memory for fragments with |
| /// ~17k total instructions in TPC-H Q2. Inspection of other TPC-H queries revealed |
| /// that a typical fragment from a TPC-H query is < 5,000 instructions, which translates |
| /// to 2.5MB, which is almost always lower than the runtime memory requirement of the |
| /// fragment - so we are unlikely to fail queries unnecessarily. |
| /// |
| /// This assumes optimizer memory usage scales linearly with instruction count. This is |
| /// true only if the size of functions is bounded, because some optimization passes |
| /// (e.g. global value numbering) use time and memory that is super-linear in relation |
| /// to the # of instructions in a function. So codegen should avoid generating |
| /// arbitrarily large function. |
| static constexpr int64_t ESTIMATED_OPTIMIZER_BYTES_PER_INST = 512; |
| }; |
| } |
| |
| #endif |