blob: 82efc242b7428a7cb88439848766fe0d30485bd2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_RUNTIME_TUPLE_H
#define IMPALA_RUNTIME_TUPLE_H
#include "codegen/impala-ir.h"
#include "common/logging.h"
#include "gutil/macros.h"
#include "runtime/descriptors.h"
#include "runtime/mem-pool.h"
namespace llvm {
class Function;
}
namespace impala {
struct CollectionValue;
struct StringValue;
class TupleDescriptor;
class TupleRow;
/// A tuple is stored as a contiguous sequence of bytes containing a fixed number
/// of fixed-size slots. The slots are arranged in order of increasing byte length;
/// the tuple might contain padding between slots in order to align them according
/// to their type.
//
/// The contents of a tuple:
/// 1) a number of bytes holding a bitvector of null indicators
/// 2) bool slots
/// 3) tinyint slots
/// 4) smallint slots
/// 5) int slots
/// 6) float slots
/// 7) bigint slots
/// 8) double slots
/// 9) string slots
//
/// A tuple with 0 materialised slots is represented as NULL.
///
/// TODO: Our projection of collection-typed slots breaks/augments the conventional
/// semantics of the null bits, because we rely on producers of array values to also
/// set the slot value in addition to the null bit. We should address this issue with
/// a proper projection that restores the intended (original) null bit semantics.
/// See also UnnestNode for details on the projection.
class Tuple {
public:
/// initialize individual tuple with data residing in mem pool
static Tuple* Create(int size, MemPool* pool) {
if (size == 0) return NULL;
Tuple* result = reinterpret_cast<Tuple*>(pool->Allocate(size));
result->Init(size);
return result;
}
void Init(int size) { memset(this, 0, size); }
void ClearNullBits(const TupleDescriptor& tuple_desc) {
memset(reinterpret_cast<uint8_t*>(this) + tuple_desc.null_bytes_offset(),
0, tuple_desc.num_null_bytes());
}
/// The total size of all data represented in this tuple (tuple data and referenced
/// string and collection data).
int64_t TotalByteSize(const TupleDescriptor& desc) const;
/// The size of all referenced string and collection data.
int64_t VarlenByteSize(const TupleDescriptor& desc) const;
/// Create a copy of 'this', including all of its referenced variable-length data
/// (i.e. strings and collections), using pool to allocate memory. Returns the copy.
Tuple* DeepCopy(const TupleDescriptor& desc, MemPool* pool);
/// Create a copy of 'this', including all its referenced variable-length data
/// (i.e. strings and collections), using pool to allocate memory. This version does
/// not allocate a tuple, instead copying to 'dst'. 'dst' must already be allocated to
/// the correct size (i.e. TotalByteSize()).
void DeepCopy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool);
/// Create a copy of 'this', including all referenced variable-length data (i.e. strings
/// and collections), into 'data'. The tuple is written first, followed by any
/// variable-length data. 'data' and 'offset' will be incremented by the total number of
/// bytes written. 'data' must already be allocated to the correct size
/// (i.e. TotalByteSize()).
/// If 'convert_ptrs' is true, rewrites pointers that are part of the tuple as offsets
/// into 'data'. Otherwise they will remain pointers directly into data. The offsets are
/// determined by 'offset', where '*offset' corresponds to address '*data'.
void DeepCopy(const TupleDescriptor& desc, char** data, int* offset,
bool convert_ptrs = false);
/// This function should only be called on tuples created by DeepCopy() with
/// 'convert_ptrs' = true. It takes all pointers contained in this tuple (i.e. in
/// StringValues and CollectionValues, including those contained within other
/// CollectionValues), and converts the offset values into pointers into
/// 'tuple_data'. 'tuple_data' should be the serialized tuple buffer created by
/// DeepCopy(). Note that 'tuple_data' should always be the beginning of this buffer,
/// regardless of this tuple's offset in 'tuple_data'.
void ConvertOffsetsToPointers(const TupleDescriptor& desc, uint8_t* tuple_data);
/// Materialize 'this' by evaluating the expressions in 'materialize_exprs_ctxs' over
/// the specified 'row'.
///
/// If non-NULL, 'pool' is used to allocate var-length data, otherwise var-length data
/// isn't copied. (Memory for this tuple itself must already be allocated.) 'NULL_POOL'
/// should be true if 'pool' is NULL and false otherwise. The template parameter serves
/// only to differentiate the NULL vs. non-NULL pool cases when we replace the function
/// calls during codegen; the parameter means there are two different function symbols.
///
/// If 'COLLECT_STRING_VALS' is true, the materialized non-NULL string value slots and
/// the total length of the string slots are returned in 'non_null_string_values' and
/// 'total_string_lengths'. 'non_null_string_values' and 'total_string_lengths' must be
/// non-NULL in this case. 'non_null_string_values' does not need to be empty; its
/// original contents will be overwritten.
/// TODO: this function does not collect other var-len types such as collections.
template <bool COLLECT_STRING_VALS, bool NULL_POOL>
inline void IR_ALWAYS_INLINE MaterializeExprs(TupleRow* row,
const TupleDescriptor& desc, const std::vector<ExprContext*>& materialize_expr_ctxs,
MemPool* pool, std::vector<StringValue*>* non_null_string_values = NULL,
int* total_string_lengths = NULL) {
DCHECK_EQ(NULL_POOL, pool == NULL);
DCHECK_EQ(materialize_expr_ctxs.size(), desc.slots().size());
StringValue** non_null_string_values_array = NULL;
int num_non_null_string_values = 0;
if (COLLECT_STRING_VALS) {
DCHECK(non_null_string_values != NULL);
DCHECK(total_string_lengths != NULL);
// string::resize() will zero-initialize any new values, so we resize to the largest
// possible size here, then truncate the vector below once we know the actual size
// (which preserves already-written values).
non_null_string_values->resize(desc.string_slots().size());
non_null_string_values_array = non_null_string_values->data();
*total_string_lengths = 0;
}
MaterializeExprs<COLLECT_STRING_VALS, NULL_POOL>(row, desc,
materialize_expr_ctxs.data(), pool, non_null_string_values_array,
total_string_lengths, &num_non_null_string_values);
if (COLLECT_STRING_VALS) non_null_string_values->resize(num_non_null_string_values);
}
/// Symbols (or substrings of the symbols) of MaterializeExprs(). These can be passed to
/// LlvmCodeGen::ReplaceCallSites().
static const char* MATERIALIZE_EXPRS_SYMBOL;
static const char* MATERIALIZE_EXPRS_NULL_POOL_SYMBOL;
/// Generates an IR version of MaterializeExprs(), returned in 'fn'. Currently only
/// 'collect_string_vals' = false is implemented.
///
/// 'pool' may be NULL, in which case no pool-related code is generated. Otherwise
/// 'pool's address is used directly in the IR. Note that this requires generating
/// separate functions for the non-NULL and NULL cases, i.e., the 'pool' argument of the
/// generated function is ignored. There are two different MaterializeExprs symbols to
/// differentiate these cases when we replace the function calls during codegen.
static Status CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_vals,
const TupleDescriptor& desc, const vector<ExprContext*>& materialize_expr_ctxs,
MemPool* pool, llvm::Function** fn);
/// Turn null indicator bit on. For non-nullable slots, the mask will be 0 and
/// this is a no-op (but we don't have to branch to check is slots are nullable).
void SetNull(const NullIndicatorOffset& offset) {
char* null_indicator_byte = reinterpret_cast<char*>(this) + offset.byte_offset;
*null_indicator_byte |= offset.bit_mask;
}
/// Turn null indicator bit off.
void SetNotNull(const NullIndicatorOffset& offset) {
char* null_indicator_byte = reinterpret_cast<char*>(this) + offset.byte_offset;
*null_indicator_byte &= ~offset.bit_mask;
}
bool IsNull(const NullIndicatorOffset& offset) const {
const char* null_indicator_byte =
reinterpret_cast<const char*>(this) + offset.byte_offset;
return (*null_indicator_byte & offset.bit_mask) != 0;
}
void* GetSlot(int offset) {
DCHECK(offset != -1); // -1 offset indicates non-materialized slot
return reinterpret_cast<char*>(this) + offset;
}
const void* GetSlot(int offset) const {
DCHECK(offset != -1); // -1 offset indicates non-materialized slot
return reinterpret_cast<const char*>(this) + offset;
}
StringValue* GetStringSlot(int offset) {
DCHECK(offset != -1); // -1 offset indicates non-materialized slot
return reinterpret_cast<StringValue*>(reinterpret_cast<char*>(this) + offset);
}
const StringValue* GetStringSlot(int offset) const {
DCHECK(offset != -1); // -1 offset indicates non-materialized slot
return reinterpret_cast<const StringValue*>(
reinterpret_cast<const char*>(this) + offset);
}
CollectionValue* GetCollectionSlot(int offset) {
DCHECK(offset != -1); // -1 offset indicates non-materialized slot
return reinterpret_cast<CollectionValue*>(reinterpret_cast<char*>(this) + offset);
}
const CollectionValue* GetCollectionSlot(int offset) const {
DCHECK(offset != -1); // -1 offset indicates non-materialized slot
return reinterpret_cast<const CollectionValue*>(
reinterpret_cast<const char*>(this) + offset);
}
/// For C++/IR interop, we need to be able to look up types by name.
static const char* LLVM_CLASS_NAME;
private:
DISALLOW_COPY_AND_ASSIGN(Tuple);
/// Copy all referenced string and collection data by allocating memory from pool,
/// copying data, then updating pointers in tuple to reference copied data.
void DeepCopyVarlenData(const TupleDescriptor& desc, MemPool* pool);
/// Copies all referenced string and collection data into 'data'. Increments 'data' and
/// 'offset' by the number of bytes written. Recursively writes collection tuple data
/// and referenced collection and string data.
void DeepCopyVarlenData(const TupleDescriptor& desc, char** data, int* offset,
bool convert_ptrs);
/// Implementation of MaterializedExprs(). This function is replaced during
/// codegen. 'num_non_null_string_values' must be initialized by the caller.
template <bool COLLECT_STRING_VALS, bool NULL_POOL>
void IR_NO_INLINE MaterializeExprs(TupleRow* row, const TupleDescriptor& desc,
ExprContext* const* materialize_expr_ctxs, MemPool* pool,
StringValue** non_null_string_values, int* total_string_lengths,
int* num_non_null_string_values);
};
}
#endif