blob: 0f66d049854731a151085cc5bfc6aad6a991ab23 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <iosfwd>
#include <string>
#include "codegen/impala-ir.h"
#include "runtime/collection-value.h"
#include "runtime/types.h"
namespace impala {
class MemPool;
class SlotDescriptor;
class StringValue;
class Tuple;
/// Useful utility functions for runtime values (which are passed around as void*).
class RawValue {
public:
/// Ascii output precision for double/float, print 16 digits.
static const int ASCII_PRECISION = 16;
/// Single NaN values to ensure all NaN values can be assigned one bit pattern
/// that will always compare and hash the same way. Allows for all NaN values
/// to be put into the same "group by" bucket.
static constexpr double CANONICAL_DOUBLE_NAN = std::numeric_limits<double>::quiet_NaN();
static constexpr float CANONICAL_FLOAT_NAN = std::numeric_limits<float>::quiet_NaN();
/// The canonical zero values when comparing negative and positive zeros.
static constexpr double CANONICAL_DOUBLE_ZERO = 0.0;
static constexpr float CANONICAL_FLOAT_ZERO = 0.0f;
/// Convert 'value' into ascii and write to 'stream'. NULL turns into "NULL". 'scale'
/// determines how many digits after the decimal are printed for floating point numbers,
/// -1 indicates to use the stream's current formatting. Doesn't support complex types.
/// If 'quote_val' is true, write STRING, VARCHAR, CHAR, DATE, TIMESTAMP values in
/// quoted form surrounded by double quotes.
/// TODO: for string types, we just print the result regardless of whether or not it
/// ascii. This could be undesirable.
static void PrintValue(const void* value, const ColumnType& type, int scale,
std::stringstream* stream, bool quote_val=false);
/// Write ascii value to string instead of stringstream.
static void PrintValue(const void* value, const ColumnType& type, int scale,
std::string* str);
/// Return ascii value string.
static std::string PrintValue(const void* value, const ColumnType& type, int scale) {
std::string str;
PrintValue(value, type, scale, &str);
return str;
}
/// Writes the byte representation of a value to a stringstream character-by-character
static void PrintValueAsBytes(const void* value, const ColumnType& type,
std::stringstream* stream);
/// Returns hash value for 'v' interpreted as 'type'. The resulting hash value
/// is combined with the seed value. Inlined in IR so that the constant 'type' can be
/// propagated.
static uint32_t IR_ALWAYS_INLINE GetHashValue(
const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;
/// Templatized version of GetHashValue, use if type is known ahead. GetHashValue
/// handles nulls. Inlined in IR so that the constant 'type' can be propagated.
template<typename T>
static inline uint32_t IR_ALWAYS_INLINE GetHashValue(
const T* v, const ColumnType& type, uint32_t seed = 0) noexcept;
/// Returns hash value for non-nullable 'v' for type T. GetHashValueNonNull doesn't
/// handle nulls.
template<typename T>
static inline uint32_t GetHashValueNonNull(const T* v, const ColumnType& type,
uint32_t seed = 0);
/// Get a 64-bit hash value using the FastHash function.
/// https://code.google.com/archive/p/fast-hash/
static uint64_t GetHashValueFastHash(const void* v, const ColumnType& type,
uint64_t seed);
/// Templatized version of GetHashValueFastHash, use if type is known ahead.
/// GetHashValueFastHash handles nulls. Inlined in IR so that the constant
/// 'type' can be propagated.
template <typename T>
static inline uint64_t IR_ALWAYS_INLINE GetHashValueFastHash(
const T* v, const ColumnType& type, uint64_t seed);
/// Returns hash value for non-nullable 'v' for type T. GetHashValueFastHashNonNull
/// doesn't handle nulls.
template <typename T>
static inline uint64_t GetHashValueFastHashNonNull(
const T* v, const ColumnType& type, uint64_t seed);
// Get a 32-bit hash value using the FastHash algorithm.
static uint32_t IR_ALWAYS_INLINE GetHashValueFastHash32(
const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;
/// Compares both values.
/// Return value is < 0 if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.
/// Inlined in IR so that the constant 'type' can be propagated.
static int IR_ALWAYS_INLINE Compare(
const void* v1, const void* v2, const ColumnType& type) noexcept;
/// Writes the bytes of a given value into the slot of a tuple. Supports primitive and
/// complex types. 'value' is allowed to be NULL. For string and collection values, the
/// data is deep-copied into memory allocated from 'pool' if pool is non-NULL, otherwise
/// the data is not copied.
///
/// If COLLECT_VAR_LEN_VALS is true, gathers the non-NULL non-smallified string slots of
/// the slot tree into 'string_values' and the non-NULL collection slots along with
/// their byte sizes into 'collection_values' recursively. Smallified strings (see Small
/// String Optimization, IMPALA-12373) are not collected. Children are placed before
/// their parents in the vectors (post-order traversal) - see Tuple::MaterializeExprs()
/// and Sorter::Run::CollectNonNullVarSlots() for the reason. If COLLECT_VAR_LEN_VALS is
/// true, 'string_values' and 'collection_values' must be non-NULL.
template <bool COLLECT_VAR_LEN_VALS>
static void Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
MemPool* pool, std::vector<StringValue*>* string_values,
std::vector<std::pair<CollectionValue*, int64_t>>* collection_values);
/// Convenience wrapper for the templated version with COLLECT_VAR_LEN_VALS=false.
static void Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
MemPool* pool);
/// Writes 'src' into 'dst' for the given primitive type. Does not support complex
/// types. 'src' must be non-NULL. For string values, the string data is copied into
/// 'pool' if pool is non-NULL.
static void WriteNonNullPrimitive(const void* src, void* dst, const ColumnType& type,
MemPool* pool);
/// Returns true if v1 == v2.
/// This is more performant than Compare() == 0 for string equality, mostly because of
/// the length comparison check.
static inline bool Eq(const void* v1, const void* v2, const ColumnType& type);
/// Returns true if val/type correspond to a NaN floating point value.
static inline bool IsNaN(const void* val, const ColumnType& type);
/// Returns true if val/type correspond to a +0/-0 floating point value.
static inline bool IsFloatingZero(const void* val, const ColumnType& type);
/// Returns the canonical form of the given value. Currently this means a unified NaN
/// value in case of NaN and +0 in case of +0/-0.
static inline const void* CanonicalValue(const void* val, const ColumnType& type);
/// Returns a canonical NaN value for a floating point type
/// (which will always have the same bit-pattern to maintain consistency in hashing).
static inline const void* CanonicalNaNValue(const ColumnType& type);
// Returns positive zero for floating point types.
static inline const void* PositiveFloatingZero(const ColumnType& type);
// Top level null values are printed as "NULL"; collections and structs are printed in
// JSON format, which requires "null".
static constexpr const char* NullLiteral(bool top_level) {
return top_level ? "NULL" : "null";
}
private:
/// Like Write() but 'value' must be non-NULL.
template <bool COLLECT_VAR_LEN_VALS>
static void WriteNonNull(const void* value, Tuple* tuple,
const SlotDescriptor* slot_desc, MemPool* pool,
std::vector<StringValue*>* string_values,
std::vector<std::pair<CollectionValue*, int64_t>>* collection_values);
/// Recursive helper function for Write() to handle struct slots.
template <bool COLLECT_VAR_LEN_VALS>
static void WriteStruct(const void* value, Tuple* tuple,
const SlotDescriptor* slot_desc, MemPool* pool,
std::vector<StringValue*>* string_values,
std::vector<std::pair<CollectionValue*, int64_t>>* collection_values);
/// Recursive helper function for Write() to handle collection slots.
template <bool COLLECT_VAR_LEN_VALS>
static void WriteCollection(const void* value, Tuple* tuple,
const SlotDescriptor* slot_desc, MemPool* pool, vector<StringValue*>* string_values,
vector<pair<CollectionValue*, int64_t>>* collection_values);
template <bool COLLECT_VAR_LEN_VALS>
static void WriteCollectionChildren(const CollectionValue& dest,
const CollectionValue& src, const SlotDescriptor& collection_slot_desc,
MemPool* pool, vector<StringValue*>* string_values,
vector<pair<CollectionValue*, int64_t>>* collection_values);
template <bool COLLECT_VAR_LEN_VALS>
static void WriteCollectionVarlenChild(Tuple* child_dest_tuple, Tuple* child_src_tuple,
const SlotDescriptor* slot_desc, MemPool* pool, vector<StringValue*>* string_values,
vector<pair<CollectionValue*, int64_t>>* collection_values );
/// Gets the destination slot from 'tuple' and 'slot_desc' and writes 'value' to this
/// slot. 'value' must be primitive and non-NULL. If COLLECT_VAR_LEN_VALS is true,
/// collects the pointers of string slots to 'string_values'.
template <bool COLLECT_VAR_LEN_VALS>
static void WritePrimitiveCollectVarlen(const void* value, Tuple* tuple,
const SlotDescriptor* slot_desc, MemPool* pool,
std::vector<StringValue*>* string_values);
};
}