be/src/runtime/raw-value.h - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #pragma once

 #include <iosfwd>
 #include <string>

 #include "codegen/impala-ir.h"
 #include "runtime/collection-value.h"
 #include "runtime/types.h"

 namespace impala {

 class MemPool;
 class SlotDescriptor;
 class StringValue;
 class Tuple;

 /// Useful utility functions for runtime values (which are passed around as void*).
 class RawValue {
  public:
   /// Ascii output precision for double/float, print 16 digits.
   static const int ASCII_PRECISION = 16;

   /// Single NaN values to ensure all NaN values can be assigned one bit pattern
   /// that will always compare and hash the same way.  Allows for all NaN values
   /// to be put into the same "group by" bucket.
   static constexpr double CANONICAL_DOUBLE_NAN = std::numeric_limits<double>::quiet_NaN();
   static constexpr float CANONICAL_FLOAT_NAN = std::numeric_limits<float>::quiet_NaN();
   /// The canonical zero values when comparing negative and positive zeros.
   static constexpr double CANONICAL_DOUBLE_ZERO = 0.0;
   static constexpr float CANONICAL_FLOAT_ZERO = 0.0f;

   /// Convert 'value' into ascii and write to 'stream'. NULL turns into "NULL". 'scale'
   /// determines how many digits after the decimal are printed for floating point numbers,
   /// -1 indicates to use the stream's current formatting. Doesn't support complex types.
   /// If 'quote_val' is true, write STRING, VARCHAR, CHAR, DATE, TIMESTAMP values in
   /// quoted form surrounded by double quotes.
   /// TODO: for string types, we just print the result regardless of whether or not it
   /// ascii. This could be undesirable.
   static void PrintValue(const void* value, const ColumnType& type, int scale,
                          std::stringstream* stream, bool quote_val=false);

   /// Write ascii value to string instead of stringstream.
   static void PrintValue(const void* value, const ColumnType& type, int scale,
                          std::string* str);

   /// Return ascii value string.
   static std::string PrintValue(const void* value, const ColumnType& type, int scale) {
     std::string str;
     PrintValue(value, type, scale, &str);
     return str;
   }

   /// Writes the byte representation of a value to a stringstream character-by-character
   static void PrintValueAsBytes(const void* value, const ColumnType& type,
                                 std::stringstream* stream);

   /// Returns hash value for 'v' interpreted as 'type'.  The resulting hash value
   /// is combined with the seed value. Inlined in IR so that the constant 'type' can be
   /// propagated.
   static uint32_t IR_ALWAYS_INLINE GetHashValue(
       const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;

   /// Templatized version of GetHashValue, use if type is known ahead. GetHashValue
   /// handles nulls. Inlined in IR so that the constant 'type' can be propagated.
   template<typename T>
   static inline uint32_t IR_ALWAYS_INLINE GetHashValue(
       const T* v, const ColumnType& type, uint32_t seed = 0) noexcept;

   /// Returns hash value for non-nullable 'v' for type T. GetHashValueNonNull doesn't
   /// handle nulls.
   template<typename T>
   static inline uint32_t GetHashValueNonNull(const T* v, const ColumnType& type,
       uint32_t seed = 0);

   /// Get a 64-bit hash value using the FastHash function.
   /// https://code.google.com/archive/p/fast-hash/
   static uint64_t GetHashValueFastHash(const void* v, const ColumnType& type,
       uint64_t seed);

   /// Templatized version of GetHashValueFastHash, use if type is known ahead.
   /// GetHashValueFastHash handles nulls. Inlined in IR so that the constant
   /// 'type' can be propagated.
   template <typename T>
   static inline uint64_t IR_ALWAYS_INLINE GetHashValueFastHash(
       const T* v, const ColumnType& type, uint64_t seed);

   /// Returns hash value for non-nullable 'v' for type T. GetHashValueFastHashNonNull
   /// doesn't handle nulls.
   template <typename T>
   static inline uint64_t GetHashValueFastHashNonNull(
       const T* v, const ColumnType& type, uint64_t seed);

   // Get a 32-bit hash value using the FastHash algorithm.
   static uint32_t IR_ALWAYS_INLINE GetHashValueFastHash32(
       const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;

   /// Compares both values.
   /// Return value is < 0  if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.
   /// Inlined in IR so that the constant 'type' can be propagated.
   static int IR_ALWAYS_INLINE Compare(
       const void* v1, const void* v2, const ColumnType& type) noexcept;

   /// Writes the bytes of a given value into the slot of a tuple. Supports primitive and
   /// complex types. 'value' is allowed to be NULL. For string and collection values, the
   /// data is deep-copied into memory allocated from 'pool' if pool is non-NULL, otherwise
   /// the data is not copied.
   ///
   /// If COLLECT_VAR_LEN_VALS is true, gathers the non-NULL non-smallified string slots of
   /// the slot tree into 'string_values' and the non-NULL collection slots along with
   /// their byte sizes into 'collection_values' recursively. Smallified strings (see Small
   /// String Optimization, IMPALA-12373) are not collected. Children are placed before
   /// their parents in the vectors (post-order traversal) - see Tuple::MaterializeExprs()
   /// and Sorter::Run::CollectNonNullVarSlots() for the reason. If COLLECT_VAR_LEN_VALS is
   /// true, 'string_values' and 'collection_values' must be non-NULL.
   template <bool COLLECT_VAR_LEN_VALS>
   static void Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
       MemPool* pool, std::vector<StringValue*>* string_values,
       std::vector<std::pair<CollectionValue*, int64_t>>* collection_values);

   /// Convenience wrapper for the templated version with COLLECT_VAR_LEN_VALS=false.
   static void Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
       MemPool* pool);

   /// Writes 'src' into 'dst' for the given primitive type. Does not support complex
   /// types. 'src' must be non-NULL. For string values, the string data is copied into
   /// 'pool' if pool is non-NULL.
   static void WriteNonNullPrimitive(const void* src, void* dst, const ColumnType& type,
       MemPool* pool);

   /// Returns true if v1 == v2.
   /// This is more performant than Compare() == 0 for string equality, mostly because of
   /// the length comparison check.
   static inline bool Eq(const void* v1, const void* v2, const ColumnType& type);

   /// Returns true if val/type correspond to a NaN floating point value.
   static inline bool IsNaN(const void* val, const ColumnType& type);

   /// Returns true if val/type correspond to a +0/-0 floating point value.
   static inline bool IsFloatingZero(const void* val, const ColumnType& type);

   /// Returns the canonical form of the given value. Currently this means a unified NaN
   /// value in case of NaN and +0 in case of +0/-0.
   static inline const void* CanonicalValue(const void* val, const ColumnType& type);

   /// Returns a canonical NaN value for a floating point type
   /// (which will always have the same bit-pattern to maintain consistency in hashing).
   static inline const void* CanonicalNaNValue(const ColumnType& type);

   // Returns positive zero for floating point types.
   static inline const void* PositiveFloatingZero(const ColumnType& type);

   // Top level null values are printed as "NULL"; collections and structs are printed in
   // JSON format, which requires "null".
   static constexpr const char* NullLiteral(bool top_level) {
     return top_level ? "NULL" : "null";
   }

 private:
   /// Like Write() but 'value' must be non-NULL.
   template <bool COLLECT_VAR_LEN_VALS>
   static void WriteNonNull(const void* value, Tuple* tuple,
       const SlotDescriptor* slot_desc, MemPool* pool,
       std::vector<StringValue*>* string_values,
       std::vector<std::pair<CollectionValue*, int64_t>>* collection_values);

   /// Recursive helper function for Write() to handle struct slots.
   template <bool COLLECT_VAR_LEN_VALS>
   static void WriteStruct(const void* value, Tuple* tuple,
       const SlotDescriptor* slot_desc, MemPool* pool,
       std::vector<StringValue*>* string_values,
       std::vector<std::pair<CollectionValue*, int64_t>>* collection_values);

   /// Recursive helper function for Write() to handle collection slots.
   template <bool COLLECT_VAR_LEN_VALS>
   static void WriteCollection(const void* value, Tuple* tuple,
       const SlotDescriptor* slot_desc, MemPool* pool, vector<StringValue*>* string_values,
       vector<pair<CollectionValue*, int64_t>>* collection_values);

   template <bool COLLECT_VAR_LEN_VALS>
   static void WriteCollectionChildren(const CollectionValue& dest,
       const CollectionValue& src, const SlotDescriptor& collection_slot_desc,
       MemPool* pool, vector<StringValue*>* string_values,
       vector<pair<CollectionValue*, int64_t>>* collection_values);

   template <bool COLLECT_VAR_LEN_VALS>
   static void WriteCollectionVarlenChild(Tuple* child_dest_tuple, Tuple* child_src_tuple,
       const SlotDescriptor* slot_desc, MemPool* pool, vector<StringValue*>* string_values,
       vector<pair<CollectionValue*, int64_t>>* collection_values );

   /// Gets the destination slot from 'tuple' and 'slot_desc' and writes 'value' to this
   /// slot. 'value' must be primitive and non-NULL. If COLLECT_VAR_LEN_VALS is true,
   /// collects the pointers of string slots to 'string_values'.
   template <bool COLLECT_VAR_LEN_VALS>
   static void WritePrimitiveCollectVarlen(const void* value, Tuple* tuple,
       const SlotDescriptor* slot_desc, MemPool* pool,
       std::vector<StringValue*>* string_values);
 };
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#pragma once

	#include <iosfwd>
	#include <string>

	#include "codegen/impala-ir.h"
	#include "runtime/collection-value.h"
	#include "runtime/types.h"

	namespace impala {

	class MemPool;
	class SlotDescriptor;
	class StringValue;
	class Tuple;

	/// Useful utility functions for runtime values (which are passed around as void*).
	class RawValue {
	public:
	/// Ascii output precision for double/float, print 16 digits.
	static const int ASCII_PRECISION = 16;

	/// Single NaN values to ensure all NaN values can be assigned one bit pattern
	/// that will always compare and hash the same way. Allows for all NaN values
	/// to be put into the same "group by" bucket.
	static constexpr double CANONICAL_DOUBLE_NAN = std::numeric_limits<double>::quiet_NaN();
	static constexpr float CANONICAL_FLOAT_NAN = std::numeric_limits<float>::quiet_NaN();
	/// The canonical zero values when comparing negative and positive zeros.
	static constexpr double CANONICAL_DOUBLE_ZERO = 0.0;
	static constexpr float CANONICAL_FLOAT_ZERO = 0.0f;

	/// Convert 'value' into ascii and write to 'stream'. NULL turns into "NULL". 'scale'
	/// determines how many digits after the decimal are printed for floating point numbers,
	/// -1 indicates to use the stream's current formatting. Doesn't support complex types.
	/// If 'quote_val' is true, write STRING, VARCHAR, CHAR, DATE, TIMESTAMP values in
	/// quoted form surrounded by double quotes.
	/// TODO: for string types, we just print the result regardless of whether or not it
	/// ascii. This could be undesirable.
	static void PrintValue(const void* value, const ColumnType& type, int scale,
	std::stringstream* stream, bool quote_val=false);

	/// Write ascii value to string instead of stringstream.
	static void PrintValue(const void* value, const ColumnType& type, int scale,
	std::string* str);

	/// Return ascii value string.
	static std::string PrintValue(const void* value, const ColumnType& type, int scale) {
	std::string str;
	PrintValue(value, type, scale, &str);
	return str;
	}

	/// Writes the byte representation of a value to a stringstream character-by-character
	static void PrintValueAsBytes(const void* value, const ColumnType& type,
	std::stringstream* stream);

	/// Returns hash value for 'v' interpreted as 'type'. The resulting hash value
	/// is combined with the seed value. Inlined in IR so that the constant 'type' can be
	/// propagated.
	static uint32_t IR_ALWAYS_INLINE GetHashValue(
	const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;

	/// Templatized version of GetHashValue, use if type is known ahead. GetHashValue
	/// handles nulls. Inlined in IR so that the constant 'type' can be propagated.
	template<typename T>
	static inline uint32_t IR_ALWAYS_INLINE GetHashValue(
	const T* v, const ColumnType& type, uint32_t seed = 0) noexcept;

	/// Returns hash value for non-nullable 'v' for type T. GetHashValueNonNull doesn't
	/// handle nulls.
	template<typename T>
	static inline uint32_t GetHashValueNonNull(const T* v, const ColumnType& type,
	uint32_t seed = 0);

	/// Get a 64-bit hash value using the FastHash function.
	/// https://code.google.com/archive/p/fast-hash/
	static uint64_t GetHashValueFastHash(const void* v, const ColumnType& type,
	uint64_t seed);

	/// Templatized version of GetHashValueFastHash, use if type is known ahead.
	/// GetHashValueFastHash handles nulls. Inlined in IR so that the constant
	/// 'type' can be propagated.
	template <typename T>
	static inline uint64_t IR_ALWAYS_INLINE GetHashValueFastHash(
	const T* v, const ColumnType& type, uint64_t seed);

	/// Returns hash value for non-nullable 'v' for type T. GetHashValueFastHashNonNull
	/// doesn't handle nulls.
	template <typename T>
	static inline uint64_t GetHashValueFastHashNonNull(
	const T* v, const ColumnType& type, uint64_t seed);

	// Get a 32-bit hash value using the FastHash algorithm.
	static uint32_t IR_ALWAYS_INLINE GetHashValueFastHash32(
	const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;

	/// Compares both values.
	/// Return value is < 0 if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.
	/// Inlined in IR so that the constant 'type' can be propagated.
	static int IR_ALWAYS_INLINE Compare(
	const void* v1, const void* v2, const ColumnType& type) noexcept;

	/// Writes the bytes of a given value into the slot of a tuple. Supports primitive and
	/// complex types. 'value' is allowed to be NULL. For string and collection values, the
	/// data is deep-copied into memory allocated from 'pool' if pool is non-NULL, otherwise
	/// the data is not copied.
	///
	/// If COLLECT_VAR_LEN_VALS is true, gathers the non-NULL non-smallified string slots of
	/// the slot tree into 'string_values' and the non-NULL collection slots along with
	/// their byte sizes into 'collection_values' recursively. Smallified strings (see Small
	/// String Optimization, IMPALA-12373) are not collected. Children are placed before
	/// their parents in the vectors (post-order traversal) - see Tuple::MaterializeExprs()
	/// and Sorter::Run::CollectNonNullVarSlots() for the reason. If COLLECT_VAR_LEN_VALS is
	/// true, 'string_values' and 'collection_values' must be non-NULL.
	template <bool COLLECT_VAR_LEN_VALS>
	static void Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
	MemPool* pool, std::vector<StringValue> string_values,
	std::vector<std::pair<CollectionValue, int64_t>> collection_values);

	/// Convenience wrapper for the templated version with COLLECT_VAR_LEN_VALS=false.
	static void Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
	MemPool* pool);

	/// Writes 'src' into 'dst' for the given primitive type. Does not support complex
	/// types. 'src' must be non-NULL. For string values, the string data is copied into
	/// 'pool' if pool is non-NULL.
	static void WriteNonNullPrimitive(const void* src, void* dst, const ColumnType& type,
	MemPool* pool);

	/// Returns true if v1 == v2.
	/// This is more performant than Compare() == 0 for string equality, mostly because of
	/// the length comparison check.
	static inline bool Eq(const void* v1, const void* v2, const ColumnType& type);

	/// Returns true if val/type correspond to a NaN floating point value.
	static inline bool IsNaN(const void* val, const ColumnType& type);

	/// Returns true if val/type correspond to a +0/-0 floating point value.
	static inline bool IsFloatingZero(const void* val, const ColumnType& type);

	/// Returns the canonical form of the given value. Currently this means a unified NaN
	/// value in case of NaN and +0 in case of +0/-0.
	static inline const void* CanonicalValue(const void* val, const ColumnType& type);

	/// Returns a canonical NaN value for a floating point type
	/// (which will always have the same bit-pattern to maintain consistency in hashing).
	static inline const void* CanonicalNaNValue(const ColumnType& type);

	// Returns positive zero for floating point types.
	static inline const void* PositiveFloatingZero(const ColumnType& type);

	// Top level null values are printed as "NULL"; collections and structs are printed in
	// JSON format, which requires "null".
	static constexpr const char* NullLiteral(bool top_level) {
	return top_level ? "NULL" : "null";
	}

	private:
	/// Like Write() but 'value' must be non-NULL.
	template <bool COLLECT_VAR_LEN_VALS>
	static void WriteNonNull(const void* value, Tuple* tuple,
	const SlotDescriptor* slot_desc, MemPool* pool,
	std::vector<StringValue> string_values,
	std::vector<std::pair<CollectionValue, int64_t>> collection_values);

	/// Recursive helper function for Write() to handle struct slots.
	template <bool COLLECT_VAR_LEN_VALS>
	static void WriteStruct(const void* value, Tuple* tuple,
	const SlotDescriptor* slot_desc, MemPool* pool,
	std::vector<StringValue> string_values,
	std::vector<std::pair<CollectionValue, int64_t>> collection_values);

	/// Recursive helper function for Write() to handle collection slots.
	template <bool COLLECT_VAR_LEN_VALS>
	static void WriteCollection(const void* value, Tuple* tuple,
	const SlotDescriptor* slot_desc, MemPool* pool, vector<StringValue> string_values,
	vector<pair<CollectionValue, int64_t>> collection_values);

	template <bool COLLECT_VAR_LEN_VALS>
	static void WriteCollectionChildren(const CollectionValue& dest,
	const CollectionValue& src, const SlotDescriptor& collection_slot_desc,
	MemPool* pool, vector<StringValue> string_values,
	vector<pair<CollectionValue, int64_t>> collection_values);

	template <bool COLLECT_VAR_LEN_VALS>
	static void WriteCollectionVarlenChild(Tuple* child_dest_tuple, Tuple* child_src_tuple,
	const SlotDescriptor* slot_desc, MemPool* pool, vector<StringValue> string_values,
	vector<pair<CollectionValue, int64_t>> collection_values );

	/// Gets the destination slot from 'tuple' and 'slot_desc' and writes 'value' to this
	/// slot. 'value' must be primitive and non-NULL. If COLLECT_VAR_LEN_VALS is true,
	/// collects the pointers of string slots to 'string_values'.
	template <bool COLLECT_VAR_LEN_VALS>
	static void WritePrimitiveCollectVarlen(const void* value, Tuple* tuple,
	const SlotDescriptor* slot_desc, MemPool* pool,
	std::vector<StringValue> string_values);
	};
	}