| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| |
| #ifndef IMPALA_RUNTIME_RAW_VALUE_INLINE_H |
| #define IMPALA_RUNTIME_RAW_VALUE_INLINE_H |
| |
| #include "runtime/raw-value.h" |
| |
| #include <cmath> |
| |
| #include <boost/functional/hash.hpp> |
| |
| #include "common/logging.h" |
| #include "runtime/date-value.h" |
| #include "runtime/decimal-value.inline.h" |
| #include "runtime/string-value.inline.h" |
| #include "runtime/timestamp-value.h" |
| #include "util/hash-util.h" |
| |
| namespace impala { |
| |
| /// Arbitrary constants used to compute hash values for special cases. Constants were |
| /// obtained by taking lower bytes of generated UUID. NULL and empty strings should |
| /// hash to different values. |
| static const uint32_t HASH_VAL_NULL = 0x58081667; |
| static const uint32_t HASH_VAL_EMPTY = 0x7dca7eee; |
| |
| inline bool RawValue::IsNaN(const void* val, const ColumnType& type) { |
| switch(type.type) { |
| case TYPE_FLOAT: |
| return std::isnan(*reinterpret_cast<const float*>(val)); |
| case TYPE_DOUBLE: |
| return std::isnan(*reinterpret_cast<const double*>(val)); |
| default: |
| return false; |
| } |
| } |
| |
| inline const void* RawValue::CanonicalNaNValue(const ColumnType& type) { |
| switch(type.type) { |
| case TYPE_FLOAT: |
| return &CANONICAL_FLOAT_NAN; |
| case TYPE_DOUBLE: |
| return &CANONICAL_DOUBLE_NAN; |
| default: |
| DCHECK(false); |
| return nullptr; |
| } |
| } |
| |
| inline bool RawValue::Eq(const void* v1, const void* v2, const ColumnType& type) { |
| const StringValue* string_value1; |
| const StringValue* string_value2; |
| switch (type.type) { |
| case TYPE_BOOLEAN: |
| return *reinterpret_cast<const bool*>(v1) |
| == *reinterpret_cast<const bool*>(v2); |
| case TYPE_TINYINT: |
| return *reinterpret_cast<const int8_t*>(v1) |
| == *reinterpret_cast<const int8_t*>(v2); |
| case TYPE_SMALLINT: |
| return *reinterpret_cast<const int16_t*>(v1) |
| == *reinterpret_cast<const int16_t*>(v2); |
| case TYPE_INT: |
| return *reinterpret_cast<const int32_t*>(v1) |
| == *reinterpret_cast<const int32_t*>(v2); |
| case TYPE_DATE: |
| return *reinterpret_cast<const DateValue*>(v1) |
| == *reinterpret_cast<const DateValue*>(v2); |
| case TYPE_BIGINT: |
| return *reinterpret_cast<const int64_t*>(v1) |
| == *reinterpret_cast<const int64_t*>(v2); |
| case TYPE_FLOAT: |
| return *reinterpret_cast<const float*>(v1) |
| == *reinterpret_cast<const float*>(v2); |
| case TYPE_DOUBLE: |
| return *reinterpret_cast<const double*>(v1) |
| == *reinterpret_cast<const double*>(v2); |
| case TYPE_STRING: |
| case TYPE_VARCHAR: |
| string_value1 = reinterpret_cast<const StringValue*>(v1); |
| string_value2 = reinterpret_cast<const StringValue*>(v2); |
| return string_value1->Eq(*string_value2); |
| case TYPE_TIMESTAMP: |
| return *reinterpret_cast<const TimestampValue*>(v1) == |
| *reinterpret_cast<const TimestampValue*>(v2); |
| case TYPE_CHAR: { |
| const char* v1ptr = reinterpret_cast<const char*>(v1); |
| const char* v2ptr = reinterpret_cast<const char*>(v2); |
| int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len); |
| int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len); |
| return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2)) == 0; |
| } |
| case TYPE_DECIMAL: |
| switch (type.GetByteSize()) { |
| case 4: |
| return reinterpret_cast<const Decimal4Value*>(v1)->value() |
| == reinterpret_cast<const Decimal4Value*>(v2)->value(); |
| case 8: |
| return reinterpret_cast<const Decimal8Value*>(v1)->value() |
| == reinterpret_cast<const Decimal8Value*>(v2)->value(); |
| case 16: |
| return reinterpret_cast<const Decimal16Value*>(v1)->value() |
| == reinterpret_cast<const Decimal16Value*>(v2)->value(); |
| default: |
| break; |
| } |
| default: |
| DCHECK(false) << type; |
| return 0; |
| } |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<bool>(const bool* v, |
| const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_BOOLEAN); |
| DCHECK(v != NULL); |
| return HashUtil::HashCombine32(*reinterpret_cast<const bool*>(v), seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<int8_t>(const int8_t* v, |
| const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_TINYINT); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 1, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<int16_t>(const int16_t* v, |
| const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_SMALLINT); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 2, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<int32_t>(const int32_t* v, |
| const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_INT); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 4, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<int64_t>(const int64_t* v, |
| const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_BIGINT); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 8, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<float>(const float* v, |
| const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_FLOAT); |
| DCHECK(v != NULL); |
| if (std::isnan(*v)) v = &RawValue::CANONICAL_FLOAT_NAN; |
| return HashUtil::MurmurHash2_64(v, 4, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<double>(const double* v, |
| const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_DOUBLE); |
| DCHECK(v != NULL); |
| if (std::isnan(*v)) v = &RawValue::CANONICAL_DOUBLE_NAN; |
| return HashUtil::MurmurHash2_64(v, 8, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<impala::StringValue>( |
| const impala::StringValue* v,const ColumnType& type, uint32_t seed) { |
| DCHECK(v != NULL); |
| if (type.type == TYPE_CHAR) { |
| // This is a inlined CHAR(n) slot. |
| // TODO: this is a bit wonky since it's not really a StringValue*. Handle CHAR(n) |
| // in a separate function. |
| return HashUtil::MurmurHash2_64(v, type.len, seed); |
| } else { |
| DCHECK(type.type == TYPE_STRING || type.type == TYPE_VARCHAR); |
| if (v->len == 0) { |
| return HashUtil::HashCombine32(HASH_VAL_EMPTY, seed); |
| } |
| return HashUtil::MurmurHash2_64(v->ptr, v->len, seed); |
| } |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<TimestampValue>( |
| const TimestampValue* v, const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_TIMESTAMP); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 12, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<DateValue>(const DateValue* v, |
| const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_DATE); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 4, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<Decimal4Value>( |
| const Decimal4Value* v, const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_DECIMAL); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 4, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<Decimal8Value>( |
| const Decimal8Value* v, const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_DECIMAL); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 8, seed); |
| } |
| |
| template<> |
| inline uint32_t RawValue::GetHashValueNonNull<Decimal16Value>( |
| const Decimal16Value* v, const ColumnType& type, uint32_t seed) { |
| DCHECK_EQ(type.type, TYPE_DECIMAL); |
| DCHECK(v != NULL); |
| return HashUtil::MurmurHash2_64(v, 16, seed); |
| } |
| |
| template<typename T> |
| inline uint32_t RawValue::GetHashValue(const T* v, const ColumnType& type, |
| uint32_t seed) noexcept { |
| // Use HashCombine with arbitrary constant to ensure we don't return seed. |
| if (UNLIKELY(v == NULL)) return HashUtil::HashCombine32(HASH_VAL_NULL, seed); |
| return RawValue::GetHashValueNonNull<T>(v, type, seed); |
| } |
| |
| } |
| |
| #endif |