blob: 916d96c4829255146f01fddab04ddad9fad4b2ae [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/raw-value.h"
#include <cmath>
#include "runtime/decimal-value.inline.h"
#include "runtime/raw-value.inline.h"
#include "runtime/string-value.inline.h"
#include "runtime/timestamp-value.h"
#include "util/hash-util.h"
using namespace impala;
int IR_ALWAYS_INLINE RawValue::Compare(
const void* v1, const void* v2, const ColumnType& type) noexcept {
const StringValue* string_value1;
const StringValue* string_value2;
const TimestampValue* ts_value1;
const TimestampValue* ts_value2;
const DateValue* date_value1;
const DateValue* date_value2;
float f1, f2;
double d1, d2;
int32_t i1, i2;
int64_t b1, b2;
switch (type.type) {
case TYPE_NULL:
return 0;
case TYPE_BOOLEAN:
return *reinterpret_cast<const bool*>(v1) - *reinterpret_cast<const bool*>(v2);
case TYPE_TINYINT:
return *reinterpret_cast<const int8_t*>(v1) - *reinterpret_cast<const int8_t*>(v2);
case TYPE_SMALLINT:
return *reinterpret_cast<const int16_t*>(v1) -
*reinterpret_cast<const int16_t*>(v2);
case TYPE_INT:
i1 = *reinterpret_cast<const int32_t*>(v1);
i2 = *reinterpret_cast<const int32_t*>(v2);
return i1 > i2 ? 1 : (i1 < i2 ? -1 : 0);
case TYPE_DATE:
date_value1 = reinterpret_cast<const DateValue*>(v1);
date_value2 = reinterpret_cast<const DateValue*>(v2);
return *date_value1 > *date_value2 ? 1 : (*date_value1 < *date_value2 ? -1 : 0);
case TYPE_BIGINT:
b1 = *reinterpret_cast<const int64_t*>(v1);
b2 = *reinterpret_cast<const int64_t*>(v2);
return b1 > b2 ? 1 : (b1 < b2 ? -1 : 0);
case TYPE_FLOAT:
// TODO: can this be faster? (just returning the difference has underflow problems)
f1 = *reinterpret_cast<const float*>(v1);
f2 = *reinterpret_cast<const float*>(v2);
if (UNLIKELY(std::isnan(f1) && std::isnan(f2))) return 0;
if (UNLIKELY(std::isnan(f1))) return -1;
if (UNLIKELY(std::isnan(f2))) return 1;
return f1 > f2 ? 1 : (f1 < f2 ? -1 : 0);
case TYPE_DOUBLE:
// TODO: can this be faster?
d1 = *reinterpret_cast<const double*>(v1);
d2 = *reinterpret_cast<const double*>(v2);
if (std::isnan(d1) && std::isnan(d2)) return 0;
if (std::isnan(d1)) return -1;
if (std::isnan(d2)) return 1;
return d1 > d2 ? 1 : (d1 < d2 ? -1 : 0);
case TYPE_STRING:
case TYPE_VARCHAR:
string_value1 = reinterpret_cast<const StringValue*>(v1);
string_value2 = reinterpret_cast<const StringValue*>(v2);
return string_value1->Compare(*string_value2);
case TYPE_TIMESTAMP:
ts_value1 = reinterpret_cast<const TimestampValue*>(v1);
ts_value2 = reinterpret_cast<const TimestampValue*>(v2);
return *ts_value1 > *ts_value2 ? 1 : (*ts_value1 < *ts_value2 ? -1 : 0);
case TYPE_CHAR: {
const char* v1ptr = reinterpret_cast<const char*>(v1);
const char* v2ptr = reinterpret_cast<const char*>(v2);
int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len);
int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len);
return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2));
}
case TYPE_DECIMAL:
switch (type.GetByteSize()) {
case 4:
return reinterpret_cast<const Decimal4Value*>(v1)->Compare(
*reinterpret_cast<const Decimal4Value*>(v2));
case 8:
return reinterpret_cast<const Decimal8Value*>(v1)->Compare(
*reinterpret_cast<const Decimal8Value*>(v2));
case 16:
return reinterpret_cast<const Decimal16Value*>(v1)->Compare(
*reinterpret_cast<const Decimal16Value*>(v2));
default:
DCHECK(false) << type;
return 0;
}
default:
DCHECK(false) << "invalid type: " << type.DebugString();
return 0;
};
}
uint32_t IR_ALWAYS_INLINE RawValue::GetHashValue(
const void* v, const ColumnType& type, uint32_t seed) noexcept {
// The choice of hash function needs to be consistent across all hosts of the cluster.
// Use HashCombine with arbitrary constant to ensure we don't return seed.
if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
switch (type.type) {
case TYPE_CHAR:
case TYPE_STRING:
case TYPE_VARCHAR:
return RawValue::GetHashValueNonNull<impala::StringValue>(
reinterpret_cast<const StringValue*>(v), type, seed);
case TYPE_BOOLEAN:
return RawValue::GetHashValueNonNull<bool>(
reinterpret_cast<const bool*>(v), type, seed);
case TYPE_TINYINT:
return RawValue::GetHashValueNonNull<int8_t>(
reinterpret_cast<const int8_t*>(v), type, seed);
case TYPE_SMALLINT:
return RawValue::GetHashValueNonNull<int16_t>(
reinterpret_cast<const int16_t*>(v), type, seed);
case TYPE_INT:
return RawValue::GetHashValueNonNull<int32_t>(
reinterpret_cast<const int32_t*>(v), type, seed);
case TYPE_DATE:
return RawValue::GetHashValueNonNull<DateValue>(
reinterpret_cast<const DateValue*>(v), type, seed);
case TYPE_BIGINT:
return RawValue::GetHashValueNonNull<int64_t>(
reinterpret_cast<const int64_t*>(v), type, seed);
case TYPE_FLOAT:
return RawValue::GetHashValueNonNull<float>(
reinterpret_cast<const float*>(v), type, seed);
case TYPE_DOUBLE:
return RawValue::GetHashValueNonNull<double>(
reinterpret_cast<const double*>(v), type, seed);
case TYPE_TIMESTAMP:
return RawValue::GetHashValueNonNull<TimestampValue>(
reinterpret_cast<const TimestampValue*>(v), type, seed);
case TYPE_DECIMAL:
switch(type.GetByteSize()) {
case 4: return
RawValue::GetHashValueNonNull<Decimal4Value>(
reinterpret_cast<const impala::Decimal4Value*>(v), type, seed);
case 8:
return RawValue::GetHashValueNonNull<Decimal8Value>(
reinterpret_cast<const Decimal8Value*>(v), type, seed);
case 16:
return RawValue::GetHashValueNonNull<Decimal16Value>(
reinterpret_cast<const Decimal16Value*>(v), type, seed);
DCHECK(false);
}
default:
DCHECK(false);
return 0;
}
}
uint64_t IR_ALWAYS_INLINE RawValue::GetHashValueFastHash(const void* v,
const ColumnType& type, uint64_t seed) {
// Hash with an arbitrary constant to ensure we don't return seed.
if (v == nullptr) {
return HashUtil::FastHash64(&HASH_VAL_NULL, sizeof(HASH_VAL_NULL), seed);
}
switch (type.type) {
case TYPE_STRING:
case TYPE_VARCHAR: {
const StringValue* string_value = reinterpret_cast<const StringValue*>(v);
return HashUtil::FastHash64(string_value->ptr,
static_cast<size_t>(string_value->len), seed);
}
case TYPE_BOOLEAN: return HashUtil::FastHash64(v, 1, seed);
case TYPE_TINYINT: return HashUtil::FastHash64(v, 1, seed);
case TYPE_SMALLINT: return HashUtil::FastHash64(v, 2, seed);
case TYPE_INT: return HashUtil::FastHash64(v, 4, seed);
case TYPE_BIGINT: return HashUtil::FastHash64(v, 8, seed);
case TYPE_FLOAT: return HashUtil::FastHash64(v, 4, seed);
case TYPE_DOUBLE: return HashUtil::FastHash64(v, 8, seed);
case TYPE_TIMESTAMP: return HashUtil::FastHash64(v, 12, seed);
case TYPE_CHAR: return HashUtil::FastHash64(v, type.len, seed);
case TYPE_DECIMAL: return HashUtil::FastHash64(v, type.GetByteSize(), seed);
case TYPE_DATE: return HashUtil::FastHash64(v, 4, seed);
default: DCHECK(false); return 0;
}
}