blob: 27c5f53a97165706c734e7f59bf526d9a2efe357 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <sstream>
#include <boost/functional/hash.hpp>
#include "runtime/collection-value.h"
#include "runtime/date-value.h"
#include "runtime/raw-value.inline.h"
#include "runtime/string-value.inline.h"
#include "runtime/tuple.h"
#include "util/ubsan.h"
#include "common/names.h"
namespace impala {
const int RawValue::ASCII_PRECISION;
constexpr double RawValue::CANONICAL_DOUBLE_NAN;
constexpr float RawValue::CANONICAL_FLOAT_NAN;
constexpr double RawValue::CANONICAL_DOUBLE_ZERO;
constexpr float RawValue::CANONICAL_FLOAT_ZERO;
void RawValue::PrintValueAsBytes(const void* value, const ColumnType& type,
stringstream* stream) {
if (value == NULL) return;
const char* chars = reinterpret_cast<const char*>(value);
const StringValue* string_val = NULL;
switch (type.type) {
case TYPE_BOOLEAN:
stream->write(chars, sizeof(bool));
return;
case TYPE_TINYINT:
stream->write(chars, sizeof(int8_t));
break;
case TYPE_SMALLINT:
stream->write(chars, sizeof(int16_t));
break;
case TYPE_INT:
stream->write(chars, sizeof(int32_t));
break;
case TYPE_DATE:
stream->write(chars, sizeof(DateValue));
break;
case TYPE_BIGINT:
stream->write(chars, sizeof(int64_t));
break;
case TYPE_FLOAT:
stream->write(chars, sizeof(float));
break;
case TYPE_DOUBLE:
stream->write(chars, sizeof(double));
break;
case TYPE_STRING:
case TYPE_VARCHAR:
string_val = reinterpret_cast<const StringValue*>(value);
stream->write(string_val->ptr, string_val->len);
break;
case TYPE_TIMESTAMP:
stream->write(chars, TimestampValue::Size());
break;
case TYPE_CHAR:
stream->write(chars, type.len);
break;
case TYPE_DECIMAL:
stream->write(chars, type.GetByteSize());
break;
default:
DCHECK(false) << "bad RawValue::PrintValue() type: " << type.DebugString();
}
}
void RawValue::PrintValue(const void* value, const ColumnType& type, int scale,
string* str) {
if (value == NULL) {
*str = "NULL";
return;
}
stringstream out;
out.precision(ASCII_PRECISION);
const StringValue* string_val = NULL;
string tmp;
bool val;
// Special case types that we can print more efficiently without using a stringstream
switch (type.type) {
case TYPE_BOOLEAN:
val = *reinterpret_cast<const bool*>(value);
*str = (val ? "true" : "false");
return;
case TYPE_STRING:
case TYPE_VARCHAR:
string_val = reinterpret_cast<const StringValue*>(value);
tmp.assign(string_val->ptr, string_val->len);
str->swap(tmp);
return;
case TYPE_CHAR:
*str = string(reinterpret_cast<const char*>(value), type.len);
return;
case TYPE_FIXED_UDA_INTERMEDIATE:
*str = "Intermediate UDA step, no value printed";
return;
default:
PrintValue(value, type, scale, &out);
}
*str = out.str();
}
void RawValue::Write(const void* value, void* dst, const ColumnType& type,
MemPool* pool) {
DCHECK(value != NULL);
switch (type.type) {
case TYPE_NULL:
break;
case TYPE_BOOLEAN:
// Unlike the other scalar types, bool has a limited set of valid values, so if
// 'dst' is uninitialized memory and happens to point to a value that is not a valid
// bool, then dereferencing it via *reinterpret_cast<bool*>(dst) is undefined
// behavior.
memcpy(dst, value, sizeof(bool));
break;
case TYPE_TINYINT:
*reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(value);
break;
case TYPE_SMALLINT:
*reinterpret_cast<int16_t*>(dst) = *reinterpret_cast<const int16_t*>(value);
break;
case TYPE_INT:
*reinterpret_cast<int32_t*>(dst) = *reinterpret_cast<const int32_t*>(value);
break;
case TYPE_DATE:
*reinterpret_cast<DateValue*>(dst) = *reinterpret_cast<const DateValue*>(value);
break;
case TYPE_BIGINT:
*reinterpret_cast<int64_t*>(dst) = *reinterpret_cast<const int64_t*>(value);
break;
case TYPE_FLOAT:
*reinterpret_cast<float*>(dst) = *reinterpret_cast<const float*>(value);
break;
case TYPE_DOUBLE:
*reinterpret_cast<double*>(dst) = *reinterpret_cast<const double*>(value);
break;
case TYPE_TIMESTAMP:
*reinterpret_cast<TimestampValue*>(dst) =
*reinterpret_cast<const TimestampValue*>(value);
break;
case TYPE_STRING:
case TYPE_VARCHAR: {
const StringValue* src = reinterpret_cast<const StringValue*>(value);
StringValue* dest = reinterpret_cast<StringValue*>(dst);
dest->len = src->len;
if (type.type == TYPE_VARCHAR) DCHECK_LE(dest->len, type.len);
if (pool != NULL) {
// Note: if this changes to TryAllocate(), CodegenAnyVal::WriteToSlot() will need
// to reflect this change as well (the codegen'd Allocate() call is actually
// generated in CodegenAnyVal::StoreToNativePtr()).
dest->ptr = reinterpret_cast<char*>(pool->Allocate(dest->len));
Ubsan::MemCpy(dest->ptr, src->ptr, dest->len);
} else {
dest->ptr = src->ptr;
}
break;
}
case TYPE_CHAR:
DCHECK_EQ(type.type, TYPE_CHAR);
memcpy(dst, value, type.len);
break;
case TYPE_DECIMAL:
memcpy(dst, value, type.GetByteSize());
break;
case TYPE_ARRAY:
case TYPE_MAP: {
DCHECK(pool == NULL) << "RawValue::Write(): deep copy of CollectionValues NYI";
const CollectionValue* src = reinterpret_cast<const CollectionValue*>(value);
CollectionValue* dest = reinterpret_cast<CollectionValue*>(dst);
dest->num_tuples = src->num_tuples;
dest->ptr = src->ptr;
break;
}
default:
DCHECK(false) << "RawValue::Write(): bad type: " << type.DebugString();
}
}
void RawValue::Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
MemPool* pool) {
if (value == NULL) {
tuple->SetNull(slot_desc->null_indicator_offset());
} else {
void* slot = tuple->GetSlot(slot_desc->tuple_offset());
RawValue::Write(value, slot, slot_desc->type(), pool);
}
}
void RawValue::PrintValue(
const void* value, const ColumnType& type, int scale, std::stringstream* stream) {
if (value == NULL) {
*stream << "NULL";
return;
}
int old_precision = stream->precision();
std::ios_base::fmtflags old_flags = stream->flags();
if (scale > -1) {
stream->precision(scale);
// Setting 'fixed' causes precision to set the number of digits printed after the
// decimal (by default it sets the maximum number of digits total).
*stream << std::fixed;
}
const StringValue* string_val = NULL;
switch (type.type) {
case TYPE_BOOLEAN: {
bool val = *reinterpret_cast<const bool*>(value);
*stream << (val ? "true" : "false");
return;
}
case TYPE_TINYINT:
// Extra casting for chars since they should not be interpreted as ASCII.
*stream << static_cast<int>(*reinterpret_cast<const int8_t*>(value));
break;
case TYPE_SMALLINT: *stream << *reinterpret_cast<const int16_t*>(value); break;
case TYPE_INT: *stream << *reinterpret_cast<const int32_t*>(value); break;
case TYPE_BIGINT: *stream << *reinterpret_cast<const int64_t*>(value); break;
case TYPE_FLOAT: {
float val = *reinterpret_cast<const float*>(value);
if (LIKELY(std::isfinite(val))) {
*stream << val;
} else if (std::isinf(val)) {
// 'Infinity' is Java's text representation of inf. By staying close to Java, we
// allow Hive to read text tables containing non-finite values produced by
// Impala. (The same logic applies to 'NaN', below).
*stream << (val < 0 ? "-Infinity" : "Infinity");
} else if (std::isnan(val)) {
*stream << "NaN";
}
} break;
case TYPE_DOUBLE: {
double val = *reinterpret_cast<const double*>(value);
if (LIKELY(std::isfinite(val))) {
*stream << val;
} else if (std::isinf(val)) {
// See TYPE_FLOAT for rationale.
*stream << (val < 0 ? "-Infinity" : "Infinity");
} else if (std::isnan(val)) {
*stream << "NaN";
}
} break;
case TYPE_VARCHAR:
case TYPE_STRING:
string_val = reinterpret_cast<const StringValue*>(value);
if (type.type == TYPE_VARCHAR) DCHECK(string_val->len <= type.len);
stream->write(string_val->ptr, string_val->len);
break;
case TYPE_TIMESTAMP:
*stream << *reinterpret_cast<const TimestampValue*>(value);
break;
case TYPE_CHAR:
stream->write(reinterpret_cast<const char*>(value), type.len);
break;
case TYPE_DECIMAL:
switch (type.GetByteSize()) {
case 4:
*stream << reinterpret_cast<const Decimal4Value*>(value)->ToString(type);
break;
case 8:
*stream << reinterpret_cast<const Decimal8Value*>(value)->ToString(type);
break;
case 16:
*stream << reinterpret_cast<const Decimal16Value*>(value)->ToString(type);
break;
default: DCHECK(false) << type;
}
break;
case TYPE_DATE: {
*stream << *reinterpret_cast<const DateValue*>(value);
}
break;
default: DCHECK(false) << "Unknown type: " << type;
}
stream->precision(old_precision);
// Undo setting stream to fixed
stream->flags(old_flags);
}
}