be/src/runtime/raw-value.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <sstream>
 #include <boost/functional/hash.hpp>

 #include "runtime/collection-value.h"
 #include "runtime/date-value.h"
 #include "runtime/raw-value.inline.h"
 #include "runtime/string-value.inline.h"
 #include "runtime/tuple.h"
 #include "util/ubsan.h"

 #include "common/names.h"

 namespace impala {

 const int RawValue::ASCII_PRECISION;
 constexpr double RawValue::CANONICAL_DOUBLE_NAN;
 constexpr float RawValue::CANONICAL_FLOAT_NAN;
 constexpr double RawValue::CANONICAL_DOUBLE_ZERO;
 constexpr float RawValue::CANONICAL_FLOAT_ZERO;

 void RawValue::PrintValueAsBytes(const void* value, const ColumnType& type,
                                  stringstream* stream) {
   if (value == NULL) return;

   const char* chars = reinterpret_cast<const char*>(value);
   const StringValue* string_val = NULL;
   switch (type.type) {
     case TYPE_BOOLEAN:
       stream->write(chars, sizeof(bool));
       return;
     case TYPE_TINYINT:
       stream->write(chars, sizeof(int8_t));
       break;
     case TYPE_SMALLINT:
       stream->write(chars, sizeof(int16_t));
       break;
     case TYPE_INT:
       stream->write(chars, sizeof(int32_t));
       break;
     case TYPE_DATE:
       stream->write(chars, sizeof(DateValue));
       break;
     case TYPE_BIGINT:
       stream->write(chars, sizeof(int64_t));
       break;
     case TYPE_FLOAT:
       stream->write(chars, sizeof(float));
       break;
     case TYPE_DOUBLE:
       stream->write(chars, sizeof(double));
       break;
     case TYPE_STRING:
     case TYPE_VARCHAR:
       string_val = reinterpret_cast<const StringValue*>(value);
       stream->write(string_val->ptr, string_val->len);
       break;
     case TYPE_TIMESTAMP:
       stream->write(chars, TimestampValue::Size());
       break;
     case TYPE_CHAR:
       stream->write(chars, type.len);
       break;
     case TYPE_DECIMAL:
       stream->write(chars, type.GetByteSize());
       break;
     default:
       DCHECK(false) << "bad RawValue::PrintValue() type: " << type.DebugString();
   }
 }

 void RawValue::PrintValue(const void* value, const ColumnType& type, int scale,
                           string* str) {
   if (value == NULL) {
     *str = "NULL";
     return;
   }

   stringstream out;
   out.precision(ASCII_PRECISION);
   const StringValue* string_val = NULL;
   string tmp;
   bool val;

   // Special case types that we can print more efficiently without using a stringstream
   switch (type.type) {
     case TYPE_BOOLEAN:
       val = *reinterpret_cast<const bool*>(value);
       *str = (val ? "true" : "false");
       return;
     case TYPE_STRING:
     case TYPE_VARCHAR:
       string_val = reinterpret_cast<const StringValue*>(value);
       tmp.assign(string_val->ptr, string_val->len);
       str->swap(tmp);
       return;
     case TYPE_CHAR:
       *str = string(reinterpret_cast<const char*>(value), type.len);
       return;
     case TYPE_FIXED_UDA_INTERMEDIATE:
       *str = "Intermediate UDA step, no value printed";
       return;
     default:
       PrintValue(value, type, scale, &out);
   }
   *str = out.str();
 }

 void RawValue::Write(const void* value, void* dst, const ColumnType& type,
     MemPool* pool) {
   DCHECK(value != NULL);
   switch (type.type) {
     case TYPE_NULL:
       break;
     case TYPE_BOOLEAN:
       // Unlike the other scalar types, bool has a limited set of valid values, so if
       // 'dst' is uninitialized memory and happens to point to a value that is not a valid
       // bool, then dereferencing it via *reinterpret_cast<bool*>(dst) is undefined
       // behavior.
       memcpy(dst, value, sizeof(bool));
       break;
     case TYPE_TINYINT:
       *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(value);
       break;
     case TYPE_SMALLINT:
       *reinterpret_cast<int16_t*>(dst) = *reinterpret_cast<const int16_t*>(value);
       break;
     case TYPE_INT:
       *reinterpret_cast<int32_t*>(dst) = *reinterpret_cast<const int32_t*>(value);
       break;
     case TYPE_DATE:
       *reinterpret_cast<DateValue*>(dst) = *reinterpret_cast<const DateValue*>(value);
       break;
     case TYPE_BIGINT:
       *reinterpret_cast<int64_t*>(dst) = *reinterpret_cast<const int64_t*>(value);
       break;
     case TYPE_FLOAT:
       *reinterpret_cast<float*>(dst) = *reinterpret_cast<const float*>(value);
       break;
     case TYPE_DOUBLE:
       *reinterpret_cast<double*>(dst) = *reinterpret_cast<const double*>(value);
       break;
     case TYPE_TIMESTAMP:
       *reinterpret_cast<TimestampValue*>(dst) =
           *reinterpret_cast<const TimestampValue*>(value);
       break;
     case TYPE_STRING:
     case TYPE_VARCHAR: {
       const StringValue* src = reinterpret_cast<const StringValue*>(value);
       StringValue* dest = reinterpret_cast<StringValue*>(dst);
       dest->len = src->len;
       if (type.type == TYPE_VARCHAR) DCHECK_LE(dest->len, type.len);
       if (pool != NULL) {
         // Note: if this changes to TryAllocate(), CodegenAnyVal::WriteToSlot() will need
         // to reflect this change as well (the codegen'd Allocate() call is actually
         // generated in CodegenAnyVal::StoreToNativePtr()).
         dest->ptr = reinterpret_cast<char*>(pool->Allocate(dest->len));
         Ubsan::MemCpy(dest->ptr, src->ptr, dest->len);
       } else {
         dest->ptr = src->ptr;
       }
       break;
     }
     case TYPE_CHAR:
       DCHECK_EQ(type.type, TYPE_CHAR);
       memcpy(dst, value, type.len);
       break;
     case TYPE_DECIMAL:
       memcpy(dst, value, type.GetByteSize());
       break;
     case TYPE_ARRAY:
     case TYPE_MAP: {
       DCHECK(pool == NULL) << "RawValue::Write(): deep copy of CollectionValues NYI";
       const CollectionValue* src = reinterpret_cast<const CollectionValue*>(value);
       CollectionValue* dest = reinterpret_cast<CollectionValue*>(dst);
       dest->num_tuples = src->num_tuples;
       dest->ptr = src->ptr;
       break;
     }
     default:
       DCHECK(false) << "RawValue::Write(): bad type: " << type.DebugString();
   }
 }

 void RawValue::Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
                      MemPool* pool) {
   if (value == NULL) {
     tuple->SetNull(slot_desc->null_indicator_offset());
   } else {
     void* slot = tuple->GetSlot(slot_desc->tuple_offset());
     RawValue::Write(value, slot, slot_desc->type(), pool);
   }
 }

 void RawValue::PrintValue(
     const void* value, const ColumnType& type, int scale, std::stringstream* stream) {
   if (value == NULL) {
     *stream << "NULL";
     return;
   }

   int old_precision = stream->precision();
   std::ios_base::fmtflags old_flags = stream->flags();
   if (scale > -1) {
     stream->precision(scale);
     // Setting 'fixed' causes precision to set the number of digits printed after the
     // decimal (by default it sets the maximum number of digits total).
     *stream << std::fixed;
   }

   const StringValue* string_val = NULL;
   switch (type.type) {
     case TYPE_BOOLEAN: {
       bool val = *reinterpret_cast<const bool*>(value);
       *stream << (val ? "true" : "false");
       return;
     }
     case TYPE_TINYINT:
       // Extra casting for chars since they should not be interpreted as ASCII.
       *stream << static_cast<int>(*reinterpret_cast<const int8_t*>(value));
       break;
     case TYPE_SMALLINT: *stream << *reinterpret_cast<const int16_t*>(value); break;
     case TYPE_INT: *stream << *reinterpret_cast<const int32_t*>(value); break;
     case TYPE_BIGINT: *stream << *reinterpret_cast<const int64_t*>(value); break;
     case TYPE_FLOAT: {
       float val = *reinterpret_cast<const float*>(value);
       if (LIKELY(std::isfinite(val))) {
         *stream << val;
       } else if (std::isinf(val)) {
         // 'Infinity' is Java's text representation of inf. By staying close to Java, we
         // allow Hive to read text tables containing non-finite values produced by
         // Impala. (The same logic applies to 'NaN', below).
         *stream << (val < 0 ? "-Infinity" : "Infinity");
       } else if (std::isnan(val)) {
         *stream << "NaN";
       }
     } break;
     case TYPE_DOUBLE: {
       double val = *reinterpret_cast<const double*>(value);
       if (LIKELY(std::isfinite(val))) {
         *stream << val;
       } else if (std::isinf(val)) {
         // See TYPE_FLOAT for rationale.
         *stream << (val < 0 ? "-Infinity" : "Infinity");
       } else if (std::isnan(val)) {
         *stream << "NaN";
       }
     } break;
     case TYPE_VARCHAR:
     case TYPE_STRING:
       string_val = reinterpret_cast<const StringValue*>(value);
       if (type.type == TYPE_VARCHAR) DCHECK(string_val->len <= type.len);
       stream->write(string_val->ptr, string_val->len);
       break;
     case TYPE_TIMESTAMP:
       *stream << *reinterpret_cast<const TimestampValue*>(value);
       break;
     case TYPE_CHAR:
       stream->write(reinterpret_cast<const char*>(value), type.len);
       break;
     case TYPE_DECIMAL:
       switch (type.GetByteSize()) {
         case 4:
           *stream << reinterpret_cast<const Decimal4Value*>(value)->ToString(type);
           break;
         case 8:
           *stream << reinterpret_cast<const Decimal8Value*>(value)->ToString(type);
           break;
         case 16:
           *stream << reinterpret_cast<const Decimal16Value*>(value)->ToString(type);
           break;
         default: DCHECK(false) << type;
       }
       break;
     case TYPE_DATE: {
         *stream << *reinterpret_cast<const DateValue*>(value);
       }
       break;
     default: DCHECK(false) << "Unknown type: " << type;
   }
   stream->precision(old_precision);
   // Undo setting stream to fixed
   stream->flags(old_flags);
 }
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include <sstream>
	#include <boost/functional/hash.hpp>

	#include "runtime/collection-value.h"
	#include "runtime/date-value.h"
	#include "runtime/raw-value.inline.h"
	#include "runtime/string-value.inline.h"
	#include "runtime/tuple.h"
	#include "util/ubsan.h"

	#include "common/names.h"

	namespace impala {

	const int RawValue::ASCII_PRECISION;
	constexpr double RawValue::CANONICAL_DOUBLE_NAN;
	constexpr float RawValue::CANONICAL_FLOAT_NAN;
	constexpr double RawValue::CANONICAL_DOUBLE_ZERO;
	constexpr float RawValue::CANONICAL_FLOAT_ZERO;

	void RawValue::PrintValueAsBytes(const void* value, const ColumnType& type,
	stringstream* stream) {
	if (value == NULL) return;

	const char* chars = reinterpret_cast<const char*>(value);
	const StringValue* string_val = NULL;
	switch (type.type) {
	case TYPE_BOOLEAN:
	stream->write(chars, sizeof(bool));
	return;
	case TYPE_TINYINT:
	stream->write(chars, sizeof(int8_t));
	break;
	case TYPE_SMALLINT:
	stream->write(chars, sizeof(int16_t));
	break;
	case TYPE_INT:
	stream->write(chars, sizeof(int32_t));
	break;
	case TYPE_DATE:
	stream->write(chars, sizeof(DateValue));
	break;
	case TYPE_BIGINT:
	stream->write(chars, sizeof(int64_t));
	break;
	case TYPE_FLOAT:
	stream->write(chars, sizeof(float));
	break;
	case TYPE_DOUBLE:
	stream->write(chars, sizeof(double));
	break;
	case TYPE_STRING:
	case TYPE_VARCHAR:
	string_val = reinterpret_cast<const StringValue*>(value);
	stream->write(string_val->ptr, string_val->len);
	break;
	case TYPE_TIMESTAMP:
	stream->write(chars, TimestampValue::Size());
	break;
	case TYPE_CHAR:
	stream->write(chars, type.len);
	break;
	case TYPE_DECIMAL:
	stream->write(chars, type.GetByteSize());
	break;
	default:
	DCHECK(false) << "bad RawValue::PrintValue() type: " << type.DebugString();
	}
	}

	void RawValue::PrintValue(const void* value, const ColumnType& type, int scale,
	string* str) {
	if (value == NULL) {
	*str = "NULL";
	return;
	}

	stringstream out;
	out.precision(ASCII_PRECISION);
	const StringValue* string_val = NULL;
	string tmp;
	bool val;

	// Special case types that we can print more efficiently without using a stringstream
	switch (type.type) {
	case TYPE_BOOLEAN:
	val = reinterpret_cast<const bool>(value);
	*str = (val ? "true" : "false");
	return;
	case TYPE_STRING:
	case TYPE_VARCHAR:
	string_val = reinterpret_cast<const StringValue*>(value);
	tmp.assign(string_val->ptr, string_val->len);
	str->swap(tmp);
	return;
	case TYPE_CHAR:
	str = string(reinterpret_cast<const char>(value), type.len);
	return;
	case TYPE_FIXED_UDA_INTERMEDIATE:
	*str = "Intermediate UDA step, no value printed";
	return;
	default:
	PrintValue(value, type, scale, &out);
	}
	*str = out.str();
	}

	void RawValue::Write(const void* value, void* dst, const ColumnType& type,
	MemPool* pool) {
	DCHECK(value != NULL);
	switch (type.type) {
	case TYPE_NULL:
	break;
	case TYPE_BOOLEAN:
	// Unlike the other scalar types, bool has a limited set of valid values, so if
	// 'dst' is uninitialized memory and happens to point to a value that is not a valid
	// bool, then dereferencing it via reinterpret_cast<bool>(dst) is undefined
	// behavior.
	memcpy(dst, value, sizeof(bool));
	break;
	case TYPE_TINYINT:
	reinterpret_cast<int8_t>(dst) = reinterpret_cast<const int8_t>(value);
	break;
	case TYPE_SMALLINT:
	reinterpret_cast<int16_t>(dst) = reinterpret_cast<const int16_t>(value);
	break;
	case TYPE_INT:
	reinterpret_cast<int32_t>(dst) = reinterpret_cast<const int32_t>(value);
	break;
	case TYPE_DATE:
	reinterpret_cast<DateValue>(dst) = reinterpret_cast<const DateValue>(value);
	break;
	case TYPE_BIGINT:
	reinterpret_cast<int64_t>(dst) = reinterpret_cast<const int64_t>(value);
	break;
	case TYPE_FLOAT:
	reinterpret_cast<float>(dst) = reinterpret_cast<const float>(value);
	break;
	case TYPE_DOUBLE:
	reinterpret_cast<double>(dst) = reinterpret_cast<const double>(value);
	break;
	case TYPE_TIMESTAMP:
	reinterpret_cast<TimestampValue>(dst) =
	reinterpret_cast<const TimestampValue>(value);
	break;
	case TYPE_STRING:
	case TYPE_VARCHAR: {
	const StringValue* src = reinterpret_cast<const StringValue*>(value);
	StringValue* dest = reinterpret_cast<StringValue*>(dst);
	dest->len = src->len;
	if (type.type == TYPE_VARCHAR) DCHECK_LE(dest->len, type.len);
	if (pool != NULL) {
	// Note: if this changes to TryAllocate(), CodegenAnyVal::WriteToSlot() will need
	// to reflect this change as well (the codegen'd Allocate() call is actually
	// generated in CodegenAnyVal::StoreToNativePtr()).
	dest->ptr = reinterpret_cast<char*>(pool->Allocate(dest->len));
	Ubsan::MemCpy(dest->ptr, src->ptr, dest->len);
	} else {
	dest->ptr = src->ptr;
	}
	break;
	}
	case TYPE_CHAR:
	DCHECK_EQ(type.type, TYPE_CHAR);
	memcpy(dst, value, type.len);
	break;
	case TYPE_DECIMAL:
	memcpy(dst, value, type.GetByteSize());
	break;
	case TYPE_ARRAY:
	case TYPE_MAP: {
	DCHECK(pool == NULL) << "RawValue::Write(): deep copy of CollectionValues NYI";
	const CollectionValue* src = reinterpret_cast<const CollectionValue*>(value);
	CollectionValue* dest = reinterpret_cast<CollectionValue*>(dst);
	dest->num_tuples = src->num_tuples;
	dest->ptr = src->ptr;
	break;
	}
	default:
	DCHECK(false) << "RawValue::Write(): bad type: " << type.DebugString();
	}
	}

	void RawValue::Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
	MemPool* pool) {
	if (value == NULL) {
	tuple->SetNull(slot_desc->null_indicator_offset());
	} else {
	void* slot = tuple->GetSlot(slot_desc->tuple_offset());
	RawValue::Write(value, slot, slot_desc->type(), pool);
	}
	}

	void RawValue::PrintValue(
	const void* value, const ColumnType& type, int scale, std::stringstream* stream) {
	if (value == NULL) {
	*stream << "NULL";
	return;
	}

	int old_precision = stream->precision();
	std::ios_base::fmtflags old_flags = stream->flags();
	if (scale > -1) {
	stream->precision(scale);
	// Setting 'fixed' causes precision to set the number of digits printed after the
	// decimal (by default it sets the maximum number of digits total).
	*stream << std::fixed;
	}

	const StringValue* string_val = NULL;
	switch (type.type) {
	case TYPE_BOOLEAN: {
	bool val = reinterpret_cast<const bool>(value);
	*stream << (val ? "true" : "false");
	return;
	}
	case TYPE_TINYINT:
	// Extra casting for chars since they should not be interpreted as ASCII.
	stream << static_cast<int>(reinterpret_cast<const int8_t*>(value));
	break;
	case TYPE_SMALLINT: stream << reinterpret_cast<const int16_t*>(value); break;
	case TYPE_INT: stream << reinterpret_cast<const int32_t*>(value); break;
	case TYPE_BIGINT: stream << reinterpret_cast<const int64_t*>(value); break;
	case TYPE_FLOAT: {
	float val = reinterpret_cast<const float>(value);
	if (LIKELY(std::isfinite(val))) {
	*stream << val;
	} else if (std::isinf(val)) {
	// 'Infinity' is Java's text representation of inf. By staying close to Java, we
	// allow Hive to read text tables containing non-finite values produced by
	// Impala. (The same logic applies to 'NaN', below).
	*stream << (val < 0 ? "-Infinity" : "Infinity");
	} else if (std::isnan(val)) {
	*stream << "NaN";
	}
	} break;
	case TYPE_DOUBLE: {
	double val = reinterpret_cast<const double>(value);
	if (LIKELY(std::isfinite(val))) {
	*stream << val;
	} else if (std::isinf(val)) {
	// See TYPE_FLOAT for rationale.
	*stream << (val < 0 ? "-Infinity" : "Infinity");
	} else if (std::isnan(val)) {
	*stream << "NaN";
	}
	} break;
	case TYPE_VARCHAR:
	case TYPE_STRING:
	string_val = reinterpret_cast<const StringValue*>(value);
	if (type.type == TYPE_VARCHAR) DCHECK(string_val->len <= type.len);
	stream->write(string_val->ptr, string_val->len);
	break;
	case TYPE_TIMESTAMP:
	stream << reinterpret_cast<const TimestampValue*>(value);
	break;
	case TYPE_CHAR:
	stream->write(reinterpret_cast<const char*>(value), type.len);
	break;
	case TYPE_DECIMAL:
	switch (type.GetByteSize()) {
	case 4:
	stream << reinterpret_cast<const Decimal4Value>(value)->ToString(type);
	break;
	case 8:
	stream << reinterpret_cast<const Decimal8Value>(value)->ToString(type);
	break;
	case 16:
	stream << reinterpret_cast<const Decimal16Value>(value)->ToString(type);
	break;
	default: DCHECK(false) << type;
	}
	break;
	case TYPE_DATE: {
	stream << reinterpret_cast<const DateValue*>(value);
	}
	break;
	default: DCHECK(false) << "Unknown type: " << type;
	}
	stream->precision(old_precision);
	// Undo setting stream to fixed
	stream->flags(old_flags);
	}
	}