be/src/runtime/types.h - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.


 #ifndef IMPALA_RUNTIME_TYPE_H
 #define IMPALA_RUNTIME_TYPE_H

 #include <string>

 #include "common/logging.h"
 #include "gen-cpp/Types_types.h"  // for TPrimitiveType
 #include "gen-cpp/TCLIService_types.h"  // for HiveServer2 Type

 namespace llvm {
   class ConstantStruct;
 }

 namespace impala {

 class LlvmCodeGen;

 // TODO for 2.3: move into ColumnType, rename to Type, and remove TYPE_ prefix
 enum PrimitiveType {
   INVALID_TYPE = 0,
   TYPE_NULL,
   TYPE_BOOLEAN,
   TYPE_TINYINT,
   TYPE_SMALLINT,
   TYPE_INT,
   TYPE_BIGINT,
   TYPE_FLOAT,
   TYPE_DOUBLE,
   TYPE_TIMESTAMP,
   TYPE_STRING,
   TYPE_DATE,
   TYPE_DATETIME,    // Not implemented
   TYPE_BINARY,      // Not implemented
   TYPE_DECIMAL,
   TYPE_CHAR,
   TYPE_VARCHAR,
   TYPE_FIXED_UDA_INTERMEDIATE,

   TYPE_STRUCT,
   TYPE_ARRAY,
   TYPE_MAP
 };

 PrimitiveType ThriftToType(TPrimitiveType::type ttype);
 TPrimitiveType::type ToThrift(PrimitiveType ptype);
 std::string TypeToString(PrimitiveType t);
 std::string TypeToOdbcString(PrimitiveType t);

 // Describes a type. Includes the enum, children types, and any type-specific metadata
 // (e.g. precision and scale for decimals).
 // TODO for 2.3: rename to TypeDescriptor
 struct ColumnType {
   PrimitiveType type;
   /// Only set if type one of TYPE_CHAR, TYPE_VARCHAR, TYPE_FIXED_UDA_INTERMEDIATE.
   int len;
   static const int MAX_VARCHAR_LENGTH = (1 << 16) - 1; // 65535
   static const int MAX_CHAR_LENGTH = (1 << 8) - 1; // 255

   /// Only set if type == TYPE_DECIMAL
   int precision, scale;

   /// Must be kept in sync with FE's max precision/scale.
   static const int MAX_PRECISION = 38;
   static const int MAX_SCALE = MAX_PRECISION;
   static const int MIN_ADJUSTED_SCALE = 6;

   /// The maximum precision representable by a 4-byte decimal (Decimal4Value)
   static const int MAX_DECIMAL4_PRECISION = 9;
   /// The maximum precision representable by a 8-byte decimal (Decimal8Value)
   static const int MAX_DECIMAL8_PRECISION = 18;

   /// Empty for scalar types
   std::vector<ColumnType> children;

   /// Only set if type == TYPE_STRUCT. The field name of each child.
   std::vector<std::string> field_names;

   static const char* LLVM_CLASS_NAME;

   ColumnType(PrimitiveType type = INVALID_TYPE)
     : type(type), len(-1), precision(-1), scale(-1) {
     DCHECK_NE(type, TYPE_CHAR);
     DCHECK_NE(type, TYPE_VARCHAR);
     DCHECK_NE(type, TYPE_DECIMAL);
     DCHECK_NE(type, TYPE_STRUCT);
     DCHECK_NE(type, TYPE_ARRAY);
     DCHECK_NE(type, TYPE_MAP);
     DCHECK_NE(type, TYPE_FIXED_UDA_INTERMEDIATE);
   }

   static ColumnType CreateCharType(int len) {
     DCHECK_GE(len, 1);
     DCHECK_LE(len, MAX_CHAR_LENGTH);
     ColumnType ret;
     ret.type = TYPE_CHAR;
     ret.len = len;
     return ret;
   }

   static ColumnType CreateVarcharType(int len) {
     DCHECK_GE(len, 1);
     DCHECK_LE(len, MAX_VARCHAR_LENGTH);
     ColumnType ret;
     ret.type = TYPE_VARCHAR;
     ret.len = len;
     return ret;
   }

   static ColumnType CreateFixedUdaIntermediateType(int len) {
     DCHECK_GE(len, 1);
     ColumnType ret;
     ret.type = TYPE_FIXED_UDA_INTERMEDIATE;
     ret.len = len;
     return ret;
   }

   static bool ValidateDecimalParams(int precision, int scale) {
     return precision >= 1 && precision <= MAX_PRECISION && scale >= 0
         && scale <= MAX_SCALE && scale <= precision;
   }

   static ColumnType CreateDecimalType(int precision, int scale) {
     DCHECK(ValidateDecimalParams(precision, scale)) << precision << ", " << scale;
     ColumnType ret;
     ret.type = TYPE_DECIMAL;
     ret.precision = precision;
     ret.scale = scale;
     return ret;
   }

   // Matches the results of createAdjustedDecimalType in front-end code.
   static ColumnType CreateAdjustedDecimalType(int precision, int scale) {
     if (precision > MAX_PRECISION) {
       int min_scale = std::min(scale, MIN_ADJUSTED_SCALE);
       int delta = precision - MAX_PRECISION;
       precision = MAX_PRECISION;
       scale = std::max(scale - delta, min_scale);
     }
     return CreateDecimalType(precision, scale);
   }

   static ColumnType FromThrift(const TColumnType& t) {
     int idx = 0;
     ColumnType result(t.types, &idx);
     DCHECK_EQ(idx, t.types.size() - 1);
     return result;
   }

   static std::vector<ColumnType> FromThrift(const std::vector<TColumnType>& ttypes);

   bool operator==(const ColumnType& o) const {
     if (type != o.type) return false;
     if (children != o.children) return false;
     if (type == TYPE_CHAR || type == TYPE_FIXED_UDA_INTERMEDIATE) return len == o.len;
     if (type == TYPE_DECIMAL) return precision == o.precision && scale == o.scale;
     return true;
   }

   bool operator!=(const ColumnType& other) const {
     return !(*this == other);
   }

   TColumnType ToThrift() const {
     TColumnType thrift_type;
     ToThrift(&thrift_type);
     return thrift_type;
   }

   inline bool IsBooleanType() const { return type == TYPE_BOOLEAN; }

   inline bool IsIntegerType() const {
     return type == TYPE_TINYINT || type == TYPE_SMALLINT || type == TYPE_INT
         || type == TYPE_BIGINT;
   }

   inline bool IsFloatingPointType() const {
     return type == TYPE_FLOAT || type == TYPE_DOUBLE;
   }

   inline bool IsDecimalType() const { return type == TYPE_DECIMAL; }

   inline bool IsStringType() const {
     return type == TYPE_STRING || type == TYPE_VARCHAR || type == TYPE_CHAR;
   }

   inline bool IsTimestampType() const { return type == TYPE_TIMESTAMP; }

   inline bool IsDateType() const { return type == TYPE_DATE; }

   inline bool IsVarLenStringType() const {
     return type == TYPE_STRING || type == TYPE_VARCHAR;
   }

   inline bool IsComplexType() const {
     return type == TYPE_STRUCT || type == TYPE_ARRAY || type == TYPE_MAP;
   }

   inline bool IsCollectionType() const {
     return type == TYPE_ARRAY || type == TYPE_MAP;
   }

   inline bool IsVarLenType() const {
     return IsVarLenStringType() || IsCollectionType();
   }

   /// Returns the byte size of this type.  Returns 0 for variable length types.
   inline int GetByteSize() const {
     switch (type) {
       case TYPE_ARRAY:
       case TYPE_MAP:
       case TYPE_STRING:
       case TYPE_VARCHAR:
         return 0;
       case TYPE_CHAR:
       case TYPE_FIXED_UDA_INTERMEDIATE:
         return len;
       case TYPE_NULL:
       case TYPE_BOOLEAN:
       case TYPE_TINYINT:
         return 1;
       case TYPE_SMALLINT:
         return 2;
       case TYPE_INT:
       case TYPE_DATE:
       case TYPE_FLOAT:
         return 4;
       case TYPE_BIGINT:
       case TYPE_DOUBLE:
         return 8;
       case TYPE_TIMESTAMP:
         // This is the size of the slot, the actual size of the data is 12.
         return 16;
       case TYPE_DECIMAL:
         return GetDecimalByteSize(precision);
       case INVALID_TYPE:
       default:
         DCHECK(false) << "NYI: " << type;
     }
     return 0;
   }

   /// Returns the size of a slot for this type.
   inline int GetSlotSize() const {
     switch (type) {
       case TYPE_STRING:
       case TYPE_VARCHAR:
         return 12;
       case TYPE_CHAR:
       case TYPE_FIXED_UDA_INTERMEDIATE:
         return len;
       case TYPE_ARRAY:
       case TYPE_MAP:
         return 12;
       case TYPE_STRUCT:
         DCHECK(false) << "TYPE_STRUCT slot not possible";
       default:
         return GetByteSize();
     }
   }

   static inline int GetDecimalByteSize(int precision) {
     DCHECK_GT(precision, 0);
     if (precision <= MAX_DECIMAL4_PRECISION) return 4;
     if (precision <= MAX_DECIMAL8_PRECISION) return 8;
     return 16;
   }

   /// Returns the IR version of this ColumnType. Only implemented for scalar types. LLVM
   /// optimizer can pull out fields of the returned ConstantStruct for constant folding.
   llvm::ConstantStruct* ToIR(LlvmCodeGen* codegen) const;

   apache::hive::service::cli::thrift::TTypeEntry ToHs2Type() const;
   std::string DebugString() const;

  private:
   /// Used to create a possibly nested type from the flattened Thrift representation.
   ///
   /// 'idx' is an in/out parameter that is initially set to the index of the type in
   /// 'types' being constructed, and is set to the index of the next type in 'types' that
   /// needs to be processed (or the size 'types' if all nodes have been processed).
   ColumnType(const std::vector<TTypeNode>& types, int* idx);

   /// Recursive implementation of ToThrift() that populates 'thrift_type' with the
   /// TTypeNodes for this type and its children.
   void ToThrift(TColumnType* thrift_type) const;
 };

 std::ostream& operator<<(std::ostream& os, const ColumnType& type);

 }

 #endif
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.


	#ifndef IMPALA_RUNTIME_TYPE_H
	#define IMPALA_RUNTIME_TYPE_H

	#include <string>

	#include "common/logging.h"
	#include "gen-cpp/Types_types.h" // for TPrimitiveType
	#include "gen-cpp/TCLIService_types.h" // for HiveServer2 Type

	namespace llvm {
	class ConstantStruct;
	}

	namespace impala {

	class LlvmCodeGen;

	// TODO for 2.3: move into ColumnType, rename to Type, and remove TYPE_ prefix
	enum PrimitiveType {
	INVALID_TYPE = 0,
	TYPE_NULL,
	TYPE_BOOLEAN,
	TYPE_TINYINT,
	TYPE_SMALLINT,
	TYPE_INT,
	TYPE_BIGINT,
	TYPE_FLOAT,
	TYPE_DOUBLE,
	TYPE_TIMESTAMP,
	TYPE_STRING,
	TYPE_DATE,
	TYPE_DATETIME, // Not implemented
	TYPE_BINARY, // Not implemented
	TYPE_DECIMAL,
	TYPE_CHAR,
	TYPE_VARCHAR,
	TYPE_FIXED_UDA_INTERMEDIATE,

	TYPE_STRUCT,
	TYPE_ARRAY,
	TYPE_MAP
	};

	PrimitiveType ThriftToType(TPrimitiveType::type ttype);
	TPrimitiveType::type ToThrift(PrimitiveType ptype);
	std::string TypeToString(PrimitiveType t);
	std::string TypeToOdbcString(PrimitiveType t);

	// Describes a type. Includes the enum, children types, and any type-specific metadata
	// (e.g. precision and scale for decimals).
	// TODO for 2.3: rename to TypeDescriptor
	struct ColumnType {
	PrimitiveType type;
	/// Only set if type one of TYPE_CHAR, TYPE_VARCHAR, TYPE_FIXED_UDA_INTERMEDIATE.
	int len;
	static const int MAX_VARCHAR_LENGTH = (1 << 16) - 1; // 65535
	static const int MAX_CHAR_LENGTH = (1 << 8) - 1; // 255

	/// Only set if type == TYPE_DECIMAL
	int precision, scale;

	/// Must be kept in sync with FE's max precision/scale.
	static const int MAX_PRECISION = 38;
	static const int MAX_SCALE = MAX_PRECISION;
	static const int MIN_ADJUSTED_SCALE = 6;

	/// The maximum precision representable by a 4-byte decimal (Decimal4Value)
	static const int MAX_DECIMAL4_PRECISION = 9;
	/// The maximum precision representable by a 8-byte decimal (Decimal8Value)
	static const int MAX_DECIMAL8_PRECISION = 18;

	/// Empty for scalar types
	std::vector<ColumnType> children;

	/// Only set if type == TYPE_STRUCT. The field name of each child.
	std::vector<std::string> field_names;

	static const char* LLVM_CLASS_NAME;

	ColumnType(PrimitiveType type = INVALID_TYPE)
	: type(type), len(-1), precision(-1), scale(-1) {
	DCHECK_NE(type, TYPE_CHAR);
	DCHECK_NE(type, TYPE_VARCHAR);
	DCHECK_NE(type, TYPE_DECIMAL);
	DCHECK_NE(type, TYPE_STRUCT);
	DCHECK_NE(type, TYPE_ARRAY);
	DCHECK_NE(type, TYPE_MAP);
	DCHECK_NE(type, TYPE_FIXED_UDA_INTERMEDIATE);
	}

	static ColumnType CreateCharType(int len) {
	DCHECK_GE(len, 1);
	DCHECK_LE(len, MAX_CHAR_LENGTH);
	ColumnType ret;
	ret.type = TYPE_CHAR;
	ret.len = len;
	return ret;
	}

	static ColumnType CreateVarcharType(int len) {
	DCHECK_GE(len, 1);
	DCHECK_LE(len, MAX_VARCHAR_LENGTH);
	ColumnType ret;
	ret.type = TYPE_VARCHAR;
	ret.len = len;
	return ret;
	}

	static ColumnType CreateFixedUdaIntermediateType(int len) {
	DCHECK_GE(len, 1);
	ColumnType ret;
	ret.type = TYPE_FIXED_UDA_INTERMEDIATE;
	ret.len = len;
	return ret;
	}

	static bool ValidateDecimalParams(int precision, int scale) {
	return precision >= 1 && precision <= MAX_PRECISION && scale >= 0
	&& scale <= MAX_SCALE && scale <= precision;
	}

	static ColumnType CreateDecimalType(int precision, int scale) {
	DCHECK(ValidateDecimalParams(precision, scale)) << precision << ", " << scale;
	ColumnType ret;
	ret.type = TYPE_DECIMAL;
	ret.precision = precision;
	ret.scale = scale;
	return ret;
	}

	// Matches the results of createAdjustedDecimalType in front-end code.
	static ColumnType CreateAdjustedDecimalType(int precision, int scale) {
	if (precision > MAX_PRECISION) {
	int min_scale = std::min(scale, MIN_ADJUSTED_SCALE);
	int delta = precision - MAX_PRECISION;
	precision = MAX_PRECISION;
	scale = std::max(scale - delta, min_scale);
	}
	return CreateDecimalType(precision, scale);
	}

	static ColumnType FromThrift(const TColumnType& t) {
	int idx = 0;
	ColumnType result(t.types, &idx);
	DCHECK_EQ(idx, t.types.size() - 1);
	return result;
	}

	static std::vector<ColumnType> FromThrift(const std::vector<TColumnType>& ttypes);

	bool operator==(const ColumnType& o) const {
	if (type != o.type) return false;
	if (children != o.children) return false;
	if (type == TYPE_CHAR \|\| type == TYPE_FIXED_UDA_INTERMEDIATE) return len == o.len;
	if (type == TYPE_DECIMAL) return precision == o.precision && scale == o.scale;
	return true;
	}

	bool operator!=(const ColumnType& other) const {
	return !(*this == other);
	}

	TColumnType ToThrift() const {
	TColumnType thrift_type;
	ToThrift(&thrift_type);
	return thrift_type;
	}

	inline bool IsBooleanType() const { return type == TYPE_BOOLEAN; }

	inline bool IsIntegerType() const {
	return type == TYPE_TINYINT \|\| type == TYPE_SMALLINT \|\| type == TYPE_INT
	\|\| type == TYPE_BIGINT;
	}

	inline bool IsFloatingPointType() const {
	return type == TYPE_FLOAT \|\| type == TYPE_DOUBLE;
	}

	inline bool IsDecimalType() const { return type == TYPE_DECIMAL; }

	inline bool IsStringType() const {
	return type == TYPE_STRING \|\| type == TYPE_VARCHAR \|\| type == TYPE_CHAR;
	}

	inline bool IsTimestampType() const { return type == TYPE_TIMESTAMP; }

	inline bool IsDateType() const { return type == TYPE_DATE; }

	inline bool IsVarLenStringType() const {
	return type == TYPE_STRING \|\| type == TYPE_VARCHAR;
	}

	inline bool IsComplexType() const {
	return type == TYPE_STRUCT \|\| type == TYPE_ARRAY \|\| type == TYPE_MAP;
	}

	inline bool IsCollectionType() const {
	return type == TYPE_ARRAY \|\| type == TYPE_MAP;
	}

	inline bool IsVarLenType() const {
	return IsVarLenStringType() \|\| IsCollectionType();
	}

	/// Returns the byte size of this type. Returns 0 for variable length types.
	inline int GetByteSize() const {
	switch (type) {
	case TYPE_ARRAY:
	case TYPE_MAP:
	case TYPE_STRING:
	case TYPE_VARCHAR:
	return 0;
	case TYPE_CHAR:
	case TYPE_FIXED_UDA_INTERMEDIATE:
	return len;
	case TYPE_NULL:
	case TYPE_BOOLEAN:
	case TYPE_TINYINT:
	return 1;
	case TYPE_SMALLINT:
	return 2;
	case TYPE_INT:
	case TYPE_DATE:
	case TYPE_FLOAT:
	return 4;
	case TYPE_BIGINT:
	case TYPE_DOUBLE:
	return 8;
	case TYPE_TIMESTAMP:
	// This is the size of the slot, the actual size of the data is 12.
	return 16;
	case TYPE_DECIMAL:
	return GetDecimalByteSize(precision);
	case INVALID_TYPE:
	default:
	DCHECK(false) << "NYI: " << type;
	}
	return 0;
	}

	/// Returns the size of a slot for this type.
	inline int GetSlotSize() const {
	switch (type) {
	case TYPE_STRING:
	case TYPE_VARCHAR:
	return 12;
	case TYPE_CHAR:
	case TYPE_FIXED_UDA_INTERMEDIATE:
	return len;
	case TYPE_ARRAY:
	case TYPE_MAP:
	return 12;
	case TYPE_STRUCT:
	DCHECK(false) << "TYPE_STRUCT slot not possible";
	default:
	return GetByteSize();
	}
	}

	static inline int GetDecimalByteSize(int precision) {
	DCHECK_GT(precision, 0);
	if (precision <= MAX_DECIMAL4_PRECISION) return 4;
	if (precision <= MAX_DECIMAL8_PRECISION) return 8;
	return 16;
	}

	/// Returns the IR version of this ColumnType. Only implemented for scalar types. LLVM
	/// optimizer can pull out fields of the returned ConstantStruct for constant folding.
	llvm::ConstantStruct* ToIR(LlvmCodeGen* codegen) const;

	apache::hive::service::cli::thrift::TTypeEntry ToHs2Type() const;
	std::string DebugString() const;

	private:
	/// Used to create a possibly nested type from the flattened Thrift representation.
	///
	/// 'idx' is an in/out parameter that is initially set to the index of the type in
	/// 'types' being constructed, and is set to the index of the next type in 'types' that
	/// needs to be processed (or the size 'types' if all nodes have been processed).
	ColumnType(const std::vector<TTypeNode>& types, int* idx);

	/// Recursive implementation of ToThrift() that populates 'thrift_type' with the
	/// TTypeNodes for this type and its children.
	void ToThrift(TColumnType* thrift_type) const;
	};

	std::ostream& operator<<(std::ostream& os, const ColumnType& type);

	}

	#endif