blob: 87f0e6869bd140e3e372cec033f5fc5ab4d47f2a [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_RUNTIME_TYPE_H
#define IMPALA_RUNTIME_TYPE_H
#include <string>
#include "common/logging.h"
#include "gen-cpp/Types_types.h" // for TPrimitiveType
#include "gen-cpp/TCLIService_types.h" // for HiveServer2 Type
namespace llvm {
class ConstantStruct;
}
namespace impala {
class LlvmCodeGen;
// TODO for 2.3: move into ColumnType, rename to Type, and remove TYPE_ prefix
enum PrimitiveType {
INVALID_TYPE = 0,
TYPE_NULL,
TYPE_BOOLEAN,
TYPE_TINYINT,
TYPE_SMALLINT,
TYPE_INT,
TYPE_BIGINT,
TYPE_FLOAT,
TYPE_DOUBLE,
TYPE_TIMESTAMP,
TYPE_STRING,
TYPE_DATE,
TYPE_DATETIME, // Not implemented
TYPE_BINARY, // Not implemented
TYPE_DECIMAL,
TYPE_CHAR,
TYPE_VARCHAR,
TYPE_FIXED_UDA_INTERMEDIATE,
TYPE_STRUCT,
TYPE_ARRAY,
TYPE_MAP
};
PrimitiveType ThriftToType(TPrimitiveType::type ttype);
TPrimitiveType::type ToThrift(PrimitiveType ptype);
std::string TypeToString(PrimitiveType t);
std::string TypeToOdbcString(PrimitiveType t);
// Describes a type. Includes the enum, children types, and any type-specific metadata
// (e.g. precision and scale for decimals).
// TODO for 2.3: rename to TypeDescriptor
struct ColumnType {
PrimitiveType type;
/// Only set if type one of TYPE_CHAR, TYPE_VARCHAR, TYPE_FIXED_UDA_INTERMEDIATE.
int len;
static const int MAX_VARCHAR_LENGTH = (1 << 16) - 1; // 65535
static const int MAX_CHAR_LENGTH = (1 << 8) - 1; // 255
/// Only set if type == TYPE_DECIMAL
int precision, scale;
/// Must be kept in sync with FE's max precision/scale.
static const int MAX_PRECISION = 38;
static const int MAX_SCALE = MAX_PRECISION;
static const int MIN_ADJUSTED_SCALE = 6;
/// The maximum precision representable by a 4-byte decimal (Decimal4Value)
static const int MAX_DECIMAL4_PRECISION = 9;
/// The maximum precision representable by a 8-byte decimal (Decimal8Value)
static const int MAX_DECIMAL8_PRECISION = 18;
/// Empty for scalar types
std::vector<ColumnType> children;
/// Only set if type == TYPE_STRUCT. The field name of each child.
std::vector<std::string> field_names;
static const char* LLVM_CLASS_NAME;
ColumnType(PrimitiveType type = INVALID_TYPE)
: type(type), len(-1), precision(-1), scale(-1) {
DCHECK_NE(type, TYPE_CHAR);
DCHECK_NE(type, TYPE_VARCHAR);
DCHECK_NE(type, TYPE_DECIMAL);
DCHECK_NE(type, TYPE_STRUCT);
DCHECK_NE(type, TYPE_ARRAY);
DCHECK_NE(type, TYPE_MAP);
DCHECK_NE(type, TYPE_FIXED_UDA_INTERMEDIATE);
}
static ColumnType CreateCharType(int len) {
DCHECK_GE(len, 1);
DCHECK_LE(len, MAX_CHAR_LENGTH);
ColumnType ret;
ret.type = TYPE_CHAR;
ret.len = len;
return ret;
}
static ColumnType CreateVarcharType(int len) {
DCHECK_GE(len, 1);
DCHECK_LE(len, MAX_VARCHAR_LENGTH);
ColumnType ret;
ret.type = TYPE_VARCHAR;
ret.len = len;
return ret;
}
static ColumnType CreateFixedUdaIntermediateType(int len) {
DCHECK_GE(len, 1);
ColumnType ret;
ret.type = TYPE_FIXED_UDA_INTERMEDIATE;
ret.len = len;
return ret;
}
static bool ValidateDecimalParams(int precision, int scale) {
return precision >= 1 && precision <= MAX_PRECISION && scale >= 0
&& scale <= MAX_SCALE && scale <= precision;
}
static ColumnType CreateDecimalType(int precision, int scale) {
DCHECK(ValidateDecimalParams(precision, scale)) << precision << ", " << scale;
ColumnType ret;
ret.type = TYPE_DECIMAL;
ret.precision = precision;
ret.scale = scale;
return ret;
}
// Matches the results of createAdjustedDecimalType in front-end code.
static ColumnType CreateAdjustedDecimalType(int precision, int scale) {
if (precision > MAX_PRECISION) {
int min_scale = std::min(scale, MIN_ADJUSTED_SCALE);
int delta = precision - MAX_PRECISION;
precision = MAX_PRECISION;
scale = std::max(scale - delta, min_scale);
}
return CreateDecimalType(precision, scale);
}
static ColumnType FromThrift(const TColumnType& t) {
int idx = 0;
ColumnType result(t.types, &idx);
DCHECK_EQ(idx, t.types.size() - 1);
return result;
}
static std::vector<ColumnType> FromThrift(const std::vector<TColumnType>& ttypes);
bool operator==(const ColumnType& o) const {
if (type != o.type) return false;
if (children != o.children) return false;
if (type == TYPE_CHAR || type == TYPE_FIXED_UDA_INTERMEDIATE) return len == o.len;
if (type == TYPE_DECIMAL) return precision == o.precision && scale == o.scale;
return true;
}
bool operator!=(const ColumnType& other) const {
return !(*this == other);
}
TColumnType ToThrift() const {
TColumnType thrift_type;
ToThrift(&thrift_type);
return thrift_type;
}
inline bool IsBooleanType() const { return type == TYPE_BOOLEAN; }
inline bool IsIntegerType() const {
return type == TYPE_TINYINT || type == TYPE_SMALLINT || type == TYPE_INT
|| type == TYPE_BIGINT;
}
inline bool IsFloatingPointType() const {
return type == TYPE_FLOAT || type == TYPE_DOUBLE;
}
inline bool IsDecimalType() const { return type == TYPE_DECIMAL; }
inline bool IsStringType() const {
return type == TYPE_STRING || type == TYPE_VARCHAR || type == TYPE_CHAR;
}
inline bool IsTimestampType() const { return type == TYPE_TIMESTAMP; }
inline bool IsDateType() const { return type == TYPE_DATE; }
inline bool IsVarLenStringType() const {
return type == TYPE_STRING || type == TYPE_VARCHAR;
}
inline bool IsComplexType() const {
return type == TYPE_STRUCT || type == TYPE_ARRAY || type == TYPE_MAP;
}
inline bool IsCollectionType() const {
return type == TYPE_ARRAY || type == TYPE_MAP;
}
inline bool IsVarLenType() const {
return IsVarLenStringType() || IsCollectionType();
}
/// Returns the byte size of this type. Returns 0 for variable length types.
inline int GetByteSize() const {
switch (type) {
case TYPE_ARRAY:
case TYPE_MAP:
case TYPE_STRING:
case TYPE_VARCHAR:
return 0;
case TYPE_CHAR:
case TYPE_FIXED_UDA_INTERMEDIATE:
return len;
case TYPE_NULL:
case TYPE_BOOLEAN:
case TYPE_TINYINT:
return 1;
case TYPE_SMALLINT:
return 2;
case TYPE_INT:
case TYPE_DATE:
case TYPE_FLOAT:
return 4;
case TYPE_BIGINT:
case TYPE_DOUBLE:
return 8;
case TYPE_TIMESTAMP:
// This is the size of the slot, the actual size of the data is 12.
return 16;
case TYPE_DECIMAL:
return GetDecimalByteSize(precision);
case INVALID_TYPE:
default:
DCHECK(false) << "NYI: " << type;
}
return 0;
}
/// Returns the size of a slot for this type.
inline int GetSlotSize() const {
switch (type) {
case TYPE_STRING:
case TYPE_VARCHAR:
return 12;
case TYPE_CHAR:
case TYPE_FIXED_UDA_INTERMEDIATE:
return len;
case TYPE_ARRAY:
case TYPE_MAP:
return 12;
case TYPE_STRUCT:
DCHECK(false) << "TYPE_STRUCT slot not possible";
default:
return GetByteSize();
}
}
static inline int GetDecimalByteSize(int precision) {
DCHECK_GT(precision, 0);
if (precision <= MAX_DECIMAL4_PRECISION) return 4;
if (precision <= MAX_DECIMAL8_PRECISION) return 8;
return 16;
}
/// Returns the IR version of this ColumnType. Only implemented for scalar types. LLVM
/// optimizer can pull out fields of the returned ConstantStruct for constant folding.
llvm::ConstantStruct* ToIR(LlvmCodeGen* codegen) const;
apache::hive::service::cli::thrift::TTypeEntry ToHs2Type() const;
std::string DebugString() const;
private:
/// Used to create a possibly nested type from the flattened Thrift representation.
///
/// 'idx' is an in/out parameter that is initially set to the index of the type in
/// 'types' being constructed, and is set to the index of the next type in 'types' that
/// needs to be processed (or the size 'types' if all nodes have been processed).
ColumnType(const std::vector<TTypeNode>& types, int* idx);
/// Recursive implementation of ToThrift() that populates 'thrift_type' with the
/// TTypeNodes for this type and its children.
void ToThrift(TColumnType* thrift_type) const;
};
std::ostream& operator<<(std::ostream& os, const ColumnType& type);
}
#endif