blob: c43808d433034ccc416db6fe7965d5a01a72084f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/types.h"
#include <ostream>
#include <sstream>
#include "gen-cpp/TCLIService_constants.h"
#include "codegen/llvm-codegen.h"
#include "common/names.h"
using namespace apache::hive::service::cli::thrift;
namespace impala {
const int ColumnType::MAX_PRECISION;
const int ColumnType::MAX_SCALE;
const int ColumnType::MIN_ADJUSTED_SCALE;
const int ColumnType::MAX_DECIMAL4_PRECISION;
const int ColumnType::MAX_DECIMAL8_PRECISION;
const char* ColumnType::LLVM_CLASS_NAME = "struct.impala::ColumnType";
ColumnType::ColumnType(const std::vector<TTypeNode>& types, int* idx)
: len(-1), precision(-1), scale(-1) {
DCHECK_GE(*idx, 0);
DCHECK_LT(*idx, types.size());
const TTypeNode& node = types[*idx];
switch (node.type) {
case TTypeNodeType::SCALAR: {
DCHECK(node.__isset.scalar_type);
const TScalarType scalar_type = node.scalar_type;
type = ThriftToType(scalar_type.type);
if (type == TYPE_CHAR || type == TYPE_VARCHAR
|| type == TYPE_FIXED_UDA_INTERMEDIATE) {
DCHECK(scalar_type.__isset.len);
len = scalar_type.len;
} else if (type == TYPE_DECIMAL) {
DCHECK(scalar_type.__isset.precision);
DCHECK(scalar_type.__isset.scale);
precision = scalar_type.precision;
scale = scalar_type.scale;
}
break;
}
case TTypeNodeType::STRUCT:
type = TYPE_STRUCT;
for (int i = 0; i < node.struct_fields.size(); ++i) {
++(*idx);
children.push_back(ColumnType(types, idx));
field_names.push_back(node.struct_fields[i].name);
}
break;
case TTypeNodeType::ARRAY:
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 1);
type = TYPE_ARRAY;
++(*idx);
children.push_back(ColumnType(types, idx));
break;
case TTypeNodeType::MAP:
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 2);
type = TYPE_MAP;
++(*idx);
children.push_back(ColumnType(types, idx));
++(*idx);
children.push_back(ColumnType(types, idx));
break;
default:
DCHECK(false) << node.type;
}
}
PrimitiveType ThriftToType(TPrimitiveType::type ttype) {
switch (ttype) {
case TPrimitiveType::INVALID_TYPE: return INVALID_TYPE;
case TPrimitiveType::NULL_TYPE: return TYPE_NULL;
case TPrimitiveType::BOOLEAN: return TYPE_BOOLEAN;
case TPrimitiveType::TINYINT: return TYPE_TINYINT;
case TPrimitiveType::SMALLINT: return TYPE_SMALLINT;
case TPrimitiveType::INT: return TYPE_INT;
case TPrimitiveType::BIGINT: return TYPE_BIGINT;
case TPrimitiveType::FLOAT: return TYPE_FLOAT;
case TPrimitiveType::DOUBLE: return TYPE_DOUBLE;
case TPrimitiveType::DATE: return TYPE_DATE;
case TPrimitiveType::DATETIME: return TYPE_DATETIME;
case TPrimitiveType::TIMESTAMP: return TYPE_TIMESTAMP;
case TPrimitiveType::STRING: return TYPE_STRING;
case TPrimitiveType::VARCHAR: return TYPE_VARCHAR;
case TPrimitiveType::BINARY: return TYPE_BINARY;
case TPrimitiveType::DECIMAL: return TYPE_DECIMAL;
case TPrimitiveType::CHAR: return TYPE_CHAR;
case TPrimitiveType::FIXED_UDA_INTERMEDIATE: return TYPE_FIXED_UDA_INTERMEDIATE;
default: return INVALID_TYPE;
}
}
TPrimitiveType::type ToThrift(PrimitiveType ptype) {
switch (ptype) {
case INVALID_TYPE: return TPrimitiveType::INVALID_TYPE;
case TYPE_NULL: return TPrimitiveType::NULL_TYPE;
case TYPE_BOOLEAN: return TPrimitiveType::BOOLEAN;
case TYPE_TINYINT: return TPrimitiveType::TINYINT;
case TYPE_SMALLINT: return TPrimitiveType::SMALLINT;
case TYPE_INT: return TPrimitiveType::INT;
case TYPE_BIGINT: return TPrimitiveType::BIGINT;
case TYPE_FLOAT: return TPrimitiveType::FLOAT;
case TYPE_DOUBLE: return TPrimitiveType::DOUBLE;
case TYPE_DATE: return TPrimitiveType::DATE;
case TYPE_DATETIME: return TPrimitiveType::DATETIME;
case TYPE_TIMESTAMP: return TPrimitiveType::TIMESTAMP;
case TYPE_STRING: return TPrimitiveType::STRING;
case TYPE_VARCHAR: return TPrimitiveType::VARCHAR;
case TYPE_BINARY: return TPrimitiveType::BINARY;
case TYPE_DECIMAL: return TPrimitiveType::DECIMAL;
case TYPE_CHAR: return TPrimitiveType::CHAR;
case TYPE_FIXED_UDA_INTERMEDIATE: return TPrimitiveType::FIXED_UDA_INTERMEDIATE;
case TYPE_STRUCT:
case TYPE_ARRAY:
case TYPE_MAP:
DCHECK(false) << "NYI: " << ptype;
default: return TPrimitiveType::INVALID_TYPE;
}
}
string TypeToString(PrimitiveType t) {
switch (t) {
case INVALID_TYPE: return "INVALID";
case TYPE_NULL: return "NULL";
case TYPE_BOOLEAN: return "BOOLEAN";
case TYPE_TINYINT: return "TINYINT";
case TYPE_SMALLINT: return "SMALLINT";
case TYPE_INT: return "INT";
case TYPE_BIGINT: return "BIGINT";
case TYPE_FLOAT: return "FLOAT";
case TYPE_DOUBLE: return "DOUBLE";
case TYPE_DATE: return "DATE";
case TYPE_DATETIME: return "DATETIME";
case TYPE_TIMESTAMP: return "TIMESTAMP";
case TYPE_STRING: return "STRING";
case TYPE_VARCHAR: return "VARCHAR";
case TYPE_BINARY: return "BINARY";
case TYPE_DECIMAL: return "DECIMAL";
case TYPE_CHAR: return "CHAR";
case TYPE_FIXED_UDA_INTERMEDIATE: return "FIXED_UDA_INTERMEDIATE";
case TYPE_STRUCT: return "STRUCT";
case TYPE_ARRAY: return "ARRAY";
case TYPE_MAP: return "MAP";
};
return "";
}
string TypeToOdbcString(PrimitiveType t) {
// ODBC driver requires types in lower case
switch (t) {
case INVALID_TYPE: return "invalid";
case TYPE_NULL: return "null";
case TYPE_BOOLEAN: return "boolean";
case TYPE_TINYINT: return "tinyint";
case TYPE_SMALLINT: return "smallint";
case TYPE_INT: return "int";
case TYPE_BIGINT: return "bigint";
case TYPE_FLOAT: return "float";
case TYPE_DOUBLE: return "double";
case TYPE_DATE: return "date";
case TYPE_DATETIME: return "datetime";
case TYPE_TIMESTAMP: return "timestamp";
case TYPE_STRING: return "string";
case TYPE_VARCHAR: return "string";
case TYPE_BINARY: return "binary";
case TYPE_DECIMAL: return "decimal";
case TYPE_CHAR: return "char";
case TYPE_STRUCT: return "struct";
case TYPE_ARRAY: return "array";
case TYPE_MAP: return "map";
case TYPE_FIXED_UDA_INTERMEDIATE:
// This type is not exposed to clients and should not be returned.
DCHECK(false);
break;
};
return "unknown";
}
void ColumnType::ToThrift(TColumnType* thrift_type) const {
thrift_type->types.push_back(TTypeNode());
TTypeNode& node = thrift_type->types.back();
if (IsComplexType()) {
if (type == TYPE_ARRAY) {
node.type = TTypeNodeType::ARRAY;
} else if (type == TYPE_MAP) {
node.type = TTypeNodeType::MAP;
} else {
DCHECK_EQ(type, TYPE_STRUCT);
node.type = TTypeNodeType::STRUCT;
node.__set_struct_fields(vector<TStructField>());
for (const string& field_name: field_names) {
node.struct_fields.push_back(TStructField());
node.struct_fields.back().name = field_name;
}
}
for (const ColumnType& child: children) {
child.ToThrift(thrift_type);
}
} else {
node.type = TTypeNodeType::SCALAR;
node.__set_scalar_type(TScalarType());
TScalarType& scalar_type = node.scalar_type;
scalar_type.__set_type(impala::ToThrift(type));
if (type == TYPE_CHAR || type == TYPE_VARCHAR
|| type == TYPE_FIXED_UDA_INTERMEDIATE) {
DCHECK_NE(len, -1);
scalar_type.__set_len(len);
} else if (type == TYPE_DECIMAL) {
DCHECK_NE(precision, -1);
DCHECK_NE(scale, -1);
scalar_type.__set_precision(precision);
scalar_type.__set_scale(scale);
}
}
}
TTypeEntry ColumnType::ToHs2Type() const {
TPrimitiveTypeEntry type_entry;
switch (type) {
// Map NULL_TYPE to BOOLEAN, otherwise Hive's JDBC driver won't
// work for queries like "SELECT NULL" (IMPALA-914).
case TYPE_NULL:
type_entry.__set_type(TTypeId::BOOLEAN_TYPE);
break;
case TYPE_BOOLEAN:
type_entry.__set_type(TTypeId::BOOLEAN_TYPE);
break;
case TYPE_TINYINT:
type_entry.__set_type(TTypeId::TINYINT_TYPE);
break;
case TYPE_SMALLINT:
type_entry.__set_type(TTypeId::SMALLINT_TYPE);
break;
case TYPE_INT:
type_entry.__set_type(TTypeId::INT_TYPE);
break;
case TYPE_BIGINT:
type_entry.__set_type(TTypeId::BIGINT_TYPE);
break;
case TYPE_FLOAT:
type_entry.__set_type(TTypeId::FLOAT_TYPE);
break;
case TYPE_DOUBLE:
type_entry.__set_type(TTypeId::DOUBLE_TYPE);
break;
case TYPE_DATE:
type_entry.__set_type(TTypeId::DATE_TYPE);
break;
case TYPE_TIMESTAMP:
type_entry.__set_type(TTypeId::TIMESTAMP_TYPE);
break;
case TYPE_STRING:
type_entry.__set_type(TTypeId::STRING_TYPE);
break;
case TYPE_BINARY:
type_entry.__set_type(TTypeId::BINARY_TYPE);
break;
case TYPE_DECIMAL: {
TTypeQualifierValue tprecision;
tprecision.__set_i32Value(precision);
TTypeQualifierValue tscale;
tscale.__set_i32Value(scale);
TTypeQualifiers type_quals;
type_quals.qualifiers[g_TCLIService_constants.PRECISION] = tprecision;
type_quals.qualifiers[g_TCLIService_constants.SCALE] = tscale;
type_entry.__set_typeQualifiers(type_quals);
type_entry.__set_type(TTypeId::DECIMAL_TYPE);
break;
}
case TYPE_CHAR:
case TYPE_VARCHAR: {
TTypeQualifierValue tmax_len;
tmax_len.__set_i32Value(len);
TTypeQualifiers type_quals;
type_quals.qualifiers[g_TCLIService_constants.CHARACTER_MAXIMUM_LENGTH] = tmax_len;
type_entry.__set_typeQualifiers(type_quals);
type_entry.__set_type(
(type == TYPE_CHAR) ? TTypeId::CHAR_TYPE : TTypeId::VARCHAR_TYPE);
break;
}
default:
// HiveServer2 does not have a type for invalid, date, datetime or
// fixed_uda_intermediate.
DCHECK(false) << "bad TypeToTValueType() type: " << DebugString();
type_entry.__set_type(TTypeId::STRING_TYPE);
};
TTypeEntry result;
result.__set_primitiveEntry(type_entry);
return result;
}
string ColumnType::DebugString() const {
stringstream ss;
switch (type) {
case TYPE_CHAR:
ss << "CHAR(" << len << ")";
return ss.str();
case TYPE_DECIMAL:
ss << "DECIMAL(" << precision << "," << scale << ")";
return ss.str();
case TYPE_VARCHAR:
ss << "VARCHAR(" << len << ")";
return ss.str();
case TYPE_FIXED_UDA_INTERMEDIATE:
ss << "FIXED_UDA_INTERMEDIATE(" << len << ")";
return ss.str();
default:
return TypeToString(type);
}
}
vector<ColumnType> ColumnType::FromThrift(const vector<TColumnType>& ttypes) {
vector<ColumnType> types;
for (const TColumnType& ttype : ttypes) types.push_back(FromThrift(ttype));
return types;
}
ostream& operator<<(ostream& os, const ColumnType& type) {
os << type.DebugString();
return os;
}
llvm::ConstantStruct* ColumnType::ToIR(LlvmCodeGen* codegen) const {
// ColumnType = { i32, i32, i32, i32, <vector>, <vector> }
llvm::StructType* column_type_type = codegen->GetStructType<ColumnType>();
DCHECK_EQ(sizeof(type), sizeof(int32_t));
llvm::Constant* type_field = codegen->GetI32Constant(type);
DCHECK_EQ(sizeof(len), sizeof(int32_t));
llvm::Constant* len_field = codegen->GetI32Constant(len);
DCHECK_EQ(sizeof(precision), sizeof(int32_t));
llvm::Constant* precision_field = codegen->GetI32Constant(precision);
DCHECK_EQ(sizeof(scale), sizeof(int32_t));
llvm::Constant* scale_field = codegen->GetI32Constant(scale);
// Create empty 'children' and 'field_names' vectors
DCHECK(children.empty()) << "Nested types NYI";
DCHECK(field_names.empty()) << "Nested types NYI";
llvm::Constant* children_field =
llvm::Constant::getNullValue(column_type_type->getElementType(4));
llvm::Constant* field_names_field =
llvm::Constant::getNullValue(column_type_type->getElementType(5));
return llvm::cast<llvm::ConstantStruct>(
llvm::ConstantStruct::get(column_type_type, type_field, len_field, precision_field,
scale_field, children_field, field_names_field));
}
}