blob: 535ce97fc7469108eb196149aa1f281e53ba2d88 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/types.h"
#include <ostream>
#include "codegen/llvm-codegen.h"
#include "gutil/strings/substitute.h"
#include "common/names.h"
using namespace apache::hive::service::cli::thrift;
namespace impala {
const int ColumnType::MAX_PRECISION;
const int ColumnType::MAX_SCALE;
const int ColumnType::MIN_ADJUSTED_SCALE;
const int ColumnType::MAX_DECIMAL4_PRECISION;
const int ColumnType::MAX_DECIMAL8_PRECISION;
const char* ColumnType::LLVM_CLASS_NAME = "struct.impala::ColumnType";
ColumnType::ColumnType(const std::vector<TTypeNode>& types, int* idx)
: len(-1), precision(-1), scale(-1), is_binary_(false) {
DCHECK_GE(*idx, 0);
DCHECK_LT(*idx, types.size());
const TTypeNode& node = types[*idx];
switch (node.type) {
case TTypeNodeType::SCALAR: {
DCHECK(node.__isset.scalar_type);
const TScalarType scalar_type = node.scalar_type;
type = ThriftToType(scalar_type.type);
if (type == TYPE_CHAR || type == TYPE_VARCHAR
|| type == TYPE_FIXED_UDA_INTERMEDIATE) {
DCHECK(scalar_type.__isset.len);
len = scalar_type.len;
} else if (type == TYPE_STRING) {
is_binary_ = scalar_type.type == TPrimitiveType::BINARY;
} else if (type == TYPE_DECIMAL) {
DCHECK(scalar_type.__isset.precision);
DCHECK(scalar_type.__isset.scale);
precision = scalar_type.precision;
scale = scalar_type.scale;
}
break;
}
case TTypeNodeType::STRUCT:
type = TYPE_STRUCT;
for (int i = 0; i < node.struct_fields.size(); ++i) {
++(*idx);
children.push_back(ColumnType(types, idx));
field_names.push_back(node.struct_fields[i].name);
field_ids.push_back(node.struct_fields[i].field_id);
}
break;
case TTypeNodeType::ARRAY:
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 1);
type = TYPE_ARRAY;
++(*idx);
children.push_back(ColumnType(types, idx));
break;
case TTypeNodeType::MAP:
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 2);
type = TYPE_MAP;
++(*idx);
children.push_back(ColumnType(types, idx));
++(*idx);
children.push_back(ColumnType(types, idx));
break;
default:
DCHECK(false) << node.type;
}
}
PrimitiveType ThriftToType(TPrimitiveType::type ttype) {
switch (ttype) {
case TPrimitiveType::INVALID_TYPE: return INVALID_TYPE;
case TPrimitiveType::NULL_TYPE: return TYPE_NULL;
case TPrimitiveType::BOOLEAN: return TYPE_BOOLEAN;
case TPrimitiveType::TINYINT: return TYPE_TINYINT;
case TPrimitiveType::SMALLINT: return TYPE_SMALLINT;
case TPrimitiveType::INT: return TYPE_INT;
case TPrimitiveType::BIGINT: return TYPE_BIGINT;
case TPrimitiveType::FLOAT: return TYPE_FLOAT;
case TPrimitiveType::DOUBLE: return TYPE_DOUBLE;
case TPrimitiveType::DATE: return TYPE_DATE;
case TPrimitiveType::DATETIME: return TYPE_DATETIME;
case TPrimitiveType::TIMESTAMP: return TYPE_TIMESTAMP;
case TPrimitiveType::STRING: return TYPE_STRING;
case TPrimitiveType::VARCHAR: return TYPE_VARCHAR;
// BINARY is generally handled the same way as STRING by the backend.
case TPrimitiveType::BINARY: return TYPE_STRING;
case TPrimitiveType::DECIMAL: return TYPE_DECIMAL;
case TPrimitiveType::CHAR: return TYPE_CHAR;
case TPrimitiveType::FIXED_UDA_INTERMEDIATE: return TYPE_FIXED_UDA_INTERMEDIATE;
default: return INVALID_TYPE;
}
}
TPrimitiveType::type ToThrift(PrimitiveType ptype, bool is_binary) {
switch (ptype) {
case INVALID_TYPE: return TPrimitiveType::INVALID_TYPE;
case TYPE_NULL: return TPrimitiveType::NULL_TYPE;
case TYPE_BOOLEAN: return TPrimitiveType::BOOLEAN;
case TYPE_TINYINT: return TPrimitiveType::TINYINT;
case TYPE_SMALLINT: return TPrimitiveType::SMALLINT;
case TYPE_INT: return TPrimitiveType::INT;
case TYPE_BIGINT: return TPrimitiveType::BIGINT;
case TYPE_FLOAT: return TPrimitiveType::FLOAT;
case TYPE_DOUBLE: return TPrimitiveType::DOUBLE;
case TYPE_DATE: return TPrimitiveType::DATE;
case TYPE_DATETIME: return TPrimitiveType::DATETIME;
case TYPE_TIMESTAMP: return TPrimitiveType::TIMESTAMP;
case TYPE_STRING:
return is_binary ? TPrimitiveType::BINARY : TPrimitiveType::STRING;
case TYPE_VARCHAR: return TPrimitiveType::VARCHAR;
case TYPE_BINARY:
DCHECK(false) << "STRING should be used instead of BINARY in the backend.";
return TPrimitiveType::INVALID_TYPE;
case TYPE_DECIMAL: return TPrimitiveType::DECIMAL;
case TYPE_CHAR: return TPrimitiveType::CHAR;
case TYPE_FIXED_UDA_INTERMEDIATE: return TPrimitiveType::FIXED_UDA_INTERMEDIATE;
case TYPE_STRUCT:
case TYPE_ARRAY:
case TYPE_MAP:
DCHECK(false) << "NYI: " << ptype;
[[fallthrough]];
default: return TPrimitiveType::INVALID_TYPE;
}
}
string TypeToString(PrimitiveType t) {
switch (t) {
case INVALID_TYPE: return "INVALID";
case TYPE_NULL: return "NULL";
case TYPE_BOOLEAN: return "BOOLEAN";
case TYPE_TINYINT: return "TINYINT";
case TYPE_SMALLINT: return "SMALLINT";
case TYPE_INT: return "INT";
case TYPE_BIGINT: return "BIGINT";
case TYPE_FLOAT: return "FLOAT";
case TYPE_DOUBLE: return "DOUBLE";
case TYPE_DATE: return "DATE";
case TYPE_DATETIME: return "DATETIME";
case TYPE_TIMESTAMP: return "TIMESTAMP";
case TYPE_STRING: return "STRING";
case TYPE_VARCHAR: return "VARCHAR";
case TYPE_BINARY: return "BINARY";
case TYPE_DECIMAL: return "DECIMAL";
case TYPE_CHAR: return "CHAR";
case TYPE_FIXED_UDA_INTERMEDIATE: return "FIXED_UDA_INTERMEDIATE";
case TYPE_STRUCT: return "STRUCT";
case TYPE_ARRAY: return "ARRAY";
case TYPE_MAP: return "MAP";
};
return "";
}
string TypeToOdbcString(const TColumnType& type) {
DCHECK_EQ(1, type.types.size());
DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
DCHECK(type.types[0].__isset.scalar_type);
TPrimitiveType::type col_type = type.types[0].scalar_type.type;
PrimitiveType primitive_type = ThriftToType(col_type);
// ODBC driver requires types in lower case
switch (primitive_type) {
case INVALID_TYPE: return "invalid";
case TYPE_NULL: return "null";
case TYPE_BOOLEAN: return "boolean";
case TYPE_TINYINT: return "tinyint";
case TYPE_SMALLINT: return "smallint";
case TYPE_INT: return "int";
case TYPE_BIGINT: return "bigint";
case TYPE_FLOAT: return "float";
case TYPE_DOUBLE: return "double";
case TYPE_DATE: return "date";
case TYPE_DATETIME: return "datetime";
case TYPE_TIMESTAMP: return "timestamp";
case TYPE_STRING:
if(col_type == TPrimitiveType::BINARY) {
return "binary";
} else {
return "string";
}
case TYPE_VARCHAR: return "string";
case TYPE_DECIMAL: return "decimal";
case TYPE_CHAR: return "char";
case TYPE_STRUCT: return "struct";
case TYPE_ARRAY: return "array";
case TYPE_MAP: return "map";
case TYPE_BINARY:
case TYPE_FIXED_UDA_INTERMEDIATE:
// This type is not exposed to clients and should not be returned.
DCHECK(false);
break;
};
return "unknown";
}
void ColumnType::ToThrift(TColumnType* thrift_type) const {
thrift_type->types.push_back(TTypeNode());
TTypeNode& node = thrift_type->types.back();
if (IsComplexType()) {
if (type == TYPE_ARRAY) {
node.type = TTypeNodeType::ARRAY;
} else if (type == TYPE_MAP) {
node.type = TTypeNodeType::MAP;
} else {
DCHECK_EQ(type, TYPE_STRUCT);
node.type = TTypeNodeType::STRUCT;
node.__set_struct_fields(vector<TStructField>());
DCHECK_EQ(field_names.size(), field_ids.size());
for (int i=0; i<field_names.size(); i++) {
node.struct_fields.push_back(TStructField());
node.struct_fields.back().name = field_names[i];
node.struct_fields.back().field_id = field_ids[i];
}
}
for (const ColumnType& child: children) {
child.ToThrift(thrift_type);
}
} else {
node.type = TTypeNodeType::SCALAR;
node.__set_scalar_type(TScalarType());
TScalarType& scalar_type = node.scalar_type;
scalar_type.__set_type(impala::ToThrift(type, is_binary_));
if (type == TYPE_CHAR || type == TYPE_VARCHAR
|| type == TYPE_FIXED_UDA_INTERMEDIATE) {
DCHECK_NE(len, -1);
scalar_type.__set_len(len);
} else if (type == TYPE_DECIMAL) {
DCHECK_NE(precision, -1);
DCHECK_NE(scale, -1);
scalar_type.__set_precision(precision);
scalar_type.__set_scale(scale);
}
}
}
string ColumnType::DebugString() const {
switch (type) {
case TYPE_STRING:
return is_binary_ ? "BINARY" : "STRING";
case TYPE_CHAR:
return Substitute("CHAR($0)", len);
case TYPE_DECIMAL:
return Substitute("DECIMAL($0,$1)", precision, scale);
case TYPE_VARCHAR:
return Substitute("VARCHAR($0)", len);
case TYPE_FIXED_UDA_INTERMEDIATE:
return Substitute("FIXED_UDA_INTERMEDIATE($0)", len);
default:
return TypeToString(type);
}
}
vector<ColumnType> ColumnType::FromThrift(const vector<TColumnType>& ttypes) {
vector<ColumnType> types;
types.reserve(ttypes.size());
for (const TColumnType& ttype : ttypes) types.push_back(FromThrift(ttype));
return types;
}
ostream& operator<<(ostream& os, const ColumnType& type) {
os << type.DebugString();
return os;
}
llvm::ConstantStruct* ColumnType::ToIR(LlvmCodeGen* codegen) const {
// ColumnType = { i32, i8, i32, i32, i32, <vector>, <vector>, <vector> }
llvm::StructType* column_type_type = codegen->GetStructType<ColumnType>();
DCHECK_EQ(sizeof(type), sizeof(int32_t));
llvm::Constant* type_field = codegen->GetI32Constant(type);
DCHECK_EQ(sizeof(len), sizeof(int32_t));
llvm::Constant* len_field = codegen->GetI32Constant(len);
DCHECK_EQ(sizeof(precision), sizeof(int32_t));
llvm::Constant* precision_field = codegen->GetI32Constant(precision);
DCHECK_EQ(sizeof(scale), sizeof(int32_t));
llvm::Constant* scale_field = codegen->GetI32Constant(scale);
// Create empty 'children', 'field_names' and 'field_ids' vectors
DCHECK(children.empty()) << "Nested types NYI";
DCHECK(field_names.empty()) << "Nested types NYI";
DCHECK(field_ids.empty()) << "Nested types NYI";
llvm::Constant* children_field =
llvm::Constant::getNullValue(column_type_type->getElementType(4));
llvm::Constant* field_names_field =
llvm::Constant::getNullValue(column_type_type->getElementType(5));
llvm::Constant* field_ids_field =
llvm::Constant::getNullValue(column_type_type->getElementType(6));
DCHECK_EQ(sizeof(is_binary_), sizeof(uint8_t));
llvm::Constant* is_binary_field = codegen->GetI8Constant(is_binary_);
llvm::Constant* padding =
llvm::Constant::getNullValue(column_type_type->getElementType(8));
return llvm::cast<llvm::ConstantStruct>(
llvm::ConstantStruct::get(column_type_type, type_field, len_field, precision_field,
scale_field, children_field, field_names_field, field_ids_field,
is_binary_field, padding));
}
}