blob: 8424e6996ae633a6f24f38c990a04b0e52c86945 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "service/hs2-util.h"
#include <sstream>
#include <rapidjson/rapidjson.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#include "common/logging.h"
#include "exprs/scalar-expr.h"
#include "exprs/scalar-expr-evaluator.h"
#include "exprs/slot-ref.h"
#include "gen-cpp/TCLIService_constants.h"
#include "runtime/date-value.h"
#include "runtime/complex-value-writer.inline.h"
#include "runtime/decimal-value.inline.h"
#include "runtime/raw-value.inline.h"
#include "runtime/row-batch.h"
#include "runtime/types.h"
#include "udf/udf-internal.h"
#include "util/bit-util.h"
#include <gutil/strings/substitute.h>
#include "common/names.h"
using namespace apache::hive::service::cli;
using namespace impala;
using namespace strings;
// Set the null indicator bit for row 'row_idx', assuming this will be called for
// successive increasing values of row_idx. If 'is_null' is true, the row_idx'th bit will
// be set in 'nulls' (taking the LSB as bit 0). If 'is_null' is false, the row_idx'th bit
// will be unchanged. If 'nulls' does not contain 'row_idx' bits, it will be extended by
// one byte.
inline void SetNullBit(uint32_t row_idx, bool is_null, string* nulls) {
DCHECK_LE(row_idx / 8, nulls->size());
int16_t mod_8 = row_idx % 8;
if (mod_8 == 0) (*nulls) += '\0';
(*nulls)[row_idx / 8] |= (1 << mod_8) * is_null;
}
inline bool GetNullBit(const string& nulls, uint32_t row_idx) {
DCHECK_LE(row_idx / 8, nulls.size());
return nulls[row_idx / 8] & (1 << row_idx % 8);
}
void impala::StitchNulls(uint32_t num_rows_before, uint32_t num_rows_added,
uint32_t start_idx, const string& from, string* to) {
// Round up to power-of-two to avoid accidentally quadratic behaviour from repeated
// small increases in size.
to->reserve(BitUtil::RoundUpToPowerOfTwo((num_rows_before + num_rows_added + 7) / 8));
// TODO: This is very inefficient, since we could conceivably go one byte at a time
// (although the operands should stay live in registers in the loop). However doing this
// more efficiently leads to very complex code: we have to deal with the fact that
// 'start_idx' and 'num_rows_before' might both lead to offsets into the null bitset
// that don't start on a byte boundary. We should revisit this, ideally with a good
// bitset implementation.
for (int i = 0; i < num_rows_added; ++i) {
SetNullBit(num_rows_before + i, GetNullBit(from, i + start_idx), to);
}
}
// For V6 and above
void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val,
const TColumnType& type, uint32_t row_idx, thrift::TColumn* column) {
string* nulls;
bool is_null;
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::BOOLEAN:
is_null = !col_val.__isset.bool_val;
column->boolVal.values.push_back(col_val.bool_val);
nulls = &column->boolVal.nulls;
break;
case TPrimitiveType::TINYINT:
is_null = !col_val.__isset.byte_val;
column->byteVal.values.push_back(col_val.byte_val);
nulls = &column->byteVal.nulls;
break;
case TPrimitiveType::SMALLINT:
is_null = !col_val.__isset.short_val;
column->i16Val.values.push_back(col_val.short_val);
nulls = &column->i16Val.nulls;
break;
case TPrimitiveType::INT:
is_null = !col_val.__isset.int_val;
column->i32Val.values.push_back(col_val.int_val);
nulls = &column->i32Val.nulls;
break;
case TPrimitiveType::BIGINT:
is_null = !col_val.__isset.long_val;
column->i64Val.values.push_back(col_val.long_val);
nulls = &column->i64Val.nulls;
break;
case TPrimitiveType::FLOAT:
case TPrimitiveType::DOUBLE:
is_null = !col_val.__isset.double_val;
column->doubleVal.values.push_back(col_val.double_val);
nulls = &column->doubleVal.nulls;
break;
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::DATE:
case TPrimitiveType::STRING:
case TPrimitiveType::CHAR:
case TPrimitiveType::VARCHAR:
case TPrimitiveType::DECIMAL:
case TPrimitiveType::BINARY:
is_null = !col_val.__isset.string_val;
column->stringVal.values.push_back(col_val.string_val);
nulls = &column->stringVal.nulls;
break;
default:
DCHECK(false) << "Unhandled type: "
<< TypeToString(ThriftToType(type.types[0].scalar_type.type));
return;
}
SetNullBit(row_idx, is_null, nulls);
}
// Specialised per-type implementations of ExprValuesToHS2TColumn.
// Helper to reserve space in hs2Vals->values and hs2Vals->nulls for the values that the
// different implementations of ExprValuesToHS2TColumn will write.
template <typename T>
void ReserveSpace(int reserve_count, T* hs2Vals) {
DCHECK_GE(reserve_count, 0);
int64_t num_null_bytes = BitUtil::RoundUpNumBytes(reserve_count);
// Round up reserve() arguments to power-of-two to avoid accidentally quadratic
// behaviour from repeated small increases in size.
hs2Vals->values.reserve(BitUtil::RoundUpToPowerOfTwo(reserve_count));
hs2Vals->nulls.reserve(BitUtil::RoundUpToPowerOfTwo(num_null_bytes));
}
// Implementation for NULL.
// Internally, Impala implement NULL expession using nullable-BooleanVal (IMPALA-914).
// To match with HiveServer2 behavior, IMPALA-14027 change the result mapping to use
// TColumn.stringVal rather than TColumn.boolVal.
static void NullExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
// It is actually not necessary to evaluate expr_eval here. But we choose to do it
// and DCHECK the result to be consistent with other functions.
BooleanVal val = expr_eval->GetBooleanVal(it.Get());
DCHECK(val.is_null);
// emplace empty string and set null bit.
column->stringVal.values.emplace_back();
SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
++output_row_idx;
}
}
// Implementation for BOOL.
static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
BooleanVal val = expr_eval->GetBooleanVal(it.Get());
column->boolVal.values.push_back(val.val);
SetNullBit(output_row_idx, val.is_null, &column->boolVal.nulls);
++output_row_idx;
}
}
// Implementation for TINYINT.
static void TinyIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
TinyIntVal val = expr_eval->GetTinyIntVal(it.Get());
column->byteVal.values.push_back(val.val);
SetNullBit(output_row_idx, val.is_null, &column->byteVal.nulls);
++output_row_idx;
}
}
// Implementation for SMALLINT.
static void SmallIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
SmallIntVal val = expr_eval->GetSmallIntVal(it.Get());
column->i16Val.values.push_back(val.val);
SetNullBit(output_row_idx, val.is_null, &column->i16Val.nulls);
++output_row_idx;
}
}
// Implementation for INT.
static void IntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
DCHECK_EQ(output_row_idx, column->i32Val.values.size());
IntVal val = expr_eval->GetIntVal(it.Get());
column->i32Val.values.push_back(val.val);
SetNullBit(output_row_idx, val.is_null, &column->i32Val.nulls);
++output_row_idx;
}
}
// Implementation for BIGINT.
static void BigIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
BigIntVal val = expr_eval->GetBigIntVal(it.Get());
column->i64Val.values.push_back(val.val);
SetNullBit(output_row_idx, val.is_null, &column->i64Val.nulls);
++output_row_idx;
}
}
// Implementation for FLOAT.
static void FloatExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
FloatVal val = expr_eval->GetFloatVal(it.Get());
column->doubleVal.values.push_back(val.val);
SetNullBit(output_row_idx, val.is_null, &column->doubleVal.nulls);
++output_row_idx;
}
}
// Implementation for DOUBLE.
static void DoubleExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
DoubleVal val = expr_eval->GetDoubleVal(it.Get());
column->doubleVal.values.push_back(val.val);
SetNullBit(output_row_idx, val.is_null, &column->doubleVal.nulls);
++output_row_idx;
}
}
// Implementation for TIMESTAMP.
static void TimestampExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
TimestampVal val = expr_eval->GetTimestampVal(it.Get());
column->stringVal.values.emplace_back();
if (!val.is_null) {
TimestampValue value = TimestampValue::FromTimestampVal(val);
column->stringVal.values.back() = value.ToString();
}
SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
++output_row_idx;
}
}
// Implementation for DATE.
static void DateExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
DateVal val = expr_eval->GetDateVal(it.Get());
column->stringVal.values.emplace_back();
if (!val.is_null) {
DateValue value = DateValue::FromDateVal(val);
column->stringVal.values.back() = value.ToString();
}
SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
++output_row_idx;
}
}
// Common logic for BINARY, STRING and VARCHAR.
static void StringExprValuesToHS2TColumnHelper(ScalarExprEvaluator* expr_eval,
RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
vector<string>& values, string& nulls) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
StringVal val = expr_eval->GetStringVal(it.Get());
if (val.is_null) {
values.emplace_back();
} else {
values.emplace_back(reinterpret_cast<char*>(val.ptr), val.len);
}
SetNullBit(output_row_idx, val.is_null, &nulls);
++output_row_idx;
}
}
// Implementation for STRING and VARCHAR.
static void StringExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
StringExprValuesToHS2TColumnHelper(
expr_eval, batch, start_idx, num_rows, output_row_idx,
column->stringVal.values, column->stringVal.nulls);
}
// Implementation for BINARY. Same as for STRING with the exception of using a different
// Thrift field.
static void BinaryExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
StringExprValuesToHS2TColumnHelper(
expr_eval, batch, start_idx, num_rows, output_row_idx,
column->binaryVal.values, column->binaryVal.nulls);
}
// Implementation for CHAR.
static void CharExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) {
ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len);
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
StringVal val = expr_eval->GetStringVal(it.Get());
if (val.is_null) {
column->stringVal.values.emplace_back();
} else {
column->stringVal.values.emplace_back(
reinterpret_cast<const char*>(val.ptr), char_type.len);
}
SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
++output_row_idx;
}
}
static void DecimalExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
DecimalVal val = expr_eval->GetDecimalVal(it.Get());
const ColumnType& decimalType = ColumnType::FromThrift(type);
if (val.is_null) {
column->stringVal.values.emplace_back();
} else {
switch (decimalType.GetByteSize()) {
case 4:
column->stringVal.values.emplace_back(
Decimal4Value(val.val4).ToString(decimalType));
break;
case 8:
column->stringVal.values.emplace_back(
Decimal8Value(val.val8).ToString(decimalType));
break;
case 16:
column->stringVal.values.emplace_back(
Decimal16Value(val.val16).ToString(decimalType));
break;
default:
DCHECK(false) << "bad type: " << decimalType;
}
}
SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
++output_row_idx;
}
}
static void StructExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
uint32_t output_row_idx, bool stringify_map_keys,
apache::hive::service::cli::thrift::TColumn* column) {
DCHECK(type.types.size() > 1);
// The buffer used by rapidjson::Writer. We reuse it to eliminate allocations.
rapidjson::StringBuffer buffer;
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
StructVal struct_val = expr_eval->GetStructVal(it.Get());
if (struct_val.is_null) {
column->stringVal.values.emplace_back();
} else {
const impala::ScalarExpr& scalar_expr = expr_eval->root();
// Currently scalar_expr can be only a slot ref as no functions return arrays.
DCHECK(scalar_expr.IsSlotRef());
const SlotDescriptor* slot_desc =
static_cast<const SlotRef&>(scalar_expr).GetSlotDescriptor();
DCHECK(slot_desc != nullptr);
buffer.Clear();
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
ComplexValueWriter<rapidjson::StringBuffer> complex_value_writer(&writer,
stringify_map_keys);
complex_value_writer.StructValToJSON(struct_val, *slot_desc);
column->stringVal.values.emplace_back(buffer.GetString());
}
SetNullBit(output_row_idx, struct_val.is_null, &column->stringVal.nulls);
++output_row_idx;
}
}
static void CollectionExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
uint32_t output_row_idx, bool stringify_map_keys,
apache::hive::service::cli::thrift::TColumn* column) {
DCHECK(type.types.size() > 1);
TTypeNodeType::type coll_thrift_type = type.types[0].type;
DCHECK(coll_thrift_type == TTypeNodeType::ARRAY ||
coll_thrift_type == TTypeNodeType::MAP);
PrimitiveType coll_impala_type = coll_thrift_type == TTypeNodeType::ARRAY ?
PrimitiveType::TYPE_ARRAY : PrimitiveType::TYPE_MAP;
// The buffer used by rapidjson::Writer. We reuse it to eliminate allocations.
rapidjson::StringBuffer buffer;
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
CollectionVal coll_val = expr_eval->GetCollectionVal(it.Get());
if (coll_val.is_null) {
column->stringVal.values.emplace_back();
} else {
const impala::ScalarExpr& scalar_expr = expr_eval->root();
// Currently scalar_expr can be only a slot ref as no functions return arrays.
DCHECK(scalar_expr.IsSlotRef());
const TupleDescriptor* item_tuple_desc = scalar_expr.GetCollectionTupleDesc();
DCHECK(item_tuple_desc != nullptr);
CollectionValue value(coll_val);
buffer.Clear();
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
ComplexValueWriter<rapidjson::StringBuffer> complex_value_writer(
&writer, stringify_map_keys);
complex_value_writer.CollectionValueToJSON(value, coll_impala_type,
item_tuple_desc);
column->stringVal.values.emplace_back(buffer.GetString());
}
SetNullBit(output_row_idx, coll_val.is_null, &column->stringVal.nulls);
++output_row_idx;
}
}
// For V6 and above
void impala::ExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
uint32_t output_row_idx, int expected_result_count, bool stringify_map_keys,
apache::hive::service::cli::thrift::TColumn* column) {
// Dispatch to a templated function for the loop over rows. This avoids branching on
// the type for every row.
// TODO: instead of relying on stamped out implementations, we could codegen this loop
// to inline the expression evaluation into the loop body.
switch (type.types[0].type) {
case TTypeNodeType::STRUCT:
ReserveSpace(expected_result_count, &column->stringVal);
StructExprValuesToHS2TColumn(expr_eval, type, batch, start_idx, num_rows,
output_row_idx, stringify_map_keys, column);
return;
case TTypeNodeType::ARRAY:
case TTypeNodeType::MAP:
ReserveSpace(expected_result_count, &column->stringVal);
CollectionExprValuesToHS2TColumn(expr_eval, type, batch, start_idx, num_rows,
output_row_idx, stringify_map_keys, column);
return;
default:
break;
}
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
ReserveSpace(expected_result_count, &column->stringVal);
NullExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
case TPrimitiveType::BOOLEAN:
ReserveSpace(expected_result_count, &column->boolVal);
BoolExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::TINYINT:
ReserveSpace(expected_result_count, &column->byteVal);
TinyIntExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::SMALLINT:
ReserveSpace(expected_result_count, &column->i16Val);
SmallIntExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::INT:
ReserveSpace(expected_result_count, &column->i32Val);
IntExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::BIGINT:
ReserveSpace(expected_result_count, &column->i64Val);
BigIntExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::FLOAT:
ReserveSpace(expected_result_count, &column->doubleVal);
FloatExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::DOUBLE:
ReserveSpace(expected_result_count, &column->doubleVal);
DoubleExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::DATE:
ReserveSpace(expected_result_count, &column->stringVal);
DateExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
break;
case TPrimitiveType::TIMESTAMP:
ReserveSpace(expected_result_count, &column->stringVal);
TimestampExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::STRING:
case TPrimitiveType::VARCHAR:
ReserveSpace(expected_result_count, &column->stringVal);
StringExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::BINARY:
ReserveSpace(expected_result_count, &column->binaryVal);
BinaryExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::CHAR:
ReserveSpace(expected_result_count, &column->stringVal);
CharExprValuesToHS2TColumn(
expr_eval, type, batch, start_idx, num_rows, output_row_idx, column);
return;
case TPrimitiveType::DECIMAL: {
ReserveSpace(expected_result_count, &column->stringVal);
DecimalExprValuesToHS2TColumn(
expr_eval, type, batch, start_idx, num_rows, output_row_idx, column);
return;
}
default:
DCHECK(false) << "Unhandled type: "
<< TypeToString(ThriftToType(type.types[0].scalar_type.type));
}
}
// For V1 -> V5
void impala::TColumnValueToHS2TColumnValue(const TColumnValue& col_val,
const TColumnType& type, thrift::TColumnValue* hs2_col_val) {
// TODO: Handle complex types.
DCHECK_EQ(1, type.types.size());
DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
DCHECK_EQ(true, type.types[0].__isset.scalar_type);
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::BOOLEAN:
hs2_col_val->__isset.boolVal = true;
hs2_col_val->boolVal.value = col_val.bool_val;
hs2_col_val->boolVal.__isset.value = col_val.__isset.bool_val;
break;
case TPrimitiveType::TINYINT:
hs2_col_val->__isset.byteVal = true;
hs2_col_val->byteVal.value = col_val.byte_val;
hs2_col_val->byteVal.__isset.value = col_val.__isset.byte_val;
break;
case TPrimitiveType::SMALLINT:
hs2_col_val->__isset.i16Val = true;
hs2_col_val->i16Val.value = col_val.short_val;
hs2_col_val->i16Val.__isset.value = col_val.__isset.short_val;
break;
case TPrimitiveType::INT:
hs2_col_val->__isset.i32Val = true;
hs2_col_val->i32Val.value = col_val.int_val;
hs2_col_val->i32Val.__isset.value = col_val.__isset.int_val;
break;
case TPrimitiveType::BIGINT:
hs2_col_val->__isset.i64Val = true;
hs2_col_val->i64Val.value = col_val.long_val;
hs2_col_val->i64Val.__isset.value = col_val.__isset.long_val;
break;
case TPrimitiveType::FLOAT:
case TPrimitiveType::DOUBLE:
hs2_col_val->__isset.doubleVal = true;
hs2_col_val->doubleVal.value = col_val.double_val;
hs2_col_val->doubleVal.__isset.value = col_val.__isset.double_val;
break;
case TPrimitiveType::DECIMAL:
case TPrimitiveType::STRING:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::DATE:
case TPrimitiveType::VARCHAR:
case TPrimitiveType::CHAR:
case TPrimitiveType::BINARY:
// HiveServer2 requires timestamp to be presented as string. Note that the .thrift
// spec says it should be a BIGINT; AFAICT Hive ignores that and produces a string.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = col_val.__isset.string_val;
if (col_val.__isset.string_val) {
hs2_col_val->stringVal.value = col_val.string_val;
}
break;
default:
DCHECK(false) << "bad type: "
<< TypeToString(ThriftToType(type.types[0].scalar_type.type));
break;
}
}
// For V1 -> V5
void impala::ExprValueToHS2TColumnValue(const void* value, const TColumnType& type,
thrift::TColumnValue* hs2_col_val) {
bool not_null = (value != NULL);
// TODO: Handle complex types.
DCHECK_EQ(1, type.types.size());
DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
DCHECK_EQ(1, type.types[0].__isset.scalar_type);
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
// Set NULLs in the stringVal, but don't set the value itself.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = false;
break;
case TPrimitiveType::BOOLEAN:
hs2_col_val->__isset.boolVal = true;
if (not_null) hs2_col_val->boolVal.value = *reinterpret_cast<const bool*>(value);
hs2_col_val->boolVal.__isset.value = not_null;
break;
case TPrimitiveType::TINYINT:
hs2_col_val->__isset.byteVal = true;
if (not_null) hs2_col_val->byteVal.value = *reinterpret_cast<const int8_t*>(value);
hs2_col_val->byteVal.__isset.value = not_null;
break;
case TPrimitiveType::SMALLINT:
hs2_col_val->__isset.i16Val = true;
if (not_null) hs2_col_val->i16Val.value = *reinterpret_cast<const int16_t*>(value);
hs2_col_val->i16Val.__isset.value = not_null;
break;
case TPrimitiveType::INT:
hs2_col_val->__isset.i32Val = true;
if (not_null) hs2_col_val->i32Val.value = *reinterpret_cast<const int32_t*>(value);
hs2_col_val->i32Val.__isset.value = not_null;
break;
case TPrimitiveType::BIGINT:
hs2_col_val->__isset.i64Val = true;
if (not_null) hs2_col_val->i64Val.value = *reinterpret_cast<const int64_t*>(value);
hs2_col_val->i64Val.__isset.value = not_null;
break;
case TPrimitiveType::FLOAT:
hs2_col_val->__isset.doubleVal = true;
if (not_null) hs2_col_val->doubleVal.value = *reinterpret_cast<const float*>(value);
hs2_col_val->doubleVal.__isset.value = not_null;
break;
case TPrimitiveType::DOUBLE:
hs2_col_val->__isset.doubleVal = true;
if (not_null) {
hs2_col_val->doubleVal.value = *reinterpret_cast<const double*>(value);
}
hs2_col_val->doubleVal.__isset.value = not_null;
break;
case TPrimitiveType::STRING:
case TPrimitiveType::VARCHAR:
// Unlike TColumn, TColumnValue does not differentiate between STRING and BINARY.
case TPrimitiveType::BINARY:
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
if (not_null) {
const StringValue* string_val = reinterpret_cast<const StringValue*>(value);
hs2_col_val->stringVal.value.assign(string_val->Ptr(), string_val->Len());
}
break;
case TPrimitiveType::CHAR:
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
if (not_null) {
ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len);
hs2_col_val->stringVal.value.assign(
reinterpret_cast<const char*>(value), char_type.len);
}
break;
case TPrimitiveType::DATE:
// HiveServer2 requires date to be presented as string.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
if (not_null) {
hs2_col_val->stringVal.value =
reinterpret_cast<const DateValue*>(value)->ToString();
}
break;
case TPrimitiveType::TIMESTAMP:
// HiveServer2 requires timestamp to be presented as string.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
if (not_null) {
RawValue::PrintValue(
value, ColumnType(TYPE_TIMESTAMP), -1, &(hs2_col_val->stringVal.value));
}
break;
case TPrimitiveType::DECIMAL: {
// HiveServer2 requires decimal to be presented as string.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
const ColumnType& decimalType = ColumnType::FromThrift(type);
if (not_null) {
switch (decimalType.GetByteSize()) {
case 4:
hs2_col_val->stringVal.value =
reinterpret_cast<const Decimal4Value*>(value)->ToString(decimalType);
break;
case 8:
hs2_col_val->stringVal.value =
reinterpret_cast<const Decimal8Value*>(value)->ToString(decimalType);
break;
case 16:
hs2_col_val->stringVal.value =
reinterpret_cast<const Decimal16Value*>(value)->ToString(decimalType);
break;
default:
DCHECK(false) << "bad type: " << decimalType;
}
}
break;
}
default:
DCHECK(false) << "bad type: "
<< TypeToString(ThriftToType(type.types[0].scalar_type.type));
break;
}
}
template<typename T>
void PrintVal(const T& val, ostream* ss) {
if (val.__isset.value) {
(*ss) << val.value;
} else {
(*ss) << "NULL";
}
}
// Specialisation for byte values that would otherwise be interpreted as character values,
// not integers, when printed to the stringstream.
template<>
void PrintVal(const apache::hive::service::cli::thrift::TByteValue& val, ostream* ss) {
if (val.__isset.value) {
(*ss) << static_cast<int16_t>(val.value);
} else {
(*ss) << "NULL";
}
}
void impala::PrintTColumnValue(const thrift::TColumnValue& colval, stringstream* out) {
if (colval.__isset.boolVal) {
if (colval.boolVal.__isset.value) {
(*out) << ((colval.boolVal.value) ? "true" : "false");
} else {
(*out) << "NULL";
}
} else if (colval.__isset.doubleVal) {
PrintVal(colval.doubleVal, out);
} else if (colval.__isset.byteVal) {
PrintVal(colval.byteVal, out);
} else if (colval.__isset.i32Val) {
PrintVal(colval.i32Val, out);
} else if (colval.__isset.i16Val) {
PrintVal(colval.i16Val, out);
} else if (colval.__isset.i64Val) {
PrintVal(colval.i64Val, out);
} else if (colval.__isset.stringVal) {
PrintVal(colval.stringVal, out);
} else {
(*out) << "NULL";
}
}
TColumnValue impala::ConvertToTColumnValue(
const thrift::TColumnDesc& desc, const thrift::TColumnValue& hive_colval) {
// By default, all values in Impala TColumnValue are unset. To set a value,
// it must be present in a particular field in the Hive version and not null.
TColumnValue colval;
if (hive_colval.__isset.boolVal && hive_colval.boolVal.__isset.value) {
colval.__set_bool_val(hive_colval.boolVal.value);
} else if (hive_colval.__isset.doubleVal && hive_colval.doubleVal.__isset.value) {
colval.__set_double_val(hive_colval.doubleVal.value);
} else if (hive_colval.__isset.byteVal && hive_colval.byteVal.__isset.value) {
colval.__set_byte_val(hive_colval.byteVal.value);
} else if (hive_colval.__isset.i32Val && hive_colval.i32Val.__isset.value) {
colval.__set_int_val(hive_colval.i32Val.value);
} else if (hive_colval.__isset.i16Val && hive_colval.i16Val.__isset.value) {
colval.__set_short_val(hive_colval.i16Val.value);
} else if (hive_colval.__isset.i64Val && hive_colval.i64Val.__isset.value) {
colval.__set_long_val(hive_colval.i64Val.value);
} else if (hive_colval.__isset.stringVal && hive_colval.stringVal.__isset.value) {
switch (desc.typeDesc.types[0].primitiveEntry.type) {
// For Hive date type, the value is represented as a string, such as '2020-01-01'.
// Convert the string to Epoch days.
case thrift::TTypeId::DATE_TYPE:
{
DateValue d =
DateValue::ParseSimpleDateFormat(hive_colval.stringVal.value, false);
colval.__set_date_val(d.Value());
}
break;
// For Hive decimal type, the value is represented as a string, such as '1.234567'.
// Its precision and scale is contained in desc as type qualifiers.
case thrift::TTypeId::DECIMAL_TYPE:
{
const std::map<std::string, thrift::TTypeQualifierValue>& map =
desc.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers;
auto it = map.find("precision");
if (it == map.end()) {
DCHECK(false) << "Unable to find precision";
}
int precision = it->second.i32Value;
it = map.find("scale");
if (it == map.end()) {
DCHECK(false) << "Unable to find scale";
}
int scale = it->second.i32Value;
VLOG(3) << "Decimal in hive_colval: value=" << hive_colval.stringVal.value
<< ", precision=" << precision
<< ", scale=" << scale;
colval.__set_decimal_val(hive_colval.stringVal.value);
}
break;
case thrift::TTypeId::STRING_TYPE:
colval.__set_string_val(hive_colval.stringVal.value);
break;
default:
DCHECK(false) << "Unsupported conversion for hive type "
<< desc.typeDesc.types[0];
}
}
return colval;
}
void impala::PrintTColumnValue(const impala::TColumnValue& value, stringstream* out) {
if (value.__isset.bool_val) {
*out << value.bool_val;
} else if (value.__isset.double_val) {
*out << value.double_val;
} else if (value.__isset.byte_val) {
*out << value.byte_val;
} else if (value.__isset.int_val) {
*out << value.int_val;
} else if (value.__isset.short_val) {
*out << value.short_val;
} else if (value.__isset.long_val) {
*out << value.long_val;
} else if (value.__isset.string_val) {
*out << value.string_val;
} else if (value.__isset.binary_val) {
*out << value.binary_val;
} else if (value.__isset.timestamp_val) {
*out << value.timestamp_val;
} else if (value.__isset.decimal_val) {
*out << value.decimal_val;
} else if (value.__isset.date_val) {
*out << value.date_val;
}
}
string impala::PrintTColumnValue(const impala::TColumnValue& value) {
std::stringstream ss;
PrintTColumnValue(value, &ss);
return ss.str();
}
bool impala::isOneFieldSet(const impala::TColumnValue& value) {
return (value.__isset.bool_val ||
value.__isset.double_val ||
value.__isset.byte_val ||
value.__isset.int_val ||
value.__isset.short_val ||
value.__isset.long_val ||
value.__isset.string_val ||
value.__isset.binary_val ||
value.__isset.timestamp_val ||
value.__isset.decimal_val ||
value.__isset.date_val);
}
thrift::TTypeEntry impala::ColumnToHs2Type(
const TColumnType& columnType) {
const ColumnType& type = ColumnType::FromThrift(columnType);
thrift::TPrimitiveTypeEntry type_entry;
switch (type.type) {
case TYPE_NULL:
type_entry.__set_type(thrift::TTypeId::NULL_TYPE);
break;
case TYPE_BOOLEAN:
type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
break;
case TYPE_TINYINT:
type_entry.__set_type(thrift::TTypeId::TINYINT_TYPE);
break;
case TYPE_SMALLINT:
type_entry.__set_type(thrift::TTypeId::SMALLINT_TYPE);
break;
case TYPE_INT:
type_entry.__set_type(thrift::TTypeId::INT_TYPE);
break;
case TYPE_BIGINT:
type_entry.__set_type(thrift::TTypeId::BIGINT_TYPE);
break;
case TYPE_FLOAT:
type_entry.__set_type(thrift::TTypeId::FLOAT_TYPE);
break;
case TYPE_DOUBLE:
type_entry.__set_type(thrift::TTypeId::DOUBLE_TYPE);
break;
case TYPE_DATE:
type_entry.__set_type(thrift::TTypeId::DATE_TYPE);
break;
case TYPE_TIMESTAMP:
type_entry.__set_type(thrift::TTypeId::TIMESTAMP_TYPE);
break;
case TYPE_STRING:
if (type.IsBinaryType()) {
type_entry.__set_type(thrift::TTypeId::BINARY_TYPE);
} else {
type_entry.__set_type(thrift::TTypeId::STRING_TYPE);
}
break;
case TYPE_DECIMAL: {
thrift::TTypeQualifierValue tprecision;
tprecision.__set_i32Value(type.precision);
thrift::TTypeQualifierValue tscale;
tscale.__set_i32Value(type.scale);
thrift::TTypeQualifiers type_quals;
type_quals.qualifiers[thrift::g_TCLIService_constants.PRECISION] = tprecision;
type_quals.qualifiers[thrift::g_TCLIService_constants.SCALE] = tscale;
type_entry.__set_typeQualifiers(type_quals);
type_entry.__set_type(thrift::TTypeId::DECIMAL_TYPE);
break;
}
case TYPE_CHAR:
case TYPE_VARCHAR: {
thrift::TTypeQualifierValue tmax_len;
tmax_len.__set_i32Value(type.len);
thrift::TTypeQualifiers type_quals;
type_quals.qualifiers[thrift::g_TCLIService_constants.CHARACTER_MAXIMUM_LENGTH]
= tmax_len;
type_entry.__set_typeQualifiers(type_quals);
type_entry.__set_type((type.type == TYPE_CHAR)
? thrift::TTypeId::CHAR_TYPE : thrift::TTypeId::VARCHAR_TYPE);
break;
}
case TYPE_STRUCT:
case TYPE_ARRAY:
case TYPE_MAP:
type_entry.__set_type(thrift::TTypeId::STRING_TYPE);
break;
case TYPE_BINARY:
default:
// HiveServer2 does not have a type for invalid, datetime or
// fixed_uda_intermediate. Binary should be stored as TYPE_STRING, not
// TYPE_BINARY in the backend.
DCHECK(false) << "bad TypeToTValueType() type: " << type.DebugString();
type_entry.__set_type(thrift::TTypeId::STRING_TYPE);
};
thrift::TTypeEntry result;
result.__set_primitiveEntry(type_entry);
return result;
}