blob: b856556810daa0c97964f5250945e3703b656ea8 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "service/hs2-util.h"
#include "common/logging.h"
#include "runtime/decimal-value.inline.h"
#include "runtime/raw-value.inline.h"
#include "runtime/types.h"
#include <gutil/strings/substitute.h>
#include "common/names.h"
using namespace apache::hive::service::cli;
using namespace impala;
using namespace strings;
// Set the null indicator bit for row 'row_idx', assuming this will be called for
// successive increasing values of row_idx. If 'is_null' is true, the row_idx'th bit will
// be set in 'nulls' (taking the LSB as bit 0). If 'is_null' is false, the row_idx'th bit
// will be unchanged. If 'nulls' does not contain 'row_idx' bits, it will be extended by
// one byte.
inline void SetNullBit(uint32_t row_idx, bool is_null, string* nulls) {
DCHECK_LE(row_idx / 8, nulls->size());
int16_t mod_8 = row_idx % 8;
if (mod_8 == 0) (*nulls) += '\0';
(*nulls)[row_idx / 8] |= (1 << mod_8) * is_null;
}
inline bool GetNullBit(const string& nulls, uint32_t row_idx) {
DCHECK_LE(row_idx / 8, nulls.size());
return nulls[row_idx / 8] & (1 << row_idx % 8);
}
void impala::StitchNulls(uint32_t num_rows_before, uint32_t num_rows_added,
uint32_t start_idx, const string& from, string* to) {
to->reserve((num_rows_before + num_rows_added + 7) / 8);
// TODO: This is very inefficient, since we could conceivably go one byte at a time
// (although the operands should stay live in registers in the loop). However doing this
// more efficiently leads to very complex code: we have to deal with the fact that
// 'start_idx' and 'num_rows_before' might both lead to offsets into the null bitset
// that don't start on a byte boundary. We should revisit this, ideally with a good
// bitset implementation.
for (int i = 0; i < num_rows_added; ++i) {
SetNullBit(num_rows_before + i, GetNullBit(from, i + start_idx), to);
}
}
// For V6 and above
void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val,
const TColumnType& type, uint32_t row_idx, thrift::TColumn* column) {
string* nulls;
bool is_null;
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
is_null = !col_val.__isset.bool_val;
column->boolVal.values.push_back(col_val.bool_val);
nulls = &column->boolVal.nulls;
break;
case TPrimitiveType::TINYINT:
is_null = !col_val.__isset.byte_val;
column->byteVal.values.push_back(col_val.byte_val);
nulls = &column->byteVal.nulls;
break;
case TPrimitiveType::SMALLINT:
is_null = !col_val.__isset.short_val;
column->i16Val.values.push_back(col_val.short_val);
nulls = &column->i16Val.nulls;
break;
case TPrimitiveType::INT:
is_null = !col_val.__isset.int_val;
column->i32Val.values.push_back(col_val.int_val);
nulls = &column->i32Val.nulls;
break;
case TPrimitiveType::BIGINT:
is_null = !col_val.__isset.long_val;
column->i64Val.values.push_back(col_val.long_val);
nulls = &column->i64Val.nulls;
break;
case TPrimitiveType::FLOAT:
case TPrimitiveType::DOUBLE:
is_null = !col_val.__isset.double_val;
column->doubleVal.values.push_back(col_val.double_val);
nulls = &column->doubleVal.nulls;
break;
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::STRING:
case TPrimitiveType::CHAR:
case TPrimitiveType::VARCHAR:
case TPrimitiveType::DECIMAL:
is_null = !col_val.__isset.string_val;
column->stringVal.values.push_back(col_val.string_val);
nulls = &column->stringVal.nulls;
break;
default:
DCHECK(false) << "Unhandled type: "
<< TypeToString(ThriftToType(type.types[0].scalar_type.type));
return;
}
SetNullBit(row_idx, is_null, nulls);
}
// For V6 and above
void impala::ExprValueToHS2TColumn(const void* value, const TColumnType& type,
uint32_t row_idx, thrift::TColumn* column) {
string* nulls;
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
column->boolVal.values.push_back(
value == NULL ? false : *reinterpret_cast<const bool*>(value));
nulls = &column->boolVal.nulls;
break;
case TPrimitiveType::TINYINT:
column->byteVal.values.push_back(
value == NULL ? 0 : *reinterpret_cast<const int8_t*>(value));
nulls = &column->byteVal.nulls;
break;
case TPrimitiveType::SMALLINT:
column->i16Val.values.push_back(
value == NULL ? 0 : *reinterpret_cast<const int16_t*>(value));
nulls = &column->i16Val.nulls;
break;
case TPrimitiveType::INT:
column->i32Val.values.push_back(
value == NULL ? 0 : *reinterpret_cast<const int32_t*>(value));
nulls = &column->i32Val.nulls;
break;
case TPrimitiveType::BIGINT:
column->i64Val.values.push_back(
value == NULL ? 0 : *reinterpret_cast<const int64_t*>(value));
nulls = &column->i64Val.nulls;
break;
case TPrimitiveType::FLOAT:
column->doubleVal.values.push_back(
value == NULL ? 0.f : *reinterpret_cast<const float*>(value));
nulls = &column->doubleVal.nulls;
break;
case TPrimitiveType::DOUBLE:
column->doubleVal.values.push_back(
value == NULL ? 0.0 : *reinterpret_cast<const double*>(value));
nulls = &column->doubleVal.nulls;
break;
case TPrimitiveType::TIMESTAMP:
column->stringVal.values.push_back("");
if (value != NULL) {
RawValue::PrintValue(value, TYPE_TIMESTAMP, -1,
&(column->stringVal.values.back()));
}
nulls = &column->stringVal.nulls;
break;
case TPrimitiveType::STRING:
case TPrimitiveType::VARCHAR:
column->stringVal.values.push_back("");
if (value != NULL) {
const StringValue* str_val = reinterpret_cast<const StringValue*>(value);
column->stringVal.values.back().assign(
static_cast<char*>(str_val->ptr), str_val->len);
}
nulls = &column->stringVal.nulls;
break;
case TPrimitiveType::CHAR:
column->stringVal.values.push_back("");
if (value != NULL) {
ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len);
column->stringVal.values.back().assign(
reinterpret_cast<const char*>(value), char_type.len);
}
nulls = &column->stringVal.nulls;
break;
case TPrimitiveType::DECIMAL: {
// HiveServer2 requires decimal to be presented as string.
column->stringVal.values.push_back("");
const ColumnType& decimalType = ColumnType::FromThrift(type);
if (value != NULL) {
switch (decimalType.GetByteSize()) {
case 4:
column->stringVal.values.back() =
reinterpret_cast<const Decimal4Value*>(value)->ToString(decimalType);
break;
case 8:
column->stringVal.values.back() =
reinterpret_cast<const Decimal8Value*>(value)->ToString(decimalType);
break;
case 16:
column->stringVal.values.back() =
reinterpret_cast<const Decimal16Value*>(value)->ToString(decimalType);
break;
default:
DCHECK(false) << "bad type: " << decimalType;
}
}
nulls = &column->stringVal.nulls;
break;
}
default:
DCHECK(false) << "Unhandled type: "
<< TypeToString(ThriftToType(type.types[0].scalar_type.type));
return;
}
SetNullBit(row_idx, (value == NULL), nulls);
}
// For V1 -> V5
void impala::TColumnValueToHS2TColumnValue(const TColumnValue& col_val,
const TColumnType& type, thrift::TColumnValue* hs2_col_val) {
// TODO: Handle complex types.
DCHECK_EQ(1, type.types.size());
DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
DCHECK_EQ(true, type.types[0].__isset.scalar_type);
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::BOOLEAN:
hs2_col_val->__isset.boolVal = true;
hs2_col_val->boolVal.value = col_val.bool_val;
hs2_col_val->boolVal.__isset.value = col_val.__isset.bool_val;
break;
case TPrimitiveType::TINYINT:
hs2_col_val->__isset.byteVal = true;
hs2_col_val->byteVal.value = col_val.byte_val;
hs2_col_val->byteVal.__isset.value = col_val.__isset.byte_val;
break;
case TPrimitiveType::SMALLINT:
hs2_col_val->__isset.i16Val = true;
hs2_col_val->i16Val.value = col_val.short_val;
hs2_col_val->i16Val.__isset.value = col_val.__isset.short_val;
break;
case TPrimitiveType::INT:
hs2_col_val->__isset.i32Val = true;
hs2_col_val->i32Val.value = col_val.int_val;
hs2_col_val->i32Val.__isset.value = col_val.__isset.int_val;
break;
case TPrimitiveType::BIGINT:
hs2_col_val->__isset.i64Val = true;
hs2_col_val->i64Val.value = col_val.long_val;
hs2_col_val->i64Val.__isset.value = col_val.__isset.long_val;
break;
case TPrimitiveType::FLOAT:
case TPrimitiveType::DOUBLE:
hs2_col_val->__isset.doubleVal = true;
hs2_col_val->doubleVal.value = col_val.double_val;
hs2_col_val->doubleVal.__isset.value = col_val.__isset.double_val;
break;
case TPrimitiveType::DECIMAL:
case TPrimitiveType::STRING:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::VARCHAR:
case TPrimitiveType::CHAR:
// HiveServer2 requires timestamp to be presented as string. Note that the .thrift
// spec says it should be a BIGINT; AFAICT Hive ignores that and produces a string.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = col_val.__isset.string_val;
if (col_val.__isset.string_val) {
hs2_col_val->stringVal.value = col_val.string_val;
}
break;
default:
DCHECK(false) << "bad type: "
<< TypeToString(ThriftToType(type.types[0].scalar_type.type));
break;
}
}
// For V1 -> V5
void impala::ExprValueToHS2TColumnValue(const void* value, const TColumnType& type,
thrift::TColumnValue* hs2_col_val) {
bool not_null = (value != NULL);
// TODO: Handle complex types.
DCHECK_EQ(1, type.types.size());
DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
DCHECK_EQ(1, type.types[0].__isset.scalar_type);
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
// Set NULLs in the bool_val.
hs2_col_val->__isset.boolVal = true;
hs2_col_val->boolVal.__isset.value = false;
break;
case TPrimitiveType::BOOLEAN:
hs2_col_val->__isset.boolVal = true;
if (not_null) hs2_col_val->boolVal.value = *reinterpret_cast<const bool*>(value);
hs2_col_val->boolVal.__isset.value = not_null;
break;
case TPrimitiveType::TINYINT:
hs2_col_val->__isset.byteVal = true;
if (not_null) hs2_col_val->byteVal.value = *reinterpret_cast<const int8_t*>(value);
hs2_col_val->byteVal.__isset.value = not_null;
break;
case TPrimitiveType::SMALLINT:
hs2_col_val->__isset.i16Val = true;
if (not_null) hs2_col_val->i16Val.value = *reinterpret_cast<const int16_t*>(value);
hs2_col_val->i16Val.__isset.value = not_null;
break;
case TPrimitiveType::INT:
hs2_col_val->__isset.i32Val = true;
if (not_null) hs2_col_val->i32Val.value = *reinterpret_cast<const int32_t*>(value);
hs2_col_val->i32Val.__isset.value = not_null;
break;
case TPrimitiveType::BIGINT:
hs2_col_val->__isset.i64Val = true;
if (not_null) hs2_col_val->i64Val.value = *reinterpret_cast<const int64_t*>(value);
hs2_col_val->i64Val.__isset.value = not_null;
break;
case TPrimitiveType::FLOAT:
hs2_col_val->__isset.doubleVal = true;
if (not_null) hs2_col_val->doubleVal.value = *reinterpret_cast<const float*>(value);
hs2_col_val->doubleVal.__isset.value = not_null;
break;
case TPrimitiveType::DOUBLE:
hs2_col_val->__isset.doubleVal = true;
if (not_null) {
hs2_col_val->doubleVal.value = *reinterpret_cast<const double*>(value);
}
hs2_col_val->doubleVal.__isset.value = not_null;
break;
case TPrimitiveType::STRING:
case TPrimitiveType::VARCHAR:
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
if (not_null) {
const StringValue* string_val = reinterpret_cast<const StringValue*>(value);
hs2_col_val->stringVal.value.assign(static_cast<char*>(string_val->ptr),
string_val->len);
}
break;
case TPrimitiveType::CHAR:
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
if (not_null) {
ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len);
hs2_col_val->stringVal.value.assign(
reinterpret_cast<const char*>(value), char_type.len);
}
break;
case TPrimitiveType::TIMESTAMP:
// HiveServer2 requires timestamp to be presented as string.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
if (not_null) {
RawValue::PrintValue(value, TYPE_TIMESTAMP, -1, &(hs2_col_val->stringVal.value));
}
break;
case TPrimitiveType::DECIMAL: {
// HiveServer2 requires decimal to be presented as string.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = not_null;
const ColumnType& decimalType = ColumnType::FromThrift(type);
if (not_null) {
switch (decimalType.GetByteSize()) {
case 4:
hs2_col_val->stringVal.value =
reinterpret_cast<const Decimal4Value*>(value)->ToString(decimalType);
break;
case 8:
hs2_col_val->stringVal.value =
reinterpret_cast<const Decimal8Value*>(value)->ToString(decimalType);
break;
case 16:
hs2_col_val->stringVal.value =
reinterpret_cast<const Decimal16Value*>(value)->ToString(decimalType);
break;
default:
DCHECK(false) << "bad type: " << decimalType;
}
}
break;
}
default:
DCHECK(false) << "bad type: "
<< TypeToString(ThriftToType(type.types[0].scalar_type.type));
break;
}
}
template<typename T>
void PrintVal(const T& val, ostream* ss) {
if (val.__isset.value) {
(*ss) << val.value;
} else {
(*ss) << "NULL";
}
}
// Specialisation for byte values that would otherwise be interpreted as character values,
// not integers, when printed to the stringstream.
template<>
void PrintVal(const apache::hive::service::cli::thrift::TByteValue& val, ostream* ss) {
if (val.__isset.value) {
(*ss) << static_cast<int16_t>(val.value);
} else {
(*ss) << "NULL";
}
}
void impala::PrintTColumnValue(
const apache::hive::service::cli::thrift::TColumnValue& colval, stringstream* out) {
if (colval.__isset.boolVal) {
if (colval.boolVal.__isset.value) {
(*out) << ((colval.boolVal.value) ? "true" : "false");
} else {
(*out) << "NULL";
}
} else if (colval.__isset.doubleVal) {
PrintVal(colval.doubleVal, out);
} else if (colval.__isset.byteVal) {
PrintVal(colval.byteVal, out);
} else if (colval.__isset.i32Val) {
PrintVal(colval.i32Val, out);
} else if (colval.__isset.i16Val) {
PrintVal(colval.i16Val, out);
} else if (colval.__isset.i64Val) {
PrintVal(colval.i64Val, out);
} else if (colval.__isset.stringVal) {
PrintVal(colval.stringVal, out);
} else {
(*out) << "NULL";
}
}