be/src/service/hs2-util.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "service/hs2-util.h"

 #include <sstream>

 #include <rapidjson/rapidjson.h>
 #include <rapidjson/stringbuffer.h>
 #include <rapidjson/writer.h>

 #include "common/logging.h"
 #include "exprs/scalar-expr.h"
 #include "exprs/scalar-expr-evaluator.h"
 #include "exprs/slot-ref.h"
 #include "gen-cpp/TCLIService_constants.h"
 #include "runtime/date-value.h"
 #include "runtime/complex-value-writer.inline.h"
 #include "runtime/decimal-value.inline.h"
 #include "runtime/raw-value.inline.h"
 #include "runtime/row-batch.h"
 #include "runtime/types.h"
 #include "udf/udf-internal.h"
 #include "util/bit-util.h"

 #include <gutil/strings/substitute.h>

 #include "common/names.h"

 using namespace apache::hive::service::cli;
 using namespace impala;
 using namespace strings;

 // Set the null indicator bit for row 'row_idx', assuming this will be called for
 // successive increasing values of row_idx. If 'is_null' is true, the row_idx'th bit will
 // be set in 'nulls' (taking the LSB as bit 0). If 'is_null' is false, the row_idx'th bit
 // will be unchanged. If 'nulls' does not contain 'row_idx' bits, it will be extended by
 // one byte.
 inline void SetNullBit(uint32_t row_idx, bool is_null, string* nulls) {
   DCHECK_LE(row_idx / 8, nulls->size());
   int16_t mod_8 = row_idx % 8;
   if (mod_8 == 0) (*nulls) += '\0';
   (*nulls)[row_idx / 8] |= (1 << mod_8) * is_null;
 }

 inline bool GetNullBit(const string& nulls, uint32_t row_idx) {
   DCHECK_LE(row_idx / 8, nulls.size());
   return nulls[row_idx / 8] & (1 << row_idx % 8);
 }

 void impala::StitchNulls(uint32_t num_rows_before, uint32_t num_rows_added,
     uint32_t start_idx, const string& from, string* to) {
   // Round up to power-of-two to avoid accidentally quadratic behaviour from repeated
   // small increases in size.
   to->reserve(BitUtil::RoundUpToPowerOfTwo((num_rows_before + num_rows_added + 7) / 8));

   // TODO: This is very inefficient, since we could conceivably go one byte at a time
   // (although the operands should stay live in registers in the loop). However doing this
   // more efficiently leads to very complex code: we have to deal with the fact that
   // 'start_idx' and 'num_rows_before' might both lead to offsets into the null bitset
   // that don't start on a byte boundary. We should revisit this, ideally with a good
   // bitset implementation.
   for (int i = 0; i < num_rows_added; ++i) {
     SetNullBit(num_rows_before + i, GetNullBit(from, i + start_idx), to);
   }
 }

 // For V6 and above
 void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val,
     const TColumnType& type, uint32_t row_idx, thrift::TColumn* column) {
   string* nulls;
   bool is_null;
   switch (type.types[0].scalar_type.type) {
     case TPrimitiveType::BOOLEAN:
       is_null = !col_val.__isset.bool_val;
       column->boolVal.values.push_back(col_val.bool_val);
       nulls = &column->boolVal.nulls;
       break;
     case TPrimitiveType::TINYINT:
       is_null = !col_val.__isset.byte_val;
       column->byteVal.values.push_back(col_val.byte_val);
       nulls = &column->byteVal.nulls;
       break;
     case TPrimitiveType::SMALLINT:
       is_null = !col_val.__isset.short_val;
       column->i16Val.values.push_back(col_val.short_val);
       nulls = &column->i16Val.nulls;
       break;
     case TPrimitiveType::INT:
       is_null = !col_val.__isset.int_val;
       column->i32Val.values.push_back(col_val.int_val);
       nulls = &column->i32Val.nulls;
       break;
     case TPrimitiveType::BIGINT:
       is_null = !col_val.__isset.long_val;
       column->i64Val.values.push_back(col_val.long_val);
       nulls = &column->i64Val.nulls;
       break;
     case TPrimitiveType::FLOAT:
     case TPrimitiveType::DOUBLE:
       is_null = !col_val.__isset.double_val;
       column->doubleVal.values.push_back(col_val.double_val);
       nulls = &column->doubleVal.nulls;
       break;
     case TPrimitiveType::NULL_TYPE:
     case TPrimitiveType::TIMESTAMP:
     case TPrimitiveType::DATE:
     case TPrimitiveType::STRING:
     case TPrimitiveType::CHAR:
     case TPrimitiveType::VARCHAR:
     case TPrimitiveType::DECIMAL:
     case TPrimitiveType::BINARY:
       is_null = !col_val.__isset.string_val;
       column->stringVal.values.push_back(col_val.string_val);
       nulls = &column->stringVal.nulls;
       break;

     default:
       DCHECK(false) << "Unhandled type: "
                     << TypeToString(ThriftToType(type.types[0].scalar_type.type));
       return;
   }

   SetNullBit(row_idx, is_null, nulls);
 }

 // Specialised per-type implementations of ExprValuesToHS2TColumn.

 // Helper to reserve space in hs2Vals->values and hs2Vals->nulls for the values that the
 // different implementations of ExprValuesToHS2TColumn will write.
 template <typename T>
 void ReserveSpace(int reserve_count, T* hs2Vals) {
   DCHECK_GE(reserve_count, 0);
   int64_t num_null_bytes = BitUtil::RoundUpNumBytes(reserve_count);
   // Round up reserve() arguments to power-of-two to avoid accidentally quadratic
   // behaviour from repeated small increases in size.
   hs2Vals->values.reserve(BitUtil::RoundUpToPowerOfTwo(reserve_count));
   hs2Vals->nulls.reserve(BitUtil::RoundUpToPowerOfTwo(num_null_bytes));
 }

 // Implementation for NULL.
 // Internally, Impala implement NULL expession using nullable-BooleanVal (IMPALA-914).
 // To match with HiveServer2 behavior, IMPALA-14027 change the result mapping to use
 // TColumn.stringVal rather than TColumn.boolVal.
 static void NullExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     // It is actually not necessary to evaluate expr_eval here. But we choose to do it
     // and DCHECK the result to be consistent with other functions.
     BooleanVal val = expr_eval->GetBooleanVal(it.Get());
     DCHECK(val.is_null);
     // emplace empty string and set null bit.
     column->stringVal.values.emplace_back();
     SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for BOOL.
 static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     BooleanVal val = expr_eval->GetBooleanVal(it.Get());
     column->boolVal.values.push_back(val.val);
     SetNullBit(output_row_idx, val.is_null, &column->boolVal.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for TINYINT.
 static void TinyIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     TinyIntVal val = expr_eval->GetTinyIntVal(it.Get());
     column->byteVal.values.push_back(val.val);
     SetNullBit(output_row_idx, val.is_null, &column->byteVal.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for SMALLINT.
 static void SmallIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     SmallIntVal val = expr_eval->GetSmallIntVal(it.Get());
     column->i16Val.values.push_back(val.val);
     SetNullBit(output_row_idx, val.is_null, &column->i16Val.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for INT.
 static void IntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     DCHECK_EQ(output_row_idx, column->i32Val.values.size());
     IntVal val = expr_eval->GetIntVal(it.Get());
     column->i32Val.values.push_back(val.val);
     SetNullBit(output_row_idx, val.is_null, &column->i32Val.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for BIGINT.
 static void BigIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     BigIntVal val = expr_eval->GetBigIntVal(it.Get());
     column->i64Val.values.push_back(val.val);
     SetNullBit(output_row_idx, val.is_null, &column->i64Val.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for FLOAT.
 static void FloatExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     FloatVal val = expr_eval->GetFloatVal(it.Get());
     column->doubleVal.values.push_back(val.val);
     SetNullBit(output_row_idx, val.is_null, &column->doubleVal.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for DOUBLE.
 static void DoubleExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     DoubleVal val = expr_eval->GetDoubleVal(it.Get());
     column->doubleVal.values.push_back(val.val);
     SetNullBit(output_row_idx, val.is_null, &column->doubleVal.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for TIMESTAMP.
 static void TimestampExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     TimestampVal val = expr_eval->GetTimestampVal(it.Get());
     column->stringVal.values.emplace_back();
     if (!val.is_null) {
       TimestampValue value = TimestampValue::FromTimestampVal(val);
       column->stringVal.values.back() = value.ToString();
     }
     SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
     ++output_row_idx;
   }
 }

 // Implementation for DATE.
 static void DateExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     DateVal val = expr_eval->GetDateVal(it.Get());
     column->stringVal.values.emplace_back();
     if (!val.is_null) {
       DateValue value = DateValue::FromDateVal(val);
       column->stringVal.values.back() = value.ToString();
     }
     SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
     ++output_row_idx;
   }
 }

 // Common logic for BINARY, STRING and VARCHAR.
 static void StringExprValuesToHS2TColumnHelper(ScalarExprEvaluator* expr_eval,
     RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
     vector<string>&  values, string& nulls) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     StringVal val = expr_eval->GetStringVal(it.Get());
     if (val.is_null) {
       values.emplace_back();
     } else {
       values.emplace_back(reinterpret_cast<char*>(val.ptr), val.len);
     }
     SetNullBit(output_row_idx, val.is_null, &nulls);
     ++output_row_idx;
   }
 }

 // Implementation for STRING and VARCHAR.
 static void StringExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   StringExprValuesToHS2TColumnHelper(
       expr_eval, batch, start_idx, num_rows, output_row_idx,
       column->stringVal.values, column->stringVal.nulls);
 }

 // Implementation for BINARY. Same as for STRING with the exception of using a different
 // Thrift field.
 static void BinaryExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
   StringExprValuesToHS2TColumnHelper(
       expr_eval, batch, start_idx, num_rows, output_row_idx,
       column->binaryVal.values, column->binaryVal.nulls);
 }


 // Implementation for CHAR.
 static void CharExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
     uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) {
   ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     StringVal val = expr_eval->GetStringVal(it.Get());
     if (val.is_null) {
       column->stringVal.values.emplace_back();
     } else {
       column->stringVal.values.emplace_back(
           reinterpret_cast<const char*>(val.ptr), char_type.len);
     }
     SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
     ++output_row_idx;
   }
 }

 static void DecimalExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
     uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) {
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     DecimalVal val = expr_eval->GetDecimalVal(it.Get());
     const ColumnType& decimalType = ColumnType::FromThrift(type);
     if (val.is_null) {
       column->stringVal.values.emplace_back();
     } else {
       switch (decimalType.GetByteSize()) {
         case 4:
           column->stringVal.values.emplace_back(
               Decimal4Value(val.val4).ToString(decimalType));
           break;
         case 8:
           column->stringVal.values.emplace_back(
               Decimal8Value(val.val8).ToString(decimalType));
           break;
         case 16:
           column->stringVal.values.emplace_back(
               Decimal16Value(val.val16).ToString(decimalType));
           break;
         default:
           DCHECK(false) << "bad type: " << decimalType;
       }
     }
     SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
     ++output_row_idx;
   }
 }

 static void StructExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
     uint32_t output_row_idx, bool stringify_map_keys,
     apache::hive::service::cli::thrift::TColumn* column) {
   DCHECK(type.types.size() > 1);
   // The buffer used by rapidjson::Writer. We reuse it to eliminate allocations.
   rapidjson::StringBuffer buffer;
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     StructVal struct_val = expr_eval->GetStructVal(it.Get());
     if (struct_val.is_null) {
       column->stringVal.values.emplace_back();
     } else {
       const impala::ScalarExpr& scalar_expr = expr_eval->root();
       // Currently scalar_expr can be only a slot ref as no functions return arrays.
       DCHECK(scalar_expr.IsSlotRef());
       const SlotDescriptor* slot_desc =
           static_cast<const SlotRef&>(scalar_expr).GetSlotDescriptor();
       DCHECK(slot_desc != nullptr);

       buffer.Clear();
       rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);

       ComplexValueWriter<rapidjson::StringBuffer> complex_value_writer(&writer,
           stringify_map_keys);
       complex_value_writer.StructValToJSON(struct_val, *slot_desc);

       column->stringVal.values.emplace_back(buffer.GetString());
     }
     SetNullBit(output_row_idx, struct_val.is_null, &column->stringVal.nulls);
     ++output_row_idx;
   }
 }

 static void CollectionExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
     uint32_t output_row_idx, bool stringify_map_keys,
     apache::hive::service::cli::thrift::TColumn* column) {
   DCHECK(type.types.size() > 1);
   TTypeNodeType::type coll_thrift_type = type.types[0].type;
   DCHECK(coll_thrift_type == TTypeNodeType::ARRAY ||
       coll_thrift_type == TTypeNodeType::MAP);
   PrimitiveType coll_impala_type = coll_thrift_type == TTypeNodeType::ARRAY ?
       PrimitiveType::TYPE_ARRAY : PrimitiveType::TYPE_MAP;

   // The buffer used by rapidjson::Writer. We reuse it to eliminate allocations.
   rapidjson::StringBuffer buffer;
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     CollectionVal coll_val = expr_eval->GetCollectionVal(it.Get());
     if (coll_val.is_null) {
       column->stringVal.values.emplace_back();
     } else {
       const impala::ScalarExpr& scalar_expr = expr_eval->root();
       // Currently scalar_expr can be only a slot ref as no functions return arrays.
       DCHECK(scalar_expr.IsSlotRef());
       const TupleDescriptor* item_tuple_desc = scalar_expr.GetCollectionTupleDesc();
       DCHECK(item_tuple_desc != nullptr);
       CollectionValue value(coll_val);

       buffer.Clear();
       rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);

       ComplexValueWriter<rapidjson::StringBuffer> complex_value_writer(
           &writer, stringify_map_keys);
       complex_value_writer.CollectionValueToJSON(value, coll_impala_type,
           item_tuple_desc);

       column->stringVal.values.emplace_back(buffer.GetString());
     }
     SetNullBit(output_row_idx, coll_val.is_null, &column->stringVal.nulls);
     ++output_row_idx;
   }
 }

 // For V6 and above
 void impala::ExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
     uint32_t output_row_idx, int expected_result_count, bool stringify_map_keys,
     apache::hive::service::cli::thrift::TColumn* column) {
   // Dispatch to a templated function for the loop over rows. This avoids branching on
   // the type for every row.
   // TODO: instead of relying on stamped out implementations, we could codegen this loop
   // to inline the expression evaluation into the loop body.
   switch (type.types[0].type) {
     case TTypeNodeType::STRUCT:
       ReserveSpace(expected_result_count, &column->stringVal);
       StructExprValuesToHS2TColumn(expr_eval, type, batch, start_idx, num_rows,
           output_row_idx, stringify_map_keys, column);
       return;
     case TTypeNodeType::ARRAY:
     case TTypeNodeType::MAP:
       ReserveSpace(expected_result_count, &column->stringVal);
       CollectionExprValuesToHS2TColumn(expr_eval, type, batch, start_idx, num_rows,
           output_row_idx, stringify_map_keys, column);
       return;
     default:
       break;
   }

   switch (type.types[0].scalar_type.type) {
     case TPrimitiveType::NULL_TYPE:
       ReserveSpace(expected_result_count, &column->stringVal);
       NullExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
     case TPrimitiveType::BOOLEAN:
       ReserveSpace(expected_result_count, &column->boolVal);
       BoolExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::TINYINT:
       ReserveSpace(expected_result_count, &column->byteVal);
       TinyIntExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::SMALLINT:
       ReserveSpace(expected_result_count, &column->i16Val);
       SmallIntExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::INT:
       ReserveSpace(expected_result_count, &column->i32Val);
       IntExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::BIGINT:
       ReserveSpace(expected_result_count, &column->i64Val);
       BigIntExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::FLOAT:
       ReserveSpace(expected_result_count, &column->doubleVal);
       FloatExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::DOUBLE:
       ReserveSpace(expected_result_count, &column->doubleVal);
       DoubleExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::DATE:
       ReserveSpace(expected_result_count, &column->stringVal);
       DateExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       break;
     case TPrimitiveType::TIMESTAMP:
       ReserveSpace(expected_result_count, &column->stringVal);
       TimestampExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::STRING:
     case TPrimitiveType::VARCHAR:
       ReserveSpace(expected_result_count, &column->stringVal);
       StringExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::BINARY:
       ReserveSpace(expected_result_count, &column->binaryVal);
       BinaryExprValuesToHS2TColumn(
           expr_eval, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::CHAR:
       ReserveSpace(expected_result_count, &column->stringVal);
       CharExprValuesToHS2TColumn(
           expr_eval, type, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TPrimitiveType::DECIMAL: {
       ReserveSpace(expected_result_count, &column->stringVal);
       DecimalExprValuesToHS2TColumn(
           expr_eval, type, batch, start_idx, num_rows, output_row_idx, column);
       return;
     }
     default:
       DCHECK(false) << "Unhandled type: "
           << TypeToString(ThriftToType(type.types[0].scalar_type.type));
   }
 }

 // For V1 -> V5
 void impala::TColumnValueToHS2TColumnValue(const TColumnValue& col_val,
     const TColumnType& type, thrift::TColumnValue* hs2_col_val) {
   // TODO: Handle complex types.
   DCHECK_EQ(1, type.types.size());
   DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
   DCHECK_EQ(true, type.types[0].__isset.scalar_type);
   switch (type.types[0].scalar_type.type) {
     case TPrimitiveType::BOOLEAN:
       hs2_col_val->__isset.boolVal = true;
       hs2_col_val->boolVal.value = col_val.bool_val;
       hs2_col_val->boolVal.__isset.value = col_val.__isset.bool_val;
       break;
     case TPrimitiveType::TINYINT:
       hs2_col_val->__isset.byteVal = true;
       hs2_col_val->byteVal.value = col_val.byte_val;
       hs2_col_val->byteVal.__isset.value = col_val.__isset.byte_val;
       break;
     case TPrimitiveType::SMALLINT:
       hs2_col_val->__isset.i16Val = true;
       hs2_col_val->i16Val.value = col_val.short_val;
       hs2_col_val->i16Val.__isset.value = col_val.__isset.short_val;
       break;
     case TPrimitiveType::INT:
       hs2_col_val->__isset.i32Val = true;
       hs2_col_val->i32Val.value = col_val.int_val;
       hs2_col_val->i32Val.__isset.value = col_val.__isset.int_val;
       break;
     case TPrimitiveType::BIGINT:
       hs2_col_val->__isset.i64Val = true;
       hs2_col_val->i64Val.value = col_val.long_val;
       hs2_col_val->i64Val.__isset.value = col_val.__isset.long_val;
       break;
     case TPrimitiveType::FLOAT:
     case TPrimitiveType::DOUBLE:
       hs2_col_val->__isset.doubleVal = true;
       hs2_col_val->doubleVal.value = col_val.double_val;
       hs2_col_val->doubleVal.__isset.value = col_val.__isset.double_val;
       break;
     case TPrimitiveType::DECIMAL:
     case TPrimitiveType::STRING:
     case TPrimitiveType::TIMESTAMP:
     case TPrimitiveType::DATE:
     case TPrimitiveType::VARCHAR:
     case TPrimitiveType::CHAR:
     case TPrimitiveType::BINARY:
       // HiveServer2 requires timestamp to be presented as string. Note that the .thrift
       // spec says it should be a BIGINT; AFAICT Hive ignores that and produces a string.
       hs2_col_val->__isset.stringVal = true;
       hs2_col_val->stringVal.__isset.value = col_val.__isset.string_val;
       if (col_val.__isset.string_val) {
         hs2_col_val->stringVal.value = col_val.string_val;
       }
       break;
     default:
       DCHECK(false) << "bad type: "
                      << TypeToString(ThriftToType(type.types[0].scalar_type.type));
       break;
   }
 }

 // For V1 -> V5
 void impala::ExprValueToHS2TColumnValue(const void* value, const TColumnType& type,
     thrift::TColumnValue* hs2_col_val) {
   bool not_null = (value != NULL);
   // TODO: Handle complex types.
   DCHECK_EQ(1, type.types.size());
   DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
   DCHECK_EQ(1, type.types[0].__isset.scalar_type);
   switch (type.types[0].scalar_type.type) {
     case TPrimitiveType::NULL_TYPE:
       // Set NULLs in the stringVal, but don't set the value itself.
       hs2_col_val->__isset.stringVal = true;
       hs2_col_val->stringVal.__isset.value = false;
       break;
     case TPrimitiveType::BOOLEAN:
       hs2_col_val->__isset.boolVal = true;
       if (not_null) hs2_col_val->boolVal.value = *reinterpret_cast<const bool*>(value);
       hs2_col_val->boolVal.__isset.value = not_null;
       break;
     case TPrimitiveType::TINYINT:
       hs2_col_val->__isset.byteVal = true;
       if (not_null) hs2_col_val->byteVal.value = *reinterpret_cast<const int8_t*>(value);
       hs2_col_val->byteVal.__isset.value = not_null;
       break;
     case TPrimitiveType::SMALLINT:
       hs2_col_val->__isset.i16Val = true;
       if (not_null) hs2_col_val->i16Val.value = *reinterpret_cast<const int16_t*>(value);
       hs2_col_val->i16Val.__isset.value = not_null;
       break;
     case TPrimitiveType::INT:
       hs2_col_val->__isset.i32Val = true;
       if (not_null) hs2_col_val->i32Val.value = *reinterpret_cast<const int32_t*>(value);
       hs2_col_val->i32Val.__isset.value = not_null;
       break;
     case TPrimitiveType::BIGINT:
       hs2_col_val->__isset.i64Val = true;
       if (not_null) hs2_col_val->i64Val.value = *reinterpret_cast<const int64_t*>(value);
       hs2_col_val->i64Val.__isset.value = not_null;
       break;
     case TPrimitiveType::FLOAT:
       hs2_col_val->__isset.doubleVal = true;
       if (not_null) hs2_col_val->doubleVal.value = *reinterpret_cast<const float*>(value);
       hs2_col_val->doubleVal.__isset.value = not_null;
       break;
     case TPrimitiveType::DOUBLE:
       hs2_col_val->__isset.doubleVal = true;
       if (not_null) {
         hs2_col_val->doubleVal.value = *reinterpret_cast<const double*>(value);
       }
       hs2_col_val->doubleVal.__isset.value = not_null;
       break;
     case TPrimitiveType::STRING:
     case TPrimitiveType::VARCHAR:
     // Unlike TColumn, TColumnValue does not differentiate between STRING and BINARY.
     case TPrimitiveType::BINARY:
       hs2_col_val->__isset.stringVal = true;
       hs2_col_val->stringVal.__isset.value = not_null;
       if (not_null) {
         const StringValue* string_val = reinterpret_cast<const StringValue*>(value);
         hs2_col_val->stringVal.value.assign(string_val->Ptr(), string_val->Len());
       }
       break;
     case TPrimitiveType::CHAR:
       hs2_col_val->__isset.stringVal = true;
       hs2_col_val->stringVal.__isset.value = not_null;
       if (not_null) {
         ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len);
         hs2_col_val->stringVal.value.assign(
            reinterpret_cast<const char*>(value), char_type.len);
       }
       break;
     case TPrimitiveType::DATE:
       // HiveServer2 requires date to be presented as string.
       hs2_col_val->__isset.stringVal = true;
       hs2_col_val->stringVal.__isset.value = not_null;
       if (not_null) {
         hs2_col_val->stringVal.value =
             reinterpret_cast<const DateValue*>(value)->ToString();
       }
       break;
     case TPrimitiveType::TIMESTAMP:
       // HiveServer2 requires timestamp to be presented as string.
       hs2_col_val->__isset.stringVal = true;
       hs2_col_val->stringVal.__isset.value = not_null;
       if (not_null) {
         RawValue::PrintValue(
             value, ColumnType(TYPE_TIMESTAMP), -1, &(hs2_col_val->stringVal.value));
       }
       break;
     case TPrimitiveType::DECIMAL: {
       // HiveServer2 requires decimal to be presented as string.
       hs2_col_val->__isset.stringVal = true;
       hs2_col_val->stringVal.__isset.value = not_null;
       const ColumnType& decimalType = ColumnType::FromThrift(type);
       if (not_null) {
         switch (decimalType.GetByteSize()) {
           case 4:
             hs2_col_val->stringVal.value =
               reinterpret_cast<const Decimal4Value*>(value)->ToString(decimalType);
             break;
           case 8:
             hs2_col_val->stringVal.value =
               reinterpret_cast<const Decimal8Value*>(value)->ToString(decimalType);
             break;
           case 16:
             hs2_col_val->stringVal.value =
               reinterpret_cast<const Decimal16Value*>(value)->ToString(decimalType);
             break;
           default:
             DCHECK(false) << "bad type: " << decimalType;
         }
       }
       break;
     }
     default:
       DCHECK(false) << "bad type: "
                      << TypeToString(ThriftToType(type.types[0].scalar_type.type));
       break;
   }
 }

 template<typename T>
 void PrintVal(const T& val, ostream* ss) {
   if (val.__isset.value) {
     (*ss) << val.value;
   } else {
     (*ss) << "NULL";
   }
 }

 // Specialisation for byte values that would otherwise be interpreted as character values,
 // not integers, when printed to the stringstream.
 template<>
 void PrintVal(const apache::hive::service::cli::thrift::TByteValue& val, ostream* ss) {
   if (val.__isset.value) {
     (*ss) << static_cast<int16_t>(val.value);
   } else {
     (*ss) << "NULL";
   }
 }

 void impala::PrintTColumnValue(const thrift::TColumnValue& colval, stringstream* out) {
   if (colval.__isset.boolVal) {
     if (colval.boolVal.__isset.value) {
       (*out) << ((colval.boolVal.value) ? "true" : "false");
     } else {
       (*out) << "NULL";
     }
   } else if (colval.__isset.doubleVal) {
     PrintVal(colval.doubleVal, out);
   } else if (colval.__isset.byteVal) {
     PrintVal(colval.byteVal, out);
   } else if (colval.__isset.i32Val) {
     PrintVal(colval.i32Val, out);
   } else if (colval.__isset.i16Val) {
     PrintVal(colval.i16Val, out);
   } else if (colval.__isset.i64Val) {
     PrintVal(colval.i64Val, out);
   } else if (colval.__isset.stringVal) {
     PrintVal(colval.stringVal, out);
   } else {
     (*out) << "NULL";
   }
 }

 TColumnValue impala::ConvertToTColumnValue(
     const thrift::TColumnDesc& desc, const thrift::TColumnValue& hive_colval) {
   // By default, all values in Impala TColumnValue are unset. To set a value,
   // it must be present in a particular field in the Hive version and not null.
   TColumnValue colval;
   if (hive_colval.__isset.boolVal && hive_colval.boolVal.__isset.value) {
     colval.__set_bool_val(hive_colval.boolVal.value);
   } else if (hive_colval.__isset.doubleVal && hive_colval.doubleVal.__isset.value) {
     colval.__set_double_val(hive_colval.doubleVal.value);
   } else if (hive_colval.__isset.byteVal && hive_colval.byteVal.__isset.value) {
     colval.__set_byte_val(hive_colval.byteVal.value);
   } else if (hive_colval.__isset.i32Val && hive_colval.i32Val.__isset.value) {
     colval.__set_int_val(hive_colval.i32Val.value);
   } else if (hive_colval.__isset.i16Val && hive_colval.i16Val.__isset.value) {
     colval.__set_short_val(hive_colval.i16Val.value);
   } else if (hive_colval.__isset.i64Val && hive_colval.i64Val.__isset.value) {
     colval.__set_long_val(hive_colval.i64Val.value);
   } else if (hive_colval.__isset.stringVal && hive_colval.stringVal.__isset.value) {
     switch (desc.typeDesc.types[0].primitiveEntry.type) {
       // For Hive date type, the value is represented as a string, such as '2020-01-01'.
       // Convert the string to Epoch days.
       case thrift::TTypeId::DATE_TYPE:
         {
           DateValue d =
               DateValue::ParseSimpleDateFormat(hive_colval.stringVal.value, false);
           colval.__set_date_val(d.Value());
         }
         break;
       // For Hive decimal type, the value is represented as a string, such as '1.234567'.
       // Its precision and scale is contained in desc as type qualifiers.
       case thrift::TTypeId::DECIMAL_TYPE:
         {
           const std::map<std::string, thrift::TTypeQualifierValue>& map =
               desc.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers;
           auto it = map.find("precision");
           if (it == map.end()) {
             DCHECK(false) << "Unable to find precision";
           }
           int precision = it->second.i32Value;

           it = map.find("scale");
           if (it == map.end()) {
             DCHECK(false) << "Unable to find scale";
           }
           int scale = it->second.i32Value;

           VLOG(3) << "Decimal in hive_colval: value=" << hive_colval.stringVal.value
                   << ", precision=" << precision
                   << ", scale=" << scale;

           colval.__set_decimal_val(hive_colval.stringVal.value);
         }
         break;
       case thrift::TTypeId::STRING_TYPE:
         colval.__set_string_val(hive_colval.stringVal.value);
         break;
       default:
         DCHECK(false) << "Unsupported conversion for hive type "
                       << desc.typeDesc.types[0];
     }
   }
   return colval;
 }

 void impala::PrintTColumnValue(const impala::TColumnValue& value, stringstream* out) {
   if (value.__isset.bool_val) {
     *out << value.bool_val;
   } else if (value.__isset.double_val) {
     *out << value.double_val;
   } else if (value.__isset.byte_val) {
     *out << value.byte_val;
   } else if (value.__isset.int_val) {
     *out << value.int_val;
   } else if (value.__isset.short_val) {
     *out << value.short_val;
   } else if (value.__isset.long_val) {
     *out << value.long_val;
   } else if (value.__isset.string_val) {
     *out << value.string_val;
   } else if (value.__isset.binary_val) {
     *out << value.binary_val;
   } else if (value.__isset.timestamp_val) {
     *out << value.timestamp_val;
   } else if (value.__isset.decimal_val) {
     *out << value.decimal_val;
   } else if (value.__isset.date_val) {
     *out << value.date_val;
   }
 }

 string impala::PrintTColumnValue(const impala::TColumnValue& value) {
   std::stringstream ss;
   PrintTColumnValue(value, &ss);
   return ss.str();
 }

 bool impala::isOneFieldSet(const impala::TColumnValue& value) {
   return (value.__isset.bool_val ||
           value.__isset.double_val ||
           value.__isset.byte_val ||
           value.__isset.int_val ||
           value.__isset.short_val ||
           value.__isset.long_val ||
           value.__isset.string_val ||
           value.__isset.binary_val ||
           value.__isset.timestamp_val ||
           value.__isset.decimal_val ||
           value.__isset.date_val);
 }

 thrift::TTypeEntry impala::ColumnToHs2Type(
     const TColumnType& columnType) {
   const ColumnType& type = ColumnType::FromThrift(columnType);
   thrift::TPrimitiveTypeEntry type_entry;
   switch (type.type) {
     case TYPE_NULL:
       type_entry.__set_type(thrift::TTypeId::NULL_TYPE);
       break;
     case TYPE_BOOLEAN:
       type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
       break;
     case TYPE_TINYINT:
       type_entry.__set_type(thrift::TTypeId::TINYINT_TYPE);
       break;
     case TYPE_SMALLINT:
       type_entry.__set_type(thrift::TTypeId::SMALLINT_TYPE);
       break;
     case TYPE_INT:
       type_entry.__set_type(thrift::TTypeId::INT_TYPE);
       break;
     case TYPE_BIGINT:
       type_entry.__set_type(thrift::TTypeId::BIGINT_TYPE);
       break;
     case TYPE_FLOAT:
       type_entry.__set_type(thrift::TTypeId::FLOAT_TYPE);
       break;
     case TYPE_DOUBLE:
       type_entry.__set_type(thrift::TTypeId::DOUBLE_TYPE);
       break;
     case TYPE_DATE:
       type_entry.__set_type(thrift::TTypeId::DATE_TYPE);
       break;
     case TYPE_TIMESTAMP:
       type_entry.__set_type(thrift::TTypeId::TIMESTAMP_TYPE);
       break;
     case TYPE_STRING:
       if (type.IsBinaryType()) {
         type_entry.__set_type(thrift::TTypeId::BINARY_TYPE);
       } else {
         type_entry.__set_type(thrift::TTypeId::STRING_TYPE);
       }
       break;
     case TYPE_DECIMAL: {
       thrift::TTypeQualifierValue tprecision;
       tprecision.__set_i32Value(type.precision);
       thrift::TTypeQualifierValue tscale;
       tscale.__set_i32Value(type.scale);

       thrift::TTypeQualifiers type_quals;
       type_quals.qualifiers[thrift::g_TCLIService_constants.PRECISION] = tprecision;
       type_quals.qualifiers[thrift::g_TCLIService_constants.SCALE] = tscale;
       type_entry.__set_typeQualifiers(type_quals);
       type_entry.__set_type(thrift::TTypeId::DECIMAL_TYPE);
       break;
     }
     case TYPE_CHAR:
     case TYPE_VARCHAR: {
       thrift::TTypeQualifierValue tmax_len;
       tmax_len.__set_i32Value(type.len);

       thrift::TTypeQualifiers type_quals;
       type_quals.qualifiers[thrift::g_TCLIService_constants.CHARACTER_MAXIMUM_LENGTH]
           = tmax_len;
       type_entry.__set_typeQualifiers(type_quals);
       type_entry.__set_type((type.type == TYPE_CHAR)
           ? thrift::TTypeId::CHAR_TYPE : thrift::TTypeId::VARCHAR_TYPE);
       break;
     }
     case TYPE_STRUCT:
     case TYPE_ARRAY:
     case TYPE_MAP:
       type_entry.__set_type(thrift::TTypeId::STRING_TYPE);
       break;
     case TYPE_BINARY:
     default:
       // HiveServer2 does not have a type for invalid, datetime or
       // fixed_uda_intermediate. Binary should be stored as TYPE_STRING, not
       // TYPE_BINARY in the backend.
       DCHECK(false) << "bad TypeToTValueType() type: " << type.DebugString();
       type_entry.__set_type(thrift::TTypeId::STRING_TYPE);
   };

   thrift::TTypeEntry result;
   result.__set_primitiveEntry(type_entry);
   return result;
 }