blob: e22dc6bf18a41a0eac3997a69175593e4fa50be1 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/complex-value-writer.h"
#include <string>
#include "runtime/raw-value.inline.h"
#include "util/coding-util.h"
namespace impala {
using impala_udf::StructVal;
namespace {
bool IsPrimitiveTypePrintedAsString(const ColumnType& type) {
return type.IsStringType() || type.IsDateType() || type.IsTimestampType();
}
} // anonymous namespace
// Writes a primitive value. If 'map_key' is true, treats the value as a
// map key. Maps are printed as JSON objects, but in Impala maps can have non-string keys
// while JSON objects can't. We circumvent this rule by writing values in the following
// way:
// - if 'stringify_map_keys_' is false, we write raw values with the
// 'rapidjson::kStringType' type when printing map keys; these non-string key values
// will not have quotes around them in the printed text.
// - if 'stringify_map_keys_' is true, we convert the map keys to strings and write them
// with quotes.
// If 'map_key' is false, 'stringify_map_keys_' has no effect.
template <class JsonStream>
void ComplexValueWriter<JsonStream>::PrimitiveValueToJSON(void* value,
const ColumnType& type, bool map_key) {
if (type.IsBooleanType() && !map_key) {
writer_->Bool( *(reinterpret_cast<bool*>(value)) );
return;
}
string tmp;
// Passing -1 as 'scale' means we do not modify the precision setting of the stream
// (used when printing floating point values).
constexpr int scale = -1;
RawValue::PrintValue(value, type, scale, &tmp);
const bool should_convert_to_string = map_key && stringify_map_keys_;
if (IsPrimitiveTypePrintedAsString(type) || should_convert_to_string) {
if (type.IsBinaryType()) {
int64_t base64_max_len;
bool succ = Base64EncodeBufLen(tmp.size(), &base64_max_len);
DCHECK(succ);
// 'base64_max_len' includes the null terminator.
string buf(base64_max_len - 1, '\0');
unsigned base64_len = 0;
succ = Base64Encode(tmp.c_str(), tmp.size(), base64_max_len, buf.data(),
&base64_len);
DCHECK(succ);
tmp = std::move(buf);
}
writer_->String(tmp.c_str());
} else {
writer_->RawValue(tmp.c_str(), tmp.size(),
map_key ? rapidjson::kStringType : rapidjson::kNumberType);
}
}
template <class JsonStream>
void ComplexValueWriter<JsonStream>::WriteNull(bool map_key) {
// We don't use writer_->Null() because that does not work for map keys. Maps are
// modelled as JSON objects but JSON objects can only have string keys, not nulls. We
// circumvent this limitation by writing the string "null" with quotes if
// 'stringify_map_keys_' is true and a raw null value without quotes if it is false.
constexpr char const* null_str = RawValue::NullLiteral(/*top_level*/ false);
if (map_key && stringify_map_keys_) {
writer_->String(null_str);
} else {
const int null_str_len = std::char_traits<char>::length(null_str);
writer_->RawValue(null_str, null_str_len, rapidjson::kStringType);
}
}
// Structs in collections are not converted to StructVal but are left as they are in the
// tuple.
template <class JsonStream>
void ComplexValueWriter<JsonStream>::StructInCollectionToJSON(Tuple* item_tuple,
const SlotDescriptor& struct_slot_desc) {
const TupleDescriptor* children_tuple_desc =
struct_slot_desc.children_tuple_descriptor();
DCHECK(children_tuple_desc != nullptr);
const ColumnType& struct_type = struct_slot_desc.type();
const std::vector<SlotDescriptor*>& child_slots = children_tuple_desc->slots();
DCHECK(struct_type.type == TYPE_STRUCT);
DCHECK_EQ(child_slots.size(), struct_type.children.size());
writer_->StartObject();
for (int i = 0; i < child_slots.size(); ++i) {
writer_->String(struct_type.field_names[i].c_str());
const SlotDescriptor& child_slot_desc = *child_slots[i];
bool element_is_null = item_tuple->IsNull(child_slot_desc.null_indicator_offset());
if (element_is_null) {
WriteNull(writer_);
continue;
}
const ColumnType& child_type = child_slot_desc.type();
void* child = item_tuple->GetSlot(child_slot_desc.tuple_offset());
if (child_type.IsStructType()) {
StructInCollectionToJSON(item_tuple, child_slot_desc);
} else if (child_type.IsCollectionType()) {
const CollectionValue* nested_collection_val =
reinterpret_cast<CollectionValue*>(child);
const TupleDescriptor* child_item_tuple_desc =
child_slot_desc.children_tuple_descriptor();
DCHECK(child_item_tuple_desc != nullptr);
CollectionValueToJSON(*nested_collection_val, child_type.type,
child_item_tuple_desc);
} else {
PrimitiveValueToJSON(child, child_type, false);
}
}
writer_->EndObject();
}
template <class JsonStream>
void ComplexValueWriter<JsonStream>::CollectionElementToJSON(Tuple* item_tuple,
const SlotDescriptor& slot_desc, bool map_key) {
void* element = item_tuple->GetSlot(slot_desc.tuple_offset());
DCHECK(element != nullptr);
const ColumnType& element_type = slot_desc.type();
bool element_is_null = item_tuple->IsNull(slot_desc.null_indicator_offset());
if (element_is_null) {
WriteNull(map_key);
} else if (element_type.IsStructType()) {
DCHECK(!map_key) << "Structs cannot be map keys.";
StructInCollectionToJSON(item_tuple, slot_desc);
} else if (element_type.IsCollectionType()) {
const CollectionValue* nested_collection_val =
reinterpret_cast<CollectionValue*>(element);
const TupleDescriptor* child_item_tuple_desc =
slot_desc.children_tuple_descriptor();
DCHECK(child_item_tuple_desc != nullptr);
ComplexValueWriter::CollectionValueToJSON(*nested_collection_val, element_type.type,
child_item_tuple_desc);
} else {
PrimitiveValueToJSON(element, element_type, map_key);
}
}
// Gets an array value and writes it in JSON format.
template <class JsonStream>
void ComplexValueWriter<JsonStream>::ArrayValueToJSON(const CollectionValue& array_value,
const TupleDescriptor* item_tuple_desc) {
DCHECK(item_tuple_desc != nullptr);
const int item_byte_size = item_tuple_desc->byte_size();
const vector<SlotDescriptor*>& slot_descs = item_tuple_desc->slots();
DCHECK(slot_descs.size() == 1);
DCHECK(slot_descs[0] != nullptr);
writer_->StartArray();
for (int i = 0; i < array_value.num_tuples; ++i) {
Tuple* item_tuple = reinterpret_cast<Tuple*>(
array_value.ptr + i * item_byte_size);
const SlotDescriptor& value_slot_desc = *slot_descs[0];
CollectionElementToJSON(item_tuple, value_slot_desc, false);
}
writer_->EndArray();
}
// Gets a map value and writes it in JSON format.
template <class JsonStream>
void ComplexValueWriter<JsonStream>::MapValueToJSON(const CollectionValue& map_value,
const TupleDescriptor* item_tuple_desc) {
DCHECK(item_tuple_desc != nullptr);
const int item_byte_size = item_tuple_desc->byte_size();
const vector<SlotDescriptor*>& slot_descs = item_tuple_desc->slots();
DCHECK(slot_descs.size() == 2);
DCHECK(slot_descs[0] != nullptr);
DCHECK(slot_descs[1] != nullptr);
writer_->StartObject();
for (int i = 0; i < map_value.num_tuples; ++i) {
Tuple* item_tuple = reinterpret_cast<Tuple*>(
map_value.ptr + i * item_byte_size);
// Print key.
const SlotDescriptor& key_slot_desc = *slot_descs[0];
CollectionElementToJSON(item_tuple, key_slot_desc, true);
// Print the value.
const SlotDescriptor& value_slot_desc = *slot_descs[1];
CollectionElementToJSON(item_tuple, value_slot_desc, false);
}
writer_->EndObject();
}
template <class JsonStream>
ComplexValueWriter<JsonStream>::ComplexValueWriter(rapidjson::Writer<JsonStream>* writer,
bool stringify_map_keys)
: writer_(writer),
stringify_map_keys_(stringify_map_keys) {}
template <class JsonStream>
void ComplexValueWriter<JsonStream>::CollectionValueToJSON(
const CollectionValue& collection_value, PrimitiveType collection_type,
const TupleDescriptor* item_tuple_desc) {
if (collection_type == PrimitiveType::TYPE_MAP) {
MapValueToJSON(collection_value, item_tuple_desc);
} else {
DCHECK_EQ(collection_type, PrimitiveType::TYPE_ARRAY);
ArrayValueToJSON(collection_value, item_tuple_desc);
}
}
template <class JsonStream>
void ComplexValueWriter<JsonStream>::StructValToJSON(const StructVal& struct_val,
const SlotDescriptor& slot_desc) {
const ColumnType& struct_type = slot_desc.type();
DCHECK(struct_type.type == TYPE_STRUCT);
DCHECK_EQ(struct_val.num_children, struct_type.children.size());
const TupleDescriptor* children_item_tuple_desc = slot_desc.children_tuple_descriptor();
DCHECK(children_item_tuple_desc != nullptr);
const std::vector<SlotDescriptor*>& child_slot_descs =
children_item_tuple_desc->slots();
DCHECK_EQ(struct_val.num_children, child_slot_descs.size());
writer_->StartObject();
for (int i = 0; i < struct_val.num_children; ++i) {
writer_->String(struct_type.field_names[i].c_str());
void* child = (void*)(struct_val.ptr[i]);
const SlotDescriptor& child_slot_desc = *child_slot_descs[i];
const ColumnType& child_type = struct_type.children[i];
DCHECK_EQ(child_type, child_slot_desc.type());
if (child == nullptr) {
WriteNull(false);
} else if (child_type.IsStructType()) {
StructValToJSON(*((StructVal*)child), child_slot_desc);
} else if (child_type.IsCollectionType()) {
CollectionValue* collection_child = reinterpret_cast<CollectionValue*>(child);
ComplexValueWriter::CollectionValueToJSON(*collection_child, child_type.type,
child_slot_desc.children_tuple_descriptor());
} else {
PrimitiveValueToJSON(child, child_type, false);
}
}
writer_->EndObject();
}
} // namespace impala