blob: 4a0acb92c0b198fbd6f197d28bf1798d767cc595 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sstream>
#include <iomanip>
#include <boost/algorithm/string/replace.hpp>
#include "NodeImpl.hh"
using std::string;
namespace avro {
namespace {
// Escape string for serialization.
string escape(const string &unescaped) {
string s;
s.reserve(unescaped.length());
for (std::string::const_iterator it = unescaped.begin(); it != unescaped.end(); ++it) {
char c = *it;
switch (c) {
case '\\':
case '"':
case '/':
s += '\\';
s += c;
break;
case '\b':
s += '\\';
s += 'b';
break;
case '\f':
s += '\f';
break;
case '\n':
s += '\\';
s += 'n';
break;
case '\r':
s += '\\';
s += 'r';
break;
case '\t':
s += '\\';
s += 't';
break;
default:
if (!std::iscntrl(c, std::locale::classic())) {
s += c;
continue;
}
s += intToHex(static_cast<unsigned int>(c));
break;
}
}
return s;
}
// Wrap an indentation in a struct for ostream operator<<
struct indent {
indent(int depth) :
d(depth)
{ }
int d;
};
/// ostream operator for indent
std::ostream& operator <<(std::ostream &os, indent x)
{
static const string spaces(" ");
while (x.d--) {
os << spaces;
}
return os;
}
} // anonymous namespace
const int kByteStringSize = 6;
SchemaResolution
NodePrimitive::resolve(const Node &reader) const
{
if (type() == reader.type()) {
return RESOLVE_MATCH;
}
switch ( type() ) {
case AVRO_INT:
if ( reader.type() == AVRO_LONG ) {
return RESOLVE_PROMOTABLE_TO_LONG;
}
// fall-through intentional
case AVRO_LONG:
if (reader.type() == AVRO_FLOAT) {
return RESOLVE_PROMOTABLE_TO_FLOAT;
}
// fall-through intentional
case AVRO_FLOAT:
if (reader.type() == AVRO_DOUBLE) {
return RESOLVE_PROMOTABLE_TO_DOUBLE;
}
default:
break;
}
return furtherResolution(reader);
}
SchemaResolution
NodeRecord::resolve(const Node &reader) const
{
if (reader.type() == AVRO_RECORD) {
if (name() == reader.name()) {
return RESOLVE_MATCH;
}
}
return furtherResolution(reader);
}
SchemaResolution
NodeEnum::resolve(const Node &reader) const
{
if (reader.type() == AVRO_ENUM) {
return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
}
return furtherResolution(reader);
}
SchemaResolution
NodeArray::resolve(const Node &reader) const
{
if (reader.type() == AVRO_ARRAY) {
const NodePtr &arrayType = leafAt(0);
return arrayType->resolve(*reader.leafAt(0));
}
return furtherResolution(reader);
}
SchemaResolution
NodeMap::resolve(const Node &reader) const
{
if (reader.type() == AVRO_MAP) {
const NodePtr &mapType = leafAt(1);
return mapType->resolve(*reader.leafAt(1));
}
return furtherResolution(reader);
}
SchemaResolution
NodeUnion::resolve(const Node &reader) const
{
// If the writer is union, resolution only needs to occur when the selected
// type of the writer is known, so this function is not very helpful.
//
// In this case, this function returns if there is a possible match given
// any writer type, so just search type by type returning the best match
// found.
SchemaResolution match = RESOLVE_NO_MATCH;
for (size_t i=0; i < leaves(); ++i) {
const NodePtr &node = leafAt(i);
SchemaResolution thisMatch = node->resolve(reader);
if (thisMatch == RESOLVE_MATCH) {
match = thisMatch;
break;
}
if (match == RESOLVE_NO_MATCH) {
match = thisMatch;
}
}
return match;
}
SchemaResolution
NodeFixed::resolve(const Node &reader) const
{
if (reader.type() == AVRO_FIXED) {
return (
(reader.fixedSize() == fixedSize()) &&
(reader.name() == name())
) ?
RESOLVE_MATCH : RESOLVE_NO_MATCH;
}
return furtherResolution(reader);
}
SchemaResolution
NodeSymbolic::resolve(const Node &reader) const
{
const NodePtr &node = leafAt(0);
return node->resolve(reader);
}
void
NodePrimitive::printJson(std::ostream &os, int depth) const
{
bool hasLogicalType = logicalType().type() != LogicalType::NONE;
if (hasLogicalType) {
os << "{\n" << indent(depth) << "\"type\": ";
}
os << '\"' << type() << '\"';
if (hasLogicalType) {
os << ",\n" << indent(depth);
logicalType().printJson(os);
os << "\n}";
}
if (getDoc().size()) {
os << ",\n" << indent(depth) << "\"doc\": \""
<< escape(getDoc()) << "\"";
}
}
void
NodeSymbolic::printJson(std::ostream &os, int depth) const
{
os << '\"' << nameAttribute_.get() << '\"';
if (getDoc().size()) {
os << ",\n" << indent(depth) << "\"doc\": \""
<< escape(getDoc()) << "\"";
}
}
static void printName(std::ostream& os, const Name& n, int depth)
{
if (!n.ns().empty()) {
os << indent(depth) << "\"namespace\": \"" << n.ns() << "\",\n";
}
os << indent(depth) << "\"name\": \"" << n.simpleName() << "\",\n";
}
void
NodeRecord::printJson(std::ostream &os, int depth) const
{
os << "{\n";
os << indent(++depth) << "\"type\": \"record\",\n";
printName(os, nameAttribute_.get(), depth);
if (getDoc().size()) {
os << indent(depth) << "\"doc\": \""
<< escape(getDoc()) << "\",\n";
}
os << indent(depth) << "\"fields\": [";
size_t fields = leafAttributes_.size();
++depth;
// Serialize "default" field:
assert(defaultValues.empty() || (defaultValues.size() == fields));
for (size_t i = 0; i < fields; ++i) {
if (i > 0) {
os << ',';
}
os << '\n' << indent(depth) << "{\n";
os << indent(++depth) << "\"name\": \"" << leafNameAttributes_.get(i) << "\",\n";
os << indent(depth) << "\"type\": ";
leafAttributes_.get(i)->printJson(os, depth);
if (!defaultValues.empty()) {
if (!defaultValues[i].isUnion() &&
defaultValues[i].type() == AVRO_NULL) {
// No "default" field.
} else {
os << ",\n" << indent(depth) << "\"default\": ";
leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os,
depth);
}
}
os << '\n';
os << indent(--depth) << '}';
}
os << '\n' << indent(--depth) << "]\n";
os << indent(--depth) << '}';
}
void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
int depth) const {
assert(isPrimitive(g.type()));
switch (g.type()) {
case AVRO_NULL:
os << "null";
break;
case AVRO_BOOL:
os << (g.value<bool>() ? "true" : "false");
break;
case AVRO_INT:
os << g.value<int32_t>();
break;
case AVRO_LONG:
os << g.value<int64_t>();
break;
case AVRO_FLOAT:
os << g.value<float>();
break;
case AVRO_DOUBLE:
os << g.value<double>();
break;
case AVRO_STRING:
os << "\"" << escape(g.value<string>()) << "\"";
break;
case AVRO_BYTES: {
// Convert to a string:
const std::vector<uint8_t> &vg = g.value<std::vector<uint8_t> >();
string s;
s.resize(vg.size() * kByteStringSize);
for (unsigned int i = 0; i < vg.size(); i++) {
string hex_string = intToHex(static_cast<int>(vg[i]));
s.replace(i*kByteStringSize, kByteStringSize, hex_string);
}
os << "\"" << s << "\"";
} break;
default:
break;
}
}
void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os,
int depth) const {
assert(g.type() == AVRO_ENUM);
os << "\"" << g.value<GenericEnum>().symbol() << "\"";
}
void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os,
int depth) const {
assert(g.type() == AVRO_FIXED);
// ex: "\uOOff"
// Convert to a string
const std::vector<uint8_t> &vg = g.value<GenericFixed>().value();
string s;
s.resize(vg.size() * kByteStringSize);
for (unsigned int i = 0; i < vg.size(); i++) {
string hex_string = intToHex(static_cast<int>(vg[i]));
s.replace(i*kByteStringSize, kByteStringSize, hex_string);
}
os << "\"" << s << "\"";
}
void NodeUnion::printDefaultToJson(const GenericDatum &g, std::ostream &os,
int depth) const {
leafAt(0)->printDefaultToJson(g, os, depth);
}
void NodeArray::printDefaultToJson(const GenericDatum &g, std::ostream &os,
int depth) const {
assert(g.type() == AVRO_ARRAY);
// ex: "default": [1]
if (g.value<GenericArray>().value().empty()) {
os << "[]";
} else {
os << "[\n";
depth++;
// Serialize all values of the array with recursive calls:
for (unsigned int i = 0; i < g.value<GenericArray>().value().size(); i++) {
if (i > 0) {
os << ",\n";
}
os << indent(depth);
leafAt(0)->printDefaultToJson(g.value<GenericArray>().value()[i], os,
depth);
}
os << "\n" << indent(--depth) << "]";
}
}
void NodeSymbolic::printDefaultToJson(const GenericDatum &g, std::ostream &os,
int depth) const {
getNode()->printDefaultToJson(g, os, depth);
}
void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os,
int depth) const {
assert(g.type() == AVRO_RECORD);
if (g.value<GenericRecord>().fieldCount() == 0) {
os << "{}";
} else {
os << "{\n";
// Serialize all fields of the record with recursive calls:
for (unsigned int i = 0; i < g.value<GenericRecord>().fieldCount(); i++) {
if (i == 0) {
++depth;
} else { // i > 0
os << ",\n";
}
os << indent(depth) << "\"";
assert(i < leaves());
os << leafNameAttributes_.get(i);
os << "\": ";
// Recursive call on child node to be able to get the name attribute
// (In case of a record we need the name of the leaves (contained in
// 'this'))
leafAt(i)->printDefaultToJson(g.value<GenericRecord>().fieldAt(i), os,
depth);
}
os << "\n" << indent(--depth) << "}";
}
}
void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os,
int depth) const {
assert(g.type() == AVRO_MAP);
//{"a": 1}
if (g.value<GenericMap>().value().empty()) {
os << "{}";
} else {
os << "{\n";
for (unsigned int i = 0; i < g.value<GenericMap>().value().size(); i++) {
if (i == 0) {
++depth;
} else {
os << ",\n";
}
os << indent(depth) << "\"" << g.value<GenericMap>().value()[i].first
<< "\": ";
leafAt(i)->printDefaultToJson(g.value<GenericMap>().value()[i].second, os,
depth);
}
os << "\n" << indent(--depth) << "}";
}
}
void
NodeEnum::printJson(std::ostream &os, int depth) const
{
os << "{\n";
os << indent(++depth) << "\"type\": \"enum\",\n";
if (getDoc().size()) {
os << indent(depth) << "\"doc\": \""
<< escape(getDoc()) << "\",\n";
}
printName(os, nameAttribute_.get(), depth);
os << indent(depth) << "\"symbols\": [\n";
int names = leafNameAttributes_.size();
++depth;
for (int i = 0; i < names; ++i) {
if (i > 0) {
os << ",\n";
}
os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"';
}
os << '\n';
os << indent(--depth) << "]\n";
os << indent(--depth) << '}';
}
void
NodeArray::printJson(std::ostream &os, int depth) const
{
os << "{\n";
os << indent(depth+1) << "\"type\": \"array\",\n";
if (getDoc().size()) {
os << indent(depth+1) << "\"doc\": \""
<< escape(getDoc()) << "\",\n";
}
os << indent(depth+1) << "\"items\": ";
leafAttributes_.get()->printJson(os, depth+1);
os << '\n';
os << indent(depth) << '}';
}
void
NodeMap::printJson(std::ostream &os, int depth) const
{
os << "{\n";
os << indent(depth+1) <<"\"type\": \"map\",\n";
if (getDoc().size()) {
os << indent(depth+1) << "\"doc\": \""
<< escape(getDoc()) << "\",\n";
}
os << indent(depth+1) << "\"values\": ";
leafAttributes_.get(1)->printJson(os, depth+1);
os << '\n';
os << indent(depth) << '}';
}
void
NodeUnion::printJson(std::ostream &os, int depth) const
{
os << "[\n";
int fields = leafAttributes_.size();
++depth;
for (int i = 0; i < fields; ++i) {
if (i > 0) {
os << ",\n";
}
os << indent(depth);
leafAttributes_.get(i)->printJson(os, depth);
}
os << '\n';
os << indent(--depth) << ']';
}
void
NodeFixed::printJson(std::ostream &os, int depth) const
{
os << "{\n";
os << indent(++depth) << "\"type\": \"fixed\",\n";
if (getDoc().size()) {
os << indent(depth) << "\"doc\": \""
<< escape(getDoc()) << "\",\n";
}
printName(os, nameAttribute_.get(), depth);
os << indent(depth) << "\"size\": " << sizeAttribute_.get();
if (logicalType().type() != LogicalType::NONE) {
os << ",\n" << indent(depth);
logicalType().printJson(os);
}
os << "\n" << indent(--depth) << '}';
}
} // namespace avro