blob: 8286567f9ff3d2cdaab1506edd25ce21f854e123 [file] [log] [blame]
#!/usr/bin/env python3
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test the schema parsing logic."""
import json
import unittest
import warnings
from typing import List
import avro.errors
import avro.schema
class TestSchema:
"""A proxy for a schema string that provides useful test metadata."""
def __init__(self, data, name="", comment="", warnings=None):
if not isinstance(data, str):
data = json.dumps(data)
self.data = data
self.name = name or data # default to data for name
self.comment = comment
self.warnings = warnings
def parse(self):
return avro.schema.parse(str(self))
def __str__(self):
return str(self.data)
class ValidTestSchema(TestSchema):
"""A proxy for a valid schema string that provides useful test metadata."""
valid = True
class InvalidTestSchema(TestSchema):
"""A proxy for an invalid schema string that provides useful test metadata."""
valid = False
PRIMITIVE_EXAMPLES = [InvalidTestSchema('"True"')] # type: List[TestSchema]
PRIMITIVE_EXAMPLES.append(InvalidTestSchema("True"))
PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"no_type": "test"}'))
PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"type": "panther"}'))
PRIMITIVE_EXAMPLES.extend([ValidTestSchema(f'"{t}"') for t in avro.schema.PRIMITIVE_TYPES])
PRIMITIVE_EXAMPLES.extend([ValidTestSchema({"type": t}) for t in avro.schema.PRIMITIVE_TYPES])
FIXED_EXAMPLES = [
ValidTestSchema({"type": "fixed", "name": "Test", "size": 1}),
ValidTestSchema(
{
"type": "fixed",
"name": "MyFixed",
"size": 1,
"namespace": "org.apache.hadoop.avro",
}
),
ValidTestSchema({"type": "fixed", "name": "NullNamespace", "namespace": None, "size": 1}),
ValidTestSchema({"type": "fixed", "name": "EmptyStringNamespace", "namespace": "", "size": 1}),
InvalidTestSchema({"type": "fixed", "name": "Missing size"}),
InvalidTestSchema({"type": "fixed", "size": 314}),
InvalidTestSchema({"type": "fixed", "size": 314, "name": "dr. spaceman"}, comment="AVRO-621"),
]
ENUM_EXAMPLES = [
ValidTestSchema({"type": "enum", "name": "Test", "symbols": ["A", "B"]}),
ValidTestSchema({"type": "enum", "name": "AVRO2174", "symbols": ["nowhitespace"]}),
InvalidTestSchema({"type": "enum", "name": "bad_default", "symbols": ["A"], "default": "B"}, comment="AVRO-3229"),
InvalidTestSchema({"type": "enum", "name": "Status", "symbols": "Normal Caution Critical"}),
InvalidTestSchema({"type": "enum", "name": [0, 1, 1, 2, 3, 5, 8], "symbols": ["Golden", "Mean"]}),
InvalidTestSchema({"type": "enum", "symbols": ["I", "will", "fail", "no", "name"]}),
InvalidTestSchema({"type": "enum", "name": "Test", "symbols": ["AA", "AA"]}),
InvalidTestSchema({"type": "enum", "name": "AVRO2174", "symbols": ["white space"]}),
]
ARRAY_EXAMPLES = [
ValidTestSchema({"type": "array", "items": "long"}),
ValidTestSchema(
{
"type": "array",
"items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]},
}
),
]
MAP_EXAMPLES = [
ValidTestSchema({"type": "map", "values": "long"}),
ValidTestSchema(
{
"type": "map",
"values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]},
}
),
]
UNION_EXAMPLES = [
ValidTestSchema(["string", "null", "long"]),
InvalidTestSchema(["null", "null"]),
InvalidTestSchema(["long", "long"]),
InvalidTestSchema([{"type": "array", "items": "long"}, {"type": "array", "items": "string"}]),
]
NAME_EXAMPLES = [
ValidTestSchema({"type": "enum", "name": "record", "symbols": ["A", "B"]}),
ValidTestSchema({"type": "record", "name": "record", "fields": [{"name": "f", "type": "long"}]}),
InvalidTestSchema({"type": "enum", "name": "int", "symbols": ["A", "B"]}),
ValidTestSchema({"type": "enum", "name": "ns.int", "symbols": ["A", "B"]}),
ValidTestSchema({"type": "enum", "namespace": "ns", "name": "int", "symbols": ["A", "B"]}),
ValidTestSchema(
{"type": "record", "name": "LinkedList", "fields": [{"name": "value", "type": "int"}, {"name": "next", "type": ["null", "LinkedList"]}]}
),
ValidTestSchema({"type": "record", "name": "record", "fields": [{"name": "value", "type": "int"}, {"name": "next", "type": ["null", "record"]}]}),
ValidTestSchema({"type": "record", "name": "ns.int", "fields": [{"name": "value", "type": "int"}, {"name": "next", "type": ["null", "ns.int"]}]}),
]
NAMED_IN_UNION_EXAMPLES = [
ValidTestSchema(
{
"namespace": "org.apache.avro.test",
"type": "record",
"name": "Test",
"fields": [
{
"type": {
"symbols": ["one", "two"],
"type": "enum",
"name": "NamedEnum",
},
"name": "thenamedenum",
},
{"type": ["null", "NamedEnum"], "name": "unionwithreftoenum"},
],
}
)
]
RECORD_EXAMPLES = [
ValidTestSchema({"type": "record", "name": "Test", "fields": [{"name": "f", "type": "long"}]}),
ValidTestSchema({"type": "error", "name": "Test", "fields": [{"name": "f", "type": "long"}]}),
ValidTestSchema(
{
"type": "record",
"name": "Node",
"fields": [
{"name": "label", "type": "string"},
{"name": "children", "type": {"type": "array", "items": "Node"}},
],
}
),
ValidTestSchema(
{
"type": "record",
"name": "Lisp",
"fields": [
{
"name": "value",
"type": [
"null",
"string",
{
"type": "record",
"name": "Cons",
"fields": [
{"name": "car", "type": "Lisp"},
{"name": "cdr", "type": "Lisp"},
],
},
],
}
],
}
),
ValidTestSchema(
{
"type": "record",
"name": "HandshakeRequest",
"namespace": "org.apache.avro.ipc",
"fields": [
{
"name": "clientHash",
"type": {"type": "fixed", "name": "MD5", "size": 16},
},
{"name": "clientProtocol", "type": ["null", "string"]},
{"name": "serverHash", "type": "MD5"},
{"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]},
],
}
),
ValidTestSchema(
{
"type": "record",
"name": "HandshakeResponse",
"namespace": "org.apache.avro.ipc",
"fields": [
{
"name": "match",
"type": {
"type": "enum",
"name": "HandshakeMatch",
"symbols": ["BOTH", "CLIENT", "NONE"],
},
},
{"name": "serverProtocol", "type": ["null", "string"]},
{
"name": "serverHash",
"type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}],
},
{"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]},
],
}
),
ValidTestSchema(
{
"type": "record",
"name": "Interop",
"namespace": "org.apache.avro",
"fields": [
{"name": "intField", "type": "int"},
{"name": "longField", "type": "long"},
{"name": "stringField", "type": "string"},
{"name": "boolField", "type": "boolean"},
{"name": "floatField", "type": "float"},
{"name": "doubleField", "type": "double"},
{"name": "bytesField", "type": "bytes"},
{"name": "nullField", "type": "null"},
{"name": "arrayField", "type": {"type": "array", "items": "double"}},
{
"name": "mapField",
"type": {
"type": "map",
"values": {
"name": "Foo",
"type": "record",
"fields": [{"name": "label", "type": "string"}],
},
},
},
{
"name": "unionField",
"type": ["boolean", "double", {"type": "array", "items": "bytes"}],
},
{
"name": "enumField",
"type": {
"type": "enum",
"name": "Kind",
"symbols": ["A", "B", "C"],
},
},
{
"name": "fixedField",
"type": {"type": "fixed", "name": "MD5", "size": 16},
},
{
"name": "recordField",
"type": {
"type": "record",
"name": "Node",
"fields": [
{"name": "label", "type": "string"},
{
"name": "children",
"type": {"type": "array", "items": "Node"},
},
],
},
},
],
}
),
ValidTestSchema(
{
"type": "record",
"name": "ipAddr",
"fields": [
{
"name": "addr",
"type": [
{"name": "IPv6", "type": "fixed", "size": 16},
{"name": "IPv4", "type": "fixed", "size": 4},
],
}
],
}
),
InvalidTestSchema(
{
"type": "record",
"name": "Address",
"fields": [{"type": "string"}, {"type": "string", "name": "City"}],
}
),
InvalidTestSchema(
{
"type": "record",
"name": "Event",
"fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}],
}
),
InvalidTestSchema(
{
"type": "record",
"name": "Rainer",
"fields": "His vision, from the constantly passing bars",
}
),
InvalidTestSchema(
{
"name": ["Tom", "Jerry"],
"type": "record",
"fields": [{"name": "name", "type": "string"}],
}
),
]
DOC_EXAMPLES = [
ValidTestSchema(
{
"type": "record",
"name": "TestDoc",
"doc": "Doc string",
"fields": [{"name": "name", "type": "string", "doc": "Doc String"}],
}
),
ValidTestSchema({"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}),
]
OTHER_PROP_EXAMPLES = [
ValidTestSchema(
{
"type": "record",
"name": "TestRecord",
"cp_string": "string",
"cp_int": 1,
"cp_array": [1, 2, 3, 4],
"fields": [
{"name": "f1", "type": "string", "cp_object": {"a": 1, "b": 2}},
{"name": "f2", "type": "long", "cp_null": None},
],
}
),
ValidTestSchema({"type": "map", "values": "long", "cp_boolean": True}),
ValidTestSchema(
{
"type": "enum",
"name": "TestEnum",
"symbols": ["one", "two", "three"],
"cp_float": 1.0,
}
),
]
DECIMAL_LOGICAL_TYPE = [
ValidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 4,
"size": 10,
"scale": 2,
}
),
ValidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}),
InvalidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal2",
"precision": 2,
"scale": 2,
"size": -2,
}
),
]
DATE_LOGICAL_TYPE = [ValidTestSchema({"type": "int", "logicalType": "date"})]
TIMEMILLIS_LOGICAL_TYPE = [ValidTestSchema({"type": "int", "logicalType": "time-millis"})]
TIMEMICROS_LOGICAL_TYPE = [ValidTestSchema({"type": "long", "logicalType": "time-micros"})]
TIMESTAMPMILLIS_LOGICAL_TYPE = [ValidTestSchema({"type": "long", "logicalType": "timestamp-millis"})]
TIMESTAMPMICROS_LOGICAL_TYPE = [ValidTestSchema({"type": "long", "logicalType": "timestamp-micros"})]
UUID_LOGICAL_TYPE = [ValidTestSchema({"type": "string", "logicalType": "uuid"})]
IGNORED_LOGICAL_TYPE = [
ValidTestSchema(
{"type": "string", "logicalType": "unknown-logical-type"},
warnings=[avro.errors.IgnoredLogicalType("Unknown unknown-logical-type, using string.")],
),
ValidTestSchema(
{"type": "bytes", "logicalType": "decimal", "scale": 0},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal precision None. Must be a positive integer.")],
),
ValidTestSchema(
{"type": "bytes", "logicalType": "decimal", "precision": 2.4, "scale": 0},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal precision 2.4. Must be a positive integer.")],
),
ValidTestSchema(
{"type": "bytes", "logicalType": "decimal", "precision": 2, "scale": -2},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal scale -2. Must be a non-negative integer.")],
),
ValidTestSchema(
{"type": "bytes", "logicalType": "decimal", "precision": -2, "scale": 2},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal precision -2. Must be a positive integer.")],
),
ValidTestSchema(
{"type": "bytes", "logicalType": "decimal", "precision": 2, "scale": 3},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal scale 3. Cannot be greater than precision 2.")],
),
ValidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestIgnored",
"precision": -10,
"scale": 2,
"size": 5,
},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal precision -10. Must be a positive integer.")],
),
ValidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestIgnored",
"scale": 2,
"size": 5,
},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal precision None. Must be a positive integer.")],
),
ValidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestIgnored",
"precision": 2,
"scale": 3,
"size": 2,
},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal scale 3. Cannot be greater than precision 2.")],
),
ValidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestIgnored",
"precision": 311,
"size": 129,
},
warnings=[avro.errors.IgnoredLogicalType("Invalid decimal precision 311. Max is 310.")],
),
ValidTestSchema(
{"type": "float", "logicalType": "decimal", "precision": 2, "scale": 0},
warnings=[avro.errors.IgnoredLogicalType("Logical type decimal requires literal type bytes/fixed, not float.")],
),
ValidTestSchema(
{"type": "int", "logicalType": "date1"},
warnings=[avro.errors.IgnoredLogicalType("Unknown date1, using int.")],
),
ValidTestSchema(
{"type": "long", "logicalType": "date"},
warnings=[avro.errors.IgnoredLogicalType("Logical type date requires literal type int, not long.")],
),
ValidTestSchema(
{"type": "int", "logicalType": "time-milis"},
warnings=[avro.errors.IgnoredLogicalType("Unknown time-milis, using int.")],
),
ValidTestSchema(
{"type": "long", "logicalType": "time-millis"},
warnings=[avro.errors.IgnoredLogicalType("Logical type time-millis requires literal type int, not long.")],
),
ValidTestSchema(
{"type": "long", "logicalType": "time-micro"},
warnings=[avro.errors.IgnoredLogicalType("Unknown time-micro, using long.")],
),
ValidTestSchema(
{"type": "int", "logicalType": "time-micros"},
warnings=[avro.errors.IgnoredLogicalType("Logical type time-micros requires literal type long, not int.")],
),
ValidTestSchema(
{"type": "long", "logicalType": "timestamp-milis"},
warnings=[avro.errors.IgnoredLogicalType("Unknown timestamp-milis, using long.")],
),
ValidTestSchema(
{"type": "int", "logicalType": "timestamp-millis"},
warnings=[avro.errors.IgnoredLogicalType("Logical type timestamp-millis requires literal type long, not int.")],
),
ValidTestSchema(
{"type": "long", "logicalType": "timestamp-micro"},
warnings=[avro.errors.IgnoredLogicalType("Unknown timestamp-micro, using long.")],
),
ValidTestSchema(
{"type": "int", "logicalType": "timestamp-micros"},
warnings=[avro.errors.IgnoredLogicalType("Logical type timestamp-micros requires literal type long, not int.")],
),
]
EXAMPLES = PRIMITIVE_EXAMPLES
EXAMPLES += FIXED_EXAMPLES
EXAMPLES += ENUM_EXAMPLES
EXAMPLES += ARRAY_EXAMPLES
EXAMPLES += MAP_EXAMPLES
EXAMPLES += UNION_EXAMPLES
EXAMPLES += NAME_EXAMPLES
EXAMPLES += NAMED_IN_UNION_EXAMPLES
EXAMPLES += RECORD_EXAMPLES
EXAMPLES += DOC_EXAMPLES
EXAMPLES += DECIMAL_LOGICAL_TYPE
EXAMPLES += DATE_LOGICAL_TYPE
EXAMPLES += TIMEMILLIS_LOGICAL_TYPE
EXAMPLES += TIMEMICROS_LOGICAL_TYPE
EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE
EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE
EXAMPLES += UUID_LOGICAL_TYPE
EXAMPLES += IGNORED_LOGICAL_TYPE
VALID_EXAMPLES = [e for e in EXAMPLES if getattr(e, "valid", False)]
INVALID_EXAMPLES = [e for e in EXAMPLES if not getattr(e, "valid", True)]
class TestMisc(unittest.TestCase):
"""Miscellaneous tests for schema"""
def test_correct_recursive_extraction(self):
"""A recursive reference within a schema should be the same type every time."""
s = avro.schema.parse(
"""{
"type": "record",
"name": "X",
"fields": [{
"name": "y",
"type": {
"type": "record",
"name": "Y",
"fields": [{"name": "Z", "type": "X"}]}
}]
}"""
)
t = avro.schema.parse(str(s.fields[0].type))
# If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
self.assertEqual("X", t.fields[0].type.name)
def test_exception_is_not_swallowed_on_parse_error(self):
"""A specific exception message should appear on a json parse error."""
self.assertRaisesRegex(
avro.errors.SchemaParseException,
r"Error parsing JSON: /not/a/real/file",
avro.schema.parse,
"/not/a/real/file",
)
def test_decimal_valid_type(self):
fixed_decimal_schema = ValidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 4,
"scale": 2,
"size": 2,
}
)
fixed_decimal = fixed_decimal_schema.parse()
self.assertEqual(4, fixed_decimal.get_prop("precision"))
self.assertEqual(2, fixed_decimal.get_prop("scale"))
self.assertEqual(2, fixed_decimal.get_prop("size"))
bytes_decimal_schema = ValidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 4})
bytes_decimal = bytes_decimal_schema.parse()
self.assertEqual(4, bytes_decimal.get_prop("precision"))
self.assertEqual(0, bytes_decimal.get_prop("scale"))
self.assertEqual("decimal", bytes_decimal.get_prop("logicalType"))
def test_fixed_decimal_valid_max_precision(self):
# An 8 byte number can represent any 18 digit number.
fixed_decimal_schema = ValidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 18,
"scale": 0,
"size": 8,
}
)
fixed_decimal = fixed_decimal_schema.parse()
self.assertIsInstance(fixed_decimal, avro.schema.FixedSchema)
self.assertIsInstance(fixed_decimal, avro.schema.DecimalLogicalSchema)
def test_fixed_decimal_invalid_max_precision(self):
# An 8 byte number can't represent every 19 digit number, so the logical
# type is not applied.
fixed_decimal_schema = ValidTestSchema(
{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 19,
"scale": 0,
"size": 8,
}
)
fixed_decimal = fixed_decimal_schema.parse()
self.assertIsInstance(fixed_decimal, avro.schema.FixedSchema)
self.assertNotIsInstance(fixed_decimal, avro.schema.DecimalLogicalSchema)
def test_parse_invalid_symbol(self):
"""Disabling enumschema symbol validation should allow invalid symbols to pass."""
test_schema_string = json.dumps({"type": "enum", "name": "AVRO2174", "symbols": ["white space"]})
with self.assertRaises(avro.errors.InvalidName, msg="When enum symbol validation is enabled, an invalid symbol should raise InvalidName."):
avro.schema.parse(test_schema_string, validate_enum_symbols=True)
try:
avro.schema.parse(test_schema_string, validate_enum_symbols=False)
except avro.errors.InvalidName: # pragma: no coverage
self.fail("When enum symbol validation is disabled, an invalid symbol should not raise InvalidName.")
class SchemaParseTestCase(unittest.TestCase):
"""Enable generating parse test cases over all the valid and invalid example schema."""
def __init__(self, test_schema):
"""Ignore the normal signature for unittest.TestCase because we are generating
many test cases from this one class. This is safe as long as the autoloader
ignores this class. The autoloader will ignore this class as long as it has
no methods starting with `test_`.
"""
super().__init__("parse_valid" if test_schema.valid else "parse_invalid")
self.test_schema = test_schema
# Never hide repeated warnings when running this test case.
warnings.simplefilter("always")
def parse_valid(self) -> None:
"""Parsing a valid schema should not error, but may contain warnings."""
test_warnings = self.test_schema.warnings or []
try:
warnings.filterwarnings(action="error", category=avro.errors.IgnoredLogicalType)
self.test_schema.parse()
except (avro.errors.IgnoredLogicalType) as e:
self.assertIn(type(e), (type(w) for w in test_warnings))
self.assertIn(str(e), (str(w) for w in test_warnings))
except (avro.errors.AvroException, avro.errors.SchemaParseException): # pragma: no coverage
self.fail(f"Valid schema failed to parse: {self.test_schema!s}")
else:
self.assertEqual([], test_warnings)
finally:
warnings.filterwarnings(action="default", category=avro.errors.IgnoredLogicalType)
def parse_invalid(self):
"""Parsing an invalid schema should error."""
with self.assertRaises(
(avro.errors.AvroException, avro.errors.SchemaParseException), msg=f"Invalid schema should not have parsed: {self.test_schema!s}"
):
self.test_schema.parse()
class RoundTripParseTestCase(unittest.TestCase):
"""Enable generating round-trip parse test cases over all the valid test schema."""
def __init__(self, test_schema):
"""Ignore the normal signature for unittest.TestCase because we are generating
many test cases from this one class. This is safe as long as the autoloader
ignores this class. The autoloader will ignore this class as long as it has
no methods starting with `test_`.
"""
super().__init__("parse_round_trip")
self.test_schema = test_schema
def parse_round_trip(self):
"""The string of a Schema should be parseable to the same Schema."""
parsed = self.test_schema.parse()
round_trip = avro.schema.parse(str(parsed))
self.assertEqual(
parsed,
round_trip,
{
"original schema": parsed.to_json(),
"round trip schema": round_trip.to_json(),
},
)
class DocAttributesTestCase(unittest.TestCase):
"""Enable generating document attribute test cases over all the document test schema."""
def __init__(self, test_schema):
"""Ignore the normal signature for unittest.TestCase because we are generating
many test cases from this one class. This is safe as long as the autoloader
ignores this class. The autoloader will ignore this class as long as it has
no methods starting with `test_`.
"""
super().__init__("check_doc_attributes")
self.test_schema = test_schema
def check_doc_attributes(self):
"""Documentation attributes should be preserved."""
sch = self.test_schema.parse()
self.assertIsNotNone(sch.doc, f"Failed to preserve 'doc' in schema: {self.test_schema!s}")
if sch.type == "record":
for f in sch.fields:
self.assertIsNotNone(
f.doc,
f"Failed to preserve 'doc' in fields: {self.test_schema!s}",
)
class OtherAttributesTestCase(unittest.TestCase):
"""Enable generating attribute test cases over all the other-prop test schema."""
_type_map = {
"cp_array": list,
"cp_boolean": bool,
"cp_float": float,
"cp_int": int,
"cp_null": type(None),
"cp_object": dict,
"cp_string": str,
}
def __init__(self, test_schema):
"""Ignore the normal signature for unittest.TestCase because we are generating
many test cases from this one class. This is safe as long as the autoloader
ignores this class. The autoloader will ignore this class as long as it has
no methods starting with `test_`.
"""
super().__init__("check_attributes")
self.test_schema = test_schema
def _check_props(self, props):
for k, v in props.items():
self.assertIsInstance(v, self._type_map[k])
def check_attributes(self):
"""Other attributes and their types on a schema should be preserved."""
sch = self.test_schema.parse()
try:
self.assertNotEqual(sch, object(), "A schema is never equal to a non-schema instance.")
except AttributeError: # pragma: no coverage
self.fail("Comparing a schema to a non-schema should be False, but not error.")
round_trip = avro.schema.parse(str(sch))
self.assertEqual(
sch,
round_trip,
"A schema should be equal to another schema parsed from the same json.",
)
self.assertEqual(
sch.other_props,
round_trip.other_props,
"Properties were not preserved in a round-trip parse.",
)
self._check_props(sch.other_props)
if sch.type == "record":
field_props = [f.other_props for f in sch.fields if f.other_props]
self.assertEqual(len(field_props), len(sch.fields))
for p in field_props:
self._check_props(p)
class CanonicalFormTestCase(unittest.TestCase):
r"""Enable generating canonical-form test cases over the valid schema.
Transforming into Parsing Canonical Form
Assuming an input schema (in JSON form) that's already UTF-8 text for a valid Avro schema (including all
quotes as required by JSON), the following transformations will produce its Parsing Canonical Form:
- [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of {"type":"int"}).
- [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate
namespace attributes, which are now redundant.
- [STRIP] Keep only attributes that are relevant to parsing data, which are: type, name, fields, symbols,
items, values, size. Strip all others (e.g., doc and aliases).
- [ORDER] Order the appearance of fields of JSON objects as follows: name, type, fields, symbols, items,
values, size. For example, if an object has type, name, and size fields, then the name field should
appear first, followed by the type and then the size fields.
- [STRINGS] For all JSON string literals in the schema text, replace any escaped characters
(e.g., \uXXXX escapes) with their UTF-8 equivalents.
- [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals
(which appear in the size attributes of fixed schemas).
- [WHITESPACE] Eliminate all whitespace in JSON outside of string literals.
We depend on the Python json parser to properly handle the STRINGS and INTEGERS rules, so
we don't test them here.
"""
def compact_json_string(self, json_doc):
"""Returns compact-encoded JSON string representation for supplied document.
Args:
json_doc (json): JSON Document
Returns:
str: Compact-encoded, stringified JSON document
"""
return json.dumps(json_doc, separators=(",", ":"))
def test_primitive_int(self):
"""
Convert primitive schemas to their simple form (e.g., int instead of {"type":"int"}).
"""
s = avro.schema.parse(json.dumps("int"))
self.assertEqual(s.canonical_form, '"int"')
s = avro.schema.parse(json.dumps({"type": "int"}))
self.assertEqual(s.canonical_form, '"int"')
def test_primitive_float(self):
s = avro.schema.parse(json.dumps("float"))
self.assertEqual(s.canonical_form, '"float"')
s = avro.schema.parse(json.dumps({"type": "float"}))
self.assertEqual(s.canonical_form, '"float"')
def test_primitive_double(self):
s = avro.schema.parse(json.dumps("double"))
self.assertEqual(s.canonical_form, '"double"')
s = avro.schema.parse(json.dumps({"type": "double"}))
self.assertEqual(s.canonical_form, '"double"')
def test_primitive_null(self):
s = avro.schema.parse(json.dumps("null"))
self.assertEqual(s.canonical_form, '"null"')
s = avro.schema.parse(json.dumps({"type": "null"}))
self.assertEqual(s.canonical_form, '"null"')
def test_primitive_bytes(self):
s = avro.schema.parse(json.dumps("bytes"))
self.assertEqual(s.canonical_form, '"bytes"')
s = avro.schema.parse(json.dumps({"type": "bytes"}))
self.assertEqual(s.canonical_form, '"bytes"')
def test_primitive_long(self):
s = avro.schema.parse(json.dumps("long"))
self.assertEqual(s.canonical_form, '"long"')
s = avro.schema.parse(json.dumps({"type": "long"}))
self.assertEqual(s.canonical_form, '"long"')
def test_primitive_boolean(self):
s = avro.schema.parse(json.dumps("boolean"))
self.assertEqual(s.canonical_form, '"boolean"')
s = avro.schema.parse(json.dumps({"type": "boolean"}))
self.assertEqual(s.canonical_form, '"boolean"')
def test_primitive_string(self):
s = avro.schema.parse(json.dumps("string"))
self.assertEqual(s.canonical_form, '"string"')
s = avro.schema.parse(json.dumps({"type": "string"}))
self.assertEqual(s.canonical_form, '"string"')
def test_integer_canonical_form(self):
"""
Integer literals starting with 0 are illegal in python, because of ambiguity. This is a placeholder test
for INTEGERS canonical form, which should generally succeed provided a valid integer has been supplied.
"""
s = avro.schema.parse('{"name":"md5","type":"fixed","size":16}')
self.assertEqual(
s.canonical_form,
self.compact_json_string({"name": "md5", "type": "fixed", "size": 16}),
)
def test_string_with_escaped_characters(self):
"""
Replace any escaped characters (e.g., \u0031 escapes) with their UTF-8 equivalents.
"""
s = avro.schema.parse('{"name":"\u0041","type":"fixed","size":16}')
self.assertEqual(
s.canonical_form,
self.compact_json_string({"name": "A", "type": "fixed", "size": 16}),
)
def test_fullname(self):
"""
Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant.
"""
s = avro.schema.parse(
json.dumps(
{
"namespace": "avro",
"name": "example",
"type": "enum",
"symbols": ["a", "b"],
}
)
)
self.assertEqual(
s.canonical_form,
self.compact_json_string({"name": "avro.example", "type": "enum", "symbols": ["a", "b"]}),
)
def test_strip(self):
"""
Keep only attributes that are relevant to parsing data, which are: type, name, fields, symbols, items, values,
size. Strip all others (e.g., doc and aliases).
"""
s = avro.schema.parse(
json.dumps(
{
"name": "foo",
"type": "enum",
"doc": "test",
"aliases": ["bar"],
"symbols": ["a", "b"],
}
)
)
self.assertEqual(
s.canonical_form,
self.compact_json_string({"name": "foo", "type": "enum", "symbols": ["a", "b"]}),
)
def test_order(self):
"""
Order the appearance of fields of JSON objects as follows: name, type, fields, symbols, items, values, size.
For example, if an object has type, name, and size fields, then the name field should appear first, followed
by the type and then the size fields.
"""
s = avro.schema.parse(json.dumps({"symbols": ["a", "b"], "type": "enum", "name": "example"}))
self.assertEqual(
s.canonical_form,
self.compact_json_string({"name": "example", "type": "enum", "symbols": ["a", "b"]}),
)
def test_whitespace(self):
"""
Eliminate all whitespace in JSON outside of string literals.
"""
s = avro.schema.parse(
"""{"type": "fixed",
"size": 16,
"name": "md5"}
"""
)
self.assertEqual(
s.canonical_form,
self.compact_json_string({"name": "md5", "type": "fixed", "size": 16}),
)
def test_record_field(self):
"""
Ensure that record fields produce the correct parsing canonical form.
"""
s = avro.schema.parse(
json.dumps(
{
"type": "record",
"name": "Test",
"doc": "This is a test schema",
"aliases": ["also", "known", "as"],
"fields": [
{
"type": {
"symbols": ["one", "two"],
"type": "enum",
"name": "NamedEnum",
},
"name": "thenamedenum",
"doc": "This is a named enum",
},
{"type": ["null", "NamedEnum"], "name": "unionwithreftoenum"},
],
}
)
)
expected = self.compact_json_string(
{
"name": "Test",
"type": "record",
"fields": [
{
"name": "thenamedenum",
"type": {
"name": "NamedEnum",
"type": "enum",
"symbols": ["one", "two"],
},
},
{"name": "unionwithreftoenum", "type": ["null", "NamedEnum"]},
],
}
)
self.assertEqual(s.canonical_form, expected)
def test_array(self):
"""
Ensure that array schema produce the correct parsing canonical form.
"""
s = avro.schema.parse(json.dumps({"items": "long", "type": "array"}))
self.assertEqual(
s.canonical_form,
self.compact_json_string({"type": "array", "items": "long"}),
)
def test_map(self):
"""
Ensure that map schema produce the correct parsing canonical form.
"""
s = avro.schema.parse(json.dumps({"values": "long", "type": "map"}))
self.assertEqual(
s.canonical_form,
self.compact_json_string({"type": "map", "values": "long"}),
)
def test_union(self):
"""
Ensure that a union schema produces the correct parsing canonical form.
"""
s = avro.schema.parse(json.dumps(["string", "null", "long"]))
self.assertEqual(s.canonical_form, '["string","null","long"]')
def test_large_record_handshake_request(self):
s = avro.schema.parse(
"""
{
"type": "record",
"name": "HandshakeRequest",
"namespace": "org.apache.avro.ipc",
"fields": [
{
"name": "clientHash",
"type": {"type": "fixed", "name": "MD5", "size": 16}
},
{"name": "clientProtocol", "type": ["null", "string"]},
{"name": "serverHash", "type": "MD5"},
{
"name": "meta",
"type": ["null", {"type": "map", "values": "bytes"}]
}
]
}
"""
)
self.assertEqual(
s.canonical_form,
(
'{"name":"org.apache.avro.ipc.HandshakeRequest","type":"record",'
'"fields":[{"name":"clientHash","type":{"name":"org.apache.avro.ipc.MD5",'
'"type":"fixed","size":16}},{"name":"clientProtocol","type":["null","string"]},'
'{"name":"serverHash","type":{"name":"org.apache.avro.ipc.MD5","type":"fixed","size":16}},'
'{"name":"meta","type":["null",{"type":"map","values":"bytes"}]}]}'
),
)
def test_large_record_handshake_response(self):
s = avro.schema.parse(
"""
{
"type": "record",
"name": "HandshakeResponse",
"namespace": "org.apache.avro.ipc",
"fields": [
{
"name": "match",
"type": {
"type": "enum",
"name": "HandshakeMatch",
"symbols": ["BOTH", "CLIENT", "NONE"]
}
},
{"name": "serverProtocol", "type": ["null", "string"]},
{
"name": "serverHash",
"type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}]
},
{
"name": "meta",
"type": ["null", {"type": "map", "values": "bytes"}]}]
}
"""
)
self.assertEqual(
s.canonical_form,
(
'{"name":"org.apache.avro.ipc.HandshakeResponse","type":"rec'
'ord","fields":[{"name":"match","type":{"name":"org.apache.a'
'vro.ipc.HandshakeMatch","type":"enum","symbols":["BOTH","CL'
'IENT","NONE"]}},{"name":"serverProtocol","type":["null","st'
'ring"]},{"name":"serverHash","type":["null",{"name":"org.ap'
'ache.avro.ipc.MD5","type":"fixed","size":16}]},{"name":"met'
'a","type":["null",{"type":"map","values":"bytes"}]}]}'
),
)
def test_large_record_interop(self):
s = avro.schema.parse(
"""
{
"type": "record",
"name": "Interop",
"namespace": "org.apache.avro",
"fields": [
{"name": "intField", "type": "int"},
{"name": "longField", "type": "long"},
{"name": "stringField", "type": "string"},
{"name": "boolField", "type": "boolean"},
{"name": "floatField", "type": "float"},
{"name": "doubleField", "type": "double"},
{"name": "bytesField", "type": "bytes"},
{"name": "nullField", "type": "null"},
{"name": "arrayField", "type": {"type": "array", "items": "double"}},
{
"name": "mapField",
"type": {
"type": "map",
"values": {"name": "Foo",
"type": "record",
"fields": [{"name": "label", "type": "string"}]}
}
},
{
"name": "unionField",
"type": ["boolean", "double", {"type": "array", "items": "bytes"}]
},
{
"name": "enumField",
"type": {"type": "enum", "name": "Kind", "symbols": ["A", "B", "C"]}
},
{
"name": "fixedField",
"type": {"type": "fixed", "name": "MD5", "size": 16}
},
{
"name": "recordField",
"type": {"type": "record",
"name": "Node",
"fields": [{"name": "label", "type": "string"},
{"name": "children",
"type": {"type": "array",
"items": "Node"}}]}
}
]
}
"""
)
self.assertEqual(
s.canonical_form,
(
'{"name":"org.apache.avro.Interop","type":"record","fields":[{"na'
'me":"intField","type":"int"},{"name":"longField","type":"long"},'
'{"name":"stringField","type":"string"},{"name":"boolField","type'
'":"boolean"},{"name":"floatField","type":"float"},{"name":"doubl'
'eField","type":"double"},{"name":"bytesField","type":"bytes"},{"'
'name":"nullField","type":"null"},{"name":"arrayField","type":{"t'
'ype":"array","items":"double"}},{"name":"mapField","type":{"type'
'":"map","values":{"name":"org.apache.avro.Foo","type":"record","'
'fields":[{"name":"label","type":"string"}]}}},{"name":"unionFiel'
'd","type":["boolean","double",{"type":"array","items":"bytes"}]}'
',{"name":"enumField","type":{"name":"org.apache.avro.Kind","type'
'":"enum","symbols":["A","B","C"]}},{"name":"fixedField","type":{'
'"name":"org.apache.avro.MD5","type":"fixed","size":16}},{"name":'
'"recordField","type":{"name":"org.apache.avro.Node","type":"reco'
'rd","fields":[{"name":"label","type":"string"},{"name":"children'
'","type":{"type":"array","items":"org.apache.avro.Node"}}]}}]}'
),
)
def load_tests(loader, default_tests, pattern):
"""Generate test cases across many test schema."""
suite = unittest.TestSuite()
suite.addTests(loader.loadTestsFromTestCase(TestMisc))
suite.addTests(SchemaParseTestCase(ex) for ex in EXAMPLES)
suite.addTests(RoundTripParseTestCase(ex) for ex in VALID_EXAMPLES)
suite.addTests(DocAttributesTestCase(ex) for ex in DOC_EXAMPLES)
suite.addTests(OtherAttributesTestCase(ex) for ex in OTHER_PROP_EXAMPLES)
suite.addTests(loader.loadTestsFromTestCase(CanonicalFormTestCase))
return suite
if __name__ == "__main__": # pragma: no coverage
unittest.main()