blob: 00e2a05de8132f58e557c6e560a7142931ed7650 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test the schema parsing logic.
"""
import unittest
import set_avro_test_path
from avro import schema
def print_test_name(test_name):
print ''
print test_name
print '=' * len(test_name)
print ''
class ExampleSchema(object):
def __init__(self, schema_string, valid, name='', comment=''):
self._schema_string = schema_string
self._valid = valid
self._name = name or schema_string # default to schema_string for name
self.comment = comment
@property
def schema_string(self):
return self._schema_string
@property
def valid(self):
return self._valid
@property
def name(self):
return self._name
#
# Example Schemas
#
def make_primitive_examples():
examples = []
for type in schema.PRIMITIVE_TYPES:
examples.append(ExampleSchema('"%s"' % type, True))
examples.append(ExampleSchema('{"type": "%s"}' % type, True))
return examples
PRIMITIVE_EXAMPLES = [
ExampleSchema('"True"', False),
ExampleSchema('True', False),
ExampleSchema('{"no_type": "test"}', False),
ExampleSchema('{"type": "panther"}', False),
] + make_primitive_examples()
FIXED_EXAMPLES = [
ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', True),
ExampleSchema("""\
{"type": "fixed",
"name": "MyFixed",
"namespace": "org.apache.hadoop.avro",
"size": 1}
""", True),
ExampleSchema("""\
{"type": "fixed",
"name": "Missing size"}
""", False),
ExampleSchema("""\
{"type": "fixed",
"size": 314}
""", False),
]
ENUM_EXAMPLES = [
ExampleSchema('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', True),
ExampleSchema("""\
{"type": "enum",
"name": "Status",
"symbols": "Normal Caution Critical"}
""", False),
ExampleSchema("""\
{"type": "enum",
"name": [ 0, 1, 1, 2, 3, 5, 8 ],
"symbols": ["Golden", "Mean"]}
""", False),
ExampleSchema("""\
{"type": "enum",
"symbols" : ["I", "will", "fail", "no", "name"]}
""", False),
ExampleSchema("""\
{"type": "enum",
"name": "Test"
"symbols" : ["AA", "AA"]}
""", False),
]
ARRAY_EXAMPLES = [
ExampleSchema('{"type": "array", "items": "long"}', True),
ExampleSchema("""\
{"type": "array",
"items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
""", True),
]
MAP_EXAMPLES = [
ExampleSchema('{"type": "map", "values": "long"}', True),
ExampleSchema("""\
{"type": "map",
"values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
""", True),
]
UNION_EXAMPLES = [
ExampleSchema('["string", "null", "long"]', True),
ExampleSchema('["null", "null"]', False),
ExampleSchema('["long", "long"]', False),
ExampleSchema("""\
[{"type": "array", "items": "long"}
{"type": "array", "items": "string"}]
""", False),
]
RECORD_EXAMPLES = [
ExampleSchema("""\
{"type": "record",
"name": "Test",
"fields": [{"name": "f",
"type": "long"}]}
""", True),
ExampleSchema("""\
{"type": "error",
"name": "Test",
"fields": [{"name": "f",
"type": "long"}]}
""", True),
ExampleSchema("""\
{"type": "record",
"name": "Node",
"fields": [{"name": "label", "type": "string"},
{"name": "children",
"type": {"type": "array", "items": "Node"}}]}
""", True),
ExampleSchema("""\
{"type": "record",
"name": "Lisp",
"fields": [{"name": "value",
"type": ["null", "string",
{"type": "record",
"name": "Cons",
"fields": [{"name": "car", "type": "Lisp"},
{"name": "cdr", "type": "Lisp"}]}]}]}
""", True),
ExampleSchema("""\
{"type": "record",
"name": "HandshakeRequest",
"namespace": "org.apache.avro.ipc",
"fields": [{"name": "clientHash",
"type": {"type": "fixed", "name": "MD5", "size": 16}},
{"name": "clientProtocol", "type": ["null", "string"]},
{"name": "serverHash", "type": "MD5"},
{"name": "meta",
"type": ["null", {"type": "map", "values": "bytes"}]}]}
""", True),
ExampleSchema("""\
{"type": "record",
"name": "HandshakeResponse",
"namespace": "org.apache.avro.ipc",
"fields": [{"name": "match",
"type": {"type": "enum",
"name": "HandshakeMatch",
"symbols": ["BOTH", "CLIENT", "NONE"]}},
{"name": "serverProtocol", "type": ["null", "string"]},
{"name": "serverHash",
"type": ["null",
{"name": "MD5", "size": 16, "type": "fixed"}]},
{"name": "meta",
"type": ["null", {"type": "map", "values": "bytes"}]}]}
""", True),
ExampleSchema("""\
{"type": "record",
"name": "Interop",
"namespace": "org.apache.avro",
"fields": [{"name": "intField", "type": "int"},
{"name": "longField", "type": "long"},
{"name": "stringField", "type": "string"},
{"name": "boolField", "type": "boolean"},
{"name": "floatField", "type": "float"},
{"name": "doubleField", "type": "double"},
{"name": "bytesField", "type": "bytes"},
{"name": "nullField", "type": "null"},
{"name": "arrayField",
"type": {"type": "array", "items": "double"}},
{"name": "mapField",
"type": {"type": "map",
"values": {"name": "Foo",
"type": "record",
"fields": [{"name": "label",
"type": "string"}]}}},
{"name": "unionField",
"type": ["boolean",
"double",
{"type": "array", "items": "bytes"}]},
{"name": "enumField",
"type": {"type": "enum",
"name": "Kind",
"symbols": ["A", "B", "C"]}},
{"name": "fixedField",
"type": {"type": "fixed", "name": "MD5", "size": 16}},
{"name": "recordField",
"type": {"type": "record",
"name": "Node",
"fields": [{"name": "label", "type": "string"},
{"name": "children",
"type": {"type": "array",
"items": "Node"}}]}}]}
""", True),
ExampleSchema("""\
{"type": "record",
"name": "ipAddr",
"fields": [{"name": "addr",
"type": [{"name": "IPv6", "type": "fixed", "size": 16},
{"name": "IPv4", "type": "fixed", "size": 4}]}]}
""", True),
ExampleSchema("""\
{"type": "record",
"name": "Address",
"fields": [{"type": "string"},
{"type": "string", "name": "City"}]}
""", False),
ExampleSchema("""\
{"type": "record",
"name": "Event",
"fields": [{"name": "Sponsor"},
{"name": "City", "type": "string"}]}
""", False),
ExampleSchema("""\
{"type": "record",
"fields": "His vision, from the constantly passing bars,"
"name", "Rainer"}
""", False),
ExampleSchema("""\
{"name": ["Tom", "Jerry"],
"type": "record",
"fields": [{"name": "name", "type": "string"}]}
""", False),
]
DOC_EXAMPLES = [
ExampleSchema("""\
{"type": "record",
"name": "TestDoc",
"doc": "Doc string",
"fields": [{"name": "name", "type": "string",
"doc" : "Doc String"}]}
""", True),
ExampleSchema("""\
{"type": "enum", "name": "Test", "symbols": ["A", "B"],
"doc": "Doc String"}
""", True),
]
OTHER_PROP_EXAMPLES = [
ExampleSchema("""\
{"type": "record",
"name": "TestRecord",
"cp_string": "string",
"cp_int": 1,
"cp_array": [ 1, 2, 3, 4],
"fields": [ {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2} },
{"name": "f2", "type": "long", "cp_null": null} ]}
""", True),
ExampleSchema("""\
{"type": "map", "values": "long", "cp_boolean": true}
""", True),
ExampleSchema("""\
{"type": "enum",
"name": "TestEnum",
"symbols": [ "one", "two", "three" ],
"cp_float" : 1.0 }
""",True),
ExampleSchema("""\
{"type": "long",
"date": "true"}
""", True)
]
EXAMPLES = PRIMITIVE_EXAMPLES
EXAMPLES += FIXED_EXAMPLES
EXAMPLES += ENUM_EXAMPLES
EXAMPLES += ARRAY_EXAMPLES
EXAMPLES += MAP_EXAMPLES
EXAMPLES += UNION_EXAMPLES
EXAMPLES += RECORD_EXAMPLES
EXAMPLES += DOC_EXAMPLES
VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
# TODO(hammer): refactor into harness for examples
# TODO(hammer): pretty-print detailed output
# TODO(hammer): make verbose flag
# TODO(hammer): show strack trace to user
# TODO(hammer): use logging module?
class TestSchema(unittest.TestCase):
def test_correct_recursive_extraction(self):
s = schema.parse('{"type": "record", "name": "X", "fields": [{"name": "y", "type": {"type": "record", "name": "Y", "fields": [{"name": "Z", "type": "X"}]}}]}')
t = schema.parse(str(s.fields[0].type))
# If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
self.assertEqual("X", t.fields[0].type.name)
def test_parse(self):
correct = 0
for example in EXAMPLES:
try:
schema.parse(example.schema_string)
if example.valid:
correct += 1
else:
self.fail("Invalid schema was parsed: " + example.schema_string)
except:
if not example.valid:
correct += 1
else:
self.fail("Valid schema failed to parse: " + example.schema_string)
fail_msg = "Parse behavior correct on %d out of %d schemas." % \
(correct, len(EXAMPLES))
self.assertEqual(correct, len(EXAMPLES), fail_msg)
def test_valid_cast_to_string_after_parse(self):
"""
Test that the string generated by an Avro Schema object
is, in fact, a valid Avro schema.
"""
print_test_name('TEST CAST TO STRING AFTER PARSE')
correct = 0
for example in VALID_EXAMPLES:
schema_data = schema.parse(example.schema_string)
schema.parse(str(schema_data))
correct += 1
fail_msg = "Cast to string success on %d out of %d schemas" % \
(correct, len(VALID_EXAMPLES))
self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
def test_equivalence_after_round_trip(self):
"""
1. Given a string, parse it to get Avro schema "original".
2. Serialize "original" to a string and parse that string
to generate Avro schema "round trip".
3. Ensure "original" and "round trip" schemas are equivalent.
"""
print_test_name('TEST ROUND TRIP')
correct = 0
for example in VALID_EXAMPLES:
original_schema = schema.parse(example.schema_string)
round_trip_schema = schema.parse(str(original_schema))
if original_schema == round_trip_schema:
correct += 1
debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
else:
debug_msg = "%s: ROUND TRIP FAILURE" % example.name
self.fail("Round trip failure: %s, %s, %s" % (example.name, original_schema, str(original_schema)))
fail_msg = "Round trip success on %d out of %d schemas" % \
(correct, len(VALID_EXAMPLES))
self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
# TODO(hammer): more tests
def test_fullname(self):
"""
The fullname is determined in one of the following ways:
* A name and namespace are both specified. For example,
one might use "name": "X", "namespace": "org.foo"
to indicate the fullname "org.foo.X".
* A fullname is specified. If the name specified contains
a dot, then it is assumed to be a fullname, and any
namespace also specified is ignored. For example,
use "name": "org.foo.X" to indicate the
fullname "org.foo.X".
* A name only is specified, i.e., a name that contains no
dots. In this case the namespace is taken from the most
tightly encosing schema or protocol. For example,
if "name": "X" is specified, and this occurs
within a field of the record definition
of "org.foo.Y", then the fullname is "org.foo.X".
References to previously defined names are as in the latter
two cases above: if they contain a dot they are a fullname, if
they do not contain a dot, the namespace is the namespace of
the enclosing definition.
Primitive type names have no namespace and their names may
not be defined in any namespace. A schema may only contain
multiple definitions of a fullname if the definitions are
equivalent.
"""
print_test_name('TEST FULLNAME')
# name and namespace specified
fullname = schema.Name('a', 'o.a.h', None).fullname
self.assertEqual(fullname, 'o.a.h.a')
# fullname and namespace specified
fullname = schema.Name('a.b.c.d', 'o.a.h', None).fullname
self.assertEqual(fullname, 'a.b.c.d')
# name and default namespace specified
fullname = schema.Name('a', None, 'b.c.d').fullname
self.assertEqual(fullname, 'b.c.d.a')
# fullname and default namespace specified
fullname = schema.Name('a.b.c.d', None, 'o.a.h').fullname
self.assertEqual(fullname, 'a.b.c.d')
# fullname, namespace, default namespace specified
fullname = schema.Name('a.b.c.d', 'o.a.a', 'o.a.h').fullname
self.assertEqual(fullname, 'a.b.c.d')
# name, namespace, default namespace specified
fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname
self.assertEqual(fullname, 'o.a.a.a')
def test_doc_attributes(self):
print_test_name('TEST DOC ATTRIBUTES')
correct = 0
for example in DOC_EXAMPLES:
original_schema = schema.parse(example.schema_string)
if original_schema.doc is not None:
correct += 1
if original_schema.type == 'record':
for f in original_schema.fields:
if f.doc is None:
self.fail("Failed to preserve 'doc' in fields: " + example.schema_string)
self.assertEqual(correct,len(DOC_EXAMPLES))
def test_other_attributes(self):
print_test_name('TEST OTHER ATTRIBUTES')
correct = 0
props = {}
for example in OTHER_PROP_EXAMPLES:
original_schema = schema.parse(example.schema_string)
round_trip_schema = schema.parse(str(original_schema))
self.assertEqual(original_schema.other_props,round_trip_schema.other_props)
if original_schema.type == "record":
field_props = 0
for f in original_schema.fields:
if f.other_props:
props.update(f.other_props)
field_props += 1
self.assertEqual(field_props,len(original_schema.fields))
if original_schema.other_props:
props.update(original_schema.other_props)
correct += 1
for k in props:
v = props[k]
if k == "cp_boolean":
self.assertEqual(type(v), bool)
elif k == "cp_int":
self.assertEqual(type(v), int)
elif k == "cp_object":
self.assertEqual(type(v), dict)
elif k == "cp_float":
self.assertEqual(type(v), float)
elif k == "cp_array":
self.assertEqual(type(v), list)
self.assertEqual(correct,len(OTHER_PROP_EXAMPLES))
def test_exception_is_not_swallowed_on_parse_error(self):
print_test_name('TEST EXCEPTION NOT SWALLOWED ON PARSE ERROR')
try:
schema.parse('/not/a/real/file')
caught_exception = False
except schema.SchemaParseException, e:
expected_message = 'Error parsing JSON: /not/a/real/file, error = ' \
'No JSON object could be decoded'
self.assertEqual(expected_message, e.args[0])
caught_exception = True
self.assertTrue(caught_exception, 'Exception was not caught')
if __name__ == '__main__':
unittest.main()