python/pyarrow/tests/test_schema.py - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 from collections import OrderedDict
 import pickle

 import pytest
 import numpy as np
 import pyarrow as pa


 def test_schema_constructor_errors():
     msg = ("Do not call Schema's constructor directly, use `pyarrow.schema` "
            "instead")
     with pytest.raises(TypeError, match=msg):
         pa.Schema()


 def test_type_integers():
     dtypes = ['int8', 'int16', 'int32', 'int64',
               'uint8', 'uint16', 'uint32', 'uint64']

     for name in dtypes:
         factory = getattr(pa, name)
         t = factory()
         assert str(t) == name


 def test_type_to_pandas_dtype():
     M8_ns = np.dtype('datetime64[ns]')
     cases = [
         (pa.null(), np.float64),
         (pa.bool_(), np.bool_),
         (pa.int8(), np.int8),
         (pa.int16(), np.int16),
         (pa.int32(), np.int32),
         (pa.int64(), np.int64),
         (pa.uint8(), np.uint8),
         (pa.uint16(), np.uint16),
         (pa.uint32(), np.uint32),
         (pa.uint64(), np.uint64),
         (pa.float16(), np.float16),
         (pa.float32(), np.float32),
         (pa.float64(), np.float64),
         (pa.date32(), M8_ns),
         (pa.date64(), M8_ns),
         (pa.timestamp('ms'), M8_ns),
         (pa.binary(), np.object_),
         (pa.binary(12), np.object_),
         (pa.string(), np.object_),
         (pa.list_(pa.int8()), np.object_),
     ]
     for arrow_type, numpy_type in cases:
         assert arrow_type.to_pandas_dtype() == numpy_type


 def test_type_list():
     value_type = pa.int32()
     list_type = pa.list_(value_type)
     assert str(list_type) == 'list<item: int32>'

     field = pa.field('my_item', pa.string())
     l2 = pa.list_(field)
     assert str(l2) == 'list<my_item: string>'


 def test_type_comparisons():
     val = pa.int32()
     assert val == pa.int32()
     assert val == 'int32'
     assert val != 5


 def test_type_for_alias():
     cases = [
         ('i1', pa.int8()),
         ('int8', pa.int8()),
         ('i2', pa.int16()),
         ('int16', pa.int16()),
         ('i4', pa.int32()),
         ('int32', pa.int32()),
         ('i8', pa.int64()),
         ('int64', pa.int64()),
         ('u1', pa.uint8()),
         ('uint8', pa.uint8()),
         ('u2', pa.uint16()),
         ('uint16', pa.uint16()),
         ('u4', pa.uint32()),
         ('uint32', pa.uint32()),
         ('u8', pa.uint64()),
         ('uint64', pa.uint64()),
         ('f4', pa.float32()),
         ('float32', pa.float32()),
         ('f8', pa.float64()),
         ('float64', pa.float64()),
         ('date32', pa.date32()),
         ('date64', pa.date64()),
         ('string', pa.string()),
         ('str', pa.string()),
         ('binary', pa.binary()),
         ('time32[s]', pa.time32('s')),
         ('time32[ms]', pa.time32('ms')),
         ('time64[us]', pa.time64('us')),
         ('time64[ns]', pa.time64('ns')),
         ('timestamp[s]', pa.timestamp('s')),
         ('timestamp[ms]', pa.timestamp('ms')),
         ('timestamp[us]', pa.timestamp('us')),
         ('timestamp[ns]', pa.timestamp('ns')),
     ]

     for val, expected in cases:
         assert pa.type_for_alias(val) == expected


 def test_type_string():
     t = pa.string()
     assert str(t) == 'string'


 def test_type_timestamp_with_tz():
     tz = 'America/Los_Angeles'
     t = pa.timestamp('ns', tz=tz)
     assert t.unit == 'ns'
     assert t.tz == tz


 def test_time_types():
     t1 = pa.time32('s')
     t2 = pa.time32('ms')
     t3 = pa.time64('us')
     t4 = pa.time64('ns')

     assert t1.unit == 's'
     assert t2.unit == 'ms'
     assert t3.unit == 'us'
     assert t4.unit == 'ns'

     assert str(t1) == 'time32[s]'
     assert str(t4) == 'time64[ns]'

     with pytest.raises(ValueError):
         pa.time32('us')

     with pytest.raises(ValueError):
         pa.time64('s')


 def test_from_numpy_dtype():
     cases = [
         (np.dtype('bool'), pa.bool_()),
         (np.dtype('int8'), pa.int8()),
         (np.dtype('int16'), pa.int16()),
         (np.dtype('int32'), pa.int32()),
         (np.dtype('int64'), pa.int64()),
         (np.dtype('uint8'), pa.uint8()),
         (np.dtype('uint16'), pa.uint16()),
         (np.dtype('uint32'), pa.uint32()),
         (np.dtype('float16'), pa.float16()),
         (np.dtype('float32'), pa.float32()),
         (np.dtype('float64'), pa.float64()),
         (np.dtype('U'), pa.string()),
         (np.dtype('S'), pa.binary()),
         (np.dtype('datetime64[s]'), pa.timestamp('s')),
         (np.dtype('datetime64[ms]'), pa.timestamp('ms')),
         (np.dtype('datetime64[us]'), pa.timestamp('us')),
         (np.dtype('datetime64[ns]'), pa.timestamp('ns'))
     ]

     for dt, pt in cases:
         result = pa.from_numpy_dtype(dt)
         assert result == pt

     # Things convertible to numpy dtypes work
     assert pa.from_numpy_dtype('U') == pa.string()
     assert pa.from_numpy_dtype(np.unicode) == pa.string()
     assert pa.from_numpy_dtype('int32') == pa.int32()
     assert pa.from_numpy_dtype(bool) == pa.bool_()

     with pytest.raises(NotImplementedError):
         pa.from_numpy_dtype(np.dtype('O'))

     with pytest.raises(TypeError):
         pa.from_numpy_dtype('not_convertible_to_dtype')


 def test_schema():
     fields = [
         pa.field('foo', pa.int32()),
         pa.field('bar', pa.string()),
         pa.field('baz', pa.list_(pa.int8()))
     ]
     sch = pa.schema(fields)

     assert sch.names == ['foo', 'bar', 'baz']
     assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]

     assert len(sch) == 3
     assert sch[0].name == 'foo'
     assert sch[0].type == fields[0].type
     assert sch.field_by_name('foo').name == 'foo'
     assert sch.field_by_name('foo').type == fields[0].type

     assert repr(sch) == """\
 foo: int32
 bar: string
 baz: list<item: int8>
   child 0, item: int8"""

     with pytest.raises(TypeError):
         pa.schema([None])


 def test_schema_from_tuples():
     fields = [
         ('foo', pa.int32()),
         ('bar', pa.string()),
         ('baz', pa.list_(pa.int8())),
     ]
     sch = pa.schema(fields)
     assert sch.names == ['foo', 'bar', 'baz']
     assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
     assert len(sch) == 3
     assert repr(sch) == """\
 foo: int32
 bar: string
 baz: list<item: int8>
   child 0, item: int8"""

     with pytest.raises(TypeError):
         pa.schema([('foo', None)])


 def test_schema_from_mapping():
     fields = OrderedDict([
         ('foo', pa.int32()),
         ('bar', pa.string()),
         ('baz', pa.list_(pa.int8())),
     ])
     sch = pa.schema(fields)
     assert sch.names == ['foo', 'bar', 'baz']
     assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
     assert len(sch) == 3
     assert repr(sch) == """\
 foo: int32
 bar: string
 baz: list<item: int8>
   child 0, item: int8"""

     fields = OrderedDict([('foo', None)])
     with pytest.raises(TypeError):
         pa.schema(fields)


 def test_schema_duplicate_fields():
     fields = [
         pa.field('foo', pa.int32()),
         pa.field('bar', pa.string()),
         pa.field('foo', pa.list_(pa.int8())),
     ]
     sch = pa.schema(fields)
     assert sch.names == ['foo', 'bar', 'foo']
     assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
     assert len(sch) == 3
     assert repr(sch) == """\
 foo: int32
 bar: string
 foo: list<item: int8>
   child 0, item: int8"""

     assert sch[0].name == 'foo'
     assert sch[0].type == fields[0].type
     assert sch.field_by_name('bar') == fields[1]
     assert sch.field_by_name('xxx') is None
     with pytest.warns(UserWarning):
         assert sch.field_by_name('foo') is None


 def test_field_flatten():
     f0 = pa.field('foo', pa.int32()).add_metadata({b'foo': b'bar'})
     assert f0.flatten() == [f0]

     f1 = pa.field('bar', pa.float64(), nullable=False)
     ff = pa.field('ff', pa.struct([f0, f1]), nullable=False)
     assert ff.flatten() == [
         pa.field('ff.foo', pa.int32()).add_metadata({b'foo': b'bar'}),
         pa.field('ff.bar', pa.float64(), nullable=False)]  # XXX

     # Nullable parent makes flattened child nullable
     ff = pa.field('ff', pa.struct([f0, f1]))
     assert ff.flatten() == [
         pa.field('ff.foo', pa.int32()).add_metadata({b'foo': b'bar'}),
         pa.field('ff.bar', pa.float64())]

     fff = pa.field('fff', pa.struct([ff]))
     assert fff.flatten() == [pa.field('fff.ff', pa.struct([f0, f1]))]


 def test_schema_add_remove_metadata():
     fields = [
         pa.field('foo', pa.int32()),
         pa.field('bar', pa.string()),
         pa.field('baz', pa.list_(pa.int8()))
     ]

     s1 = pa.schema(fields)

     assert s1.metadata is None

     metadata = {b'foo': b'bar', b'pandas': b'badger'}

     s2 = s1.add_metadata(metadata)
     assert s2.metadata == metadata

     s3 = s2.remove_metadata()
     assert s3.metadata is None

     # idempotent
     s4 = s3.remove_metadata()
     assert s4.metadata is None


 def test_schema_equals():
     fields = [
         pa.field('foo', pa.int32()),
         pa.field('bar', pa.string()),
         pa.field('baz', pa.list_(pa.int8()))
     ]
     metadata = {b'foo': b'bar', b'pandas': b'badger'}

     sch1 = pa.schema(fields)
     sch2 = pa.schema(fields)
     sch3 = pa.schema(fields, metadata=metadata)
     sch4 = pa.schema(fields, metadata=metadata)

     assert sch1.equals(sch2)
     assert sch3.equals(sch4)
     assert sch1.equals(sch3, check_metadata=False)
     assert not sch1.equals(sch3, check_metadata=True)
     assert not sch1.equals(sch3)

     del fields[-1]
     sch3 = pa.schema(fields)
     assert not sch1.equals(sch3)


 def test_schema_equals_propagates_check_metadata():
     # ARROW-4088
     schema1 = pa.schema([
         pa.field('foo', pa.int32()),
         pa.field('bar', pa.string())
     ])
     schema2 = pa.schema([
         pa.field('foo', pa.int32()),
         pa.field('bar', pa.string(), metadata={'a': 'alpha'}),
     ])
     assert not schema1.equals(schema2)
     assert schema1.equals(schema2, check_metadata=False)


 def test_schema_equals_invalid_type():
     # ARROW-5873
     schema = pa.schema([pa.field("a", pa.int64())])

     for val in [None, 'string', pa.array([1, 2])]:
         with pytest.raises(TypeError):
             schema.equals(val)


 def test_schema_equality_operators():
     fields = [
         pa.field('foo', pa.int32()),
         pa.field('bar', pa.string()),
         pa.field('baz', pa.list_(pa.int8()))
     ]
     metadata = {b'foo': b'bar', b'pandas': b'badger'}

     sch1 = pa.schema(fields)
     sch2 = pa.schema(fields)
     sch3 = pa.schema(fields, metadata=metadata)
     sch4 = pa.schema(fields, metadata=metadata)

     assert sch1 == sch2
     assert sch3 == sch4
     assert sch1 != sch3
     assert sch2 != sch4

     # comparison with other types doesn't raise
     assert sch1 != []
     assert sch3 != 'foo'


 def test_schema_negative_indexing():
     fields = [
         pa.field('foo', pa.int32()),
         pa.field('bar', pa.string()),
         pa.field('baz', pa.list_(pa.int8()))
     ]

     schema = pa.schema(fields)

     assert schema[-1].equals(schema[2])
     assert schema[-2].equals(schema[1])
     assert schema[-3].equals(schema[0])

     with pytest.raises(IndexError):
         schema[-4]

     with pytest.raises(IndexError):
         schema[3]


 def test_schema_repr_with_dictionaries():
     fields = [
         pa.field('one', pa.dictionary(pa.int16(), pa.string())),
         pa.field('two', pa.int32())
     ]
     sch = pa.schema(fields)

     expected = (
         """\
 one: dictionary<values=string, indices=int16, ordered=0>
 two: int32""")

     assert repr(sch) == expected


 def test_type_schema_pickling():
     cases = [
         pa.int8(),
         pa.string(),
         pa.binary(),
         pa.binary(10),
         pa.list_(pa.string()),
         pa.struct([
             pa.field('a', 'int8'),
             pa.field('b', 'string')
         ]),
         pa.union([
             pa.field('a', pa.int8()),
             pa.field('b', pa.int16())
         ], pa.lib.UnionMode_SPARSE),
         pa.union([
             pa.field('a', pa.int8()),
             pa.field('b', pa.int16())
         ], pa.lib.UnionMode_DENSE),
         pa.time32('s'),
         pa.time64('us'),
         pa.date32(),
         pa.date64(),
         pa.timestamp('ms'),
         pa.timestamp('ns'),
         pa.decimal128(12, 2),
         pa.field('a', 'string', metadata={b'foo': b'bar'})
     ]

     for val in cases:
         roundtripped = pickle.loads(pickle.dumps(val))
         assert val == roundtripped

     fields = []
     for i, f in enumerate(cases):
         if isinstance(f, pa.Field):
             fields.append(f)
         else:
             fields.append(pa.field('_f{}'.format(i), f))

     schema = pa.schema(fields, metadata={b'foo': b'bar'})
     roundtripped = pickle.loads(pickle.dumps(schema))
     assert schema == roundtripped


 def test_empty_table():
     schema = pa.schema([
         pa.field('oneField', pa.int64())
     ])
     table = schema.empty_table()
     assert isinstance(table, pa.Table)
     assert table.num_rows == 0
     assert table.schema == schema


 @pytest.mark.pandas
 def test_schema_from_pandas():
     import pandas as pd
     inputs = [
         list(range(10)),
         pd.Categorical(list(range(10))),
         ['foo', 'bar', None, 'baz', 'qux'],
         np.array([
             '2007-07-13T01:23:34.123456789',
             '2006-01-13T12:34:56.432539784',
             '2010-08-13T05:46:57.437699912'
         ], dtype='datetime64[ns]')
     ]
     for data in inputs:
         df = pd.DataFrame({'a': data})
         schema = pa.Schema.from_pandas(df)
         expected = pa.Table.from_pandas(df).schema
         assert schema == expected
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	from collections import OrderedDict
	import pickle

	import pytest
	import numpy as np
	import pyarrow as pa


	def test_schema_constructor_errors():
	msg = ("Do not call Schema's constructor directly, use `pyarrow.schema` "
	"instead")
	with pytest.raises(TypeError, match=msg):
	pa.Schema()


	def test_type_integers():
	dtypes = ['int8', 'int16', 'int32', 'int64',
	'uint8', 'uint16', 'uint32', 'uint64']

	for name in dtypes:
	factory = getattr(pa, name)
	t = factory()
	assert str(t) == name


	def test_type_to_pandas_dtype():
	M8_ns = np.dtype('datetime64[ns]')
	cases = [
	(pa.null(), np.float64),
	(pa.bool_(), np.bool_),
	(pa.int8(), np.int8),
	(pa.int16(), np.int16),
	(pa.int32(), np.int32),
	(pa.int64(), np.int64),
	(pa.uint8(), np.uint8),
	(pa.uint16(), np.uint16),
	(pa.uint32(), np.uint32),
	(pa.uint64(), np.uint64),
	(pa.float16(), np.float16),
	(pa.float32(), np.float32),
	(pa.float64(), np.float64),
	(pa.date32(), M8_ns),
	(pa.date64(), M8_ns),
	(pa.timestamp('ms'), M8_ns),
	(pa.binary(), np.object_),
	(pa.binary(12), np.object_),
	(pa.string(), np.object_),
	(pa.list_(pa.int8()), np.object_),
	]
	for arrow_type, numpy_type in cases:
	assert arrow_type.to_pandas_dtype() == numpy_type


	def test_type_list():
	value_type = pa.int32()
	list_type = pa.list_(value_type)
	assert str(list_type) == 'list<item: int32>'

	field = pa.field('my_item', pa.string())
	l2 = pa.list_(field)
	assert str(l2) == 'list<my_item: string>'


	def test_type_comparisons():
	val = pa.int32()
	assert val == pa.int32()
	assert val == 'int32'
	assert val != 5


	def test_type_for_alias():
	cases = [
	('i1', pa.int8()),
	('int8', pa.int8()),
	('i2', pa.int16()),
	('int16', pa.int16()),
	('i4', pa.int32()),
	('int32', pa.int32()),
	('i8', pa.int64()),
	('int64', pa.int64()),
	('u1', pa.uint8()),
	('uint8', pa.uint8()),
	('u2', pa.uint16()),
	('uint16', pa.uint16()),
	('u4', pa.uint32()),
	('uint32', pa.uint32()),
	('u8', pa.uint64()),
	('uint64', pa.uint64()),
	('f4', pa.float32()),
	('float32', pa.float32()),
	('f8', pa.float64()),
	('float64', pa.float64()),
	('date32', pa.date32()),
	('date64', pa.date64()),
	('string', pa.string()),
	('str', pa.string()),
	('binary', pa.binary()),
	('time32[s]', pa.time32('s')),
	('time32[ms]', pa.time32('ms')),
	('time64[us]', pa.time64('us')),
	('time64[ns]', pa.time64('ns')),
	('timestamp[s]', pa.timestamp('s')),
	('timestamp[ms]', pa.timestamp('ms')),
	('timestamp[us]', pa.timestamp('us')),
	('timestamp[ns]', pa.timestamp('ns')),
	]

	for val, expected in cases:
	assert pa.type_for_alias(val) == expected


	def test_type_string():
	t = pa.string()
	assert str(t) == 'string'


	def test_type_timestamp_with_tz():
	tz = 'America/Los_Angeles'
	t = pa.timestamp('ns', tz=tz)
	assert t.unit == 'ns'
	assert t.tz == tz


	def test_time_types():
	t1 = pa.time32('s')
	t2 = pa.time32('ms')
	t3 = pa.time64('us')
	t4 = pa.time64('ns')

	assert t1.unit == 's'
	assert t2.unit == 'ms'
	assert t3.unit == 'us'
	assert t4.unit == 'ns'

	assert str(t1) == 'time32[s]'
	assert str(t4) == 'time64[ns]'

	with pytest.raises(ValueError):
	pa.time32('us')

	with pytest.raises(ValueError):
	pa.time64('s')


	def test_from_numpy_dtype():
	cases = [
	(np.dtype('bool'), pa.bool_()),
	(np.dtype('int8'), pa.int8()),
	(np.dtype('int16'), pa.int16()),
	(np.dtype('int32'), pa.int32()),
	(np.dtype('int64'), pa.int64()),
	(np.dtype('uint8'), pa.uint8()),
	(np.dtype('uint16'), pa.uint16()),
	(np.dtype('uint32'), pa.uint32()),
	(np.dtype('float16'), pa.float16()),
	(np.dtype('float32'), pa.float32()),
	(np.dtype('float64'), pa.float64()),
	(np.dtype('U'), pa.string()),
	(np.dtype('S'), pa.binary()),
	(np.dtype('datetime64[s]'), pa.timestamp('s')),
	(np.dtype('datetime64[ms]'), pa.timestamp('ms')),
	(np.dtype('datetime64[us]'), pa.timestamp('us')),
	(np.dtype('datetime64[ns]'), pa.timestamp('ns'))
	]

	for dt, pt in cases:
	result = pa.from_numpy_dtype(dt)
	assert result == pt

	# Things convertible to numpy dtypes work
	assert pa.from_numpy_dtype('U') == pa.string()
	assert pa.from_numpy_dtype(np.unicode) == pa.string()
	assert pa.from_numpy_dtype('int32') == pa.int32()
	assert pa.from_numpy_dtype(bool) == pa.bool_()

	with pytest.raises(NotImplementedError):
	pa.from_numpy_dtype(np.dtype('O'))

	with pytest.raises(TypeError):
	pa.from_numpy_dtype('not_convertible_to_dtype')


	def test_schema():
	fields = [
	pa.field('foo', pa.int32()),
	pa.field('bar', pa.string()),
	pa.field('baz', pa.list_(pa.int8()))
	]
	sch = pa.schema(fields)

	assert sch.names == ['foo', 'bar', 'baz']
	assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]

	assert len(sch) == 3
	assert sch[0].name == 'foo'
	assert sch[0].type == fields[0].type
	assert sch.field_by_name('foo').name == 'foo'
	assert sch.field_by_name('foo').type == fields[0].type

	assert repr(sch) == """\
	foo: int32
	bar: string
	baz: list<item: int8>
	child 0, item: int8"""

	with pytest.raises(TypeError):
	pa.schema([None])


	def test_schema_from_tuples():
	fields = [
	('foo', pa.int32()),
	('bar', pa.string()),
	('baz', pa.list_(pa.int8())),
	]
	sch = pa.schema(fields)
	assert sch.names == ['foo', 'bar', 'baz']
	assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
	assert len(sch) == 3
	assert repr(sch) == """\
	foo: int32
	bar: string
	baz: list<item: int8>
	child 0, item: int8"""

	with pytest.raises(TypeError):
	pa.schema([('foo', None)])


	def test_schema_from_mapping():
	fields = OrderedDict([
	('foo', pa.int32()),
	('bar', pa.string()),
	('baz', pa.list_(pa.int8())),
	])
	sch = pa.schema(fields)
	assert sch.names == ['foo', 'bar', 'baz']
	assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
	assert len(sch) == 3
	assert repr(sch) == """\
	foo: int32
	bar: string
	baz: list<item: int8>
	child 0, item: int8"""

	fields = OrderedDict([('foo', None)])
	with pytest.raises(TypeError):
	pa.schema(fields)


	def test_schema_duplicate_fields():
	fields = [
	pa.field('foo', pa.int32()),
	pa.field('bar', pa.string()),
	pa.field('foo', pa.list_(pa.int8())),
	]
	sch = pa.schema(fields)
	assert sch.names == ['foo', 'bar', 'foo']
	assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
	assert len(sch) == 3
	assert repr(sch) == """\
	foo: int32
	bar: string
	foo: list<item: int8>
	child 0, item: int8"""

	assert sch[0].name == 'foo'
	assert sch[0].type == fields[0].type
	assert sch.field_by_name('bar') == fields[1]
	assert sch.field_by_name('xxx') is None
	with pytest.warns(UserWarning):
	assert sch.field_by_name('foo') is None


	def test_field_flatten():
	f0 = pa.field('foo', pa.int32()).add_metadata({b'foo': b'bar'})
	assert f0.flatten() == [f0]

	f1 = pa.field('bar', pa.float64(), nullable=False)
	ff = pa.field('ff', pa.struct([f0, f1]), nullable=False)
	assert ff.flatten() == [
	pa.field('ff.foo', pa.int32()).add_metadata({b'foo': b'bar'}),
	pa.field('ff.bar', pa.float64(), nullable=False)] # XXX

	# Nullable parent makes flattened child nullable
	ff = pa.field('ff', pa.struct([f0, f1]))
	assert ff.flatten() == [
	pa.field('ff.foo', pa.int32()).add_metadata({b'foo': b'bar'}),
	pa.field('ff.bar', pa.float64())]

	fff = pa.field('fff', pa.struct([ff]))
	assert fff.flatten() == [pa.field('fff.ff', pa.struct([f0, f1]))]


	def test_schema_add_remove_metadata():
	fields = [
	pa.field('foo', pa.int32()),
	pa.field('bar', pa.string()),
	pa.field('baz', pa.list_(pa.int8()))
	]

	s1 = pa.schema(fields)

	assert s1.metadata is None

	metadata = {b'foo': b'bar', b'pandas': b'badger'}

	s2 = s1.add_metadata(metadata)
	assert s2.metadata == metadata

	s3 = s2.remove_metadata()
	assert s3.metadata is None

	# idempotent
	s4 = s3.remove_metadata()
	assert s4.metadata is None


	def test_schema_equals():
	fields = [
	pa.field('foo', pa.int32()),
	pa.field('bar', pa.string()),
	pa.field('baz', pa.list_(pa.int8()))
	]
	metadata = {b'foo': b'bar', b'pandas': b'badger'}

	sch1 = pa.schema(fields)
	sch2 = pa.schema(fields)
	sch3 = pa.schema(fields, metadata=metadata)
	sch4 = pa.schema(fields, metadata=metadata)

	assert sch1.equals(sch2)
	assert sch3.equals(sch4)
	assert sch1.equals(sch3, check_metadata=False)
	assert not sch1.equals(sch3, check_metadata=True)
	assert not sch1.equals(sch3)

	del fields[-1]
	sch3 = pa.schema(fields)
	assert not sch1.equals(sch3)


	def test_schema_equals_propagates_check_metadata():
	# ARROW-4088
	schema1 = pa.schema([
	pa.field('foo', pa.int32()),
	pa.field('bar', pa.string())
	])
	schema2 = pa.schema([
	pa.field('foo', pa.int32()),
	pa.field('bar', pa.string(), metadata={'a': 'alpha'}),
	])
	assert not schema1.equals(schema2)
	assert schema1.equals(schema2, check_metadata=False)


	def test_schema_equals_invalid_type():
	# ARROW-5873
	schema = pa.schema([pa.field("a", pa.int64())])

	for val in [None, 'string', pa.array([1, 2])]:
	with pytest.raises(TypeError):
	schema.equals(val)


	def test_schema_equality_operators():
	fields = [
	pa.field('foo', pa.int32()),
	pa.field('bar', pa.string()),
	pa.field('baz', pa.list_(pa.int8()))
	]
	metadata = {b'foo': b'bar', b'pandas': b'badger'}

	sch1 = pa.schema(fields)
	sch2 = pa.schema(fields)
	sch3 = pa.schema(fields, metadata=metadata)
	sch4 = pa.schema(fields, metadata=metadata)

	assert sch1 == sch2
	assert sch3 == sch4
	assert sch1 != sch3
	assert sch2 != sch4

	# comparison with other types doesn't raise
	assert sch1 != []
	assert sch3 != 'foo'


	def test_schema_negative_indexing():
	fields = [
	pa.field('foo', pa.int32()),
	pa.field('bar', pa.string()),
	pa.field('baz', pa.list_(pa.int8()))
	]

	schema = pa.schema(fields)

	assert schema[-1].equals(schema[2])
	assert schema[-2].equals(schema[1])
	assert schema[-3].equals(schema[0])

	with pytest.raises(IndexError):
	schema[-4]

	with pytest.raises(IndexError):
	schema[3]


	def test_schema_repr_with_dictionaries():
	fields = [
	pa.field('one', pa.dictionary(pa.int16(), pa.string())),
	pa.field('two', pa.int32())
	]
	sch = pa.schema(fields)

	expected = (
	"""\
	one: dictionary<values=string, indices=int16, ordered=0>
	two: int32""")

	assert repr(sch) == expected


	def test_type_schema_pickling():
	cases = [
	pa.int8(),
	pa.string(),
	pa.binary(),
	pa.binary(10),
	pa.list_(pa.string()),
	pa.struct([
	pa.field('a', 'int8'),
	pa.field('b', 'string')
	]),
	pa.union([
	pa.field('a', pa.int8()),
	pa.field('b', pa.int16())
	], pa.lib.UnionMode_SPARSE),
	pa.union([
	pa.field('a', pa.int8()),
	pa.field('b', pa.int16())
	], pa.lib.UnionMode_DENSE),
	pa.time32('s'),
	pa.time64('us'),
	pa.date32(),
	pa.date64(),
	pa.timestamp('ms'),
	pa.timestamp('ns'),
	pa.decimal128(12, 2),
	pa.field('a', 'string', metadata={b'foo': b'bar'})
	]

	for val in cases:
	roundtripped = pickle.loads(pickle.dumps(val))
	assert val == roundtripped

	fields = []
	for i, f in enumerate(cases):
	if isinstance(f, pa.Field):
	fields.append(f)
	else:
	fields.append(pa.field('_f{}'.format(i), f))

	schema = pa.schema(fields, metadata={b'foo': b'bar'})
	roundtripped = pickle.loads(pickle.dumps(schema))
	assert schema == roundtripped


	def test_empty_table():
	schema = pa.schema([
	pa.field('oneField', pa.int64())
	])
	table = schema.empty_table()
	assert isinstance(table, pa.Table)
	assert table.num_rows == 0
	assert table.schema == schema


	@pytest.mark.pandas
	def test_schema_from_pandas():
	import pandas as pd
	inputs = [
	list(range(10)),
	pd.Categorical(list(range(10))),
	['foo', 'bar', None, 'baz', 'qux'],
	np.array([
	'2007-07-13T01:23:34.123456789',
	'2006-01-13T12:34:56.432539784',
	'2010-08-13T05:46:57.437699912'
	], dtype='datetime64[ns]')
	]
	for data in inputs:
	df = pd.DataFrame({'a': data})
	schema = pa.Schema.from_pandas(df)
	expected = pa.Table.from_pandas(df).schema
	assert schema == expected