python/pyarrow/conftest.py - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import pytest
 import pyarrow as pa
 from pyarrow import Codec
 from pyarrow import fs

 groups = [
     'acero',
     'brotli',
     'bz2',
     'cython',
     'dataset',
     'hypothesis',
     'fastparquet',
     'gandiva',
     'gcs',
     'gdb',
     'gzip',
     'hdfs',
     'large_memory',
     'lz4',
     'memory_leak',
     'nopandas',
     'orc',
     'pandas',
     'parquet',
     'parquet_encryption',
     's3',
     'snappy',
     'substrait',
     'flight',
     'slow',
     'requires_testing_data',
     'zstd',
 ]

 defaults = {
     'acero': False,
     'brotli': Codec.is_available('brotli'),
     'bz2': Codec.is_available('bz2'),
     'cython': False,
     'dataset': False,
     'fastparquet': False,
     'flight': False,
     'gandiva': False,
     'gcs': False,
     'gdb': True,
     'gzip': Codec.is_available('gzip'),
     'hdfs': False,
     'hypothesis': False,
     'large_memory': False,
     'lz4': Codec.is_available('lz4'),
     'memory_leak': False,
     'nopandas': False,
     'orc': False,
     'pandas': False,
     'parquet': False,
     'parquet_encryption': False,
     'requires_testing_data': True,
     's3': False,
     'slow': False,
     'snappy': Codec.is_available('snappy'),
     'substrait': False,
     'zstd': Codec.is_available('zstd'),
 }

 try:
     import cython  # noqa
     defaults['cython'] = True
 except ImportError:
     pass

 try:
     import fastparquet  # noqa
     defaults['fastparquet'] = True
 except ImportError:
     pass

 try:
     import pyarrow.gandiva  # noqa
     defaults['gandiva'] = True
 except ImportError:
     pass

 try:
     import pyarrow.acero  # noqa
     defaults['acero'] = True
 except ImportError:
     pass

 try:
     import pyarrow.dataset  # noqa
     defaults['dataset'] = True
 except ImportError:
     pass

 try:
     import pyarrow.orc  # noqa
     defaults['orc'] = True
 except ImportError:
     pass

 try:
     import pandas  # noqa
     defaults['pandas'] = True
 except ImportError:
     defaults['nopandas'] = True

 try:
     import pyarrow.parquet  # noqa
     defaults['parquet'] = True
 except ImportError:
     pass

 try:
     import pyarrow.parquet.encryption  # noqa
     defaults['parquet_encryption'] = True
 except ImportError:
     pass

 try:
     import pyarrow.flight  # noqa
     defaults['flight'] = True
 except ImportError:
     pass

 try:
     from pyarrow.fs import GcsFileSystem  # noqa
     defaults['gcs'] = True
 except ImportError:
     pass


 try:
     from pyarrow.fs import S3FileSystem  # noqa
     defaults['s3'] = True
 except ImportError:
     pass

 try:
     from pyarrow.fs import HadoopFileSystem  # noqa
     defaults['hdfs'] = True
 except ImportError:
     pass

 try:
     import pyarrow.substrait  # noqa
     defaults['substrait'] = True
 except ImportError:
     pass


 # Doctest should ignore files for the modules that are not built
 def pytest_ignore_collect(path, config):
     if config.option.doctestmodules:
         # don't try to run doctests on the /tests directory
         if "/pyarrow/tests/" in str(path):
             return True

         doctest_groups = [
             'dataset',
             'orc',
             'parquet',
             'flight',
             'substrait',
         ]

         # handle cuda, flight, etc
         for group in doctest_groups:
             if 'pyarrow/{}'.format(group) in str(path):
                 if not defaults[group]:
                     return True

         if 'pyarrow/parquet/encryption' in str(path):
             if not defaults['parquet_encryption']:
                 return True

         if 'pyarrow/cuda' in str(path):
             try:
                 import pyarrow.cuda  # noqa
                 return False
             except ImportError:
                 return True

         if 'pyarrow/fs' in str(path):
             try:
                 from pyarrow.fs import S3FileSystem  # noqa
                 return False
             except ImportError:
                 return True

     if getattr(config.option, "doctest_cython", False):
         if "/pyarrow/tests/" in str(path):
             return True
         if "/pyarrow/_parquet_encryption" in str(path):
             return True

     return False


 # Save output files from doctest examples into temp dir
 @pytest.fixture(autouse=True)
 def _docdir(request):

     # Trigger ONLY for the doctests
     doctest_m = request.config.option.doctestmodules
     doctest_c = getattr(request.config.option, "doctest_cython", False)

     if doctest_m or doctest_c:

         # Get the fixture dynamically by its name.
         tmpdir = request.getfixturevalue('tmpdir')

         # Chdir only for the duration of the test.
         with tmpdir.as_cwd():
             yield

     else:
         yield


 # Define doctest_namespace for fs module docstring import
 @pytest.fixture(autouse=True)
 def add_fs(doctest_namespace, request, tmp_path):

     # Trigger ONLY for the doctests
     doctest_m = request.config.option.doctestmodules
     doctest_c = getattr(request.config.option, "doctest_cython", False)

     if doctest_m or doctest_c:
         # fs import
         doctest_namespace["fs"] = fs

         # Creation of an object and file with data
         local = fs.LocalFileSystem()
         path = tmp_path / 'pyarrow-fs-example.dat'
         with local.open_output_stream(str(path)) as stream:
             stream.write(b'data')
         doctest_namespace["local"] = local
         doctest_namespace["local_path"] = str(tmp_path)
         doctest_namespace["path"] = str(path)
     yield


 # Define udf fixture for test_udf.py and test_substrait.py
 @pytest.fixture(scope="session")
 def unary_func_fixture():
     """
     Register a unary scalar function.
     """
     from pyarrow import compute as pc

     def unary_function(ctx, x):
         return pc.call_function("add", [x, 1],
                                 memory_pool=ctx.memory_pool)
     func_name = "y=x+1"
     unary_doc = {"summary": "add function",
                  "description": "test add function"}
     pc.register_scalar_function(unary_function,
                                 func_name,
                                 unary_doc,
                                 {"array": pa.int64()},
                                 pa.int64())
     return unary_function, func_name
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import pytest
	import pyarrow as pa
	from pyarrow import Codec
	from pyarrow import fs

	groups = [
	'acero',
	'brotli',
	'bz2',
	'cython',
	'dataset',
	'hypothesis',
	'fastparquet',
	'gandiva',
	'gcs',
	'gdb',
	'gzip',
	'hdfs',
	'large_memory',
	'lz4',
	'memory_leak',
	'nopandas',
	'orc',
	'pandas',
	'parquet',
	'parquet_encryption',
	's3',
	'snappy',
	'substrait',
	'flight',
	'slow',
	'requires_testing_data',
	'zstd',
	]

	defaults = {
	'acero': False,
	'brotli': Codec.is_available('brotli'),
	'bz2': Codec.is_available('bz2'),
	'cython': False,
	'dataset': False,
	'fastparquet': False,
	'flight': False,
	'gandiva': False,
	'gcs': False,
	'gdb': True,
	'gzip': Codec.is_available('gzip'),
	'hdfs': False,
	'hypothesis': False,
	'large_memory': False,
	'lz4': Codec.is_available('lz4'),
	'memory_leak': False,
	'nopandas': False,
	'orc': False,
	'pandas': False,
	'parquet': False,
	'parquet_encryption': False,
	'requires_testing_data': True,
	's3': False,
	'slow': False,
	'snappy': Codec.is_available('snappy'),
	'substrait': False,
	'zstd': Codec.is_available('zstd'),
	}

	try:
	import cython # noqa
	defaults['cython'] = True
	except ImportError:
	pass

	try:
	import fastparquet # noqa
	defaults['fastparquet'] = True
	except ImportError:
	pass

	try:
	import pyarrow.gandiva # noqa
	defaults['gandiva'] = True
	except ImportError:
	pass

	try:
	import pyarrow.acero # noqa
	defaults['acero'] = True
	except ImportError:
	pass

	try:
	import pyarrow.dataset # noqa
	defaults['dataset'] = True
	except ImportError:
	pass

	try:
	import pyarrow.orc # noqa
	defaults['orc'] = True
	except ImportError:
	pass

	try:
	import pandas # noqa
	defaults['pandas'] = True
	except ImportError:
	defaults['nopandas'] = True

	try:
	import pyarrow.parquet # noqa
	defaults['parquet'] = True
	except ImportError:
	pass

	try:
	import pyarrow.parquet.encryption # noqa
	defaults['parquet_encryption'] = True
	except ImportError:
	pass

	try:
	import pyarrow.flight # noqa
	defaults['flight'] = True
	except ImportError:
	pass

	try:
	from pyarrow.fs import GcsFileSystem # noqa
	defaults['gcs'] = True
	except ImportError:
	pass


	try:
	from pyarrow.fs import S3FileSystem # noqa
	defaults['s3'] = True
	except ImportError:
	pass

	try:
	from pyarrow.fs import HadoopFileSystem # noqa
	defaults['hdfs'] = True
	except ImportError:
	pass

	try:
	import pyarrow.substrait # noqa
	defaults['substrait'] = True
	except ImportError:
	pass


	# Doctest should ignore files for the modules that are not built
	def pytest_ignore_collect(path, config):
	if config.option.doctestmodules:
	# don't try to run doctests on the /tests directory
	if "/pyarrow/tests/" in str(path):
	return True

	doctest_groups = [
	'dataset',
	'orc',
	'parquet',
	'flight',
	'substrait',
	]

	# handle cuda, flight, etc
	for group in doctest_groups:
	if 'pyarrow/{}'.format(group) in str(path):
	if not defaults[group]:
	return True

	if 'pyarrow/parquet/encryption' in str(path):
	if not defaults['parquet_encryption']:
	return True

	if 'pyarrow/cuda' in str(path):
	try:
	import pyarrow.cuda # noqa
	return False
	except ImportError:
	return True

	if 'pyarrow/fs' in str(path):
	try:
	from pyarrow.fs import S3FileSystem # noqa
	return False
	except ImportError:
	return True

	if getattr(config.option, "doctest_cython", False):
	if "/pyarrow/tests/" in str(path):
	return True
	if "/pyarrow/_parquet_encryption" in str(path):
	return True

	return False


	# Save output files from doctest examples into temp dir
	@pytest.fixture(autouse=True)
	def _docdir(request):

	# Trigger ONLY for the doctests
	doctest_m = request.config.option.doctestmodules
	doctest_c = getattr(request.config.option, "doctest_cython", False)

	if doctest_m or doctest_c:

	# Get the fixture dynamically by its name.
	tmpdir = request.getfixturevalue('tmpdir')

	# Chdir only for the duration of the test.
	with tmpdir.as_cwd():
	yield

	else:
	yield


	# Define doctest_namespace for fs module docstring import
	@pytest.fixture(autouse=True)
	def add_fs(doctest_namespace, request, tmp_path):

	# Trigger ONLY for the doctests
	doctest_m = request.config.option.doctestmodules
	doctest_c = getattr(request.config.option, "doctest_cython", False)

	if doctest_m or doctest_c:
	# fs import
	doctest_namespace["fs"] = fs

	# Creation of an object and file with data
	local = fs.LocalFileSystem()
	path = tmp_path / 'pyarrow-fs-example.dat'
	with local.open_output_stream(str(path)) as stream:
	stream.write(b'data')
	doctest_namespace["local"] = local
	doctest_namespace["local_path"] = str(tmp_path)
	doctest_namespace["path"] = str(path)
	yield


	# Define udf fixture for test_udf.py and test_substrait.py
	@pytest.fixture(scope="session")
	def unary_func_fixture():
	"""
	Register a unary scalar function.
	"""
	from pyarrow import compute as pc

	def unary_function(ctx, x):
	return pc.call_function("add", [x, 1],
	memory_pool=ctx.memory_pool)
	func_name = "y=x+1"
	unary_doc = {"summary": "add function",
	"description": "test add function"}
	pc.register_scalar_function(unary_function,
	func_name,
	unary_doc,
	{"array": pa.int64()},
	pa.int64())
	return unary_function, func_name