python/pyarrow/__init__.py - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 # flake8: noqa

 from __future__ import absolute_import

 import os as _os
 import sys as _sys

 try:
     from ._generated_version import version as __version__
 except ImportError:
     # Package is not installed, parse git tag at runtime
     try:
         import setuptools_scm
         # Code duplicated from setup.py to avoid a dependency on each other
         def parse_git(root, **kwargs):
             """
             Parse function for setuptools_scm that ignores tags for non-C++
             subprojects, e.g. apache-arrow-js-XXX tags.
             """
             from setuptools_scm.git import parse
             kwargs['describe_command'] = \
                 "git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'"
             return parse(root, **kwargs)
         __version__ = setuptools_scm.get_version('../',
                                                  parse=parse_git)
     except ImportError:
         __version__ = None


 import pyarrow.compat as compat

 from pyarrow.lib import cpu_count, set_cpu_count
 from pyarrow.lib import (null, bool_,
                          int8, int16, int32, int64,
                          uint8, uint16, uint32, uint64,
                          time32, time64, timestamp, date32, date64,
                          float16, float32, float64,
                          binary, string, utf8, decimal128,
                          list_, struct, union, dictionary, field,
                          type_for_alias,
                          DataType, DictionaryType, ListType, StructType,
                          UnionType, TimestampType, Time32Type, Time64Type,
                          FixedSizeBinaryType, Decimal128Type,
                          BaseExtensionType, ExtensionType,
                          UnknownExtensionType,
                          DictionaryMemo,
                          Field,
                          Schema,
                          schema,
                          Array, Tensor,
                          array, chunked_array, column, table,
                          infer_type, from_numpy_dtype,
                          NullArray,
                          NumericArray, IntegerArray, FloatingPointArray,
                          BooleanArray,
                          Int8Array, UInt8Array,
                          Int16Array, UInt16Array,
                          Int32Array, UInt32Array,
                          Int64Array, UInt64Array,
                          ListArray, UnionArray,
                          BinaryArray, StringArray,
                          FixedSizeBinaryArray,
                          DictionaryArray,
                          Date32Array, Date64Array,
                          TimestampArray, Time32Array, Time64Array,
                          Decimal128Array, StructArray, ExtensionArray,
                          ArrayValue, Scalar, NA, _NULL as NULL,
                          BooleanValue,
                          Int8Value, Int16Value, Int32Value, Int64Value,
                          UInt8Value, UInt16Value, UInt32Value, UInt64Value,
                          HalfFloatValue, FloatValue, DoubleValue, ListValue,
                          BinaryValue, StringValue, FixedSizeBinaryValue,
                          DecimalValue, UnionValue, StructValue, DictionaryValue,
                          Date32Value, Date64Value,
                          Time32Value, Time64Value,
                          TimestampValue)

 # Buffers, allocation
 from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
                          compress, decompress, allocate_buffer)

 from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
                          total_allocated_bytes, set_memory_pool,
                          default_memory_pool, logging_memory_pool,
                          proxy_memory_pool, log_memory_allocations)

 # I/O
 from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
                          CompressedInputStream, CompressedOutputStream,
                          FixedSizeBufferWriter,
                          BufferReader, BufferOutputStream,
                          OSFile, MemoryMappedFile, memory_map,
                          create_memory_map, have_libhdfs, have_libhdfs3,
                          MockOutputStream, input_stream, output_stream)

 from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
                          concat_arrays, concat_tables)

 # Exceptions
 from pyarrow.lib import (ArrowException,
                          ArrowKeyError,
                          ArrowInvalid,
                          ArrowIOError,
                          ArrowMemoryError,
                          ArrowNotImplementedError,
                          ArrowTypeError,
                          ArrowSerializationError,
                          PlasmaObjectExists)

 # Serialization
 from pyarrow.lib import (deserialize_from, deserialize,
                          deserialize_components,
                          serialize, serialize_to, read_serialized,
                          SerializedPyObject, SerializationContext,
                          SerializationCallbackError,
                          DeserializationCallbackError)

 from pyarrow.filesystem import FileSystem, LocalFileSystem

 from pyarrow.hdfs import HadoopFileSystem
 import pyarrow.hdfs as hdfs

 from pyarrow.ipc import (Message, MessageReader,
                          RecordBatchFileReader, RecordBatchFileWriter,
                          RecordBatchStreamReader, RecordBatchStreamWriter,
                          read_message, read_record_batch, read_schema,
                          read_tensor, write_tensor,
                          get_record_batch_size, get_tensor_size,
                          open_stream,
                          open_file,
                          serialize_pandas, deserialize_pandas)
 import pyarrow.ipc as ipc


 def open_stream(source):
     """
     pyarrow.open_stream deprecated since 0.12, use pyarrow.ipc.open_stream
     """
     import warnings
     warnings.warn("pyarrow.open_stream is deprecated, please use "
                   "pyarrow.ipc.open_stream")
     return ipc.open_stream(source)


 def open_file(source):
     """
     pyarrow.open_file deprecated since 0.12, use pyarrow.ipc.open_file
     """
     import warnings
     warnings.warn("pyarrow.open_file is deprecated, please use "
                   "pyarrow.ipc.open_file")
     return ipc.open_file(source)


 localfs = LocalFileSystem.get_instance()

 from pyarrow.serialization import (default_serialization_context,
                                    register_default_serialization_handlers,
                                    register_torch_serialization_handlers)

 import pyarrow.types as types

 # Entry point for starting the plasma store

 def _plasma_store_entry_point():
     """Entry point for starting the plasma store.

     This can be used by invoking e.g.
     ``plasma_store -s /tmp/plasma -m 1000000000``
     from the command line and will start the plasma_store executable with the
     given arguments.
     """
     import pyarrow
     plasma_store_executable = _os.path.join(pyarrow.__path__[0],
                                             "plasma_store_server")
     _os.execv(plasma_store_executable, _sys.argv)

 # ----------------------------------------------------------------------
 # Deprecations

 from pyarrow.util import _deprecate_api  # noqa

 # ----------------------------------------------------------------------
 # Returning absolute path to the pyarrow include directory (if bundled, e.g. in
 # wheels)

 def get_include():
     """
     Return absolute path to directory containing Arrow C++ include
     headers. Similar to numpy.get_include
     """
     return _os.path.join(_os.path.dirname(__file__), 'include')


 def _get_pkg_config_executable():
     return _os.environ.get('PKG_CONFIG', 'pkg-config')


 def _has_pkg_config(pkgname):
     import subprocess
     try:
         return subprocess.call([_get_pkg_config_executable(),
                                 '--exists', pkgname]) == 0
     except OSError:
         # TODO: replace with FileNotFoundError once we ditch 2.7
         return False


 def _read_pkg_config_variable(pkgname, cli_args):
     import subprocess
     cmd = [_get_pkg_config_executable(), pkgname] + cli_args
     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
     out, err = proc.communicate()
     if proc.returncode != 0:
         raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
     return out.rstrip().decode('utf8')


 def get_libraries():
     """
     Return list of library names to include in the `libraries` argument for C
     or Cython extensions using pyarrow
     """
     return ['arrow', 'arrow_python']


 def get_library_dirs():
     """
     Return lists of directories likely to contain Arrow C++ libraries for
     linking C or Cython extensions using pyarrow
     """
     package_cwd = _os.path.dirname(__file__)
     library_dirs = [package_cwd]

     def append_library_dir(library_dir):
         if library_dir not in library_dirs:
             library_dirs.append(library_dir)

     # Search library paths via pkg-config. This is necessary if the user
     # installed libarrow and the other shared libraries manually and they
     # are not shipped inside the pyarrow package (see also ARROW-2976).
     pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
     for pkgname in ["arrow", "arrow_python"]:
         if _has_pkg_config(pkgname):
             library_dir = _read_pkg_config_variable(pkgname,
                                                     ["--libs-only-L"])
             assert library_dir.startswith("-L")
             append_library_dir(library_dir[2:])

     if _sys.platform == 'win32':
         # TODO(wesm): Is this necessary, or does setuptools within a conda
         # installation add Library\lib to the linker path for MSVC?
         python_base_install = _os.path.dirname(_sys.executable)
         library_dir = _os.path.join(python_base_install, 'Library', 'lib')

         if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
             append_library_dir(library_dir)

     # ARROW-4074: Allow for ARROW_HOME to be set to some other directory
     if _os.environ.get('ARROW_HOME'):
         append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
     else:
         # Python wheels bundle the Arrow libraries in the pyarrow directory.
         append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))

     return library_dirs
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	# flake8: noqa

	from __future__ import absolute_import

	import os as _os
	import sys as _sys

	try:
	from ._generated_version import version as __version__
	except ImportError:
	# Package is not installed, parse git tag at runtime
	try:
	import setuptools_scm
	# Code duplicated from setup.py to avoid a dependency on each other
	def parse_git(root, **kwargs):
	"""
	Parse function for setuptools_scm that ignores tags for non-C++
	subprojects, e.g. apache-arrow-js-XXX tags.
	"""
	from setuptools_scm.git import parse
	kwargs['describe_command'] = \
	"git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'"
	return parse(root, **kwargs)
	__version__ = setuptools_scm.get_version('../',
	parse=parse_git)
	except ImportError:
	__version__ = None


	import pyarrow.compat as compat

	from pyarrow.lib import cpu_count, set_cpu_count
	from pyarrow.lib import (null, bool_,
	int8, int16, int32, int64,
	uint8, uint16, uint32, uint64,
	time32, time64, timestamp, date32, date64,
	float16, float32, float64,
	binary, string, utf8, decimal128,
	list_, struct, union, dictionary, field,
	type_for_alias,
	DataType, DictionaryType, ListType, StructType,
	UnionType, TimestampType, Time32Type, Time64Type,
	FixedSizeBinaryType, Decimal128Type,
	BaseExtensionType, ExtensionType,
	UnknownExtensionType,
	DictionaryMemo,
	Field,
	Schema,
	schema,
	Array, Tensor,
	array, chunked_array, column, table,
	infer_type, from_numpy_dtype,
	NullArray,
	NumericArray, IntegerArray, FloatingPointArray,
	BooleanArray,
	Int8Array, UInt8Array,
	Int16Array, UInt16Array,
	Int32Array, UInt32Array,
	Int64Array, UInt64Array,
	ListArray, UnionArray,
	BinaryArray, StringArray,
	FixedSizeBinaryArray,
	DictionaryArray,
	Date32Array, Date64Array,
	TimestampArray, Time32Array, Time64Array,
	Decimal128Array, StructArray, ExtensionArray,
	ArrayValue, Scalar, NA, _NULL as NULL,
	BooleanValue,
	Int8Value, Int16Value, Int32Value, Int64Value,
	UInt8Value, UInt16Value, UInt32Value, UInt64Value,
	HalfFloatValue, FloatValue, DoubleValue, ListValue,
	BinaryValue, StringValue, FixedSizeBinaryValue,
	DecimalValue, UnionValue, StructValue, DictionaryValue,
	Date32Value, Date64Value,
	Time32Value, Time64Value,
	TimestampValue)

	# Buffers, allocation
	from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
	compress, decompress, allocate_buffer)

	from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
	total_allocated_bytes, set_memory_pool,
	default_memory_pool, logging_memory_pool,
	proxy_memory_pool, log_memory_allocations)

	# I/O
	from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
	CompressedInputStream, CompressedOutputStream,
	FixedSizeBufferWriter,
	BufferReader, BufferOutputStream,
	OSFile, MemoryMappedFile, memory_map,
	create_memory_map, have_libhdfs, have_libhdfs3,
	MockOutputStream, input_stream, output_stream)

	from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
	concat_arrays, concat_tables)

	# Exceptions
	from pyarrow.lib import (ArrowException,
	ArrowKeyError,
	ArrowInvalid,
	ArrowIOError,
	ArrowMemoryError,
	ArrowNotImplementedError,
	ArrowTypeError,
	ArrowSerializationError,
	PlasmaObjectExists)

	# Serialization
	from pyarrow.lib import (deserialize_from, deserialize,
	deserialize_components,
	serialize, serialize_to, read_serialized,
	SerializedPyObject, SerializationContext,
	SerializationCallbackError,
	DeserializationCallbackError)

	from pyarrow.filesystem import FileSystem, LocalFileSystem

	from pyarrow.hdfs import HadoopFileSystem
	import pyarrow.hdfs as hdfs

	from pyarrow.ipc import (Message, MessageReader,
	RecordBatchFileReader, RecordBatchFileWriter,
	RecordBatchStreamReader, RecordBatchStreamWriter,
	read_message, read_record_batch, read_schema,
	read_tensor, write_tensor,
	get_record_batch_size, get_tensor_size,
	open_stream,
	open_file,
	serialize_pandas, deserialize_pandas)
	import pyarrow.ipc as ipc


	def open_stream(source):
	"""
	pyarrow.open_stream deprecated since 0.12, use pyarrow.ipc.open_stream
	"""
	import warnings
	warnings.warn("pyarrow.open_stream is deprecated, please use "
	"pyarrow.ipc.open_stream")
	return ipc.open_stream(source)


	def open_file(source):
	"""
	pyarrow.open_file deprecated since 0.12, use pyarrow.ipc.open_file
	"""
	import warnings
	warnings.warn("pyarrow.open_file is deprecated, please use "
	"pyarrow.ipc.open_file")
	return ipc.open_file(source)


	localfs = LocalFileSystem.get_instance()

	from pyarrow.serialization import (default_serialization_context,
	register_default_serialization_handlers,
	register_torch_serialization_handlers)

	import pyarrow.types as types

	# Entry point for starting the plasma store

	def _plasma_store_entry_point():
	"""Entry point for starting the plasma store.

	This can be used by invoking e.g.
	``plasma_store -s /tmp/plasma -m 1000000000``
	from the command line and will start the plasma_store executable with the
	given arguments.
	"""
	import pyarrow
	plasma_store_executable = _os.path.join(pyarrow.__path__[0],
	"plasma_store_server")
	_os.execv(plasma_store_executable, _sys.argv)

	# ----------------------------------------------------------------------
	# Deprecations

	from pyarrow.util import _deprecate_api # noqa

	# ----------------------------------------------------------------------
	# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
	# wheels)

	def get_include():
	"""
	Return absolute path to directory containing Arrow C++ include
	headers. Similar to numpy.get_include
	"""
	return _os.path.join(_os.path.dirname(__file__), 'include')


	def _get_pkg_config_executable():
	return _os.environ.get('PKG_CONFIG', 'pkg-config')


	def _has_pkg_config(pkgname):
	import subprocess
	try:
	return subprocess.call([_get_pkg_config_executable(),
	'--exists', pkgname]) == 0
	except OSError:
	# TODO: replace with FileNotFoundError once we ditch 2.7
	return False


	def _read_pkg_config_variable(pkgname, cli_args):
	import subprocess
	cmd = [_get_pkg_config_executable(), pkgname] + cli_args
	proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
	stderr=subprocess.PIPE)
	out, err = proc.communicate()
	if proc.returncode != 0:
	raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
	return out.rstrip().decode('utf8')


	def get_libraries():
	"""
	Return list of library names to include in the `libraries` argument for C
	or Cython extensions using pyarrow
	"""
	return ['arrow', 'arrow_python']


	def get_library_dirs():
	"""
	Return lists of directories likely to contain Arrow C++ libraries for
	linking C or Cython extensions using pyarrow
	"""
	package_cwd = _os.path.dirname(__file__)
	library_dirs = [package_cwd]

	def append_library_dir(library_dir):
	if library_dir not in library_dirs:
	library_dirs.append(library_dir)

	# Search library paths via pkg-config. This is necessary if the user
	# installed libarrow and the other shared libraries manually and they
	# are not shipped inside the pyarrow package (see also ARROW-2976).
	pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
	for pkgname in ["arrow", "arrow_python"]:
	if _has_pkg_config(pkgname):
	library_dir = _read_pkg_config_variable(pkgname,
	["--libs-only-L"])
	assert library_dir.startswith("-L")
	append_library_dir(library_dir[2:])

	if _sys.platform == 'win32':
	# TODO(wesm): Is this necessary, or does setuptools within a conda
	# installation add Library\lib to the linker path for MSVC?
	python_base_install = _os.path.dirname(_sys.executable)
	library_dir = _os.path.join(python_base_install, 'Library', 'lib')

	if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
	append_library_dir(library_dir)

	# ARROW-4074: Allow for ARROW_HOME to be set to some other directory
	if _os.environ.get('ARROW_HOME'):
	append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
	else:
	# Python wheels bundle the Arrow libraries in the pyarrow directory.
	append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))

	return library_dirs