python/src/nanoarrow/c_buffer.py - arrow-nanoarrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 from nanoarrow._lib import CArrowType, CBuffer, CBufferBuilder, _obj_is_buffer
 from nanoarrow.c_schema import c_schema_view


 def c_buffer(obj, schema=None) -> CBuffer:
     """Owning, read-only ArrowBuffer wrapper

     If obj implements the Python buffer protocol, ``c_buffer()`` wraps
     obj in nanoarrow's owning buffer structure, the ArrowBuffer,
     such that it can be used to construct arrays. The ownership of the
     underlying buffer is handled by the Python buffer protocol
     (i.e., ``PyObject_GetBuffer()`` and ``PyBuffer_Release()``).

     If obj is iterable, a buffer will be allocated and populated with
     the contents of obj according to ``schema``. The
     ``schema`` parameter is required to create a buffer from
     a Python iterable. The ``struct`` module is currently used to encode
     values from obj into binary form.

     Unlike with :func:`c_array`, ``schema`` is explicitly
     honoured (or an error will be raised).

     Parameters
     ----------

     obj : buffer-like or iterable
         A Python object that supports the Python buffer protocol. This includes
         bytes, memoryview, bytearray, bulit-in types as well as numpy arrays.
     schema :  schema-like, optional
         The data type of the desired buffer as sanitized by
         :func:`c_schema`. Only values that make sense as buffer types are
         allowed (e.g., integer types, floating-point types, interval types,
         decimal types, binary, string, fixed-size binary).

     Examples
     --------

     >>> import nanoarrow as na
     >>> na.c_buffer(b"1234")
     nanoarrow.c_lib.CBuffer(uint8[4 b] 49 50 51 52)
     >>> na.c_buffer([1, 2, 3], na.int32())
     nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
     """
     if isinstance(obj, CBuffer) and schema is None:
         return obj

     if _obj_is_buffer(obj):
         if schema is not None:
             raise NotImplementedError(
                 "c_buffer() with schema for pybuffer is not implemented"
             )
         return CBuffer.from_pybuffer(obj)

     if _obj_is_iterable(obj):
         buffer, _ = _c_buffer_from_iterable(obj, schema)
         return buffer

     raise TypeError(
         f"Can't convert object of type {type(obj).__name__} to nanoarrow.c_buffer"
     )


 def _c_buffer_from_iterable(obj, schema=None) -> CBuffer:
     import array

     # array.typecodes is not available in all PyPy versions.
     # Rather than guess, just don't use the array constructor if
     # this attribute is not available.
     if hasattr(array, "typecodes"):
         array_typecodes = array.typecodes
     else:
         array_typecodes = []

     if schema is None:
         raise ValueError("CBuffer from iterable requires schema")

     schema_view = c_schema_view(schema)
     if (
         schema_view.extension_name is not None
         or schema_view.storage_type_id != schema_view.type_id
     ):
         raise ValueError(
             f"Can't create buffer from iterable for type {schema_view.type}"
         )

     builder = CBufferBuilder()

     if schema_view.storage_type_id == CArrowType.FIXED_SIZE_BINARY:
         builder.set_data_type(CArrowType.BINARY, schema_view.fixed_size * 8)
     else:
         builder.set_data_type(schema_view.storage_type_id)

     # If we are using a typecode supported by the array module, it has much
     # faster implementations of safely building buffers from iterables
     if (
         builder.format in array_typecodes
         and schema_view.storage_type_id != CArrowType.BOOL
     ):
         buf = array.array(builder.format, obj)
         return CBuffer.from_pybuffer(buf), len(buf)

     n_values = builder.write_elements(obj)
     return builder.finish(), n_values


 def _obj_is_iterable(obj):
     return hasattr(obj, "__iter__")
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	from nanoarrow._lib import CArrowType, CBuffer, CBufferBuilder, _obj_is_buffer
	from nanoarrow.c_schema import c_schema_view


	def c_buffer(obj, schema=None) -> CBuffer:
	"""Owning, read-only ArrowBuffer wrapper

	If obj implements the Python buffer protocol, ``c_buffer()`` wraps
	obj in nanoarrow's owning buffer structure, the ArrowBuffer,
	such that it can be used to construct arrays. The ownership of the
	underlying buffer is handled by the Python buffer protocol
	(i.e., ``PyObject_GetBuffer()`` and ``PyBuffer_Release()``).

	If obj is iterable, a buffer will be allocated and populated with
	the contents of obj according to ``schema``. The
	``schema`` parameter is required to create a buffer from
	a Python iterable. The ``struct`` module is currently used to encode
	values from obj into binary form.

	Unlike with :func:`c_array`, ``schema`` is explicitly
	honoured (or an error will be raised).

	Parameters
	----------

	obj : buffer-like or iterable
	A Python object that supports the Python buffer protocol. This includes
	bytes, memoryview, bytearray, bulit-in types as well as numpy arrays.
	schema : schema-like, optional
	The data type of the desired buffer as sanitized by
	:func:`c_schema`. Only values that make sense as buffer types are
	allowed (e.g., integer types, floating-point types, interval types,
	decimal types, binary, string, fixed-size binary).

	Examples
	--------

	>>> import nanoarrow as na
	>>> na.c_buffer(b"1234")
	nanoarrow.c_lib.CBuffer(uint8[4 b] 49 50 51 52)
	>>> na.c_buffer([1, 2, 3], na.int32())
	nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
	"""
	if isinstance(obj, CBuffer) and schema is None:
	return obj

	if _obj_is_buffer(obj):
	if schema is not None:
	raise NotImplementedError(
	"c_buffer() with schema for pybuffer is not implemented"
	)
	return CBuffer.from_pybuffer(obj)

	if _obj_is_iterable(obj):
	buffer, _ = _c_buffer_from_iterable(obj, schema)
	return buffer

	raise TypeError(
	f"Can't convert object of type {type(obj).__name__} to nanoarrow.c_buffer"
	)


	def _c_buffer_from_iterable(obj, schema=None) -> CBuffer:
	import array

	# array.typecodes is not available in all PyPy versions.
	# Rather than guess, just don't use the array constructor if
	# this attribute is not available.
	if hasattr(array, "typecodes"):
	array_typecodes = array.typecodes
	else:
	array_typecodes = []

	if schema is None:
	raise ValueError("CBuffer from iterable requires schema")

	schema_view = c_schema_view(schema)
	if (
	schema_view.extension_name is not None
	or schema_view.storage_type_id != schema_view.type_id
	):
	raise ValueError(
	f"Can't create buffer from iterable for type {schema_view.type}"
	)

	builder = CBufferBuilder()

	if schema_view.storage_type_id == CArrowType.FIXED_SIZE_BINARY:
	builder.set_data_type(CArrowType.BINARY, schema_view.fixed_size * 8)
	else:
	builder.set_data_type(schema_view.storage_type_id)

	# If we are using a typecode supported by the array module, it has much
	# faster implementations of safely building buffers from iterables
	if (
	builder.format in array_typecodes
	and schema_view.storage_type_id != CArrowType.BOOL
	):
	buf = array.array(builder.format, obj)
	return CBuffer.from_pybuffer(buf), len(buf)

	n_values = builder.write_elements(obj)
	return builder.finish(), n_values


	def _obj_is_iterable(obj):
	return hasattr(obj, "__iter__")