python/pyarrow/compute.py - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 from pyarrow._compute import (  # noqa
     Function,
     FunctionOptions,
     FunctionRegistry,
     HashAggregateFunction,
     HashAggregateKernel,
     Kernel,
     ScalarAggregateFunction,
     ScalarAggregateKernel,
     ScalarFunction,
     ScalarKernel,
     VectorFunction,
     VectorKernel,
     # Option classes
     ArraySortOptions,
     AssumeTimezoneOptions,
     CastOptions,
     CountOptions,
     DayOfWeekOptions,
     DictionaryEncodeOptions,
     ElementWiseAggregateOptions,
     ExtractRegexOptions,
     FilterOptions,
     IndexOptions,
     JoinOptions,
     MakeStructOptions,
     MatchSubstringOptions,
     ModeOptions,
     NullOptions,
     PadOptions,
     PartitionNthOptions,
     QuantileOptions,
     ReplaceSliceOptions,
     ReplaceSubstringOptions,
     RoundOptions,
     RoundToMultipleOptions,
     ScalarAggregateOptions,
     SelectKOptions,
     SetLookupOptions,
     SliceOptions,
     SortOptions,
     SplitOptions,
     SplitPatternOptions,
     StrftimeOptions,
     StrptimeOptions,
     TakeOptions,
     TDigestOptions,
     TrimOptions,
     VarianceOptions,
     WeekOptions,
     # Functions
     call_function,
     function_registry,
     get_function,
     list_functions,
 )

 import inspect
 from textwrap import dedent
 import warnings

 import pyarrow as pa


 def _get_arg_names(func):
     return func._doc.arg_names


 def _decorate_compute_function(wrapper, exposed_name, func, option_class):
     # Decorate the given compute function wrapper with useful metadata
     # and documentation.
     wrapper.__arrow_compute_function__ = dict(name=func.name,
                                               arity=func.arity)
     wrapper.__name__ = exposed_name
     wrapper.__qualname__ = exposed_name

     doc_pieces = []

     cpp_doc = func._doc
     summary = cpp_doc.summary
     if not summary:
         arg_str = "arguments" if func.arity > 1 else "argument"
         summary = ("Call compute function {!r} with the given {}"
                    .format(func.name, arg_str))

     description = cpp_doc.description
     arg_names = _get_arg_names(func)

     doc_pieces.append("""\
         {}.

         """.format(summary))

     if description:
         doc_pieces.append("{}\n\n".format(description))

     doc_pieces.append("""\
         Parameters
         ----------
         """)

     for arg_name in arg_names:
         if func.kind in ('vector', 'scalar_aggregate'):
             arg_type = 'Array-like'
         else:
             arg_type = 'Array-like or scalar-like'
         doc_pieces.append("""\
             {} : {}
                 Argument to compute function
             """.format(arg_name, arg_type))

     doc_pieces.append("""\
         memory_pool : pyarrow.MemoryPool, optional
             If not passed, will allocate memory from the default memory pool.
         """)
     if option_class is not None:
         doc_pieces.append("""\
             options : pyarrow.compute.{0}, optional
                 Parameters altering compute function semantics.
             """.format(option_class.__name__))
         options_sig = inspect.signature(option_class)
         for p in options_sig.parameters.values():
             doc_pieces.append("""\
             {0} : optional
                 Parameter for {1} constructor. Either `options`
                 or `{0}` can be passed, but not both at the same time.
             """.format(p.name, option_class.__name__))

     wrapper.__doc__ = "".join(dedent(s) for s in doc_pieces)
     return wrapper


 def _get_options_class(func):
     class_name = func._doc.options_class
     if not class_name:
         return None
     try:
         return globals()[class_name]
     except KeyError:
         warnings.warn("Python binding for {} not exposed"
                       .format(class_name), RuntimeWarning)
         return None


 def _handle_options(name, option_class, options, kwargs):
     if kwargs:
         if options is None:
             return option_class(**kwargs)
         raise TypeError(
             "Function {!r} called with both an 'options' argument "
             "and additional named arguments"
             .format(name))

     if options is not None:
         if isinstance(options, dict):
             return option_class(**options)
         elif isinstance(options, option_class):
             return options
         raise TypeError(
             "Function {!r} expected a {} parameter, got {}"
             .format(name, option_class, type(options)))

     return options


 def _make_generic_wrapper(func_name, func, option_class):
     if option_class is None:
         def wrapper(*args, memory_pool=None):
             return func.call(args, None, memory_pool)
     else:
         def wrapper(*args, memory_pool=None, options=None, **kwargs):
             options = _handle_options(func_name, option_class, options,
                                       kwargs)
             return func.call(args, options, memory_pool)
     return wrapper


 def _make_signature(arg_names, var_arg_names, option_class):
     from inspect import Parameter
     params = []
     for name in arg_names:
         params.append(Parameter(name, Parameter.POSITIONAL_OR_KEYWORD))
     for name in var_arg_names:
         params.append(Parameter(name, Parameter.VAR_POSITIONAL))
     params.append(Parameter("memory_pool", Parameter.KEYWORD_ONLY,
                             default=None))
     if option_class is not None:
         params.append(Parameter("options", Parameter.KEYWORD_ONLY,
                                 default=None))
         options_sig = inspect.signature(option_class)
         for p in options_sig.parameters.values():
             # XXX for now, our generic wrappers don't allow positional
             # option arguments
             params.append(p.replace(kind=Parameter.KEYWORD_ONLY))
     return inspect.Signature(params)


 def _wrap_function(name, func):
     option_class = _get_options_class(func)
     arg_names = _get_arg_names(func)
     has_vararg = arg_names and arg_names[-1].startswith('*')
     if has_vararg:
         var_arg_names = [arg_names.pop().lstrip('*')]
     else:
         var_arg_names = []

     wrapper = _make_generic_wrapper(name, func, option_class)
     wrapper.__signature__ = _make_signature(arg_names, var_arg_names,
                                             option_class)
     return _decorate_compute_function(wrapper, name, func, option_class)


 def _make_global_functions():
     """
     Make global functions wrapping each compute function.

     Note that some of the automatically-generated wrappers may be overriden
     by custom versions below.
     """
     g = globals()
     reg = function_registry()

     # Avoid clashes with Python keywords
     rewrites = {'and': 'and_',
                 'or': 'or_'}

     for cpp_name in reg.list_functions():
         name = rewrites.get(cpp_name, cpp_name)
         func = reg.get_function(cpp_name)
         assert name not in g, name
         g[cpp_name] = g[name] = _wrap_function(name, func)


 _make_global_functions()


 def cast(arr, target_type, safe=True):
     """
     Cast array values to another data type. Can also be invoked as an array
     instance method.

     Parameters
     ----------
     arr : Array or ChunkedArray
     target_type : DataType or type string alias
         Type to cast to
     safe : bool, default True
         Check for overflows or other unsafe conversions

     Examples
     --------
     >>> from datetime import datetime
     >>> import pyarrow as pa
     >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
     >>> arr.type
     TimestampType(timestamp[us])

     You can use ``pyarrow.DataType`` objects to specify the target type:

     >>> cast(arr, pa.timestamp('ms'))
     <pyarrow.lib.TimestampArray object at 0x7fe93c0f6910>
     [
       2010-01-01 00:00:00.000,
       2015-01-01 00:00:00.000
     ]

     >>> cast(arr, pa.timestamp('ms')).type
     TimestampType(timestamp[ms])

     Alternatively, it is also supported to use the string aliases for these
     types:

     >>> arr.cast('timestamp[ms]')
     <pyarrow.lib.TimestampArray object at 0x10420eb88>
     [
       1262304000000,
       1420070400000
     ]
     >>> arr.cast('timestamp[ms]').type
     TimestampType(timestamp[ms])

     Returns
     -------
     casted : Array
     """
     if target_type is None:
         raise ValueError("Cast target type must not be None")
     if safe:
         options = CastOptions.safe(target_type)
     else:
         options = CastOptions.unsafe(target_type)
     return call_function("cast", [arr], options)


 def count_substring(array, pattern, *, ignore_case=False):
     """
     Count the occurrences of substring *pattern* in each value of a
     string array.

     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         pattern to search for exact matches
     ignore_case : bool, default False
         Ignore case while searching.

     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("count_substring", [array],
                          MatchSubstringOptions(pattern,
                                                ignore_case=ignore_case))


 def count_substring_regex(array, pattern, *, ignore_case=False):
     """
     Count the non-overlapping matches of regex *pattern* in each value
     of a string array.

     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         pattern to search for exact matches
     ignore_case : bool, default False
         Ignore case while searching.

     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("count_substring_regex", [array],
                          MatchSubstringOptions(pattern,
                                                ignore_case=ignore_case))


 def find_substring(array, pattern, *, ignore_case=False):
     """
     Find the index of the first occurrence of substring *pattern* in each
     value of a string array.

     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         pattern to search for exact matches
     ignore_case : bool, default False
         Ignore case while searching.

     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("find_substring", [array],
                          MatchSubstringOptions(pattern,
                                                ignore_case=ignore_case))


 def find_substring_regex(array, pattern, *, ignore_case=False):
     """
     Find the index of the first match of regex *pattern* in each
     value of a string array.

     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         regex pattern to search for
     ignore_case : bool, default False
         Ignore case while searching.

     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("find_substring_regex", [array],
                          MatchSubstringOptions(pattern,
                                                ignore_case=ignore_case))


 def match_like(array, pattern, *, ignore_case=False):
     """
     Test if the SQL-style LIKE pattern *pattern* matches a value of a
     string array.

     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         SQL-style LIKE pattern. '%' will match any number of
         characters, '_' will match exactly one character, and all
         other characters match themselves. To match a literal percent
         sign or underscore, precede the character with a backslash.
     ignore_case : bool, default False
         Ignore case while searching.

     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray

     """
     return call_function("match_like", [array],
                          MatchSubstringOptions(pattern,
                                                ignore_case=ignore_case))


 def match_substring(array, pattern, *, ignore_case=False):
     """
     Test if substring *pattern* is contained within a value of a string array.

     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         pattern to search for exact matches
     ignore_case : bool, default False
         Ignore case while searching.

     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("match_substring", [array],
                          MatchSubstringOptions(pattern,
                                                ignore_case=ignore_case))


 def match_substring_regex(array, pattern, *, ignore_case=False):
     """
     Test if regex *pattern* matches at any position a value of a string array.

     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         regex pattern to search
     ignore_case : bool, default False
         Ignore case while searching.

     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("match_substring_regex", [array],
                          MatchSubstringOptions(pattern,
                                                ignore_case=ignore_case))


 def mode(array, n=1, *, skip_nulls=True, min_count=0):
     """
     Return top-n most common values and number of times they occur in a passed
     numerical (chunked) array, in descending order of occurrence. If there are
     multiple values with same count, the smaller one is returned first.

     Parameters
     ----------
     array : pyarrow.Array or pyarrow.ChunkedArray
     n : int, default 1
         Specify the top-n values.
     skip_nulls : bool, default True
         If True, ignore nulls in the input. Else return an empty array
         if any input is null.
     min_count : int, default 0
         If there are fewer than this many values in the input, return
         an empty array.

     Returns
     -------
     An array of <input type "Mode", int64_t "Count"> structs

     Examples
     --------
     >>> import pyarrow as pa
     >>> import pyarrow.compute as pc
     >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
     >>> modes = pc.mode(arr, 2)
     >>> modes[0]
     <pyarrow.StructScalar: {'mode': 2, 'count': 5}>
     >>> modes[1]
     <pyarrow.StructScalar: {'mode': 1, 'count': 2}>
     """
     options = ModeOptions(n, skip_nulls=skip_nulls, min_count=min_count)
     return call_function("mode", [array], options)


 def filter(data, mask, null_selection_behavior='drop'):
     """
     Select values (or records) from array- or table-like data given boolean
     filter, where true values are selected.

     Parameters
     ----------
     data : Array, ChunkedArray, RecordBatch, or Table
     mask : Array, ChunkedArray
         Must be of boolean type
     null_selection_behavior : str, default 'drop'
         Configure the behavior on encountering a null slot in the mask.
         Allowed values are 'drop' and 'emit_null'.

         - 'drop': nulls will be treated as equivalent to False.
         - 'emit_null': nulls will result in a null in the output.

     Returns
     -------
     result : depends on inputs

     Examples
     --------
     >>> import pyarrow as pa
     >>> arr = pa.array(["a", "b", "c", None, "e"])
     >>> mask = pa.array([True, False, None, False, True])
     >>> arr.filter(mask)
     <pyarrow.lib.StringArray object at 0x7fa826df9200>
     [
       "a",
       "e"
     ]
     >>> arr.filter(mask, null_selection_behavior='emit_null')
     <pyarrow.lib.StringArray object at 0x7fa826df9200>
     [
       "a",
       null,
       "e"
     ]
     """
     options = FilterOptions(null_selection_behavior)
     return call_function('filter', [data, mask], options)


 def index(data, value, start=None, end=None, *, memory_pool=None):
     """
     Find the index of the first occurrence of a given value.

     Parameters
     ----------
     data : Array or ChunkedArray
     value : Scalar-like object
     start : int, optional
     end : int, optional
     memory_pool : MemoryPool, optional
         If not passed, will allocate memory from the default memory pool.

     Returns
     -------
     index : the index, or -1 if not found
     """
     if start is not None:
         if end is not None:
             data = data.slice(start, end - start)
         else:
             data = data.slice(start)
     elif end is not None:
         data = data.slice(0, end)

     if not isinstance(value, pa.Scalar):
         value = pa.scalar(value, type=data.type)
     elif data.type != value.type:
         value = pa.scalar(value.as_py(), type=data.type)
     options = IndexOptions(value=value)
     result = call_function('index', [data], options, memory_pool)
     if start is not None and result.as_py() >= 0:
         result = pa.scalar(result.as_py() + start, type=pa.int64())
     return result


 def take(data, indices, *, boundscheck=True, memory_pool=None):
     """
     Select values (or records) from array- or table-like data given integer
     selection indices.

     The result will be of the same type(s) as the input, with elements taken
     from the input array (or record batch / table fields) at the given
     indices. If an index is null then the corresponding value in the output
     will be null.

     Parameters
     ----------
     data : Array, ChunkedArray, RecordBatch, or Table
     indices : Array, ChunkedArray
         Must be of integer type
     boundscheck : boolean, default True
         Whether to boundscheck the indices. If False and there is an out of
         bounds index, will likely cause the process to crash.
     memory_pool : MemoryPool, optional
         If not passed, will allocate memory from the default memory pool.

     Returns
     -------
     result : depends on inputs

     Examples
     --------
     >>> import pyarrow as pa
     >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
     >>> indices = pa.array([0, None, 4, 3])
     >>> arr.take(indices)
     <pyarrow.lib.StringArray object at 0x7ffa4fc7d368>
     [
       "a",
       null,
       "e",
       null
     ]
     """
     options = TakeOptions(boundscheck=boundscheck)
     return call_function('take', [data, indices], options, memory_pool)


 def fill_null(values, fill_value):
     """
     Replace each null element in values with fill_value. The fill_value must be
     the same type as values or able to be implicitly casted to the array's
     type.

     This is an alias for :func:`coalesce`.

     Parameters
     ----------
     values : Array, ChunkedArray, or Scalar-like object
         Each null element is replaced with the corresponding value
         from fill_value.
     fill_value : Array, ChunkedArray, or Scalar-like object
         If not same type as data will attempt to cast.

     Returns
     -------
     result : depends on inputs

     Examples
     --------
     >>> import pyarrow as pa
     >>> arr = pa.array([1, 2, None, 3], type=pa.int8())
     >>> fill_value = pa.scalar(5, type=pa.int8())
     >>> arr.fill_null(fill_value)
     pyarrow.lib.Int8Array object at 0x7f95437f01a0>
     [
       1,
       2,
       5,
       3
     ]
     """
     if not isinstance(fill_value, (pa.Array, pa.ChunkedArray, pa.Scalar)):
         fill_value = pa.scalar(fill_value, type=values.type)
     elif values.type != fill_value.type:
         fill_value = pa.scalar(fill_value.as_py(), type=values.type)

     return call_function("coalesce", [values, fill_value])


 def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
     """
     Select the indices of the top-k ordered elements from array- or table-like
     data.

     This is a specialization for :func:`select_k_unstable`. Output is not
     guaranteed to be stable.

     Parameters
     ----------
     values : Array, ChunkedArray, RecordBatch, or Table
         Data to sort and get top indices from.
     k : int
         The number of `k` elements to keep.
     sort_keys : List-like
         Column key names to order by when input is table-like data.
     memory_pool : MemoryPool, optional
         If not passed, will allocate memory from the default memory pool.

     Returns
     -------
     result : Array of indices

     Examples
     --------
     >>> import pyarrow as pa
     >>> import pyarrow.compute as pc
     >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
     >>> pc.top_k_unstable(arr, k=3)
     <pyarrow.lib.UInt64Array object at 0x7fdcb19d7f30>
     [
       5,
       4,
       2
     ]
     """
     if sort_keys is None:
         sort_keys = []
     if isinstance(values, (pa.Array, pa.ChunkedArray)):
         sort_keys.append(("dummy", "descending"))
     else:
         sort_keys = map(lambda key_name: (key_name, "descending"), sort_keys)
     options = SelectKOptions(k, sort_keys)
     return call_function("select_k_unstable", [values], options, memory_pool)


 def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
     """
     Select the indices of the bottom-k ordered elements from
     array- or table-like data.

     This is a specialization for :func:`select_k_unstable`. Output is not
     guaranteed to be stable.

     Parameters
     ----------
     values : Array, ChunkedArray, RecordBatch, or Table
         Data to sort and get bottom indices from.
     k : int
         The number of `k` elements to keep.
     sort_keys : List-like
         Column key names to order by when input is table-like data.
     memory_pool : MemoryPool, optional
         If not passed, will allocate memory from the default memory pool.

     Returns
     -------
     result : Array of indices

     Examples
     --------
     >>> import pyarrow as pa
     >>> import pyarrow.compute as pc
     >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
     >>> pc.bottom_k_unstable(arr, k=3)
     <pyarrow.lib.UInt64Array object at 0x7fdcb19d7fa0>
     [
       0,
       1,
       2
     ]
     """
     if sort_keys is None:
         sort_keys = []
     if isinstance(values, (pa.Array, pa.ChunkedArray)):
         sort_keys.append(("dummy", "ascending"))
     else:
         sort_keys = map(lambda key_name: (key_name, "ascending"), sort_keys)
     options = SelectKOptions(k, sort_keys)
     return call_function("select_k_unstable", [values], options, memory_pool)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	from pyarrow._compute import ( # noqa
	Function,
	FunctionOptions,
	FunctionRegistry,
	HashAggregateFunction,
	HashAggregateKernel,
	Kernel,
	ScalarAggregateFunction,
	ScalarAggregateKernel,
	ScalarFunction,
	ScalarKernel,
	VectorFunction,
	VectorKernel,
	# Option classes
	ArraySortOptions,
	AssumeTimezoneOptions,
	CastOptions,
	CountOptions,
	DayOfWeekOptions,
	DictionaryEncodeOptions,
	ElementWiseAggregateOptions,
	ExtractRegexOptions,
	FilterOptions,
	IndexOptions,
	JoinOptions,
	MakeStructOptions,
	MatchSubstringOptions,
	ModeOptions,
	NullOptions,
	PadOptions,
	PartitionNthOptions,
	QuantileOptions,
	ReplaceSliceOptions,
	ReplaceSubstringOptions,
	RoundOptions,
	RoundToMultipleOptions,
	ScalarAggregateOptions,
	SelectKOptions,
	SetLookupOptions,
	SliceOptions,
	SortOptions,
	SplitOptions,
	SplitPatternOptions,
	StrftimeOptions,
	StrptimeOptions,
	TakeOptions,
	TDigestOptions,
	TrimOptions,
	VarianceOptions,
	WeekOptions,
	# Functions
	call_function,
	function_registry,
	get_function,
	list_functions,
	)

	import inspect
	from textwrap import dedent
	import warnings

	import pyarrow as pa


	def _get_arg_names(func):
	return func._doc.arg_names


	def _decorate_compute_function(wrapper, exposed_name, func, option_class):
	# Decorate the given compute function wrapper with useful metadata
	# and documentation.
	wrapper.__arrow_compute_function__ = dict(name=func.name,
	arity=func.arity)
	wrapper.__name__ = exposed_name
	wrapper.__qualname__ = exposed_name

	doc_pieces = []

	cpp_doc = func._doc
	summary = cpp_doc.summary
	if not summary:
	arg_str = "arguments" if func.arity > 1 else "argument"
	summary = ("Call compute function {!r} with the given {}"
	.format(func.name, arg_str))

	description = cpp_doc.description
	arg_names = _get_arg_names(func)

	doc_pieces.append("""\
	{}.

	""".format(summary))

	if description:
	doc_pieces.append("{}\n\n".format(description))

	doc_pieces.append("""\
	Parameters
	----------
	""")

	for arg_name in arg_names:
	if func.kind in ('vector', 'scalar_aggregate'):
	arg_type = 'Array-like'
	else:
	arg_type = 'Array-like or scalar-like'
	doc_pieces.append("""\
	{} : {}
	Argument to compute function
	""".format(arg_name, arg_type))

	doc_pieces.append("""\
	memory_pool : pyarrow.MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.
	""")
	if option_class is not None:
	doc_pieces.append("""\
	options : pyarrow.compute.{0}, optional
	Parameters altering compute function semantics.
	""".format(option_class.__name__))
	options_sig = inspect.signature(option_class)
	for p in options_sig.parameters.values():
	doc_pieces.append("""\
	{0} : optional
	Parameter for {1} constructor. Either `options`
	or `{0}` can be passed, but not both at the same time.
	""".format(p.name, option_class.__name__))

	wrapper.__doc__ = "".join(dedent(s) for s in doc_pieces)
	return wrapper


	def _get_options_class(func):
	class_name = func._doc.options_class
	if not class_name:
	return None
	try:
	return globals()[class_name]
	except KeyError:
	warnings.warn("Python binding for {} not exposed"
	.format(class_name), RuntimeWarning)
	return None


	def _handle_options(name, option_class, options, kwargs):
	if kwargs:
	if options is None:
	return option_class(**kwargs)
	raise TypeError(
	"Function {!r} called with both an 'options' argument "
	"and additional named arguments"
	.format(name))

	if options is not None:
	if isinstance(options, dict):
	return option_class(**options)
	elif isinstance(options, option_class):
	return options
	raise TypeError(
	"Function {!r} expected a {} parameter, got {}"
	.format(name, option_class, type(options)))

	return options


	def _make_generic_wrapper(func_name, func, option_class):
	if option_class is None:
	def wrapper(*args, memory_pool=None):
	return func.call(args, None, memory_pool)
	else:
	def wrapper(args, memory_pool=None, options=None, *kwargs):
	options = _handle_options(func_name, option_class, options,
	kwargs)
	return func.call(args, options, memory_pool)
	return wrapper


	def _make_signature(arg_names, var_arg_names, option_class):
	from inspect import Parameter
	params = []
	for name in arg_names:
	params.append(Parameter(name, Parameter.POSITIONAL_OR_KEYWORD))
	for name in var_arg_names:
	params.append(Parameter(name, Parameter.VAR_POSITIONAL))
	params.append(Parameter("memory_pool", Parameter.KEYWORD_ONLY,
	default=None))
	if option_class is not None:
	params.append(Parameter("options", Parameter.KEYWORD_ONLY,
	default=None))
	options_sig = inspect.signature(option_class)
	for p in options_sig.parameters.values():
	# XXX for now, our generic wrappers don't allow positional
	# option arguments
	params.append(p.replace(kind=Parameter.KEYWORD_ONLY))
	return inspect.Signature(params)


	def _wrap_function(name, func):
	option_class = _get_options_class(func)
	arg_names = _get_arg_names(func)
	has_vararg = arg_names and arg_names[-1].startswith('*')
	if has_vararg:
	var_arg_names = [arg_names.pop().lstrip('*')]
	else:
	var_arg_names = []

	wrapper = _make_generic_wrapper(name, func, option_class)
	wrapper.__signature__ = _make_signature(arg_names, var_arg_names,
	option_class)
	return _decorate_compute_function(wrapper, name, func, option_class)


	def _make_global_functions():
	"""
	Make global functions wrapping each compute function.

	Note that some of the automatically-generated wrappers may be overriden
	by custom versions below.
	"""
	g = globals()
	reg = function_registry()

	# Avoid clashes with Python keywords
	rewrites = {'and': 'and_',
	'or': 'or_'}

	for cpp_name in reg.list_functions():
	name = rewrites.get(cpp_name, cpp_name)
	func = reg.get_function(cpp_name)
	assert name not in g, name
	g[cpp_name] = g[name] = _wrap_function(name, func)


	_make_global_functions()


	def cast(arr, target_type, safe=True):
	"""
	Cast array values to another data type. Can also be invoked as an array
	instance method.

	Parameters
	----------
	arr : Array or ChunkedArray
	target_type : DataType or type string alias
	Type to cast to
	safe : bool, default True
	Check for overflows or other unsafe conversions

	Examples
	--------
	>>> from datetime import datetime
	>>> import pyarrow as pa
	>>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
	>>> arr.type
	TimestampType(timestamp[us])

	You can use ``pyarrow.DataType`` objects to specify the target type:

	>>> cast(arr, pa.timestamp('ms'))
	<pyarrow.lib.TimestampArray object at 0x7fe93c0f6910>
	[
	2010-01-01 00:00:00.000,
	2015-01-01 00:00:00.000
	]

	>>> cast(arr, pa.timestamp('ms')).type
	TimestampType(timestamp[ms])

	Alternatively, it is also supported to use the string aliases for these
	types:

	>>> arr.cast('timestamp[ms]')
	<pyarrow.lib.TimestampArray object at 0x10420eb88>
	[
	1262304000000,
	1420070400000
	]
	>>> arr.cast('timestamp[ms]').type
	TimestampType(timestamp[ms])

	Returns
	-------
	casted : Array
	"""
	if target_type is None:
	raise ValueError("Cast target type must not be None")
	if safe:
	options = CastOptions.safe(target_type)
	else:
	options = CastOptions.unsafe(target_type)
	return call_function("cast", [arr], options)


	def count_substring(array, pattern, *, ignore_case=False):
	"""
	Count the occurrences of substring pattern in each value of a
	string array.

	Parameters
	----------
	array : pyarrow.Array or pyarrow.ChunkedArray
	pattern : str
	pattern to search for exact matches
	ignore_case : bool, default False
	Ignore case while searching.

	Returns
	-------
	result : pyarrow.Array or pyarrow.ChunkedArray
	"""
	return call_function("count_substring", [array],
	MatchSubstringOptions(pattern,
	ignore_case=ignore_case))


	def count_substring_regex(array, pattern, *, ignore_case=False):
	"""
	Count the non-overlapping matches of regex pattern in each value
	of a string array.

	Parameters
	----------
	array : pyarrow.Array or pyarrow.ChunkedArray
	pattern : str
	pattern to search for exact matches
	ignore_case : bool, default False
	Ignore case while searching.

	Returns
	-------
	result : pyarrow.Array or pyarrow.ChunkedArray
	"""
	return call_function("count_substring_regex", [array],
	MatchSubstringOptions(pattern,
	ignore_case=ignore_case))


	def find_substring(array, pattern, *, ignore_case=False):
	"""
	Find the index of the first occurrence of substring pattern in each
	value of a string array.

	Parameters
	----------
	array : pyarrow.Array or pyarrow.ChunkedArray
	pattern : str
	pattern to search for exact matches
	ignore_case : bool, default False
	Ignore case while searching.

	Returns
	-------
	result : pyarrow.Array or pyarrow.ChunkedArray
	"""
	return call_function("find_substring", [array],
	MatchSubstringOptions(pattern,
	ignore_case=ignore_case))


	def find_substring_regex(array, pattern, *, ignore_case=False):
	"""
	Find the index of the first match of regex pattern in each
	value of a string array.

	Parameters
	----------
	array : pyarrow.Array or pyarrow.ChunkedArray
	pattern : str
	regex pattern to search for
	ignore_case : bool, default False
	Ignore case while searching.

	Returns
	-------
	result : pyarrow.Array or pyarrow.ChunkedArray
	"""
	return call_function("find_substring_regex", [array],
	MatchSubstringOptions(pattern,
	ignore_case=ignore_case))


	def match_like(array, pattern, *, ignore_case=False):
	"""
	Test if the SQL-style LIKE pattern pattern matches a value of a
	string array.

	Parameters
	----------
	array : pyarrow.Array or pyarrow.ChunkedArray
	pattern : str
	SQL-style LIKE pattern. '%' will match any number of
	characters, '_' will match exactly one character, and all
	other characters match themselves. To match a literal percent
	sign or underscore, precede the character with a backslash.
	ignore_case : bool, default False
	Ignore case while searching.

	Returns
	-------
	result : pyarrow.Array or pyarrow.ChunkedArray

	"""
	return call_function("match_like", [array],
	MatchSubstringOptions(pattern,
	ignore_case=ignore_case))


	def match_substring(array, pattern, *, ignore_case=False):
	"""
	Test if substring pattern is contained within a value of a string array.

	Parameters
	----------
	array : pyarrow.Array or pyarrow.ChunkedArray
	pattern : str
	pattern to search for exact matches
	ignore_case : bool, default False
	Ignore case while searching.

	Returns
	-------
	result : pyarrow.Array or pyarrow.ChunkedArray
	"""
	return call_function("match_substring", [array],
	MatchSubstringOptions(pattern,
	ignore_case=ignore_case))


	def match_substring_regex(array, pattern, *, ignore_case=False):
	"""
	Test if regex pattern matches at any position a value of a string array.

	Parameters
	----------
	array : pyarrow.Array or pyarrow.ChunkedArray
	pattern : str
	regex pattern to search
	ignore_case : bool, default False
	Ignore case while searching.

	Returns
	-------
	result : pyarrow.Array or pyarrow.ChunkedArray
	"""
	return call_function("match_substring_regex", [array],
	MatchSubstringOptions(pattern,
	ignore_case=ignore_case))


	def mode(array, n=1, *, skip_nulls=True, min_count=0):
	"""
	Return top-n most common values and number of times they occur in a passed
	numerical (chunked) array, in descending order of occurrence. If there are
	multiple values with same count, the smaller one is returned first.

	Parameters
	----------
	array : pyarrow.Array or pyarrow.ChunkedArray
	n : int, default 1
	Specify the top-n values.
	skip_nulls : bool, default True
	If True, ignore nulls in the input. Else return an empty array
	if any input is null.
	min_count : int, default 0
	If there are fewer than this many values in the input, return
	an empty array.

	Returns
	-------
	An array of <input type "Mode", int64_t "Count"> structs

	Examples
	--------
	>>> import pyarrow as pa
	>>> import pyarrow.compute as pc
	>>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
	>>> modes = pc.mode(arr, 2)
	>>> modes[0]
	<pyarrow.StructScalar: {'mode': 2, 'count': 5}>
	>>> modes[1]
	<pyarrow.StructScalar: {'mode': 1, 'count': 2}>
	"""
	options = ModeOptions(n, skip_nulls=skip_nulls, min_count=min_count)
	return call_function("mode", [array], options)


	def filter(data, mask, null_selection_behavior='drop'):
	"""
	Select values (or records) from array- or table-like data given boolean
	filter, where true values are selected.

	Parameters
	----------
	data : Array, ChunkedArray, RecordBatch, or Table
	mask : Array, ChunkedArray
	Must be of boolean type
	null_selection_behavior : str, default 'drop'
	Configure the behavior on encountering a null slot in the mask.
	Allowed values are 'drop' and 'emit_null'.

	- 'drop': nulls will be treated as equivalent to False.
	- 'emit_null': nulls will result in a null in the output.

	Returns
	-------
	result : depends on inputs

	Examples
	--------
	>>> import pyarrow as pa
	>>> arr = pa.array(["a", "b", "c", None, "e"])
	>>> mask = pa.array([True, False, None, False, True])
	>>> arr.filter(mask)
	<pyarrow.lib.StringArray object at 0x7fa826df9200>
	[
	"a",
	"e"
	]
	>>> arr.filter(mask, null_selection_behavior='emit_null')
	<pyarrow.lib.StringArray object at 0x7fa826df9200>
	[
	"a",
	null,
	"e"
	]
	"""
	options = FilterOptions(null_selection_behavior)
	return call_function('filter', [data, mask], options)


	def index(data, value, start=None, end=None, *, memory_pool=None):
	"""
	Find the index of the first occurrence of a given value.

	Parameters
	----------
	data : Array or ChunkedArray
	value : Scalar-like object
	start : int, optional
	end : int, optional
	memory_pool : MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.

	Returns
	-------
	index : the index, or -1 if not found
	"""
	if start is not None:
	if end is not None:
	data = data.slice(start, end - start)
	else:
	data = data.slice(start)
	elif end is not None:
	data = data.slice(0, end)

	if not isinstance(value, pa.Scalar):
	value = pa.scalar(value, type=data.type)
	elif data.type != value.type:
	value = pa.scalar(value.as_py(), type=data.type)
	options = IndexOptions(value=value)
	result = call_function('index', [data], options, memory_pool)
	if start is not None and result.as_py() >= 0:
	result = pa.scalar(result.as_py() + start, type=pa.int64())
	return result


	def take(data, indices, *, boundscheck=True, memory_pool=None):
	"""
	Select values (or records) from array- or table-like data given integer
	selection indices.

	The result will be of the same type(s) as the input, with elements taken
	from the input array (or record batch / table fields) at the given
	indices. If an index is null then the corresponding value in the output
	will be null.

	Parameters
	----------
	data : Array, ChunkedArray, RecordBatch, or Table
	indices : Array, ChunkedArray
	Must be of integer type
	boundscheck : boolean, default True
	Whether to boundscheck the indices. If False and there is an out of
	bounds index, will likely cause the process to crash.
	memory_pool : MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.

	Returns
	-------
	result : depends on inputs

	Examples
	--------
	>>> import pyarrow as pa
	>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
	>>> indices = pa.array([0, None, 4, 3])
	>>> arr.take(indices)
	<pyarrow.lib.StringArray object at 0x7ffa4fc7d368>
	[
	"a",
	null,
	"e",
	null
	]
	"""
	options = TakeOptions(boundscheck=boundscheck)
	return call_function('take', [data, indices], options, memory_pool)


	def fill_null(values, fill_value):
	"""
	Replace each null element in values with fill_value. The fill_value must be
	the same type as values or able to be implicitly casted to the array's
	type.

	This is an alias for :func:`coalesce`.

	Parameters
	----------
	values : Array, ChunkedArray, or Scalar-like object
	Each null element is replaced with the corresponding value
	from fill_value.
	fill_value : Array, ChunkedArray, or Scalar-like object
	If not same type as data will attempt to cast.

	Returns
	-------
	result : depends on inputs

	Examples
	--------
	>>> import pyarrow as pa
	>>> arr = pa.array([1, 2, None, 3], type=pa.int8())
	>>> fill_value = pa.scalar(5, type=pa.int8())
	>>> arr.fill_null(fill_value)
	pyarrow.lib.Int8Array object at 0x7f95437f01a0>
	[
	1,
	2,
	5,
	3
	]
	"""
	if not isinstance(fill_value, (pa.Array, pa.ChunkedArray, pa.Scalar)):
	fill_value = pa.scalar(fill_value, type=values.type)
	elif values.type != fill_value.type:
	fill_value = pa.scalar(fill_value.as_py(), type=values.type)

	return call_function("coalesce", [values, fill_value])


	def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
	"""
	Select the indices of the top-k ordered elements from array- or table-like
	data.

	This is a specialization for :func:`select_k_unstable`. Output is not
	guaranteed to be stable.

	Parameters
	----------
	values : Array, ChunkedArray, RecordBatch, or Table
	Data to sort and get top indices from.
	k : int
	The number of `k` elements to keep.
	sort_keys : List-like
	Column key names to order by when input is table-like data.
	memory_pool : MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.

	Returns
	-------
	result : Array of indices

	Examples
	--------
	>>> import pyarrow as pa
	>>> import pyarrow.compute as pc
	>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
	>>> pc.top_k_unstable(arr, k=3)
	<pyarrow.lib.UInt64Array object at 0x7fdcb19d7f30>
	[
	5,
	4,
	2
	]
	"""
	if sort_keys is None:
	sort_keys = []
	if isinstance(values, (pa.Array, pa.ChunkedArray)):
	sort_keys.append(("dummy", "descending"))
	else:
	sort_keys = map(lambda key_name: (key_name, "descending"), sort_keys)
	options = SelectKOptions(k, sort_keys)
	return call_function("select_k_unstable", [values], options, memory_pool)


	def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
	"""
	Select the indices of the bottom-k ordered elements from
	array- or table-like data.

	This is a specialization for :func:`select_k_unstable`. Output is not
	guaranteed to be stable.

	Parameters
	----------
	values : Array, ChunkedArray, RecordBatch, or Table
	Data to sort and get bottom indices from.
	k : int
	The number of `k` elements to keep.
	sort_keys : List-like
	Column key names to order by when input is table-like data.
	memory_pool : MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.

	Returns
	-------
	result : Array of indices

	Examples
	--------
	>>> import pyarrow as pa
	>>> import pyarrow.compute as pc
	>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
	>>> pc.bottom_k_unstable(arr, k=3)
	<pyarrow.lib.UInt64Array object at 0x7fdcb19d7fa0>
	[
	0,
	1,
	2
	]
	"""
	if sort_keys is None:
	sort_keys = []
	if isinstance(values, (pa.Array, pa.ChunkedArray)):
	sort_keys.append(("dummy", "ascending"))
	else:
	sort_keys = map(lambda key_name: (key_name, "ascending"), sort_keys)
	options = SelectKOptions(k, sort_keys)
	return call_function("select_k_unstable", [values], options, memory_pool)