python/tvm/topi/unique.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=invalid-name
 """Unique operator"""
 from tvm import te, tir
 from ..te import hybrid
 from .scan import cumsum
 from .sort import sort, argsort


 def _calc_adjacent_diff_ir(data, output, binop=tir.Sub):
     """Low level IR to calculate adjacent difference in an 1-D array.

     Parameters
     ----------
     data : Buffer
         Input 1-D Buffer.

     output: Buffer
         A buffer to store adjacent difference, of the same shape as data. The adjacent difference
         is defined as: output[0] = 0, output[i] = binop(data[i], data[i-1])
         where i > 0 and i < len(data).

     binop: function, optional
         A binary associative op to use for calculating adjacent difference. The function takes two
         TIR expressions and produce a new TIR expression. By default it uses tvm.tir.Sub to
         compute the adjacent difference.
     """
     ib = tir.ir_builder.create()
     data_ptr = ib.buffer_ptr(data)
     output_ptr = ib.buffer_ptr(output)
     with ib.for_range(0, data.shape[0], kind="parallel") as i:
         with ib.if_scope(i == 0):
             output_ptr[0] = 0
         with ib.else_scope():
             output_ptr[i] = tir.Cast(output.dtype, binop(data_ptr[i], data_ptr[i - 1]))
     return ib.get()


 def _calc_adjacent_diff(data, out_dtype="int32", binop=tir.Sub):
     """Function calculate adjacent difference in an 1-D array.

     Parameters
     ----------
     data : tvm.te.Tensor
         Input 1-D tensor.

     output_dtype : str
         The output tensor data type.

     binop: function, optional
         A binary associative op to use for calculating difference. The function takes two
         TIR expressions and produce a new TIR expression. By default it uses tvm.tir.Sub to
         compute the adjacent difference.

     Returns
     -------
     output : tvm.te.Tensor
         1-D tensor storing the adjacent difference of the input tensor. The adjacent difference
         is defined as: output[0] = 0, output[i] = binop(data[i], data[i-1])
         where i > 0 and i < len(data).
     """
     return te.extern(
         [data.shape],
         [data],
         lambda ins, outs: _calc_adjacent_diff_ir(ins[0], outs[0], binop=binop),
         dtype=[out_dtype],
         name="_calc_adjacent_diff",
         tag="_calc_adjacent_diff_cpu",
     )


 @hybrid.script
 def _calc_num_unique(inc_scan):
     """Helper function to get the number of unique elements fron inc_scan tensor"""
     output = output_tensor((1,), "int32")
     output[0] = inc_scan[inc_scan.shape[0] - 1] + int32(1)
     return output


 def _calc_unique_ir(
     data, argsorted_indices, inc_scan, index_converter, unique_elements, inverse_indices, counts
 ):
     """Low level IR to calculate unique elements, inverse indices, and counts (optional) of
     unique elements of 1-D array.

     Parameters
     ----------
     data : Buffer
         Input 1-D Buffer.

     argsorted_indices : Buffer
         A buffer that stores the argsorted indices of the input data.

     inc_scan : Buffer
         A buffer that stores the inclusive scan of the binary tir.NE adjacent difference
         of the sorted data.

     index_converter (optional) : Buffer
         An optional index converter that transforms the unique element index
         such that new_idx = index_converter[old_idx].

     unique_elements : Buffer
         A buffer that stores the unique elements.

     inverse_indices : Buffer
         A buffer that stores the index of each input data element in the unique element array.

     counts (optional) : Buffer
         A buffer that stores the count of each unique element.
     """
     ib = tir.ir_builder.create()
     data_ptr = ib.buffer_ptr(data)
     argsorted_indices_ptr = ib.buffer_ptr(argsorted_indices)
     inc_scan_ptr = ib.buffer_ptr(inc_scan)
     unique_elements_ptr = ib.buffer_ptr(unique_elements)
     inverse_indices_ptr = ib.buffer_ptr(inverse_indices)

     index_converter_ptr = None
     if isinstance(index_converter, tir.Buffer):
         index_converter_ptr = ib.buffer_ptr(index_converter)

     if isinstance(counts, tir.Buffer):
         counts_ptr = ib.buffer_ptr(counts)
         # use indices_ptr as a tmp buffer to store tids with inc_scan[tid] != inc_scan[tid-1]
         unique_seq_indices_ptr = ib.buffer_ptr(inverse_indices)

     data_length = data.shape[0]

     # if need to return counts
     if isinstance(counts, tir.Buffer):
         num_unique = inc_scan_ptr[inc_scan.shape[0] - 1] + 1
         num_elements = data.shape[0]
         unique_seq_indices_ptr[num_unique - 1] = num_elements
         with ib.new_scope():
             with ib.for_range(0, data_length, kind="parallel") as i:
                 with ib.if_scope(i > 0):
                     with ib.if_scope(inc_scan_ptr[i] != inc_scan_ptr[i - 1]):
                         unique_seq_indices_ptr[inc_scan_ptr[i] - 1] = i
         with ib.new_scope():
             with ib.for_range(0, num_unique, kind="parallel") as i:
                 unique_idx = i if not index_converter_ptr else index_converter_ptr[i]
                 with ib.if_scope(i == 0):
                     counts_ptr[unique_idx] = unique_seq_indices_ptr[i]
                 with ib.else_scope():
                     counts_ptr[unique_idx] = (
                         unique_seq_indices_ptr[i] - unique_seq_indices_ptr[i - 1]
                     )
     # calculate unique elements and inverse indices
     with ib.new_scope():
         with ib.for_range(0, data_length, kind="parallel") as i:
             data_idx = argsorted_indices_ptr[i]
             unique_idx = (
                 inc_scan_ptr[i] if not index_converter_ptr else index_converter_ptr[inc_scan_ptr[i]]
             )
             inverse_indices_ptr[data_idx] = unique_idx
             with ib.if_scope(i == 0):
                 unique_elements_ptr[unique_idx] = data_ptr[data_idx]
             with ib.else_scope():
                 with ib.if_scope(inc_scan_ptr[i] != inc_scan_ptr[i - 1]):
                     unique_elements_ptr[unique_idx] = data_ptr[data_idx]
     return ib.get()


 @hybrid.script
 def _calc_first_occurence(argsorted_indices, inc_scan):
     """Hybrid script to calculate the first occurence of each unique element in the input data.

     Parameters
     ----------
     argsorted_indices : tvm.te.Tensor
         A tensor that stores the argsorted indices of the input data.

     inc_scan : tvm.te.Tensor
         A tensor that stores the inclusive scan of the binary tir.NE adjacent difference
         of the sorted data.

     first_occurence : tvm.te.Tensor
         A tensor that stores the first occurence of each unique element in the input data.
     """
     first_occurence = output_tensor(argsorted_indices.shape, "int32")
     for i in parallel(argsorted_indices.shape[0]):
         first_occurence[i] = argsorted_indices.shape[0]
     for i in parallel(argsorted_indices.shape[0]):
         if i == 0 or inc_scan[i] != inc_scan[i - 1]:
             first_occurence[inc_scan[i]] = argsorted_indices[i]
     return first_occurence


 def unique(data, is_sorted=True, return_counts=False):
     """
     Find the unique elements of a 1-D tensor. Please note `output` and `counts` are all padded to
     have the same length of `data` and element with index >= num_unique[0] has undefined value.

     Parameters
     ----------
     data : tvm.te.Tensor
         A 1-D tensor of integers.

     sorted : bool
         Whether to sort the unique elements in ascending order before returning as output.

     return_counts : bool
         Whether to return the count of each unique element.

     Returns
     -------
     unique : tvm.te.Tensor
         A 1-D tensor containing the unique elements of the input data tensor. The same size as
         the input data. If there are less unique elements than input data, the end of the tensor
         is padded with zeros.

     indices : tvm.te.Tensor
         A 1-D tensor. The same size as output. For each entry in output, it contains
         the index of its first occurence in the input data. The end of the tensor is padded
         with the length of the input data.

     inverse_indices : tvm.te.Tensor
         A 1-D tensor. For each entry in data, it contains the index of that data element in
         the unique array. (Note that inverse_indices is very similar to indices if output is not
         sorted.)

     num_unique : tvm.te.Tensor
         A 1-D tensor with size=1 containing the number of unique elements in the input data tensor.

     counts (optional) : tvm.te.Tensor
         A 1-D tensor containing the count of each unique element in the output.

     Examples
     --------
     .. code-block:: python

         [output, indices, num_unique] = unique([4, 5, 1, 2, 3, 3, 4, 5], False, False)
         output          =  [4, 5, 1, 2, 3, _, _, _]
         indices         =  [0, 1, 2, 3, 4, _, _, _]
         inverse_indices =  [0, 1, 2, 3, 4, 4, 0, 1]
         num_unique      =  [5]

         [output, indices, num_unique, counts] = unique([4, 5, 1, 2, 3, 3, 4, 5], False, True)
         output          =  [4, 5, 1, 2, 3, _, _, _]
         indices         =  [0, 1, 2, 3, 4, _, _, _]
         inverse_indices =  [0, 1, 2, 3, 4, 4, 0, 1]
         num_unique      =  [5]
         counts          =  [2, 2, 1, 1, 2, _, _, _]

         [output, indices, num_unique] = unique([4, 5, 1, 2, 3, 3, 4, 5], True)
         output          =  [1, 2, 3, 4, 5, _, _, _]
         indices         =  [2, 3, 4, 0, 1, _, _, _]
         inverse_indices =  [3, 4, 0, 1, 2, 2, 3, 4]
         num_unique      =  [5]
     """
     sorted_data = sort(data)
     argsorted_indices = argsort(data, dtype="int32")
     # adjacent difference
     adjacent_diff = _calc_adjacent_diff(sorted_data, "int32", tir.NE)
     # inclusive scan
     inc_scan = cumsum(adjacent_diff, dtype="int32", exclusive=0)
     # total number of unique elements
     num_unique_elements = _calc_num_unique(inc_scan)
     # prepare outputs
     if return_counts:
         out_data_shape = [data.shape] * 3
         out_dtypes = [data.dtype, "int32", "int32"]
     else:
         out_data_shape = [data.shape] * 2
         out_dtypes = [data.dtype, "int32"]
     # prepare inputs and fcompute

     first_occurence = _calc_first_occurence(argsorted_indices, inc_scan)
     if is_sorted:
         in_data = [data, argsorted_indices, inc_scan]
         if return_counts:
             fcompute = lambda ins, outs: _calc_unique_ir(*ins, None, *outs)
         else:
             fcompute = lambda ins, outs: _calc_unique_ir(*ins, None, *outs, None)

         indices = first_occurence
     else:
         # calculate index converter by sorting unique elements by their first occurence
         argsorted_first_occurence = argsort(first_occurence, dtype="int32")
         index_converter = argsort(argsorted_first_occurence, dtype="int32")
         in_data = [data, argsorted_indices, inc_scan, index_converter]
         if return_counts:
             fcompute = lambda ins, outs: _calc_unique_ir(*ins, *outs)
         else:
             fcompute = lambda ins, outs: _calc_unique_ir(*ins, *outs, None)
         # First occurence is in order of sorted unique output, if we sort the first_occurence array
         # we get the correct result
         indices = sort(first_occurence)

     outs = te.extern(
         out_data_shape,
         in_data,
         fcompute,
         dtype=out_dtypes,
         name="_calc_unique",
         tag="_calc_unique_cpu",
     )
     if return_counts:
         return [outs[0], indices, outs[1], num_unique_elements, outs[2]]
     return [outs[0], indices, outs[1], num_unique_elements]
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	# pylint: disable=invalid-name
	"""Unique operator"""
	from tvm import te, tir
	from ..te import hybrid
	from .scan import cumsum
	from .sort import sort, argsort


	def _calc_adjacent_diff_ir(data, output, binop=tir.Sub):
	"""Low level IR to calculate adjacent difference in an 1-D array.

	Parameters
	----------
	data : Buffer
	Input 1-D Buffer.

	output: Buffer
	A buffer to store adjacent difference, of the same shape as data. The adjacent difference
	is defined as: output[0] = 0, output[i] = binop(data[i], data[i-1])
	where i > 0 and i < len(data).

	binop: function, optional
	A binary associative op to use for calculating adjacent difference. The function takes two
	TIR expressions and produce a new TIR expression. By default it uses tvm.tir.Sub to
	compute the adjacent difference.
	"""
	ib = tir.ir_builder.create()
	data_ptr = ib.buffer_ptr(data)
	output_ptr = ib.buffer_ptr(output)
	with ib.for_range(0, data.shape[0], kind="parallel") as i:
	with ib.if_scope(i == 0):
	output_ptr[0] = 0
	with ib.else_scope():
	output_ptr[i] = tir.Cast(output.dtype, binop(data_ptr[i], data_ptr[i - 1]))
	return ib.get()


	def _calc_adjacent_diff(data, out_dtype="int32", binop=tir.Sub):
	"""Function calculate adjacent difference in an 1-D array.

	Parameters
	----------
	data : tvm.te.Tensor
	Input 1-D tensor.

	output_dtype : str
	The output tensor data type.

	binop: function, optional
	A binary associative op to use for calculating difference. The function takes two
	TIR expressions and produce a new TIR expression. By default it uses tvm.tir.Sub to
	compute the adjacent difference.

	Returns
	-------
	output : tvm.te.Tensor
	1-D tensor storing the adjacent difference of the input tensor. The adjacent difference
	is defined as: output[0] = 0, output[i] = binop(data[i], data[i-1])
	where i > 0 and i < len(data).
	"""
	return te.extern(
	[data.shape],
	[data],
	lambda ins, outs: _calc_adjacent_diff_ir(ins[0], outs[0], binop=binop),
	dtype=[out_dtype],
	name="_calc_adjacent_diff",
	tag="_calc_adjacent_diff_cpu",
	)


	@hybrid.script
	def _calc_num_unique(inc_scan):
	"""Helper function to get the number of unique elements fron inc_scan tensor"""
	output = output_tensor((1,), "int32")
	output[0] = inc_scan[inc_scan.shape[0] - 1] + int32(1)
	return output


	def _calc_unique_ir(
	data, argsorted_indices, inc_scan, index_converter, unique_elements, inverse_indices, counts
	):
	"""Low level IR to calculate unique elements, inverse indices, and counts (optional) of
	unique elements of 1-D array.

	Parameters
	----------
	data : Buffer
	Input 1-D Buffer.

	argsorted_indices : Buffer
	A buffer that stores the argsorted indices of the input data.

	inc_scan : Buffer
	A buffer that stores the inclusive scan of the binary tir.NE adjacent difference
	of the sorted data.

	index_converter (optional) : Buffer
	An optional index converter that transforms the unique element index
	such that new_idx = index_converter[old_idx].

	unique_elements : Buffer
	A buffer that stores the unique elements.

	inverse_indices : Buffer
	A buffer that stores the index of each input data element in the unique element array.

	counts (optional) : Buffer
	A buffer that stores the count of each unique element.
	"""
	ib = tir.ir_builder.create()
	data_ptr = ib.buffer_ptr(data)
	argsorted_indices_ptr = ib.buffer_ptr(argsorted_indices)
	inc_scan_ptr = ib.buffer_ptr(inc_scan)
	unique_elements_ptr = ib.buffer_ptr(unique_elements)
	inverse_indices_ptr = ib.buffer_ptr(inverse_indices)

	index_converter_ptr = None
	if isinstance(index_converter, tir.Buffer):
	index_converter_ptr = ib.buffer_ptr(index_converter)

	if isinstance(counts, tir.Buffer):
	counts_ptr = ib.buffer_ptr(counts)
	# use indices_ptr as a tmp buffer to store tids with inc_scan[tid] != inc_scan[tid-1]
	unique_seq_indices_ptr = ib.buffer_ptr(inverse_indices)

	data_length = data.shape[0]

	# if need to return counts
	if isinstance(counts, tir.Buffer):
	num_unique = inc_scan_ptr[inc_scan.shape[0] - 1] + 1
	num_elements = data.shape[0]
	unique_seq_indices_ptr[num_unique - 1] = num_elements
	with ib.new_scope():
	with ib.for_range(0, data_length, kind="parallel") as i:
	with ib.if_scope(i > 0):
	with ib.if_scope(inc_scan_ptr[i] != inc_scan_ptr[i - 1]):
	unique_seq_indices_ptr[inc_scan_ptr[i] - 1] = i
	with ib.new_scope():
	with ib.for_range(0, num_unique, kind="parallel") as i:
	unique_idx = i if not index_converter_ptr else index_converter_ptr[i]
	with ib.if_scope(i == 0):
	counts_ptr[unique_idx] = unique_seq_indices_ptr[i]
	with ib.else_scope():
	counts_ptr[unique_idx] = (
	unique_seq_indices_ptr[i] - unique_seq_indices_ptr[i - 1]
	)
	# calculate unique elements and inverse indices
	with ib.new_scope():
	with ib.for_range(0, data_length, kind="parallel") as i:
	data_idx = argsorted_indices_ptr[i]
	unique_idx = (
	inc_scan_ptr[i] if not index_converter_ptr else index_converter_ptr[inc_scan_ptr[i]]
	)
	inverse_indices_ptr[data_idx] = unique_idx
	with ib.if_scope(i == 0):
	unique_elements_ptr[unique_idx] = data_ptr[data_idx]
	with ib.else_scope():
	with ib.if_scope(inc_scan_ptr[i] != inc_scan_ptr[i - 1]):
	unique_elements_ptr[unique_idx] = data_ptr[data_idx]
	return ib.get()


	@hybrid.script
	def _calc_first_occurence(argsorted_indices, inc_scan):
	"""Hybrid script to calculate the first occurence of each unique element in the input data.

	Parameters
	----------
	argsorted_indices : tvm.te.Tensor
	A tensor that stores the argsorted indices of the input data.

	inc_scan : tvm.te.Tensor
	A tensor that stores the inclusive scan of the binary tir.NE adjacent difference
	of the sorted data.

	first_occurence : tvm.te.Tensor
	A tensor that stores the first occurence of each unique element in the input data.
	"""
	first_occurence = output_tensor(argsorted_indices.shape, "int32")
	for i in parallel(argsorted_indices.shape[0]):
	first_occurence[i] = argsorted_indices.shape[0]
	for i in parallel(argsorted_indices.shape[0]):
	if i == 0 or inc_scan[i] != inc_scan[i - 1]:
	first_occurence[inc_scan[i]] = argsorted_indices[i]
	return first_occurence


	def unique(data, is_sorted=True, return_counts=False):
	"""
	Find the unique elements of a 1-D tensor. Please note `output` and `counts` are all padded to
	have the same length of `data` and element with index >= num_unique[0] has undefined value.

	Parameters
	----------
	data : tvm.te.Tensor
	A 1-D tensor of integers.

	sorted : bool
	Whether to sort the unique elements in ascending order before returning as output.

	return_counts : bool
	Whether to return the count of each unique element.

	Returns
	-------
	unique : tvm.te.Tensor
	A 1-D tensor containing the unique elements of the input data tensor. The same size as
	the input data. If there are less unique elements than input data, the end of the tensor
	is padded with zeros.

	indices : tvm.te.Tensor
	A 1-D tensor. The same size as output. For each entry in output, it contains
	the index of its first occurence in the input data. The end of the tensor is padded
	with the length of the input data.

	inverse_indices : tvm.te.Tensor
	A 1-D tensor. For each entry in data, it contains the index of that data element in
	the unique array. (Note that inverse_indices is very similar to indices if output is not
	sorted.)

	num_unique : tvm.te.Tensor
	A 1-D tensor with size=1 containing the number of unique elements in the input data tensor.

	counts (optional) : tvm.te.Tensor
	A 1-D tensor containing the count of each unique element in the output.

	Examples
	--------
	.. code-block:: python

	[output, indices, num_unique] = unique([4, 5, 1, 2, 3, 3, 4, 5], False, False)
	output = [4, 5, 1, 2, 3, _, _, _]
	indices = [0, 1, 2, 3, 4, _, _, _]
	inverse_indices = [0, 1, 2, 3, 4, 4, 0, 1]
	num_unique = [5]

	[output, indices, num_unique, counts] = unique([4, 5, 1, 2, 3, 3, 4, 5], False, True)
	output = [4, 5, 1, 2, 3, _, _, _]
	indices = [0, 1, 2, 3, 4, _, _, _]
	inverse_indices = [0, 1, 2, 3, 4, 4, 0, 1]
	num_unique = [5]
	counts = [2, 2, 1, 1, 2, _, _, _]

	[output, indices, num_unique] = unique([4, 5, 1, 2, 3, 3, 4, 5], True)
	output = [1, 2, 3, 4, 5, _, _, _]
	indices = [2, 3, 4, 0, 1, _, _, _]
	inverse_indices = [3, 4, 0, 1, 2, 2, 3, 4]
	num_unique = [5]
	"""
	sorted_data = sort(data)
	argsorted_indices = argsort(data, dtype="int32")
	# adjacent difference
	adjacent_diff = _calc_adjacent_diff(sorted_data, "int32", tir.NE)
	# inclusive scan
	inc_scan = cumsum(adjacent_diff, dtype="int32", exclusive=0)
	# total number of unique elements
	num_unique_elements = _calc_num_unique(inc_scan)
	# prepare outputs
	if return_counts:
	out_data_shape = [data.shape] * 3
	out_dtypes = [data.dtype, "int32", "int32"]
	else:
	out_data_shape = [data.shape] * 2
	out_dtypes = [data.dtype, "int32"]
	# prepare inputs and fcompute

	first_occurence = _calc_first_occurence(argsorted_indices, inc_scan)
	if is_sorted:
	in_data = [data, argsorted_indices, inc_scan]
	if return_counts:
	fcompute = lambda ins, outs: _calc_unique_ir(ins, None, outs)
	else:
	fcompute = lambda ins, outs: _calc_unique_ir(ins, None, outs, None)

	indices = first_occurence
	else:
	# calculate index converter by sorting unique elements by their first occurence
	argsorted_first_occurence = argsort(first_occurence, dtype="int32")
	index_converter = argsort(argsorted_first_occurence, dtype="int32")
	in_data = [data, argsorted_indices, inc_scan, index_converter]
	if return_counts:
	fcompute = lambda ins, outs: _calc_unique_ir(ins, outs)
	else:
	fcompute = lambda ins, outs: _calc_unique_ir(ins, outs, None)
	# First occurence is in order of sorted unique output, if we sort the first_occurence array
	# we get the correct result
	indices = sort(first_occurence)

	outs = te.extern(
	out_data_shape,
	in_data,
	fcompute,
	dtype=out_dtypes,
	name="_calc_unique",
	tag="_calc_unique_cpu",
	)
	if return_counts:
	return [outs[0], indices, outs[1], num_unique_elements, outs[2]]
	return [outs[0], indices, outs[1], num_unique_elements]