tests/python/frontend/tensorflow/test_forward.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=import-self, invalid-name, unused-argument, ungrouped-imports, wrong-import-order
 """
 Tensorflow testcases
 ====================
 This article is a test script to test tensorflow operator with Relay.
 """
 from __future__ import print_function
 from distutils.version import LooseVersion

 import threading
 import platform
 import os.path
 from packaging import version as package_version
 import numpy as np
 import pytest

 from PIL import Image

 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import graph_util
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.ops import init_ops
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.client import device_lib

 try:
     import tensorflow.compat.v1 as tf

     tf.disable_v2_behavior()
 except ImportError:
     import tensorflow as tf

 import tvm
 from tvm import relay, ir
 from tvm.runtime.vm import VirtualMachine
 from tvm.relay.frontend.tensorflow import from_tensorflow
 from tvm.contrib import graph_executor
 from tvm.contrib import utils
 import tvm.testing
 import tvm.relay.testing.tf as tf_testing
 from relay.utils.tag_span import _set_span, _create_span, _verify_structural_equal_with_span


 # Only allow TF to run on half the GPU RAM to save the other half
 # For TVM
 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
 gpu_sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 gpu_sess.close()


 #######################################################################
 # Generic run functions for TVM & tensorflow
 # ------------------------------------------


 def convert_to_list(x):
     if not isinstance(x, list):
         x = [x]
     return x


 tf_dtypes = {
     "float32": tf.float32,
     "float16": tf.float16,
     "float64": tf.float64,
     "int32": tf.int32,
     "uint8": tf.uint8,
     "int8": tf.int8,
     "int16": tf.int16,
     "uint16": tf.uint16,
     "int64": tf.int64,
 }


 def vmobj_to_list(o):
     """Converts TVM objects returned by VM execution to Python List."""
     if isinstance(o, tvm.nd.NDArray):
         return [o.numpy()]
     elif isinstance(o, tvm.runtime.container.ADT):
         result = []
         for f in o:
             result.extend(vmobj_to_list(f))
         return result
     elif isinstance(o, tvm.relay.backend.interpreter.ConstructorValue):
         if o.constructor.name_hint == "Cons":
             tl = vmobj_to_list(o.fields[1])
             hd = vmobj_to_list(o.fields[0])
             hd.extend(tl)
             return hd
         elif o.constructor.name_hint == "Nil":
             return []
         elif "tensor_nil" in o.constructor.name_hint:
             return [0]
         elif "tensor" in o.constructor.name_hint:
             return [o.fields[0].numpy()]
         else:
             raise RuntimeError(f"Unknown object type: {o.constructor.name_hint}")
     else:
         raise RuntimeError(f"Unknown object type: {type(o)}")


 def run_tvm_graph(
     graph_def,
     input_data,
     input_node,
     num_output=1,
     target="llvm",
     out_names=None,
     opt_level=3,
     mode="graph_executor",
     cuda_layout="NCHW",
     layout=None,
     disabled_pass=None,
     ignore_in_shape=False,
     serialize=False,
     convert_config=None,
 ):
     """Generic function to compile on relay and execute on tvm"""
     input_data = convert_to_list(input_data)
     input_node = convert_to_list(input_node)
     if target == "cuda":
         layout = cuda_layout
     target_host = None
     if ignore_in_shape:
         shape_dict = None
     else:
         shape_dict = {
             e: i.shape if hasattr(i, "shape") else () for e, i in zip(input_node, input_data)
         }
     with tvm.testing.disable_span_filling():
         mod, params = relay.frontend.from_tensorflow(
             graph_def,
             layout=layout,
             shape=shape_dict,
             outputs=out_names,
             convert_config=convert_config,
         )
     with tvm.testing.enable_span_filling():
         mod_with_span, _ = relay.frontend.from_tensorflow(
             graph_def,
             layout=layout,
             shape=shape_dict,
             outputs=out_names,
             convert_config=convert_config,
         )
     assert tvm.ir.structural_equal(mod["main"], mod_with_span["main"], map_free_vars=True)

     dev = tvm.device(target, 0)
     if mode == "debug":
         inputs = []
         for param in mod["main"].params:
             found = False
             for i, n in enumerate(input_node):
                 if n == param.name_hint:
                     found = True
                     inputs.append(tvm.nd.array(input_data[i]))
                     break
             # Interpreter doesn't bind constants, so still need to find in params
             if not found:
                 inputs.append(tvm.nd.array(params[param.name_hint]))
         result = relay.create_executor(mode, mod=mod, device=tvm.cpu(), target="llvm").evaluate()(
             *inputs
         )
         return vmobj_to_list(result)
     elif mode == "vm":
         with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass):
             mod = relay.transform.InferType()(mod)
             vm_exec = relay.vm.compile(mod, target="llvm", params=params)
         if serialize:
             code, lib = vm_exec.save()
             vm_exec = tvm.runtime.vm.Executable.load_exec(code, lib)
         vm = VirtualMachine(vm_exec, tvm.cpu())
         inputs = {}
         for e, i in zip(input_node, input_data):
             inputs[e] = tvm.nd.array(i)
         result = vm.invoke("main", **inputs)
         return vmobj_to_list(result)
     else:
         with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass):
             target = tvm.target.Target(target, target_host)
             graph, lib, params = relay.build(mod, target=target, params=params)

         m = graph_executor.create(graph, lib, dev)
         # set inputs
         for e, i in zip(input_node, input_data):
             if e != "":
                 m.set_input(e, tvm.nd.array(i))

         m.set_input(**params)
         # execute
         m.run()
         # get outputs
         assert out_names is None or num_output == len(
             out_names
         ), f"out_names: {out_names} num_output: {num_output}"
         tvm_output_list = [m.get_output(i).numpy() for i in range(num_output)]
         return tvm_output_list


 def run_tf_graph(sess, input_data, input_node, output_node):
     """Generic function to execute tensorflow"""
     input_data = convert_to_list(input_data)
     input_node = convert_to_list(input_node)
     output_node = convert_to_list(output_node)

     tensor = [sess.graph.get_tensor_by_name(output_name) for output_name in output_node]

     input_dict = {e: input_data[i] for i, e in enumerate(input_node)}
     if len(input_node) == 1 and input_node[0] == "":
         output_data = sess.run(tensor)
     else:
         output_data = sess.run(tensor, input_dict)
     return output_data


 def compare_tf_with_tvm(
     in_data,
     in_name,
     out_name,
     init_global_variables=False,
     no_gpu=False,
     opt_level=3,
     mode="graph_executor",
     cuda_layout="NCHW",
     add_shapes_to_graph_def=True,
     targets=None,
     ignore_in_shape=False,
     convert_config=None,
     atol=1e-5,
     rtol=1e-5,
 ):
     """Generic function to generate and compare tensorflow and TVM output"""

     def name_without_num(name):
         return name.split(":")[0] if ":" in name else name

     out_name = convert_to_list(out_name)
     out_node = [name_without_num(name) for name in out_name]

     in_data = convert_to_list(in_data)
     in_name = convert_to_list(in_name)
     in_node = [name_without_num(name) for name in in_name]
     with tf.Session() as sess:
         if init_global_variables:
             sess.run(variables.global_variables_initializer())
         final_graph_def = (
             tf_testing.AddShapesToGraphDef(sess, out_node)
             if add_shapes_to_graph_def
             else tf.get_default_graph().as_graph_def()
         )

         tf_output = run_tf_graph(sess, in_data, in_name, out_name)

         devices = targets if targets else ["llvm", "cuda"]

         for device in devices:
             _ = tvm.device(device, 0)
             if not tvm.testing.device_enabled(device):
                 print(f"Skip because {device} is not enabled")
                 continue
             if no_gpu and device == "cuda":
                 continue
             if "cublas" in device and not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
                 print(f"Skip because cublas is not enabled: {device}")
                 continue

             tvm_output = run_tvm_graph(
                 final_graph_def,
                 in_data,
                 in_node,
                 target=device,
                 out_names=out_name,
                 num_output=len(out_name),
                 opt_level=opt_level,
                 mode=mode,
                 cuda_layout=cuda_layout,
                 ignore_in_shape=ignore_in_shape,
                 convert_config=convert_config,
             )
             # since the names from tensorflow and relay runs are not exactly same,
             # first len(tf_output) will be compared
             for i, tf_out in enumerate(tf_output):
                 if not isinstance(tf_out, np.ndarray):
                     assert len(tvm_output[i].shape) == 0  # pylint: disable=len-as-condition
                 tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=atol, rtol=rtol)

         sess.close()


 def is_gpu_available():
     """Verify gpu is available"""
     local_device_protos = device_lib.list_local_devices()
     gpu_list = [x.name for x in local_device_protos if x.device_type == "GPU"]
     if gpu_list:
         print("Tensorflow GPU:", gpu_list)
         return True
     else:
         return False


 #######################################################################
 # Pooling
 # -------


 def _test_pooling_iteration(input_shape, **kwargs):
     """One iteration of pool operation with given shapes and attributes"""

     x = -np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
         nn_ops.pool(in_data, **kwargs)

         if kwargs["pooling_type"] == "MAX":
             out_name = "max_pool:0"
         else:
             out_name = "avg_pool:0"

         compare_tf_with_tvm(x, "Placeholder:0", out_name)


 def _test_pooling(input_shape, **kwargs):
     _test_pooling_iteration(input_shape, **kwargs)

     if is_gpu_available():
         if len(input_shape) == 4:
             input_shape = [input_shape[ii] for ii in (0, 3, 1, 2)]
             if isinstance(kwargs["padding"], list):
                 kwargs["padding"] = [kwargs["padding"][ii] for ii in (0, 3, 1, 2)]
             kwargs["data_format"] = "NCHW"
             _test_pooling_iteration(input_shape, **kwargs)


 def _test_pooling_dynamic(input_shape, np_shape, **kwargs):
     """Pooling with dynamic height and width dimensions."""
     x = -np.arange(np.prod(np_shape), dtype=np.float32).reshape(np_shape) - 1

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
         nn_ops.pool(in_data, **kwargs)

         if kwargs["pooling_type"] == "MAX":
             out_name = "max_pool:0"
         else:
             out_name = "avg_pool:0"

         compare_tf_with_tvm(x, "Placeholder:0", out_name, mode="vm", ignore_in_shape=True)


 @tvm.testing.uses_gpu
 def test_forward_pooling():
     """Pooling"""
     # TensorFlow only supports NDHWC for max_pool3d on CPU
     for pool_type in ["AVG", "MAX"]:
         # NDHWC is the default layout for max_pool3d and avg_pool3d in TensorFlow
         _test_pooling(
             input_shape=[1, 3, 32, 32, 32],
             window_shape=[2, 2, 2],
             padding="VALID",
             pooling_type=pool_type,
             dilation_rate=[1, 1, 1],
             strides=[2, 2, 2],
         )

         _test_pooling(
             input_shape=[1, 3, 32, 32, 32],
             window_shape=[1, 1, 1],
             padding="SAME",
             pooling_type=pool_type,
             dilation_rate=[1, 1, 1],
             strides=[1, 1, 1],
         )

         _test_pooling(
             input_shape=[1, 3, 32, 32, 32],
             window_shape=[2, 2, 2],
             padding="SAME",
             pooling_type=pool_type,
             dilation_rate=[1, 1, 1],
             strides=[2, 2, 2],
         )

         _test_pooling_dynamic(
             input_shape=[1, None, None, 3],
             np_shape=[1, 32, 32, 3],
             window_shape=[2, 2],
             padding="SAME",
             pooling_type=pool_type,
             dilation_rate=[1, 1],
             strides=[1, 1],
         )

         # test cases for max_pool3d & avg_pool3d with layout NCDHW
         # TensorFlow pool3d  doesn't support NCDHW on cpu
         if is_gpu_available():
             _test_pooling(
                 input_shape=[1, 3, 32, 32, 32],
                 window_shape=[1, 1, 1],
                 padding="SAME",
                 pooling_type=pool_type,
                 dilation_rate=[1, 1, 1],
                 strides=[1, 1, 1],
                 data_format="NCDHW",
             )

             _test_pooling(
                 input_shape=[1, 3, 32, 32, 32],
                 window_shape=[2, 2, 2],
                 padding="VALID",
                 pooling_type=pool_type,
                 dilation_rate=[1, 1, 1],
                 strides=[2, 2, 2],
                 data_format="NCDHW",
             )

         _test_pooling(
             input_shape=[2, 9, 10, 2],
             window_shape=[1, 1],
             padding="SAME",
             pooling_type=pool_type,
             dilation_rate=[1, 1],
             strides=[1, 1],
         )

         _test_pooling(
             input_shape=[2, 10, 9, 2],
             window_shape=[1, 1],
             padding="SAME",
             pooling_type=pool_type,
             dilation_rate=[1, 1],
             strides=[1, 1],
         )

         _test_pooling(
             input_shape=[2, 9, 10, 2],
             window_shape=[2, 1],
             padding="SAME",
             pooling_type=pool_type,
             dilation_rate=[1, 1],
             strides=[1, 1],
         )

         _test_pooling(
             input_shape=[2, 10, 9, 2],
             window_shape=[2, 3],
             padding="SAME",
             pooling_type=pool_type,
             dilation_rate=[1, 1],
             strides=[2, 1],
         )

         # Tests involving SpaceToBatchND
         _test_pooling(
             input_shape=[1, 1, 2, 1],
             window_shape=[1, 1],
             padding="VALID",
             pooling_type=pool_type,
             dilation_rate=[1, 2],
         )

         _test_pooling(
             input_shape=[1, 2, 1],
             window_shape=[1],
             padding="VALID",
             pooling_type=pool_type,
             dilation_rate=[2],
         )
     # Explicit padding
     if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
         _test_pooling(
             input_shape=[2, 9, 10, 2],
             window_shape=[4, 4],
             padding=[[0, 0], [0, 1], [2, 3], [0, 0]],
             pooling_type="MAX",
             dilation_rate=[1, 1],
             strides=[1, 1],
         )


 #######################################################################
 # Convolution
 # -----------


 def _test_convolution(
     opname,
     tensor_in_sizes,
     filter_in_sizes,
     dilations,
     strides,
     padding,
     data_format,
     deconv_output_shape=None,
     add_shapes_to_graph_def=True,
 ):
     """One iteration of convolution with given shapes and attributes"""
     deconv_output_shape = deconv_output_shape or []
     total_size_1 = np.prod(tensor_in_sizes)
     total_size_2 = np.prod(filter_in_sizes)
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
     filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
         in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")
         if data_format == "NHWC":
             strides = [1] + strides + [1]
             dilations = [1] + dilations + [1]
         else:
             strides = [1, 1] + strides
             dilations = [1, 1] + dilations

         if opname == "conv":
             nn_ops.conv2d(
                 in_data,
                 in_filter,
                 strides=strides,
                 dilations=dilations,
                 padding=padding,
                 data_format=data_format,
             )

             compare_tf_with_tvm(
                 np.reshape(data_array, tensor_in_sizes).astype("float32"),
                 "Placeholder:0",
                 "Conv2D:0",
                 add_shapes_to_graph_def=add_shapes_to_graph_def,
             )
         elif opname == "conv_transpose":
             nn_ops.conv2d_transpose(
                 in_data,
                 in_filter,
                 output_shape=deconv_output_shape,
                 strides=strides,
                 padding=padding,
                 data_format=data_format,
             )

             compare_tf_with_tvm(
                 np.reshape(data_array, tensor_in_sizes).astype("float32"),
                 "Placeholder:0",
                 "conv2d_transpose:0",
                 add_shapes_to_graph_def=add_shapes_to_graph_def,
             )
         else:
             nn_ops.depthwise_conv2d_native(
                 in_data,
                 in_filter,
                 strides=strides,
                 dilations=dilations,
                 padding=padding,
                 data_format=data_format,
             )

             compare_tf_with_tvm(
                 np.reshape(data_array, tensor_in_sizes).astype("float32"),
                 "Placeholder:0",
                 "DepthwiseConv2dNative:0",
                 add_shapes_to_graph_def=add_shapes_to_graph_def,
             )


 @pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/10275")
 @tvm.testing.uses_gpu
 def test_forward_convolution():
     """Convolution"""
     if is_gpu_available():
         _test_convolution("conv", [4, 176, 8, 8], [1, 1, 176, 32], [1, 1], [1, 1], "SAME", "NCHW")
         _test_convolution("conv", [4, 19, 17, 17], [3, 3, 19, 19], [1, 1], [2, 2], "VALID", "NCHW")
         _test_convolution("conv", [4, 124, 17, 17], [1, 1, 124, 19], [1, 1], [1, 1], "SAME", "NCHW")
         _test_convolution("conv", [4, 12, 17, 17], [3, 3, 12, 32], [1, 1], [2, 2], "VALID", "NCHW")
         _test_convolution(
             "depthwise", [4, 176, 8, 8], [1, 1, 176, 1], [1, 1], [1, 1], "SAME", "NCHW"
         )
         _test_convolution(
             "depthwise", [4, 19, 17, 17], [3, 3, 19, 1], [1, 1], [2, 2], "VALID", "NCHW"
         )
         _test_convolution(
             "depthwise", [4, 124, 17, 17], [1, 1, 124, 1], [1, 1], [1, 1], "SAME", "NCHW"
         )
         _test_convolution(
             "depthwise", [4, 12, 17, 17], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NCHW"
         )
         _test_convolution(
             "depthwise", [4, 12, 17, 17], [3, 3, 12, 2], [1, 1], [2, 2], "VALID", "NCHW"
         )
         _test_convolution(
             "conv_transpose",
             [4, 32, 8, 8],
             [1, 1, 176, 32],
             [1, 1],
             [1, 1],
             "SAME",
             "NCHW",
             [4, 176, 8, 8],
         )
         _test_convolution(
             "conv_transpose",
             [4, 32, 8, 8],
             [2, 2, 176, 32],
             [1, 1],
             [1, 1],
             "SAME",
             "NCHW",
             [4, 176, 8, 8],
         )
         _test_convolution(
             "conv_transpose",
             [4, 32, 8, 8],
             [2, 2, 176, 32],
             [1, 1],
             [2, 2],
             "SAME",
             "NCHW",
             [4, 176, 15, 15],
         )
         _test_convolution(
             "conv_transpose",
             [4, 32, 8, 8],
             [3, 3, 176, 32],
             [1, 1],
             [1, 1],
             "SAME",
             "NCHW",
             [4, 176, 8, 8],
         )
         _test_convolution(
             "conv_transpose",
             [4, 32, 8, 8],
             [3, 3, 176, 32],
             [1, 1],
             [2, 2],
             "SAME",
             "NCHW",
             [4, 176, 15, 15],
         )
         _test_convolution(
             "conv_transpose",
             [4, 32, 8, 8],
             [3, 3, 176, 32],
             [1, 1],
             [2, 2],
             "SAME",
             "NCHW",
             [4, 176, 16, 16],
         )
         _test_convolution(
             "conv_transpose",
             [4, 19, 8, 8],
             [3, 3, 19, 19],
             [1, 1],
             [2, 2],
             "VALID",
             "NCHW",
             [4, 19, 17, 17],
         )
         _test_convolution(
             "conv_transpose",
             [4, 19, 17, 17],
             [1, 1, 124, 19],
             [1, 1],
             [1, 1],
             "SAME",
             "NCHW",
             [4, 124, 17, 17],
         )
         _test_convolution(
             "conv_transpose",
             [4, 19, 17, 17],
             [3, 3, 124, 19],
             [1, 1],
             [1, 1],
             "SAME",
             "NCHW",
             [4, 124, 17, 17],
         )
         _test_convolution(
             "conv_transpose",
             [4, 32, 8, 8],
             [3, 3, 12, 32],
             [1, 1],
             [2, 2],
             "VALID",
             "NCHW",
             [4, 12, 17, 17],
         )
         # kernel 2x2, strides (2,2)
         _test_convolution(
             "conv_transpose",
             [4, 19, 8, 8],
             [2, 2, 19, 19],
             [1, 1],
             [2, 2],
             "VALID",
             "NCHW",
             [4, 19, 16, 16],
         )
         _test_convolution(
             "conv_transpose",
             [4, 32, 8, 8],
             [2, 2, 12, 32],
             [1, 1],
             [2, 2],
             "VALID",
             "NCHW",
             [4, 12, 16, 16],
         )
         # output channel is 1
         _test_convolution(
             "conv_transpose",
             [1, 19, 8, 8],
             [1, 1, 1, 19],
             [1, 1],
             [1, 1],
             "VALID",
             "NCHW",
             [1, 1, 8, 8],
         )
         _test_convolution(
             "conv_transpose",
             [4, 19, 8, 8],
             [2, 2, 66, 19],
             [1, 1],
             [2, 2],
             "VALID",
             "NCHW",
             [4, 66, 16, 16],
         )
     _test_convolution("conv", [4, 8, 8, 176], [1, 1, 176, 32], [1, 1], [1, 1], "SAME", "NHWC")
     _test_convolution("conv", [4, 17, 17, 19], [3, 3, 19, 19], [1, 1], [2, 2], "VALID", "NHWC")
     _test_convolution("conv", [4, 17, 17, 124], [1, 1, 124, 19], [1, 1], [1, 1], "SAME", "NHWC")
     _test_convolution("conv", [4, 17, 17, 12], [3, 3, 12, 32], [1, 1], [2, 2], "VALID", "NHWC")
     _test_convolution(
         "conv",
         [4, 17, 17, 12],
         [3, 3, 12, 32],
         [1, 1],
         [2, 2],
         "VALID",
         "NHWC",
         add_shapes_to_graph_def=False,
     )
     _test_convolution("depthwise", [4, 8, 8, 176], [1, 1, 176, 1], [1, 1], [1, 1], "SAME", "NHWC")
     _test_convolution("depthwise", [4, 17, 17, 19], [3, 3, 19, 1], [1, 1], [2, 2], "VALID", "NHWC")
     _test_convolution("depthwise", [4, 17, 17, 124], [1, 1, 124, 1], [1, 1], [1, 1], "SAME", "NHWC")
     _test_convolution("depthwise", [4, 17, 17, 12], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NHWC")
     _test_convolution("depthwise", [4, 17, 17, 12], [3, 3, 12, 2], [1, 1], [2, 2], "VALID", "NHWC")
     _test_convolution(
         "depthwise",
         [4, 17, 17, 12],
         [3, 3, 12, 2],
         [1, 1],
         [2, 2],
         "VALID",
         "NHWC",
         add_shapes_to_graph_def=False,
     )

     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [1, 1, 176, 32],
         [1, 1],
         [1, 1],
         "SAME",
         "NHWC",
         [4, 8, 8, 176],
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [2, 2, 176, 32],
         [1, 1],
         [1, 1],
         "SAME",
         "NHWC",
         [4, 8, 8, 176],
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [2, 2, 176, 32],
         [1, 1],
         [2, 2],
         "SAME",
         "NHWC",
         [4, 15, 15, 176],
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [3, 3, 176, 32],
         [1, 1],
         [1, 1],
         "SAME",
         "NHWC",
         [4, 8, 8, 176],
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [3, 3, 176, 32],
         [1, 1],
         [2, 2],
         "SAME",
         "NHWC",
         [4, 15, 15, 176],
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [3, 3, 176, 32],
         [1, 1],
         [2, 2],
         "SAME",
         "NHWC",
         [4, 16, 16, 176],
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 19],
         [3, 3, 19, 19],
         [1, 1],
         [2, 2],
         "VALID",
         "NHWC",
         [4, 17, 17, 19],
     )
     _test_convolution(
         "conv_transpose",
         [4, 17, 17, 19],
         [1, 1, 124, 19],
         [1, 1],
         [1, 1],
         "SAME",
         "NHWC",
         [4, 17, 17, 124],
     )
     _test_convolution(
         "conv_transpose",
         [4, 17, 17, 19],
         [3, 3, 124, 19],
         [1, 1],
         [1, 1],
         "SAME",
         "NHWC",
         [4, 17, 17, 124],
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [3, 3, 12, 32],
         [1, 1],
         [2, 2],
         "VALID",
         "NHWC",
         [4, 17, 17, 12],
     )
     # kernel 2x2, strides (2,2)
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 19],
         [2, 2, 19, 19],
         [1, 1],
         [2, 2],
         "VALID",
         "NHWC",
         [4, 16, 16, 19],
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [2, 2, 12, 32],
         [1, 1],
         [2, 2],
         "VALID",
         "NHWC",
         [4, 16, 16, 12],
     )
     # output channel is 1
     _test_convolution(
         "conv_transpose",
         [1, 8, 8, 19],
         [1, 1, 1, 19],
         [1, 1],
         [1, 1],
         "VALID",
         "NHWC",
         [1, 8, 8, 1],
     )
     # Test without adding shapes to graph def
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 32],
         [1, 1, 176, 32],
         [1, 1],
         [1, 1],
         "SAME",
         "NHWC",
         [4, 8, 8, 176],
         add_shapes_to_graph_def=False,
     )
     _test_convolution(
         "conv_transpose",
         [4, 8, 8, 19],
         [2, 2, 66, 19],
         [1, 1],
         [2, 2],
         "VALID",
         "NHWC",
         [4, 16, 16, 66],
     )
     # Explicit padding
     if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
         _test_convolution(
             "conv",
             [4, 8, 8, 16],
             [1, 1, 16, 32],
             [1, 1],
             [1, 1],
             [[0, 0], [2, 3], [0, 1], [0, 0]],
             "NHWC",
         )
         _test_convolution(
             "depthwise",
             [4, 8, 8, 16],
             [1, 1, 16, 1],
             [1, 1],
             [1, 1],
             [[0, 0], [2, 3], [0, 1], [0, 0]],
             "NHWC",
         )
         _test_convolution(
             "conv_transpose",
             [4, 8, 8, 32],
             [3, 3, 176, 32],
             [1, 1],
             [2, 2],
             [[0, 0], [1, 0], [1, 0], [0, 0]],
             "NHWC",
             [4, 16, 16, 176],
         )


 #######################################################################
 # Convolution3D
 # -------------


 def _test_convolution3d(
     opname,
     tensor_in_sizes,
     filter_in_sizes,
     dilations,
     strides,
     padding,
     data_format,
     deconv_output_shape=None,
     add_shapes_to_graph_def=True,
 ):
     """One iteration of 3D convolution with given shapes and attributes"""
     deconv_output_shape = deconv_output_shape or []
     total_size_1 = np.prod(tensor_in_sizes)
     total_size_2 = np.prod(filter_in_sizes)
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
     filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
         in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")
         if data_format == "NDHWC":
             strides = [1] + strides + [1]
             dilations = [1] + dilations + [1]
         else:
             strides = [1, 1] + strides
             dilations = [1, 1] + dilations

         if opname == "conv":
             nn_ops.conv3d(
                 in_data,
                 in_filter,
                 strides=strides,
                 dilations=dilations,
                 padding=padding,
                 data_format=data_format,
             )

             compare_tf_with_tvm(
                 np.reshape(data_array, tensor_in_sizes).astype("float32"),
                 "Placeholder:0",
                 "Conv3D:0",
                 cuda_layout="NCDHW",
                 add_shapes_to_graph_def=add_shapes_to_graph_def,
             )


 @tvm.testing.uses_gpu
 def test_forward_convolution3d():
     """Convolution3d"""
     if is_gpu_available():
         _test_convolution3d(
             "conv", [4, 176, 8, 8, 8], [1, 1, 1, 176, 32], [1, 1, 1], [1, 1, 1], "SAME", "NCDHW"
         )
         _test_convolution3d(
             "conv", [4, 19, 17, 17, 17], [3, 3, 3, 19, 19], [1, 1, 1], [2, 2, 2], "VALID", "NCDHW"
         )
         _test_convolution3d(
             "conv", [4, 124, 17, 17, 17], [1, 1, 1, 124, 19], [1, 1, 1], [1, 1, 1], "SAME", "NCDHW"
         )
         _test_convolution3d(
             "conv", [4, 12, 17, 17, 17], [3, 3, 3, 12, 32], [1, 1, 1], [2, 2, 2], "VALID", "NCDHW"
         )
     _test_convolution3d(
         "conv", [4, 8, 8, 8, 176], [1, 1, 1, 176, 32], [1, 1, 1], [1, 1, 1], "SAME", "NDHWC"
     )
     _test_convolution3d(
         "conv", [4, 17, 17, 17, 19], [3, 3, 3, 19, 19], [1, 1, 1], [2, 2, 2], "VALID", "NDHWC"
     )
     _test_convolution3d(
         "conv", [4, 17, 17, 17, 124], [1, 1, 1, 124, 19], [1, 1, 1], [1, 1, 1], "SAME", "NDHWC"
     )
     _test_convolution3d(
         "conv", [4, 17, 17, 17, 12], [3, 3, 3, 12, 32], [1, 1, 1], [2, 2, 2], "VALID", "NDHWC"
     )
     # Test without adding shapes to graph def
     _test_convolution3d(
         "conv",
         [4, 17, 17, 17, 12],
         [3, 3, 3, 12, 32],
         [1, 1, 1],
         [2, 2, 2],
         "VALID",
         "NDHWC",
         add_shapes_to_graph_def=False,
     )


 #######################################################################
 # Convolution3D Transpose
 # -----------------------


 def _test_convolution3d_transpose(
     data_shape,
     filter_shape,
     strides,
     padding,
     output_shape,
     data_format="NCDHW",
     add_shapes_to_graph_def=True,
 ):
     """One iteration of 3D convolution transpose with given shapes and attributes"""

     dtype = "float32"
     data_array = np.random.uniform(size=data_shape).astype(dtype)
     filter_array = np.random.uniform(size=filter_shape).astype(dtype)
     if data_format == "NDHWC":
         strides = [1] + strides + [1]
     else:
         strides = [1, 1] + strides

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data_shape, dtype=dtype)
         in_filter = constant_op.constant(filter_array, shape=filter_shape, dtype=dtype)

         nn_ops.conv3d_transpose(
             in_data,
             in_filter,
             output_shape=output_shape,
             strides=strides,
             padding=padding,
             data_format=data_format,
         )

         compare_tf_with_tvm(
             data_array,
             "Placeholder:0",
             "conv3d_transpose:0",
             cuda_layout="NDHWC",
             add_shapes_to_graph_def=add_shapes_to_graph_def,
         )


 @tvm.testing.uses_gpu
 def test_forward_convolution3d_transpose():
     """Convolution3d transpose"""
     if is_gpu_available():
         _test_convolution3d_transpose(
             data_shape=[1, 10, 8, 8, 8],
             filter_shape=[1, 1, 1, 6, 10],
             strides=[1, 1, 1],
             padding="VALID",
             output_shape=[1, 6, 8, 8, 8],
         )

         _test_convolution3d_transpose(
             data_shape=[4, 9, 8, 8, 8],
             filter_shape=[1, 1, 1, 6, 9],
             strides=[1, 1, 1],
             padding="VALID",
             output_shape=[4, 6, 8, 8, 8],
         )

         _test_convolution3d_transpose(
             data_shape=[1, 3, 8, 8, 8],
             filter_shape=[1, 1, 1, 6, 3],
             strides=[2, 2, 2],
             padding="SAME",
             output_shape=[1, 6, 15, 15, 15],
         )

         _test_convolution3d_transpose(
             data_shape=[1, 16, 8, 8, 8],
             filter_shape=[3, 3, 3, 6, 16],
             strides=[3, 3, 3],
             padding="VALID",
             output_shape=[1, 6, 24, 24, 24],
         )

     _test_convolution3d_transpose(
         data_shape=[1, 8, 8, 8, 10],
         filter_shape=[1, 1, 1, 6, 10],
         strides=[1, 1, 1],
         padding="VALID",
         output_shape=[1, 8, 8, 8, 6],
         data_format="NDHWC",
     )

     _test_convolution3d_transpose(
         data_shape=[4, 8, 8, 8, 9],
         filter_shape=[1, 1, 1, 6, 9],
         strides=[1, 1, 1],
         padding="VALID",
         output_shape=[4, 8, 8, 8, 6],
         data_format="NDHWC",
     )

     _test_convolution3d_transpose(
         data_shape=[1, 8, 8, 8, 3],
         filter_shape=[1, 1, 1, 6, 3],
         strides=[2, 2, 2],
         padding="SAME",
         output_shape=[1, 15, 15, 15, 6],
         data_format="NDHWC",
     )

     _test_convolution3d_transpose(
         data_shape=[1, 8, 8, 8, 16],
         filter_shape=[3, 3, 3, 6, 16],
         strides=[3, 3, 3],
         padding="VALID",
         output_shape=[1, 24, 24, 24, 6],
         data_format="NDHWC",
     )

     # Test without adding shapes to graph def
     _test_convolution3d_transpose(
         data_shape=[1, 8, 8, 8, 16],
         filter_shape=[3, 3, 3, 6, 16],
         strides=[3, 3, 3],
         padding="VALID",
         output_shape=[1, 24, 24, 24, 6],
         data_format="NDHWC",
         add_shapes_to_graph_def=False,
     )


 #######################################################################
 # BiasAdd
 # -----------


 def _test_biasadd(tensor_in_sizes, data_format):
     """One iteration of biasadd with given shapes and attributes"""

     total_size_1 = 1
     for s in tensor_in_sizes:
         total_size_1 *= s
     tensor_bias_sizes = [tensor_in_sizes[1]] if data_format == "NCHW" else [tensor_in_sizes[3]]
     total_size_2 = tensor_bias_sizes[0]
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
     bias_array = [f * 1.0 for f in range(1, total_size_2 + 1)]

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
         in_bias = constant_op.constant(bias_array, shape=tensor_bias_sizes, dtype="float32")
         nn_ops.bias_add(in_data, in_bias, data_format=data_format)

         compare_tf_with_tvm(
             np.reshape(data_array, tensor_in_sizes).astype("float32"), "Placeholder:0", "BiasAdd:0"
         )


 @tvm.testing.uses_gpu
 def test_forward_biasadd():
     """Bias add"""
     if is_gpu_available():
         _test_biasadd([4, 176, 8, 8], "NCHW")
         _test_biasadd([1, 100, 1, 1], "NCHW")
         _test_biasadd([4, 19, 17, 17], "NCHW")
         _test_biasadd([4, 124, 3, 3], "NCHW")

     _test_biasadd([4, 8, 8, 176], "NHWC")
     _test_biasadd([1, 1, 1, 100], "NHWC")
     _test_biasadd([4, 17, 17, 19], "NHWC")
     _test_biasadd([4, 3, 3, 124], "NHWC")


 def _test_forward_where(input_shape):
     with tf.Graph().as_default():
         dtype = tf.float32
         t = tf.constant(
             np.random.choice([0, 1, -2, 3, -1, 0.1, -0.2], size=input_shape).astype(dtype.name)
         )
         out = tf.where(t)
         compare_tf_with_tvm([], [], out.name, mode="debug")
         compare_tf_with_tvm([], [], out.name, mode="vm")


 def test_forward_argwhere():
     _test_forward_where((5,))
     _test_forward_where((5, 5))
     _test_forward_where((5, 5, 5))
     _test_forward_where((5, 5, 5, 5))
     _test_forward_where((5, 5, 5, 5, 5))


 def _test_forward_where_with_broadcast(in_shape, cond_shape):
     choice_list = list(np.arange(10).astype("float32"))
     t1 = np.random.choice(choice_list, size=cond_shape)
     t2 = np.random.choice(choice_list, size=cond_shape)
     x = np.random.choice(choice_list, size=in_shape)
     y = np.random.choice(choice_list, size=in_shape)

     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=cond_shape, dtype="float32", name="in1")
         in2 = tf.placeholder(shape=cond_shape, dtype="float32", name="in2")
         condition = math_ops.less(in1, in2, name="less")
         lhs = tf.placeholder(shape=in_shape, dtype="float32", name="x")
         rhs = tf.placeholder(shape=in_shape, dtype="float32", name="y")
         out = tf.where(condition, lhs, rhs)
         compare_tf_with_tvm([t1, t2, x, y], ["in1:0", "in2:0", "x:0", "y:0"], out.name)


 def test_forward_where_with_broadcast():
     _test_forward_where_with_broadcast((5, 2), (5,))
     _test_forward_where_with_broadcast((5, 7), (5,))
     _test_forward_where_with_broadcast((3, 2, 5), (3,))


 #######################################################################
 # SpaceToBatchND
 # --------------


 def _test_space_to_batch_nd(input_shape, block_shape, paddings, dtype="int32"):
     data = np.random.uniform(0, 5, size=input_shape).astype(dtype)

     with tf.Graph().as_default():
         in_data = tf.placeholder(shape=input_shape, dtype=dtype)
         out = tf.space_to_batch_nd(in_data, block_shape, paddings)

         compare_tf_with_tvm(data, in_data.name, out.name)


 def _test_space_to_batch_nd_infer_paddings(input_shape, block_shape, dtype="int32"):
     data = np.random.uniform(0, 5, size=input_shape).astype(dtype)
     padding_np = np.array([0, 1]).astype(np.int32).reshape((1, 2))
     with tf.Graph().as_default():
         in_data = tf.placeholder(shape=input_shape, dtype=dtype)
         const1 = tf.constant(padding_np, dtype=tf.int32)
         # make paddings an input to tf.transpose, but not an input to the graph,
         # so it can be extracted with infer_value_simulated
         paddings = tf.reverse(const1, axis=[-1])
         out = tf.space_to_batch_nd(in_data, block_shape, paddings)
         compare_tf_with_tvm(data, in_data.name, out.name)


 def test_forward_space_to_batch_nd():
     """SpaceToBatchNd"""
     # test cases: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/space-to-batch-n-d
     _test_space_to_batch_nd(input_shape=[1, 2, 2, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])

     _test_space_to_batch_nd(input_shape=[1, 2, 2, 3], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])

     _test_space_to_batch_nd(input_shape=[1, 4, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [0, 0]])

     _test_space_to_batch_nd(
         input_shape=[2, 2, 4, 1], block_shape=[2, 2], paddings=[[0, 0], [2, 0]], dtype="int64"
     )

     # pylint: disable=line-too-long
     # https://github.com/tensorflow/tensorflow/blob/24f578/tensorflow/python/kernel_tests/spacetobatch_op_test.py
     _test_space_to_batch_nd(input_shape=[2, 3], block_shape=[2], paddings=[[1, 0]], dtype="float32")

     _test_space_to_batch_nd(
         input_shape=[2, 3, 2], block_shape=[2], paddings=[[1, 0]], dtype="float64"
     )

     _test_space_to_batch_nd_infer_paddings(input_shape=[2, 3, 2], block_shape=[2])


 #######################################################################
 # BatchToSpaceND
 # --------------


 def _test_batch_to_space_nd(input_shape, block_shape, crops, dtype="int32"):
     data = np.random.uniform(0, 5, size=input_shape).astype(dtype)

     with tf.Graph().as_default():
         in_data = tf.placeholder(shape=input_shape, dtype=dtype)
         out = tf.batch_to_space_nd(in_data, block_shape, crops)

         compare_tf_with_tvm(data, in_data.name, out.name)


 def test_forward_batch_to_space_nd():
     """BatchToSpaceNd"""
     # test cases: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/batch-to-space-n-d
     _test_batch_to_space_nd(input_shape=[4, 1, 1, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])

     _test_batch_to_space_nd(input_shape=[4, 1, 1, 3], block_shape=[2, 2], crops=[[0, 0], [0, 0]])

     _test_batch_to_space_nd(input_shape=[4, 2, 2, 1], block_shape=[2, 2], crops=[[0, 0], [0, 0]])

     _test_batch_to_space_nd(
         input_shape=[8, 1, 3, 1], block_shape=[2, 2], crops=[[0, 0], [2, 0]], dtype="int64"
     )

     # pylint: disable=line-too-long
     # https://github.com/tensorflow/tensorflow/blob/24f578/tensorflow/python/kernel_tests/batchtospace_op_test.py
     _test_batch_to_space_nd(
         input_shape=[18, 2, 1, 2], block_shape=[2, 3], crops=[[1, 1], [0, 0]], dtype="float32"
     )

     _test_batch_to_space_nd(
         input_shape=[20, 5, 8, 7], block_shape=[2, 2], crops=[[1, 1], [1, 1]], dtype="float64"
     )


 #######################################################################
 # Reshape
 # -------


 def _test_reshape(data, out_shape):
     """One iteration of reshape operation with given data and out shape"""

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         array_ops.reshape(in_data, out_shape)

         compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")


 def _test_reshape_with_call():
     """relay.expr.Call as shape"""
     data = np.zeros((6, 4, 2))
     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         out_shape = tf.constant([1, 2, 3], dtype="int32")
         out_shape = tf.multiply(out_shape, 2)
         array_ops.reshape(in_data, out_shape)

         compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")


 def _test_reshape_like(data, shape_like):
     """A special case for reshape."""

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         in_shape_like = array_ops.placeholder(shape=shape_like.shape, dtype=data.dtype)
         out_shape = array_ops.shape(in_shape_like)
         array_ops.reshape(in_data, out_shape)

         compare_tf_with_tvm(data, "Placeholder:0", "Reshape:0")


 def _test_reshape_symbolic(data, a_data, b_data):
     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         a = array_ops.placeholder(shape=a_data.shape, dtype=a_data.dtype)
         b = array_ops.placeholder(shape=b_data.shape, dtype=b_data.dtype)
         newshape = tf.add(a, b)
         out = array_ops.reshape(in_data, newshape)

         for mode in ["debug", "vm"]:
             compare_tf_with_tvm(
                 [data, a_data, b_data], [in_data.name, a.name, b.name], out.name, mode=mode
             )


 def test_forward_reshape():
     """Reshape"""
     _test_reshape(np.arange(6.0), [2, 3])
     _test_reshape(np.arange(6), [-1, 2])
     _test_reshape(np.arange(6), [3, -1])
     _test_reshape(np.arange(6), [-1])
     _test_reshape_with_call()
     _test_reshape_like(np.zeros((3, 6)), np.zeros((9, 2)))
     _test_reshape_symbolic(np.arange(6.0), np.array([2, 0]), np.array([0, 3]))
     _test_reshape_symbolic(np.arange(6), np.array([-1, 0]), np.array([0, 2]))
     _test_reshape_symbolic(np.arange(6), np.array([3, 0]), np.array([3, -1]))
     _test_reshape_symbolic(np.arange(6), np.array([0]), np.array([-1]))


 #######################################################################
 # DepthToSpace
 # ------------


 def _test_depthtospace(data, block_size):
     """One iteration of depth_to_space operation with given data and block size"""

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         array_ops.depth_to_space(in_data, block_size)

         compare_tf_with_tvm(data, "Placeholder:0", "DepthToSpace:0")


 def test_forward_depthtospace():
     _test_depthtospace(np.random.normal(size=[1, 32, 32, 4]), 2)
     _test_depthtospace(np.random.normal(size=[1, 16, 8, 32]), 4)


 #######################################################################
 # SpaceToDepth
 # ------------


 def _test_spacetodepth(data, block_size):
     """One iteration of space_to_depth operation with given data and block size"""

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         array_ops.space_to_depth(in_data, block_size)

         compare_tf_with_tvm(data, "Placeholder:0", "SpaceToDepth:0")


 def test_forward_spacetodepth():
     _test_spacetodepth(np.random.normal(size=[1, 32, 32, 4]), 2)
     _test_spacetodepth(np.random.normal(size=[1, 16, 8, 32]), 4)


 #######################################################################
 # Squeeze
 # -------


 def _test_squeeze(data, squeeze_dims=None):
     """One iteration of squeeze"""

     if squeeze_dims is None:
         squeeze_dims = []

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)

         if squeeze_dims:
             array_ops.squeeze(in_data, squeeze_dims)
         else:
             array_ops.squeeze(in_data)

         compare_tf_with_tvm(data, "Placeholder:0", "Squeeze:0")


 def test_forward_squeeze():
     """Squeeze"""

     # Nothing to squeeze.
     _test_squeeze(np.arange(2).reshape((2)))
     _test_squeeze(np.arange(6).reshape((2, 3)))

     # Squeeze the middle element away.
     _test_squeeze(np.arange(4).reshape((2, 1, 2)))

     # Squeeze on both ends.
     _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)))

     # Positive squeeze dim index.
     _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0])
     _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [2, 4])
     _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0, 4, 2])

     # Negative squeeze dim index.
     _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-1])
     _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5])
     _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5, -1])


 #######################################################################
 # TensorArray
 # -----------
 def test_tensor_array_write_read():
     """Tensor array write read"""

     def run(dtype_str, infer_shape, element_shape):
         with tf.Graph().as_default():
             dtype = tf_dtypes[dtype_str]
             np_data = np.array([[1.0, 2.0], [3.0, 4.0]]).astype(dtype_str)
             _ = [np_data, np_data]
             t1 = tf.constant(np_data, dtype=dtype)
             t2 = tf.constant(np_data, dtype=dtype)
             ta1 = tf.TensorArray(
                 dtype=dtype, size=2, infer_shape=infer_shape, element_shape=element_shape
             )
             ta2 = ta1.write(0, t1)
             ta3 = ta2.write(1, t2)
             _ = ta3.read(0)
             _ = tf.get_default_graph()
             compare_tf_with_tvm([], [], "TensorArrayReadV3:0", mode="vm")

     for dtype in ["float32", "int8"]:
         run(dtype, False, None)
         run(dtype, False, tf.TensorShape([None, 2]))
         run(dtype, True, None)


 def test_tensor_array_scatter():
     """Tensor array scatter"""

     def run(dtype_str, infer_shape):
         with tf.Graph().as_default():
             dtype = tf_dtypes[dtype_str]
             if infer_shape:
                 element_shape = tf.TensorShape([tf.Dimension(None)])
             else:
                 element_shape = None
             ta0 = _construct_scatter(dtype, dtype_str, element_shape, infer_shape, 3)
             _ = ta0.read(0)
             _ = ta0.read(1)
             _ = ta0.read(2)
             ta1 = _construct_scatter(dtype, dtype_str, element_shape, infer_shape, 4)
             out4 = ta1.read(0)
             _ = tf.get_default_graph()
             compare_tf_with_tvm([], [], ["TensorArrayReadV3:0"], mode="vm")
             compare_tf_with_tvm([], [], ["TensorArrayReadV3_1:0"], mode="vm")
             compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0"], mode="vm")
             compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0", out4.name], mode="vm")

     def _construct_scatter(dtype, dtype_str, element_shape, infer_shape, size):
         arr = [[float(i)] for i in range(size)]  # pylint: disable=unnecessary-comprehension
         indices_arr = list(range(size - 1, -1, -1))

         t = tf.constant(np.array(arr).astype(dtype_str), dtype=dtype)
         indices = tf.constant(indices_arr)
         ta1 = tf.TensorArray(
             dtype=dtype, size=size, infer_shape=infer_shape, element_shape=element_shape
         )
         ta2 = ta1.scatter(indices, t)
         return ta2

     for dtype in ["float32", "int8"]:
         run(dtype, False)
         run(dtype, True)


 def test_tensor_array_gather():
     """tensor array gather"""

     def run(dtype_str, infer_shape):
         with tf.Graph().as_default():
             dtype = tf_dtypes[dtype_str]
             t = tf.constant(np.array([[1.0], [2.0], [3.0]]).astype(dtype_str))
             scatter_indices = tf.constant([2, 1, 0])
             gather_indices = tf.constant([1, 2])
             ta1 = tf.TensorArray(dtype=dtype, size=3, infer_shape=infer_shape)
             ta2 = ta1.scatter(scatter_indices, t)
             _ = ta2.gather(gather_indices)
             _ = tf.get_default_graph()
             compare_tf_with_tvm([], [], ["TensorArrayGatherV3:0"], mode="vm")

     for dtype in ["float32", "int8"]:
         run(dtype, True)


 def test_tensor_array_split():
     """tensor array split"""

     def run(dtype_str, infer_shape):
         with tf.Graph().as_default():
             dtype = tf_dtypes[dtype_str]
             t = tf.constant(
                 np.array([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0]]).astype(
                     dtype_str
                 ),
                 dtype=dtype,
             )
             split_length = tf.constant([2, 2, 2, 2], dtype=tf.int32)
             ta1 = tf.TensorArray(dtype=dtype, size=4, infer_shape=infer_shape)
             ta2 = ta1.split(t, split_length)
             _ = ta2.read(0)
             _ = ta2.read(1)
             _ = ta2.read(2)
             _ = ta2.read(3)
             _ = tf.get_default_graph()
             compare_tf_with_tvm([], [], ["TensorArrayReadV3:0"], mode="debug")
             compare_tf_with_tvm([], [], ["TensorArrayReadV3_1:0"], mode="debug")
             compare_tf_with_tvm([], [], ["TensorArrayReadV3_2:0"], mode="debug")
             compare_tf_with_tvm([], [], ["TensorArrayReadV3_3:0"], mode="debug")

     for dtype in ["float32", "int8"]:
         run(dtype, False)
         run(dtype, True)


 def test_tensor_array_concat():
     """Tensor array concat"""

     def run(dtype_str, infer_shape):
         with tf.Graph().as_default():
             dtype = tf_dtypes[dtype_str]
             t = tf.constant(
                 np.array([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0]]).astype(
                     dtype_str
                 ),
                 dtype=dtype,
             )
             split_length = tf.constant([2, 2, 2, 2], dtype=tf.int32)
             ta1 = tf.TensorArray(dtype=dtype, size=4, infer_shape=infer_shape)
             ta2 = ta1.split(t, split_length)
             t = ta2.concat()
             _ = tf.identity(t)
             compare_tf_with_tvm([], [], ["Identity:0"], mode="debug")

     for dtype in ["float32", "int8"]:
         run(dtype, False)
         run(dtype, True)


 def test_tensor_array_size():
     """Tensor array size"""
     if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
         pytest.skip("Needs fixing for tflite >= 1.15.0")

     def run(dtype_str, infer_shape):
         with tf.Graph().as_default():
             dtype = tf_dtypes[dtype_str]
             np_data = np.array([[1.0, 2.0], [3.0, 4.0]]).astype(dtype_str)
             _ = [np_data, np_data]
             t1 = tf.constant(np_data, dtype=dtype)
             t2 = tf.constant(np_data, dtype=dtype)
             ta1 = tf.TensorArray(dtype=dtype, size=2, infer_shape=infer_shape)
             ta2 = ta1.write(0, t1)
             ta3 = ta2.write(1, t2)
             _ = ta3.size()
             _ = tf.get_default_graph()
             compare_tf_with_tvm([], [], "TensorArraySizeV3:0", mode="debug")

     for dtype in ["float32", "int8"]:
         run(dtype, False)
         run(dtype, True)


 def test_tensor_array_stack():
     """Tensor array stack"""

     def run(dtype_str, infer_shape):
         if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
             pytest.skip("Needs fixing for tflite >= 1.15.0")

         with tf.Graph().as_default():
             dtype = tf_dtypes[dtype_str]
             t = tf.constant(np.array([[1.0], [2.0], [3.0]]).astype(dtype_str))
             scatter_indices = tf.constant([2, 1, 0])
             ta1 = tf.TensorArray(dtype=dtype, size=3, infer_shape=infer_shape)
             ta2 = ta1.scatter(scatter_indices, t)
             t1 = ta2.stack()
             print(t1)
             _ = tf.get_default_graph()

             compare_tf_with_tvm([], [], ["TensorArrayStack/TensorArrayGatherV3:0"], mode="vm")

     for dtype in ["float32", "int8"]:
         run(dtype, True)


 def test_tensor_array_unstack():
     """Tensor array unstack"""

     def run(dtype_str, input_shape, infer_shape):
         if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
             pytest.skip("Needs fixing for tflite >= 1.15.0")

         with tf.Graph().as_default():
             dtype = tf_dtypes[dtype_str]
             t = tf.constant(np.random.choice([0, 1, 2, 3], size=input_shape).astype(dtype.name))
             ta1 = tf.TensorArray(dtype=dtype, infer_shape=infer_shape, size=input_shape[0])
             ta2 = ta1.unstack(t)
             _ = ta2.size()
             _ = ta2.read(0)
             compare_tf_with_tvm([], [], "TensorArraySizeV3:0", mode="debug")
             compare_tf_with_tvm([], [], "TensorArrayReadV3:0", mode="debug")

     for dtype in ["float32", "int8"]:
         run(dtype, (5,), False)
         run(dtype, (5, 5), True)
         run(dtype, (5, 5, 5), False)
         run(dtype, (5, 5, 5, 5), True)


 #######################################################################
 # ConcatV2
 # --------


 def _test_concat_v2(shape1, shape2, dim):
     """One iteration of ConcatV2"""

     with tf.Graph().as_default():
         dtype = "float32"
         in1 = tf.placeholder(shape=shape1, dtype=dtype, name="in1")
         in2 = tf.placeholder(shape=shape2, dtype=dtype, name="in2")
         array_ops.concat_v2([in1, in2], dim)

         np_data1 = np.random.uniform(size=shape1).astype(dtype)
         np_data2 = np.random.uniform(size=shape2).astype(dtype)

         compare_tf_with_tvm([np_data1, np_data2], ["in1:0", "in2:0"], "ConcatV2:0")


 def test_forward_concat_v2():
     if tf.__version__ < LooseVersion("1.4.1"):
         return

     _test_concat_v2([2, 3], [2, 3], 0)
     _test_concat_v2([10, 3, 5], [2, 3, 5], 0)
     _test_concat_v2([2, 3], [2, 3], 1)
     _test_concat_v2([5, 8], [5, 4], 1)
     _test_concat_v2([2, 8, 5], [2, 8, 6], -1)


 #######################################################################
 # Sigmoid
 # -------


 def _test_sigmoid(data):
     """One iteration of sigmoid"""

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         _ = math_ops.sigmoid(in_data)

         compare_tf_with_tvm(data, "Placeholder:0", "Sigmoid:0")


 def test_forward_sigmoid():
     """Sigmoid"""

     _test_sigmoid(np.random.uniform(size=(3, 4, 4, 3)).astype("float32"))


 #######################################################################
 # Argmin/Argmax
 # -------------


 def _test_argx(func, data, **kwargs):

     with tf.Graph().as_default():
         inp = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="c0")
         func(inp, name="argx0", **kwargs)
         compare_tf_with_tvm(data, "c0:0", "argx0:0")


 def test_forward_argminmax():
     for output_type in [tf.int64, tf.int32]:
         for axis in [None, 0, 1, 2]:
             data = np.random.uniform(size=(8, 4, 9)).astype("float32")
             _test_argx(tf.argmax, data=data, axis=axis, output_type=output_type)
             _test_argx(tf.argmin, data=data, axis=axis, output_type=output_type)


 #######################################################################
 # Variable
 # --------


 def _test_variable(data):
     """One iteration of a variable"""

     tf.reset_default_graph()
     with tf.Graph().as_default():
         input_op = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         input_tensor = array_ops.reshape(input_op, data.shape)

         size = input_tensor.shape.dims[1]
         with variable_scope.variable_scope("linear", reuse=None):
             w = variable_scope.get_variable("w", shape=[size, size], dtype=input_tensor.dtype)
         math_ops.matmul(input_tensor, w)

         compare_tf_with_tvm(data, "Placeholder:0", "MatMul:0", init_global_variables=True)


 def test_forward_variable():
     """Variable type op test"""
     _test_variable(np.random.uniform(size=(32, 100)).astype("float32"))


 @tvm.testing.parametrize_targets("llvm", "cuda")
 def test_read_variable_op(target, dev):
     """Read Variable op test"""

     tf.reset_default_graph()
     data = np.random.uniform(size=(32, 100)).astype("float32")
     input_tensor = array_ops.placeholder(shape=data.shape, dtype=data.dtype)

     size = input_tensor.shape.dims[1]
     var_data = np.random.uniform(-5, 5, size=[size, size]).astype(np.float32)
     input_var = tf.Variable(var_data, name="var1", use_resource=True)
     math_ops.matmul(input_tensor, input_var)

     out_name = ["MatMul:0"]
     out_node = ["MatMul"]
     in_name = ["Placeholder:0"]
     in_node = ["Placeholder"]
     in_data = [data]

     with tf.Session() as sess:
         sess.run(variables.global_variables_initializer())

         final_graph_def = sess.graph.as_graph_def(add_shapes=True)
         tf_output = run_tf_graph(sess, in_data, in_name, out_name)

         shape_dict = {e: i.shape for e, i in zip(in_name, in_data)}
         with pytest.raises(Exception) as execinfo:
             with tvm.testing.disable_span_filling():
                 mod, _ = relay.frontend.from_tensorflow(
                     final_graph_def, layout=None, shape=shape_dict, outputs=None
                 )
             with tvm.testing.enable_span_filling():
                 mod_with_span, _ = relay.frontend.from_tensorflow(
                     final_graph_def, layout=None, shape=shape_dict, outputs=None
                 )
             assert tvm.ir.structural_equal(mod["main"], mod_with_span["main"])

         assert execinfo.value.args[0].startswith("Graph is not frozen. Provide a frozen graph")

         # Now convert the variables to constant and run inference on the converted graph
         final_graph_def = tf.graph_util.convert_variables_to_constants(
             sess,
             sess.graph.as_graph_def(add_shapes=True),
             out_node,
         )

         tvm_output = run_tvm_graph(
             final_graph_def,
             in_data,
             in_node,
             target=target,
             out_names=out_name,
             num_output=len(out_name),
         )
         for i, tf_out in enumerate(tf_output):
             tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=1e-4, rtol=1e-5)

         sess.close()


 #######################################################################
 # MatMul, BatchMatMul, BatchMatMulV2
 # ----------------------------------


 def _test_matmul(i, j, k, dtype, outer=None):
     """One iteration of matmul"""

     A_shape_init = [i, j]
     B_shape_init = [j, k]

     for transpose_a in [False, True]:
         for transpose_b in [False, True]:
             outer = outer or []
             A_shape = outer + (A_shape_init[::-1] if transpose_a else A_shape_init)
             B_shape = outer + (B_shape_init[::-1] if transpose_b else B_shape_init)

             with tf.Graph().as_default():
                 A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
                 B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
                 result = tf.matmul(A, B, transpose_a=transpose_a, transpose_b=transpose_b)

                 A_np = np.random.uniform(high=5.0, size=A_shape).astype(dtype)
                 B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
                 compare_tf_with_tvm(
                     [A_np, B_np], [A.name, B.name], result.name, convert_config={"use_dense": True}
                 )
                 compare_tf_with_tvm(
                     [A_np, B_np], [A.name, B.name], result.name, convert_config={"use_dense": False}
                 )


 def test_forward_matmul():
     """MatMul op test"""
     _test_matmul(1, 3, 6, "int32")
     _test_matmul(5, 3, 1, "float64")


 def _test_batch_matmul(A_shape, B_shape, dtype, adjoint_a=False, adjoint_b=False):

     with tf.Graph().as_default():
         A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
         B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
         result = tf.matmul(A, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name="batchmatmul")

         A_np = np.random.uniform(high=5.0, size=A_shape).astype(dtype)
         B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)
         compare_tf_with_tvm(
             [A_np, B_np],
             [A.name, B.name],
             result.name,
             convert_config={"use_nt_batch_matmul": True},
         )
         compare_tf_with_tvm(
             [A_np, B_np],
             [A.name, B.name],
             result.name,
             convert_config={"use_nt_batch_matmul": False},
         )


 def _test_batch_matmul_dynamic(
     A_shape, B_shape, A_np_shape, B_np_shape, dtype, adjoint_a=False, adjoint_b=False
 ):
     with tf.Graph().as_default():
         A = tf.placeholder(shape=A_shape, dtype=dtype, name="A")
         B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")
         result = tf.matmul(A, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name="batchmatmul")

         A_np = np.random.uniform(high=5.0, size=A_np_shape).astype(dtype)
         B_np = np.random.uniform(high=5.0, size=B_np_shape).astype(dtype)
         # for now, in TOPI, only llvm & cublas's implementation support dynamic shape
         # TODO add more backends support in TOPI
         compare_tf_with_tvm(
             [A_np, B_np],
             [A.name, B.name],
             result.name,
             mode="vm",
             targets=["llvm", "cuda -libs=cublas"],
             convert_config={"use_nt_batch_matmul": True},
         )
         compare_tf_with_tvm(
             [A_np, B_np],
             [A.name, B.name],
             result.name,
             mode="vm",
             targets=["llvm", "cuda -libs=cublas"],
             convert_config={"use_nt_batch_matmul": False},
         )


 def test_forward_batch_matmul():
     """TF op BatchMatMul, BatchMatMulV2 test"""
     _test_batch_matmul((3, 5, 4), (3, 4, 5), "int32")
     _test_batch_matmul((3, 5, 4), (3, 4, 5), "float32", True, True)
     _test_batch_matmul((3, 5, 4), (3, 5, 4), "int32", True, False)
     _test_batch_matmul((3, 5, 4), (3, 5, 4), "float32", False, True)
     _test_batch_matmul((2, 3, 4, 5, 6), (2, 3, 4, 6, 5), "int32")
     _test_batch_matmul((1, 2, 3, 4, 5, 6), (1, 2, 3, 4, 6, 5), "float32", True, True)
     _test_batch_matmul((3, 4, 5, 6), (3, 4, 5, 6), "int32", True, False)
     _test_batch_matmul((2, 3, 4, 2, 3, 4, 5, 6), (2, 3, 4, 2, 3, 4, 5, 6), "float32", False, True)
     _test_batch_matmul((1, 8, 64, 2), (2, 1), "float32", False, False)
     _test_batch_matmul((1, 8, 8, 64), (64, 1), "float32", False, False)
     _test_batch_matmul((1, 8, 64), (64, 1), "float32", False, False)


 def test_forward_batch_matmul_dynamic():
     """Dynamic batch matmul"""
     _test_batch_matmul_dynamic((None, 5, 4), (None, 4, 5), (3, 5, 4), (3, 4, 5), "int32")
     _test_batch_matmul_dynamic(
         (None, 5, 4), (None, 4, 5), (3, 5, 4), (3, 4, 5), "float32", True, True
     )
     _test_batch_matmul_dynamic(
         (None, 5, 4), (None, 5, 4), (3, 5, 4), (3, 5, 4), "int32", True, False
     )
     _test_batch_matmul_dynamic(
         (None, 5, 4), (None, 5, 4), (3, 5, 4), (3, 5, 4), "float32", False, True
     )
     _test_batch_matmul_dynamic(
         (None, 4, 5, 6), (None, 4, 6, 5), (3, 4, 5, 6), (3, 4, 6, 5), "float32"
     )
     _test_batch_matmul_dynamic(
         (None, None, 5, 6), (None, None, 6, 5), (3, 4, 5, 6), (3, 4, 6, 5), "float32"
     )
     _test_batch_matmul_dynamic(
         (None, None, None, 5, 6),
         (None, None, None, 6, 5),
         (2, 3, 4, 5, 6),
         (2, 3, 4, 6, 5),
         "float32",
     )
     _test_batch_matmul_dynamic(
         (None, None, None, 5, 6),
         (6, None),
         (2, 3, 4, 5, 6),
         (6, 1),
         "float32",
     )
     _test_batch_matmul_dynamic(
         (None, 5, 6),
         (6, None),
         (24, 5, 6),
         (6, 1),
         "float32",
     )


 #######################################################################
 # SparseTensorDenseMatMul
 # ----------------------------------


 def _test_sparse_dense_matmul(indices, values, A_inp_shape, B_inp_shape, dtype, flip=False):
     """One iteration of sparse_dense_matmul"""

     for adjoint_a in [False, True]:
         for adjoint_b in [False, True]:
             A_shape = A_inp_shape[::-1] if adjoint_a else A_inp_shape
             B_shape = B_inp_shape[::-1] if adjoint_b else B_inp_shape

             with tf.Graph().as_default():
                 A_sp = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=A_shape)
                 B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")

                 if flip:
                     result = tf.sparse.sparse_dense_matmul(
                         B, A_sp, adjoint_a=adjoint_b, adjoint_b=adjoint_a
                     )
                 else:
                     result = tf.sparse.sparse_dense_matmul(
                         A_sp, B, adjoint_a=adjoint_a, adjoint_b=adjoint_b
                     )

                 B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)

                 compare_tf_with_tvm([B_np], [B.name], result.name)


 def test_forward_sparse_dense_matmul():
     """sparse_dense_matmul op test"""
     ###################################################################
     #
     # In order to create a SparseTensor, it requires 3 input as below:
     #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
     #
     # Above Sparse can be represented in Dense as below :
     #    [[1, 0, 0, 0]
     #     [0, 0, 2, 0]
     #     [0, 0, 0, 0]]
     #
     # ------------------------------------------------------------------

     _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [4, 3], "float32")
     _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 3], [3, 3], "float32")
     _test_sparse_dense_matmul([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], "float32")
     _test_sparse_dense_matmul([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [7, 9], [9, 5], "float32")
     _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [4, 3], [3, 4], "float32", True)
     _test_sparse_dense_matmul([[0, 0], [1, 2]], [4.0, 8.0], [3, 3], [3, 3], "float32", True)
     _test_sparse_dense_matmul(
         [[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], "float32", True
     )
     _test_sparse_dense_matmul(
         [[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [9, 5], [7, 9], "float32", True
     )


 #######################################################################
 # SparseFillEmptyRows
 # ------------


 def _test_sparse_fill_empty_rows(indices_np, values_np, dense_shape_np, default_value_int, use_dyn):
     with tf.Graph().as_default():
         if use_dyn:
             indices = tf.placeholder(shape=(None, None), dtype=indices_np.dtype, name="indices")
             values = tf.placeholder(shape=(None), dtype=values_np.dtype, name="values")
             dense_shape = tf.placeholder(
                 shape=(None), dtype=dense_shape_np.dtype, name="dense_shape"
             )
         else:
             indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
             values = tf.placeholder(shape=values_np.shape, dtype=values_np.dtype, name="values")
             dense_shape = tf.placeholder(
                 shape=dense_shape_np.shape, dtype=dense_shape_np.dtype, name="dense_shape"
             )

         default_value = tf.placeholder(shape=(), dtype=values_np.dtype, name="default_value")
         sp_input = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=dense_shape)
         _ = tf.sparse.fill_empty_rows(sp_input, default_value, name="sparse_fill_empty_rows")
         compare_tf_with_tvm(
             [indices_np, values_np, dense_shape_np, default_value_int],
             [indices.name, values.name, dense_shape.name, default_value.name],
             [
                 "sparse_fill_empty_rows/SparseFillEmptyRows:0",
                 "sparse_fill_empty_rows/SparseFillEmptyRows:1",
                 "sparse_fill_empty_rows/SparseFillEmptyRows:2",
             ],
             mode="vm",
         )


 @pytest.mark.parametrize(
     "sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int",
     [
         (
             np.array([[1, 1], [0, 3], [0, 1], [2, 0], [3, 1]], dtype=np.int64),
             np.array([1, 2, 3, 4, 5], dtype=np.int64),
             np.array([5, 6], dtype=np.int64),
             10,
         ),
         (
             np.array([[1, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
             np.array([1, 2, 3, 4], dtype=np.int64),
             np.array([5, 6], dtype=np.int64),
             10,
         ),
         (
             np.array([[0, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
             np.array([1, 2, 3, 4], dtype=np.int64),
             np.array([5, 6], dtype=np.int64),
             10,
         ),
         (
             np.array([[1, 1, 1], [1, 3, 1], [2, 0, 5], [3, 1, 6]], dtype=np.int64),
             np.array([1, 2, 3, 4], dtype=np.int64),
             np.array([7, 7, 7], dtype=np.int64),
             5,
         ),
         (
             np.array([[1], [2]], dtype=np.int64),
             np.array([7, 8], dtype=np.int64),
             np.array([5], dtype=np.int64),
             4,
         ),
         (
             np.ones((0, 1), dtype=np.int64),
             np.array([], dtype=np.int64),
             np.array([5], dtype=np.int64),
             4,
         ),
         (
             np.ones((0, 3), dtype=np.int64),
             np.array([], dtype=np.int64),
             np.array([9, 3, 7], dtype=np.int64),
             100,
         ),
     ],
 )
 @pytest.mark.parametrize("use_dyn", [True, False])
 def test_forward_sparse_fill_empty_rows(
     sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int, use_dyn
 ):
     """sparse_fill_empty_rows op test"""
     ###################################################################
     #
     # In order to create a SparseTensor, it requires 3 input as below:
     #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
     #
     # Above Sparse can be represented in Dense as below :
     #    [[1, 0, 0, 0]
     #     [0, 0, 2, 0]
     #     [0, 0, 0, 0]]
     #
     # ------------------------------------------------------------------
     _test_sparse_fill_empty_rows(
         sparse_indices_np, sparse_values_np, dense_shape_np, default_value_int, use_dyn
     )


 #######################################################################
 # SparseReshape
 # ------------


 def _test_sparse_reshape(indices_np, values_np, prev_shape_np, new_shape_np, use_dyn=False):
     with tf.Graph().as_default():
         if use_dyn:
             indices = tf.placeholder(shape=(None, None), dtype=indices_np.dtype, name="indices")
             values = tf.placeholder(shape=(None), dtype=values_np.dtype, name="values")
             prev_shape = tf.placeholder(shape=(None), dtype=prev_shape_np.dtype, name="prev_shape")
             new_shape = tf.placeholder(shape=(None), dtype=new_shape_np.dtype, name="new_shape")
         else:
             indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
             values = tf.placeholder(shape=values_np.shape, dtype=values_np.dtype, name="values")
             prev_shape = tf.placeholder(
                 shape=prev_shape_np.shape, dtype=prev_shape_np.dtype, name="prev_shape"
             )
             new_shape = tf.placeholder(
                 shape=new_shape_np.shape, dtype=new_shape_np.dtype, name="new_shape"
             )
         sp_input = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=prev_shape)

         _ = tf.sparse.reshape(sp_input, new_shape, name="sparse_reshape")
         compare_tf_with_tvm(
             [indices_np, values_np, prev_shape_np, new_shape_np],
             [indices.name, values.name, prev_shape.name, new_shape.name],
             ["sparse_reshape:0", "sparse_reshape:1", "sparse_reshape/Identity:0"],
             mode="vm",
         )


 @pytest.mark.parametrize(
     "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
     [
         (
             np.ones((0, 1), dtype=np.int64),
             np.array([], dtype=np.int64),
             np.array([4], dtype=np.int64),
             np.array([2, -1], dtype=np.int64),
         ),
         (
             np.ones((0, 1), dtype=np.int64),
             np.array([], dtype=np.int64),
             np.array([4], dtype=np.int64),
             np.array([2, 2], dtype=np.int64),
         ),
         (
             np.ones((0, 2), dtype=np.int64),
             np.array([], dtype=np.int64),
             np.array([3, 6], dtype=np.int64),
             np.array([-1, 2], dtype=np.int64),
         ),
         (
             np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
             np.array([7, 5, 6, 3, 9], dtype=np.int64),
             np.array([2, 3, 6], dtype=np.int64),
             np.array([-1, 9], dtype=np.int64),
         ),
         (
             np.array(
                 [
                     [0, 0, 0, 0, 0],
                     [0, 0, 1, 2, 3],
                     [0, 1, 0, 3, 5],
                     [1, 0, 0, 4, 6],
                     [1, 2, 3, 6, 8],
                 ],
                 dtype=np.int64,
             ),
             np.array([7, 5, 6, 3, 9], dtype=np.int64),
             np.array([2, 3, 6, 7, 9], dtype=np.int64),
             np.array([9, -1, 7], dtype=np.int64),
         ),
         (
             np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
             np.array([7, 5, 6, 3, 9], dtype=np.int64),
             np.array([9, 4], dtype=np.int64),
             np.array([-1], dtype=np.int64),
         ),
         (
             np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
             np.array([7, 5, 6, 3, 9], dtype=np.int64),
             np.array([25], dtype=np.int64),
             np.array([5, 5], dtype=np.int64),
         ),
         (
             np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
             np.array([7, 5, 6, 3, 9], dtype=np.int64),
             np.array([500, 20], dtype=np.int64),
             np.array([500, 20], dtype=np.int64),
         ),
         (
             np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
             np.array([7, 5, 6, 3, 9], dtype=np.int64),
             np.array([500, 20], dtype=np.int64),
             np.array([500, -1], dtype=np.int64),
         ),
         (
             np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
             np.array([7, 5, 6, 3, 9], dtype=np.int64),
             np.array([500, 20], dtype=np.int64),
             np.array([250, 40], dtype=np.int64),
         ),
     ],
 )
 @pytest.mark.parametrize("use_dyn", [True, False])
 def test_forward_sparse_reshape(
     sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, use_dyn
 ):
     """sparse_reshape op test"""
     ###################################################################
     #
     # In order to create a SparseTensor, it requires 3 input as below:
     #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
     #
     # Above Sparse can be represented in Dense as below :
     #    [[1, 0, 0, 0]
     #     [0, 0, 2, 0]
     #     [0, 0, 0, 0]]
     #
     # ------------------------------------------------------------------
     _test_sparse_reshape(sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, use_dyn)


 #######################################################################
 # Sparse Segment Variants
 # ------------


 def _test_sparse_segment_variant(
     tf_op, data_np, indices_np, segment_ids_np, num_segments, use_dyn=False
 ):
     with tf.Graph().as_default():
         if use_dyn:
             data = tf.placeholder(
                 shape=[None for _ in data_np.shape], dtype=data_np.dtype, name="data"
             )
             indices = tf.placeholder(shape=[None], dtype=indices_np.dtype, name="indices")
             segment_ids = tf.placeholder(
                 shape=(None), dtype=segment_ids_np.dtype, name="segment_ids"
             )
         else:
             data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name="data")
             indices = tf.placeholder(shape=indices_np.shape, dtype=indices_np.dtype, name="indices")
             segment_ids = tf.placeholder(
                 shape=segment_ids_np.shape, dtype=segment_ids_np.dtype, name="segment_ids"
             )

         _ = tf_op(
             data, indices, segment_ids, num_segments=num_segments, name="sparse_segment_variant"
         )
         compare_tf_with_tvm(
             [data_np, indices_np, segment_ids_np],
             [data.name, indices.name, segment_ids.name],
             ["sparse_segment_variant:0"],
             mode="vm",
         )


 @pytest.mark.parametrize(
     "data_np, indices_np, segment_ids_np, num_segments",
     [
         (
             np.array([5, 1, 7, 2, 3, 4], dtype=np.float32),
             np.array([0, 3, 4], dtype=np.int32),
             np.array([0, 1, 1], dtype=np.int32),
             None,
         ),
         (
             np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
             np.array([0, 1], dtype=np.int32),
             np.array([0, 2], dtype=np.int32),
             4,
         ),
         (
             np.random.random((6, 4, 5)),
             np.array([0, 2, 4, 3, 1], dtype=np.int32),
             np.array([0, 0, 1, 5, 5], dtype=np.int32),
             100,
         ),
         (
             np.random.random((6, 4, 5)),
             np.array([0, 2, 4, 3, 1], dtype=np.int32),
             np.array([0, 0, 1, 5, 5], dtype=np.int32),
             None,
         ),
         (
             np.array([[[1, 7]], [[3, 8]], [[2, 9]]], dtype=np.float64),
             np.array([0, 1, 2], dtype=np.int32),
             np.array([0, 0, 1], dtype=np.int32),
             None,
         ),
         (
             np.random.random((9, 4, 5, 7)),
             np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
             np.array([0, 0, 1, 3, 5, 6, 7, 7, 8], dtype=np.int32),
             9,
         ),
         (
             np.random.random((9, 4, 5, 7)),
             np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
             np.array([0, 0, 1, 3, 5, 6, 7, 7, 8], dtype=np.int32),
             None,
         ),
         (
             np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
             np.array([0, 1], dtype=np.int32),
             np.array([0, 2], dtype=np.int32),
             None,
         ),
         (
             np.random.random((9, 4, 5, 7)),
             np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=np.int32),
             np.array([0, 0, 1, 3, 5, 5, 5, 5, 5], dtype=np.int32),
             6,
         ),
     ],
 )
 @pytest.mark.parametrize("use_dyn", [True, False])
 @pytest.mark.parametrize(
     "tf_op",
     [
         tf.sparse.segment_sum,
         tf.sparse.segment_sqrt_n,
         tf.sparse.segment_mean,
     ],
 )
 def test_forward_sparse_segment_sum_variants(
     tf_op,
     data_np,
     indices_np,
     segment_ids_np,
     num_segments,
     use_dyn,
 ):
     """sparse segment sum variants tests"""
     _test_sparse_segment_variant(tf_op, data_np, indices_np, segment_ids_np, num_segments, use_dyn)


 #######################################################################
 # Math SegmentSum
 # ------------


 def _test_math_segment_sum(data_np, segment_ids_np, use_dyn=False):
     with tf.Graph().as_default():
         if use_dyn:
             data = tf.placeholder(
                 shape=[None for _ in data_np.shape], dtype=data_np.dtype, name="data"
             )
             segment_ids = tf.placeholder(
                 shape=(None), dtype=segment_ids_np.dtype, name="segment_ids"
             )
         else:
             data = tf.placeholder(shape=data_np.shape, dtype=data_np.dtype, name="data")
             segment_ids = tf.placeholder(
                 shape=segment_ids_np.shape, dtype=segment_ids_np.dtype, name="segment_ids"
             )

         _ = tf.math.segment_sum(data, segment_ids, name="segment_sum")
         compare_tf_with_tvm(
             [data_np, segment_ids_np],
             [data.name, segment_ids.name],
             ["segment_sum:0"],
             mode="vm",
         )


 @pytest.mark.parametrize(
     "data_np, segment_ids_np",
     [
         (
             np.array([5, 1, 7, 2, 3, 4], dtype=np.float32),
             np.array([0, 0, 0, 1, 1, 1], dtype=np.int32),
         ),
         (
             np.array([[1, 2, 3, 4], [-1, -2, -3, -4], [5, 6, 7, 8]], dtype=np.float64),
             np.array([0, 0, 1], dtype=np.int32),
         ),
         (
             np.random.random((6, 4, 5)),
             np.array([0, 0, 1, 2, 2, 3], dtype=np.int64),
         ),
         (
             np.array([[[1, 7]], [[3, 8]], [[2, 9]]], dtype=np.float32),
             np.array([0, 0, 1], dtype=np.int32),
         ),
         (
             np.random.random((9, 4, 5, 7)),
             np.array([0, 0, 0, 1, 2, 3, 4, 4, 5], dtype=np.int64),
         ),
     ],
 )
 @pytest.mark.parametrize("use_dyn", [True, False])
 def test_forward_math_segment_sum(data_np, segment_ids_np, use_dyn):
     """math segment sum test"""
     _test_math_segment_sum(data_np, segment_ids_np, use_dyn)


 # tensorflow.compat.v1.sparse_to_dense
 # ---------------
 def _test_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape):
     with tf.Graph().as_default():
         indices = tf.placeholder(
             shape=sparse_indices.shape, dtype=str(sparse_indices.dtype), name="indices"
         )
         values = tf.placeholder(
             shape=sparse_values.shape, dtype=str(sparse_values.dtype), name="values"
         )
         oshape = tf.constant(output_shape, shape=output_shape.shape, dtype=str(output_shape.dtype))

         # Output shape depends on a dynamic input, use VM.
         if default_value is None:
             output = tf.sparse_to_dense(indices, oshape, values)
             compare_tf_with_tvm(
                 [sparse_indices, sparse_values], ["indices:0", "values:0"], output.name, mode="vm"
             )
         else:
             dv = tf.placeholder(shape=(), dtype=str(default_value.dtype), name="default_value")
             output = tf.sparse_to_dense(indices, oshape, values, dv)
             compare_tf_with_tvm(
                 [sparse_indices, sparse_values, default_value],
                 ["indices:0", "values:0", "default_value:0"],
                 output.name,
                 mode="vm",
             )


 def test_forward_sparse_to_dense():
     """Sparse to dense"""
     # scalar
     _test_sparse_to_dense(
         sparse_indices=np.int32(1),
         sparse_values=np.int32(3),
         default_value=np.int32(0),
         output_shape=np.array([5]).astype("int32"),
     )

     # vector
     _test_sparse_to_dense(
         sparse_indices=np.array([0, 1, 4]).astype("int32"),
         sparse_values=np.array([3, 3, 3]).astype("int32"),
         default_value=np.int32(0),
         output_shape=np.array([5]).astype("int32"),
     )

     # vector nXd
     _test_sparse_to_dense(
         sparse_indices=np.array([[0, 0], [1, 2]]).astype("int32"),
         sparse_values=np.array([1, 2]).astype("int32"),
         default_value=np.int32(0),
         output_shape=np.array([3, 4]).astype("int32"),
     )

     _test_sparse_to_dense(
         sparse_indices=np.array([[0, 0, 0], [1, 2, 3]]).astype("int32"),
         sparse_values=np.array([1, 2]).astype("int32"),
         default_value=np.int32(4),
         output_shape=np.array([2, 3, 4]).astype("int32"),
     )

     # floats
     _test_sparse_to_dense(
         sparse_indices=np.array([0, 1, 4]).astype("int32"),
         sparse_values=np.array([3.1, 3.1, 3.1]).astype("float32"),
         default_value=np.float32(3.5),
         output_shape=np.array([5]).astype("int32"),
     )

     # default value not specified
     _test_sparse_to_dense(
         sparse_indices=np.array([0, 1, 4]).astype("int32"),
         sparse_values=np.array([3.1, 3.1, 3.1]).astype("float32"),
         default_value=None,
         output_shape=np.array([5]).astype("int32"),
     )


 #######################################################################
 # tensorflow.sparse.to_dense
 # ---------------
 def _test_sparse_to_dense_v2(indices, values, A_shape, dtype, default_value=None):
     with tf.Graph().as_default():
         A_sp = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=A_shape)

         result = tf.sparse.to_dense(A_sp, default_value=default_value)

         # The output shape depends on a dynamic input, use VM.
         compare_tf_with_tvm([], [], result.name, mode="vm")


 def test_forward_sparse_to_dense_v2():
     _test_sparse_to_dense_v2([[1]], [3.0], [5], "float32")
     _test_sparse_to_dense_v2([[1]], [3.0], [5], "float32", 0.3)
     _test_sparse_to_dense_v2([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], "float32")
     _test_sparse_to_dense_v2([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], "float32", 1.3)
     _test_sparse_to_dense_v2([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], "float32")
     _test_sparse_to_dense_v2([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], "float32", 1.9)


 #######################################################################
 # tensorflow.sparse.add
 # ----------------------------------


 def _test_sparse_add(indices, values, A_shape, B_shape, dtype, flip=False):
     """One iteration of tf.sparse.add"""

     # TODO(ANSHUMAN87): support cuda
     # TODO(ANSHUMAN87): support both sparse input case

     with tf.Graph().as_default():
         A_sp = tf.sparse.SparseTensor(
             indices=indices, values=np.array(values).astype(dtype), dense_shape=A_shape
         )
         B = tf.placeholder(shape=B_shape, dtype=dtype, name="B")

         # TODO(ANSHUMAN87): support user input threashold values
         if flip:
             if package_version.parse(tf.VERSION) < package_version.parse("1.13.0"):
                 result = tf.sparse.add(B, A_sp, thresh=0)
             else:
                 result = tf.sparse.add(B, A_sp, threshold=0)
         else:
             if package_version.parse(tf.VERSION) < package_version.parse("1.13.0"):
                 result = tf.sparse.add(A_sp, B, thresh=0)
             else:
                 result = tf.sparse.add(A_sp, B, threshold=0)

         B_np = np.random.uniform(high=5.0, size=B_shape).astype(dtype)

         compare_tf_with_tvm([B_np], [B.name], result.name, no_gpu=True)


 def test_sparse_add():
     """sparse.add op test"""
     ###################################################################
     #
     # In order to create a SparseTensor, it requires 3 input as below:
     #    SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
     #
     # Above Sparse can be represented in Dense as below :
     #    [[1, 0, 0, 0]
     #     [0, 0, 2, 0]
     #     [0, 0, 0, 0]]
     #
     # ------------------------------------------------------------------
     for dtype_inp in ["float32", "float64", "int32"]:
         _test_sparse_add([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [3, 4], dtype_inp)
         _test_sparse_add([[0, 0], [1, 2]], [4.0, 8.0], [3, 4], [3, 4], dtype_inp, True)
         _test_sparse_add([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], dtype_inp)
         _test_sparse_add([[0, 0], [1, 3], [4, 3]], [3.0, 6.0, 9.0], [5, 5], [5, 5], dtype_inp, True)


 #######################################################################
 # StridedSlice
 # ------------


 def _test_stridedslice(
     ip_shape,
     begin,
     end,
     stride,
     dtype,
     begin_mask=0,
     end_mask=0,
     new_axis_mask=0,
     shrink_axis_mask=0,
     ellipsis_mask=0,
 ):
     """One iteration of a Stridedslice"""

     tf.reset_default_graph()
     np_data = np.random.uniform(size=ip_shape).astype(dtype)
     with tf.Graph().as_default():
         if len(ip_shape) == 0:  # pylint: disable=len-as-condition
             in_data = tf.constant(np_data, dtype)
         else:
             in_data = tf.placeholder(dtype, ip_shape, name="in_data")
         tf.strided_slice(
             in_data,
             begin,
             end,
             stride,
             begin_mask=begin_mask,
             end_mask=end_mask,
             new_axis_mask=new_axis_mask,
             shrink_axis_mask=shrink_axis_mask,
             ellipsis_mask=ellipsis_mask,
             name="strided_slice",
         )
         if len(ip_shape) == 0:  # pylint: disable=len-as-condition
             compare_tf_with_tvm(None, "", "strided_slice:0")
         else:
             compare_tf_with_tvm(np_data, "in_data:0", "strided_slice:0")


 def test_forward_stridedslice():
     """test StridedSlice"""

     _test_stridedslice([], [0], [0], [1], "float32", new_axis_mask=1)
     _test_stridedslice([2], [1], [1], [1], "float32", shrink_axis_mask=1)
     _test_stridedslice([4], [-1], [0], [1], "float32", shrink_axis_mask=1)
     _test_stridedslice([2, 1], [0], [1], [1], "float32", shrink_axis_mask=1)
     _test_stridedslice([2, 3, 4], [-2], [0], [1], "float32", shrink_axis_mask=8)
     _test_stridedslice([2, 3, 4], [0], [1], [1], "float32", shrink_axis_mask=8)
     _test_stridedslice([3, 4, 3], [1, -1, 0], [4, -5, 3], [2, -1, 1], "float32")
     _test_stridedslice([3, 4, 3], [1, 0], [4, 3], [2, 1], "float32", ellipsis_mask=8)
     _test_stridedslice([3, 4, 3], [1, 0], [4, 2], [2, 1], "float32", ellipsis_mask=2)
     _test_stridedslice([3, 4, 5, 3], [1, 0], [4, 2], [2, 1], "float32", ellipsis_mask=2)
     _test_stridedslice([3, 4, 5, 3], [1, 0, 1], [4, 2, 2], [2, 1, 1], "float32", ellipsis_mask=2)
     _test_stridedslice([3, 4, 3], [1, 1, 0], [4, 4, 2], [2, 1, 1], "float32", new_axis_mask=5)
     _test_stridedslice(
         [3, 4, 3], [1, 1, 1], [4, 4, 1], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=4
     )
     _test_stridedslice(
         [6, 4, 5], [1, 1, 1], [6, 3, 4], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=5
     )
     _test_stridedslice(
         [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=4, new_axis_mask=2
     )
     _test_stridedslice(
         [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=3
     )
     _test_stridedslice(
         [3, 4, 3], [1, 1, 0], [4, 4, 1], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=3
     )
     _test_stridedslice(
         [3, 4, 3], [1, 1, 2], [4, 4, 3], [2, 1, 1], "float32", ellipsis_mask=2, new_axis_mask=2
     )
     _test_stridedslice((3, 4), [1, 0], [4, 4], [1, 1], "float32", shrink_axis_mask=2)
     _test_stridedslice(
         [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=2, new_axis_mask=2
     )
     _test_stridedslice(
         [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=1, new_axis_mask=2
     )
     _test_stridedslice(
         [3, 4, 3], [1, 1, 0], [4, 4, 3], [2, 1, 1], "float32", shrink_axis_mask=2, new_axis_mask=1
     )
     _test_stridedslice(
         [3, 4, 5, 4, 5, 6], [0, 0], [2, 3], [1, 1], "float32", shrink_axis_mask=5, new_axis_mask=1
     )
     _test_stridedslice(
         [3, 4, 5, 4, 5, 6],
         [0, 0, 1, 2, 1],
         [2, 3, 4, 5, 3],
         [1, 1, 2, 2, 1],
         "float32",
         shrink_axis_mask=5,
         new_axis_mask=1,
         ellipsis_mask=2,
         begin_mask=8,
         end_mask=8,
     )
     _test_stridedslice(
         [3, 4, 5, 4, 5, 6],
         [0, 0, 1, 2, 1],
         [2, 3, 4, 5, 3],
         [1, 1, 2, 2, 1],
         "float32",
         shrink_axis_mask=8,
         new_axis_mask=1,
         ellipsis_mask=2,
         begin_mask=5,
         end_mask=5,
     )
     _test_stridedslice(
         [3, 4, 5, 4, 5, 6],
         [0, 0, 1, 2, 1],
         [2, 3, 4, 5, 3],
         [1, 1, 2, 2, 1],
         "float32",
         shrink_axis_mask=16,
         new_axis_mask=1,
         ellipsis_mask=2,
         begin_mask=5,
         end_mask=5,
     )
     _test_stridedslice(
         [3, 4, 5, 4, 5, 6],
         [1, 2, 0, -3],
         [4, 5, 3, 3],
         [2, 2, 1, 1],
         "float32",
         shrink_axis_mask=8,
         new_axis_mask=1,
         ellipsis_mask=2,
         begin_mask=5,
         end_mask=8,
     )
     _test_stridedslice(
         [1, 13, 13, 3, 2],
         [0, 0],
         [1, 1],
         [1, -1],
         "float32",
         ellipsis_mask=1,
         begin_mask=2,
         end_mask=2,
     )


 #######################################################################
 # FloorDiv, RealDiv
 # -----------------
 def _test_forward_divide(ip_shape, dtype):
     np_numer = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
     np_denomin = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         numerator = tf.placeholder(dtype, ip_shape, name="numer")
         denominator = tf.placeholder(dtype, ip_shape, name="denomin")
         tf.math.divide(numerator, denominator, name="RealDiv")
         compare_tf_with_tvm([np_numer, np_denomin], ["numer:0", "denomin:0"], "RealDiv:0")


 def _test_forward_floordiv(ip_shape, dtype):
     np_numer = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         numerator = tf.placeholder(dtype, ip_shape, name="numer")
         tf.math.floordiv(numerator, tf.constant(5, dtype=dtype), name="FloorDiv")
         compare_tf_with_tvm([np_numer], ["numer:0"], "FloorDiv:0")


 def test_forward_divide():
     """test FloorDiv, RealDiv"""
     _test_forward_divide((4,), "int32")
     _test_forward_divide((4, 3, 7), "float32")
     _test_forward_floordiv((4, 3, 7), "float32")
     _test_forward_floordiv((4, 3, 7), "int32")


 #######################################################################
 # FloorMod
 # --------
 def _test_forward_floormod(in_shape, if_shape, dtype):
     np_numer = np.random.uniform(1, 100, size=in_shape).astype(dtype)
     np_factor = np.random.uniform(1, 100, size=if_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         numerator = tf.placeholder(dtype, in_shape, name="numer")
         factor = tf.placeholder(dtype, if_shape, name="factor")
         tf.floormod(numerator, factor, name="FloorMod")
         compare_tf_with_tvm([np_numer, np_factor], ["numer:0", "factor:0"], "FloorMod:0")


 def test_forward_floormod():
     """test FloorMod"""
     _test_forward_floormod((10,), (10,), "float32")
     _test_forward_floormod((8, 2), (1,), "float32")
     _test_forward_floormod((4, 3, 7), (4, 3, 7), "float32")
     _test_forward_floormod((4, 3, 7), (4, 3, 7), "int32")


 #######################################################################
 # TruncateMod
 # -----------
 def _test_forward_truncatemod(ip_shape, dtype):
     np_data_1 = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
     np_data_2 = np.random.uniform(1, 10, size=ip_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data_1 = tf.placeholder(dtype, ip_shape, name="in_data_1")
         in_data_2 = tf.placeholder(dtype, ip_shape, name="in_data_2")
         tf.truncatemod(in_data_1, in_data_2, name="truncatemod")
         compare_tf_with_tvm([np_data_1, np_data_2], ["in_data_1:0", "in_data_2:0"], "truncatemod:0")


 def test_forward_truncatemod():
     """test TruncateMod"""
     _test_forward_truncatemod((4, 3, 7), "int32")


 #######################################################################
 # Gather, GatherV2
 # --------------------------


 def _test_gather(ip_shape, indice_shape, indice_value, axis, batch_dims, dtype):
     """One iteration of a GatherV2"""

     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, ip_shape, name="in_data")
         indices = tf.placeholder("int32", indice_shape, name="indices")
         out = tf.gather(in_data, indices, axis=axis, batch_dims=batch_dims)
         np_data = np.random.uniform(1, 10, size=ip_shape).astype(dtype)

         def _fill_indices(indice_value):
             indices = np.array(ip_shape, dtype=dtype)
             if isinstance(indice_value, int):
                 indices = np.array([indice_value], dtype="int32")
             else:
                 indices = np.asarray(indice_value, dtype="int32")
             return indices

         np_indices = _fill_indices(indice_value)
         compare_tf_with_tvm([np_data, np_indices], ["in_data:0", "indices:0"], out.name)


 def test_forward_gather():
     """test Gather/GatherV2 layer"""
     _test_gather((4,), (1,), 1, 0, 1, "int32")
     _test_gather((4,), (1,), 1, 0, 0, "float32")
     _test_gather((1, 4), (1,), [0], 0, 0, "int32")
     _test_gather((4,), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "float32")
     _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "int32")
     _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 1, 0, "int32")
     _test_gather((2, 2), (1, 2, 2), [[[1, 0], [0, 1]]], 0, 0, "float32")
     _test_gather((3, 3, 3), (1, 1, 2), [[[1, 0]]], 0, 0, "int32")
     _test_gather((3, 3, 3), (1, 1, 2), [[[1, 0]]], 2, 0, "int32")
     _test_gather((4, 3, 5, 6), (1, 4), [[2, 1, 0, 0]], 0, 0, "float32")
     _test_gather((2, 2), (2, 2), [[0, 0], [0, 0]], 1, 1, "float32")
     _test_gather(
         (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 2, 2, "float32"
     )
     _test_gather(
         (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 1, "float32"
     )
     _test_gather(
         (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 2, "float32"
     )
     _test_gather(
         (2, 2, 3, 6), (2, 2, 3), [[[1, 1, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 1]]], 3, 0, "float32"
     )


 #######################################################################
 # GatherND
 # --------------------------


 def _test_gather_nd(ip_shape, indice_value, dtype):
     """test operator GatherNd"""
     np_data = np.random.uniform(1, 100, size=ip_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, ip_shape, name="in_data")
         tf.gather_nd(in_data, indices=indice_value, name="gather_nd")
         compare_tf_with_tvm([np_data], ["in_data:0"], "gather_nd:0")


 def test_forward_gather_nd():
     """test operator GatherNd"""
     _test_gather_nd((2, 2), [[0, 0], [1, 1]], "float32")
     _test_gather_nd((2, 2, 2), [[1, 0, 0], [0, 0, 0]], "float32")
     _test_gather_nd((4,), [1], "float32")
     _test_gather_nd((4,), [1], "int32")
     _test_gather_nd((1, 4), [0, 3], "int32")
     _test_gather_nd((2, 2), [[[1, 0], [0, 1]]], "int32")
     _test_gather_nd((2, 2), [[[1, 0], [0, 1]]], "float32")
     _test_gather_nd((3, 3, 3), [[[1, 0]]], "int32")
     _test_gather_nd((3, 3, 3), [[[1, 0]]], "int32")
     _test_gather_nd((4, 3, 5, 6), [[2, 1, 0, 0]], "float32")
     _test_gather_nd((3, 3, 3), [[[2, 1]]], "int32")


 #######################################################################
 # BiasAdd
 # -------
 def test_forward_bias_add():
     """test Op BiasAdd"""

     def check_bias_add(lh_shpae, rh_shape, dtype):
         tf.reset_default_graph()
         lh_data = np.random.uniform(size=lh_shpae).astype(dtype)
         rh_data = np.random.uniform(size=rh_shape).astype(dtype)
         with tf.Graph().as_default():
             lft_data = tf.placeholder(dtype, name="lft_data")
             rgt_data = tf.placeholder(dtype, name="rgt_data")
             tf.nn.bias_add(lft_data, rgt_data, name="BiasAdd")
             compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "BiasAdd:0")

     check_bias_add((10, 8, 16, 32), (32,), dtype="int32")
     check_bias_add((10, 20), (20,), dtype="float32")


 #######################################################################
 # Split
 # -----


 def _test_split(in_shape, axis, num_or_size_splits, dtype):
     """One iteration of a Split"""
     np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)

     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, in_shape, name="in_data")
         _ = len(num_or_size_splits) if isinstance(num_or_size_splits, list) else num_or_size_splits
         split = tf.split(in_data, num_or_size_splits, axis=axis)
         relu = [tf.nn.relu(i) for i in split]

         compare_tf_with_tvm([np_data], ["in_data:0"], [n.name for n in relu])

     # and now test together with concat
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, in_shape, name="in_data")
         splitted = tf.split(in_data, num_or_size_splits, axis=axis)
         concat = tf.concat(splitted, axis)
         compare_tf_with_tvm([np_data], "in_data:0", concat.name)


 def test_forward_split():
     """test split layer"""
     # rank 1
     _test_split((3,), 0, 1, "float32")
     _test_split((3,), 0, 3, "float32")
     _test_split((6,), 0, 3, "float32")
     # rank 2
     _test_split((6, 2), 0, 3, "float32")
     _test_split((2, 6), 1, 6, "float32")
     # rank 3
     _test_split((6, 2, 4), 0, 2, "int32")
     _test_split((2, 6, 4), 1, 3, "float32")
     _test_split((2, 4, 6), 2, 1, "float32")
     # rank 4
     _test_split((6, 1, 3, 5), 0, 3, "float32")
     _test_split((1, 6, 3, 5), 1, 3, "float32")
     _test_split((1, 3, 6, 5), 2, 3, "float32")
     _test_split((1, 3, 5, 6), 3, 3, "float32")
     # split along negative axis
     _test_split((6, 1, 3, 5), -4, 3, "float32")
     _test_split((1, 6, 3, 5), -3, 3, "float32")
     _test_split((1, 3, 6, 5), -2, 3, "float32")
     _test_split((1, 3, 5, 6), -1, 3, "float32")
     # size_splits list
     _test_split((6,), 0, [1, 2, 3], "int32")
     _test_split((3, 6, 4), -2, [1, 4, 1], "float32")


 ######################################################################
 # TopKV2
 # ------


 def _test_forward_top_k_v2(in_shape, k):
     np_data = np.random.uniform(-100, 100, size=in_shape).astype("float32")
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder("float32", in_shape, name="in_data")
         tf.math.top_k(in_data, k, name="TopK")
         compare_tf_with_tvm([np_data], ["in_data:0"], "TopK:0")


 def test_forward_top_k_v2():
     _test_forward_top_k_v2((3,), 1)
     _test_forward_top_k_v2((3,), 3)
     _test_forward_top_k_v2((3, 5, 7), 3)
     _test_forward_top_k_v2((3, 5, 7), 3)


 #######################################################################
 # Unstack
 # -------


 def _test_unstack(ip_shape, axis, dtype):
     np_data = np.random.uniform(-5, 5, size=ip_shape).astype(dtype)

     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, ip_shape, name="in_data")
         unstack = tf.unstack(in_data, axis=axis)

         compare_tf_with_tvm([np_data], ["in_data:0"], [n.name for n in unstack])

     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, ip_shape, name="in_data")
         tf.stack(tf.unstack(in_data, axis=axis), axis=axis)

         compare_tf_with_tvm([np_data], ["in_data:0"], "stack:0")


 def test_forward_unstack():
     """test unstack layer"""
     _test_unstack((6,), 0, "int32")
     _test_unstack((2, 6), 1, "float64")
     # negative axis
     _test_unstack((1, 4), -1, "int32")
     _test_unstack((3, 6, 4), -2, "float32")


 #######################################################################
 # Tile
 # ----


 def _test_tile(in_shape, multiples, dtype):
     np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, in_shape, name="in_data")
         tf.tile(in_data, multiples=multiples, name="tile")
         compare_tf_with_tvm([np_data], ["in_data:0"], "tile:0")


 def test_forward_tile():
     """test Tile"""
     _test_tile((2,), (3,), "int32")
     _test_tile((2, 2), (2, 3), "float32")
     _test_tile((2, 4, 6), (6, 7, 8), "float64")


 #######################################################################
 # ClipByValue
 # -----------


 def _test_forward_clip_by_value(ip_shape, clip_value_min, clip_value_max, dtype):
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, ip_shape, name="in_data")
         tf.clip_by_value(in_data, clip_value_min, clip_value_max, name="ClipByValue")
         np_data = np.random.uniform(-100, 100, size=ip_shape).astype(dtype)
         compare_tf_with_tvm([np_data], ["in_data:0"], "ClipByValue:0")


 def test_forward_clip_by_value():
     """test ClipByValue op"""
     if tf.__version__ < LooseVersion("1.9"):
         _test_forward_clip_by_value((4,), 0.1, 5.0, "float32")
         _test_forward_clip_by_value((4, 4), 1, 5, "int32")


 #######################################################################
 # Multi Input to graph
 # --------------------


 def test_forward_multi_input():
     """Multi Input"""
     with tf.Graph().as_default():
         in1 = tf.placeholder(tf.int32, shape=[3, 3], name="in1")
         in2 = tf.placeholder(tf.int32, shape=[3, 3], name="in2")
         in3 = tf.placeholder(tf.int32, shape=[3, 3], name="in3")
         in4 = tf.placeholder(tf.int32, shape=[3, 3], name="in4")

         out1 = tf.add(in1, in2, name="out1")
         out2 = tf.subtract(in3, in4, name="out2")
         _ = tf.multiply(out1, out2, name="out")
         in_data = np.arange(9, dtype="int32").reshape([3, 3])

         compare_tf_with_tvm(
             [in_data, in_data, in_data, in_data], ["in1:0", "in2:0", "in3:0", "in4:0"], "out:0"
         )


 #######################################################################
 # Multi Output to Graph
 # ---------------------


 def test_forward_multi_output():
     """Multi Output"""
     with tf.Graph().as_default():
         in1 = tf.placeholder(tf.int32, shape=[3, 3], name="in1")
         in2 = tf.placeholder(tf.int32, shape=[3, 3], name="in2")
         in3 = tf.placeholder(tf.int32, shape=[3, 3], name="in3")
         in4 = tf.placeholder(tf.int32, shape=[3, 3], name="in4")

         _ = tf.add(in1, in2, name="out1")
         _ = tf.subtract(in3, in4, name="out2")
         in_data = np.arange(9, dtype="int32").reshape([3, 3])
         in_data = [in_data] * 4
         in_name = ["in1:0", "in2:0", "in3:0", "in4:0"]
         out_name = ["out1:0", "out2:0"]
         out_node = [out.strip(":0") for out in out_name]
         in_node = [inp.strip(":0") for inp in in_name]

         with tf.Session() as sess:
             final_graph_def = tf.graph_util.convert_variables_to_constants(
                 sess,
                 sess.graph.as_graph_def(add_shapes=True),
                 out_node,
             )
             tf_output = run_tf_graph(sess, in_data, in_name, out_name)
             tvm_output = run_tvm_graph(
                 final_graph_def, in_data, in_node, target="llvm", out_names=out_node, num_output=2
             )
             for i, tf_out in enumerate(tf_output):
                 tvm.testing.assert_allclose(tf_out, tvm_output[i], atol=1e-5, rtol=1e-5)


 #######################################################################
 # Resize Bilinear, Nearest_Neighbor
 # ---------------------------------


 def _test_resize_bilinear(in_shape, to_shape, align_corners):
     """One iteration of resize bilinear"""

     data = np.random.uniform(size=in_shape).astype("float32")
     shape_data = np.array(to_shape).astype("int32")

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         shape_data = constant_op.constant(
             shape_data, shape=shape_data.shape, dtype=shape_data.dtype
         )
         tf.image.resize_bilinear(in_data, shape_data, align_corners=align_corners)

         compare_tf_with_tvm(data, "Placeholder:0", "ResizeBilinear:0")


 def _test_resize_bilinear_from_tensor(in_shape, align_corners):
     """One iteration of resize bilinear with non-constant output shape, requires
     value inference to get proper output shape."""

     data = np.random.uniform(size=in_shape).astype("float32")

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(
             shape=[in_shape[0], None, None, in_shape[3]], dtype=data.dtype
         )
         to_shape = tf.shape(in_data)[1:3]
         tf.image.resize_bilinear(in_data, to_shape, align_corners=align_corners)

         compare_tf_with_tvm(data, "Placeholder:0", "ResizeBilinear:0")


 def _test_resize_nearest_neighbor(in_shape, to_shape):
     """One iteration of resize nearest neighbor"""

     data = np.random.uniform(size=in_shape).astype("float32")
     shape_data = np.array(to_shape).astype("int32")

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         shape_data = constant_op.constant(
             shape_data, shape=shape_data.shape, dtype=shape_data.dtype
         )
         tf.image.resize_nearest_neighbor(in_data, shape_data, name="resize_nearest_neighbor")

         compare_tf_with_tvm(data, "Placeholder:0", "resize_nearest_neighbor:0")


 def _test_resize_nearest_neighbor_dynamic_shape(in_shape, scale):
     """One iteration of resize nearest neighbor for graph with dynamic input shape"""

     data = np.random.uniform(size=in_shape).astype("float32")
     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=None, dtype=data.dtype)
         # multiply input shape by scale factor
         new_shape = tf.shape(in_data)[1:3] * tf.constant(scale, dtype=tf.int32)
         tf.image.resize_nearest_neighbor(in_data, new_shape, name="resize_nearest_neighbor")

         compare_tf_with_tvm(data, "Placeholder:0", "resize_nearest_neighbor:0")


 def test_forward_resize():
     """Resize Bilinear, Nearest_Neighbor"""
     # TF default layout is NHWC
     _test_resize_bilinear((4, 32, 32, 3), [50, 50], False)
     _test_resize_bilinear((6, 32, 32, 3), [20, 20], True)
     _test_resize_bilinear_from_tensor((4, 32, 32, 3), False)
     _test_resize_bilinear_from_tensor((6, 50, 50, 3), True)
     _test_resize_nearest_neighbor((6, 32, 32, 3), [20, 20])
     _test_resize_nearest_neighbor_dynamic_shape((1, 16, 16, 3), scale=[2, 2])


 #######################################################################
 # BroadcastArgs
 # -----------


 def _test_broadcast_args(in_shape_1, in_shape_2):
     """One iteration of broadcast_args"""

     shape_1 = np.array(in_shape_1).astype("int32")
     shape_2 = np.array(in_shape_2).astype("int32")

     with tf.Graph().as_default():
         shape_1 = constant_op.constant(shape_1, shape=shape_1.shape, dtype=shape_1.dtype)
         shape_2 = constant_op.constant(shape_2, shape=shape_2.shape, dtype=shape_2.dtype)
         tf.raw_ops.BroadcastArgs(s0=shape_1, s1=shape_2)

         compare_tf_with_tvm(None, "", "BroadcastArgs:0", opt_level=0)


 def test_forward_broadcast_args():
     """Resize Bilinear"""

     _test_broadcast_args((4, 1, 32, 32), [4, 8, 32, 32])
     _test_broadcast_args((6, 32, 32, 1), [6, 32, 32, 16])
     _test_broadcast_args((32, 32, 16), [6, 32, 32, 16])


 #######################################################################
 # BroadcastTo
 # -----------


 def _test_broadcast_to(in_shape, to_shape):
     """One iteration of broadcast_to"""

     data = np.random.uniform(size=in_shape).astype("float32")
     shape_data = np.array(to_shape).astype("int32")

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         shape_data = constant_op.constant(
             shape_data, shape=shape_data.shape, dtype=shape_data.dtype
         )
         tf.broadcast_to(in_data, shape_data)

         compare_tf_with_tvm(data, "Placeholder:0", "BroadcastTo:0", opt_level=0)


 def _test_broadcast_to_from_tensor(in_shape):
     """One iteration of broadcast_to with unknown shape at graph build"""

     data = np.random.uniform(size=in_shape).astype("float32")

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=[None], dtype=data.dtype)

         shape_data = tf.multiply(tf.shape(in_data), 32)
         tf.broadcast_to(in_data, shape_data)

         compare_tf_with_tvm(data, "Placeholder:0", "BroadcastTo:0")


 def test_forward_broadcast_to():
     """Resize Bilinear"""

     _test_broadcast_to((4, 1, 32, 32), [4, 8, 32, 32])
     _test_broadcast_to((6, 32, 32, 1), [6, 32, 32, 16])
     _test_broadcast_to_from_tensor((1))


 #######################################################################
 # Fill
 # ----


 def _test_fill(in_shape):
     """Use the fill op to create a tensor of ones with non-constant shape."""

     with tf.Graph().as_default():
         tf.ones(shape=in_shape, dtype="float32")
         compare_tf_with_tvm(in_shape, [], "ones:0", opt_level=1)


 def _test_fill_from_tensor(in_shape):
     """Use the fill op to create a tensor of ones with non-constant shape.
     Some extra ops need to be added here to prevent the graph from
     being fully constant and folded away."""

     data = np.random.uniform(size=in_shape).astype("float32")

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(
             shape=[in_shape[0], in_shape[1], None, None], dtype=data.dtype
         )

         x = tf.ones(shape=2 * tf.shape(in_data), dtype=data.dtype)
         _ = tf.math.add(in_data, tf.reduce_mean(x), name="out1")
         compare_tf_with_tvm(data, "Placeholder:0", "out1:0")


 def _test_fill_symbolic_inputs(in_shape_data, in_value_data, dtype):
     with tf.Graph().as_default():
         in_shape = tf.placeholder(shape=[in_shape_data.shape[0]], dtype=in_shape_data.dtype)
         in_value = tf.placeholder(shape=(), dtype=dtype)
         out = tf.fill(in_shape, in_value)
         for mode in ["debug", "vm"]:
             compare_tf_with_tvm(
                 [in_shape_data, in_value_data], [in_shape.name, in_value.name], out.name, mode=mode
             )


 def test_forward_fill():
     """Resize Bilinear"""

     _test_fill((32))
     _test_fill((6, 32, 64, 64))
     _test_fill_from_tensor((6, 32, 64, 64))
     _test_fill_symbolic_inputs(np.array((2,)), np.int32(9), tf.int32)
     _test_fill_symbolic_inputs(np.array((2, 3)), 9, tf.int64)
     _test_fill_symbolic_inputs(np.array((2, 3, 4)), np.float32(9.0), tf.float32)


 #######################################################################
 # Crop to bounding box
 # --------------------


 def _test_crop(in_shape, off_h, off_w, tar_h, tar_w):
     """Crop to bounding box"""
     data = np.random.uniform(size=in_shape).astype("float32")
     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
         tf.image.crop_to_bounding_box(in_data, off_h, off_w, tar_h, tar_w)
         compare_tf_with_tvm(data, "Placeholder:0", "crop_to_bounding_box/Slice:0")


 def test_forward_crop():
     """Crop to bounding box"""
     _test_crop((1, 224, 224, 3), 20, 20, 120, 120)


 #######################################################################
 # CropAndResize
 # -------------


 def _test_forward_crop_and_resize(
     img_shape,
     boxes,
     box_idx,
     crop_size,
     extrapolation_value=0.0,
     method="bilinear",
     dtype="float32",
     atol=1e-4,
     rtol=1e-4,
 ):
     image = np.random.uniform(0, 10, size=img_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = array_ops.placeholder(dtype, image.shape, name="in_data")
         tf.image.crop_and_resize(
             in_data,
             boxes=boxes,
             box_ind=box_idx,
             crop_size=crop_size,
             method=method,
             extrapolation_value=extrapolation_value,
             name="crop_and_resize",
         )
         compare_tf_with_tvm([image], ["in_data:0"], "crop_and_resize:0", atol=atol, rtol=rtol)


 def test_forward_crop_and_resize():
     """CropAndResize"""
     _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3])
     _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3], 0.2)
     _test_forward_crop_and_resize([1, 6, 6, 3], [[0, 0, 1, 1]], [0], [3, 3], 0.2, "nearest")
     _test_forward_crop_and_resize([1, 11, 11, 3], [[0.3, 0.3, 1, 1]], [0], [21, 21])
     _test_forward_crop_and_resize([1, 41, 41, 3], [[0.2, 0.4, 0.8, 0.8]], [0], [21, 11])
     _test_forward_crop_and_resize([1, 100, 100, 3], [[0, 0, 0.9, 0.9]], [0], [30, 30])
     _test_forward_crop_and_resize([1, 249, 249, 3], [[0, 0, 1, 1]], [0], [9, 9])
     _test_forward_crop_and_resize([1, 201, 301, 3], [[0.2, 0.3, 0.7, 0.8]], [0], [51, 51])
     _test_forward_crop_and_resize(
         img_shape=[10, 11, 11, 3],
         boxes=[[0, 0, 0.9, 0.9], [0.2, 0.2, 0.8, 0.8]],
         box_idx=[0, 1],
         crop_size=[5, 5],
     )

     if platform.machine() == "aarch64":
         pytest.skip("Currently failing on AArch64")
     _test_forward_crop_and_resize([1, 224, 224, 3], [[0.1, 0.2, 1, 1]], [0], [9, 9])
     _test_forward_crop_and_resize(
         img_shape=[20, 576, 576, 3],
         boxes=[[0, 0, 1, 1], [0, 0, 0.8, 0.8], [0.1, 0.2, 0.9, 1], [0.2, 0, 1, 1]],
         box_idx=[1, 0, 2, 3],
         crop_size=[24, 24],
         extrapolation_value=0.3,
         atol=1e-3,
         rtol=1e-3,
     )
     _test_forward_crop_and_resize(
         img_shape=[20, 229, 229, 3],
         boxes=[[0, 0, 0.9, 0.9], [0.3, 0.3, 1, 1], [0.2, 0.1, 0.7, 0.8], [0, 0, 1, 1]],
         box_idx=[3, 0, 2, 1],
         crop_size=[58, 58],
         extrapolation_value=0.2,
         method="nearest",
         atol=1e-3,
         rtol=1e-3,
     )


 #######################################################################
 # Non Max Suppression
 # -------------------
 def _test_forward_nms_v3(
     bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
 ):
     boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
     scores = np.random.uniform(size=score_shape).astype(dtype)
     max_output_size = np.int32(out_size)
     tf.reset_default_graph()
     in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
     in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
     in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
     tf.image.non_max_suppression(
         boxes=in_data_1,
         scores=in_data_2,
         max_output_size=in_data_3,
         iou_threshold=iou_threshold,
         score_threshold=score_threshold,
         name="nms",
     )
     compare_tf_with_tvm(
         [boxes, scores, max_output_size],
         ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
         "nms/NonMaxSuppressionV3:0",
         mode="vm",
     )
     compare_tf_with_tvm(
         [boxes, scores, max_output_size],
         ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
         "nms/NonMaxSuppressionV3:0",
         mode="debug",
     )


 def _test_forward_nms_v4(
     bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
 ):
     boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
     scores = np.random.uniform(size=score_shape).astype(dtype)
     max_output_size = np.int32(out_size)
     tf.reset_default_graph()
     in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
     in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
     in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
     indices_padded, num_valid = tf.image.non_max_suppression_padded(
         boxes=in_data_1,
         scores=in_data_2,
         max_output_size=in_data_3,
         iou_threshold=iou_threshold,
         score_threshold=score_threshold,
         name="nms",
         pad_to_max_output_size=True,
     )
     num_valid = tf.reshape(num_valid, shape=(-1,))
     indices_padded = tf.reshape(indices_padded, shape=(-1,))
     tf.slice(indices_padded, tf.constant([0]), num_valid, name="SlicedIndices")
     compare_tf_with_tvm(
         [boxes, scores, max_output_size],
         ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
         ["nms/NonMaxSuppressionV4:1", "SlicedIndices:0"],
         mode="vm",
     )
     compare_tf_with_tvm(
         [boxes, scores, max_output_size],
         ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
         ["nms/NonMaxSuppressionV4:1", "SlicedIndices:0"],
         mode="debug",
     )


 def _test_forward_nms_v5(
     bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"
 ):
     boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype)
     scores = np.random.uniform(size=score_shape).astype(dtype)
     max_output_size = np.int32(out_size)
     tf.reset_default_graph()
     in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
     in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
     in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
     tf.image.non_max_suppression_with_scores(
         boxes=in_data_1,
         scores=in_data_2,
         max_output_size=in_data_3,
         iou_threshold=iou_threshold,
         score_threshold=score_threshold,
         name="nms",
     )
     compare_tf_with_tvm(
         [boxes, scores, max_output_size],
         ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
         ["nms/NonMaxSuppressionV5:0", "nms/NonMaxSuppressionV5:1"],
         mode="vm",
     )


 def test_forward_nms():
     """NonMaxSuppressionV3,5"""
     for _test_forward_nms in [_test_forward_nms_v3, _test_forward_nms_v5]:
         _test_forward_nms((5, 4), (5,), 0.7, 0.5, 5)
         _test_forward_nms((20, 4), (20,), 0.5, 0.6, 10)
         _test_forward_nms((1000, 4), (1000,), 0.3, 0.7, 1000)
         _test_forward_nms((2000, 4), (2000,), 0.4, 0.6, 7)


 def _test_forward_combined_nms(
     bx_shape,
     score_shape,
     iou_threshold,
     score_threshold,
     out_size,
     total_size,
     clip_boxes=False,
     dtype="float32",
 ):
     def get_random_scores(size, dtype):
         size1d = np.prod(size)
         scores = np.linspace(0, 1, num=size1d)
         np.random.shuffle(scores)
         return scores.reshape(size).astype(dtype)

     boxes = np.random.uniform(-1, 2, size=bx_shape).astype(dtype)
     scores = get_random_scores(score_shape, dtype)
     max_output_size = np.int32(out_size)
     tf.reset_default_graph()
     in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1")
     in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2")
     in_data_3 = tf.placeholder(tf.int32, name="in_data_3")
     tf.image.combined_non_max_suppression(
         boxes=in_data_1,
         scores=in_data_2,
         max_output_size_per_class=in_data_3,
         max_total_size=total_size,
         iou_threshold=iou_threshold,
         score_threshold=score_threshold,
         pad_per_class=False,
         clip_boxes=clip_boxes,
         name="nms",
     )
     compare_tf_with_tvm(
         [boxes, scores, max_output_size],
         ["in_data_1:0", "in_data_2:0", "in_data_3:0"],
         [
             "nms/CombinedNonMaxSuppression:0",
             "nms/CombinedNonMaxSuppression:1",
             "nms/CombinedNonMaxSuppression:2",
             "nms/CombinedNonMaxSuppression:3",
         ],
     )


 def test_forward_combined_nms():
     """CombinedNonMaxSuppression"""
     _test_forward_combined_nms((1, 64, 1, 4), (1, 64, 1), 0.7, 0.5, 64, 64)
     _test_forward_combined_nms((1, 32, 1, 4), (1, 32, 1), 0.7, 0.5, 10, 64)
     _test_forward_combined_nms((1, 32, 1, 4), (1, 32, 2), 0.7, 0.5, 32, 64)
     _test_forward_combined_nms((1, 64, 1, 4), (1, 64, 20), 0.7, 0.5, 64, 10)
     # This workload seems flaky on CI.
     # See https://github.com/apache/tvm/issues/8140
     # _test_forward_combined_nms((1, 64, 20, 4), (1, 64, 20), 0.7, 0.5, 64, 64, clip_boxes=True)
     _test_forward_combined_nms((2, 200, 1, 4), (2, 200, 1), 0.4, 0.6, 100, 100)
     _test_forward_combined_nms((2, 200, 1, 4), (2, 200, 10), 0.4, 0.2, 150, 1000)


 #######################################################################
 # LSTM
 # ----


 def _test_lstm_cell(batch_size, num_hidden, num_layers, forget_bias, dtype):
     """One iteration of a LSTM cell"""

     tf.reset_default_graph()
     input_size = num_hidden
     input_data = np.full((batch_size, input_size), 1.0, dtype=dtype)
     in_state_c = np.full((batch_size, num_hidden), 0.1, dtype=dtype)
     in_state_h = np.full((batch_size, num_hidden), 0.1, dtype=dtype)

     def _get_tensorflow_output():
         with tf.Session() as sess:
             with variable_scope.variable_scope(
                 "root", initializer=init_ops.constant_initializer(0.5)
             ):
                 m0 = tf.placeholder(dtype, [batch_size, num_hidden], name="m0")
                 m1 = tf.placeholder(dtype, [batch_size, num_hidden], name="m1")
                 x = tf.placeholder(shape=(batch_size, input_size), dtype=dtype, name="input")
                 g, ((out_m0, out_m1)) = tensorflow.contrib.rnn.LSTMBlockCell(
                     num_hidden, forget_bias=forget_bias
                 )(x, (m0, m1))
                 sess.run([variables.global_variables_initializer()])
                 res = sess.run(
                     [g, out_m0, out_m1],
                     {
                         x.name: np.array([[1.0, 1.0]]),
                         m0.name: in_state_c,
                         m1.name: in_state_h,
                     },
                 )
             graph_def = sess.graph.as_graph_def(add_shapes=True)
             final_graph_def = graph_util.convert_variables_to_constants(
                 sess, graph_def, ["root/lstm_cell/LSTMBlockCell"]
             )

             return final_graph_def, res

     graph_def, tf_out = _get_tensorflow_output()
     tvm_output = run_tvm_graph(
         graph_def,
         [input_data, in_state_c, in_state_h],
         ["root/input", "root/m0", "root/m1"],
         num_output=7,
     )
     assert isinstance(tvm_output, list)

     tvm.testing.assert_allclose(tf_out[0], tvm_output[6], rtol=1e-3, atol=1e-3)
     tvm.testing.assert_allclose(tf_out[1], tvm_output[1], rtol=1e-3, atol=1e-3)


 def test_forward_lstm():
     """test LSTM block cell"""
     if package_version.parse(tf.VERSION) < package_version.parse("2.0.0"):
         # in 2.0, tf.contrib.rnn.LSTMBlockCell is removed
         _test_lstm_cell(1, 2, 1, 0.5, "float32")


 #######################################################################
 # Pack
 # ---
 def _test_pack(axis, shape, **kwargs):

     a = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
     b = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)

     with tf.Graph().as_default():
         tf_a = array_ops.placeholder(shape=shape, dtype="float32", name="pl_a")
         tf_b = array_ops.placeholder(shape=shape, dtype="float32", name="pl_b")
         tf_c = tf.stack([tf_a, tf_b], axis=axis, **kwargs)
         assert tf_c.op.op_def.name == "Pack", "tf.stack() is expected to produce 'Pack' operation"

         compare_tf_with_tvm([a, b], ["pl_a:0", "pl_b:0"], "stack:0")


 def test_forward_pack():
     for axis in range(-3, 3):
         _test_pack(axis, [3, 2, 1])
     for axis in range(-1, 1):
         _test_pack(axis, [3])
     _test_pack(0, [])


 #######################################################################
 # Unpack
 # ------
 def _test_forward_unpack(in_shape, axis, dtype):
     """test operator Unpack"""
     np_data = np.random.uniform(-100, 100, size=in_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, in_shape, name="in_data")
         tf.unstack(in_data, axis=axis, name="Unpack")
         compare_tf_with_tvm([np_data], ["in_data:0"], "Unpack:0")


 def test_forward_unpack():
     _test_forward_unpack((3,), 0, "int32")
     _test_forward_unpack((3,), -1, "int16")
     _test_forward_unpack((21, 23, 3), 2, "float32")


 #######################################################################
 # Range
 # -----


 def test_forward_range():
     """test operator Range"""
     for dtype in [tf.int32, tf.int64]:
         tf.reset_default_graph()
         with tf.Graph().as_default():
             tf.range(1, 18, 3, name="range", dtype=dtype)
             compare_tf_with_tvm([], [], "range:0")

     # test type assignment for operator Range
     tf.reset_default_graph()
     with tf.Graph().as_default():
         tf.range(1, 256 + 1, 1, dtype=tf.float32)
         compare_tf_with_tvm([], [], "range:0")


 #######################################################################
 # Einsum
 # -----


 def _test_einsum(equation, dtype, *shape_of_input_tensors):
     """Test Einsum Op"""

     with tf.Graph().as_default():
         inputs_placeholders = []
         input_data = []
         for idx, shape in enumerate(shape_of_input_tensors):
             input_name = f"input_{idx}"
             inputs_placeholders.append(tf.placeholder(shape=shape, dtype=dtype, name=input_name))
             input_data.append(np.random.normal(size=shape).astype(dtype))

         result = tf.einsum(equation, *inputs_placeholders)

         compare_tf_with_tvm(input_data, [ph.name for ph in inputs_placeholders], result.name)


 def test_forward_einsum():
     for dtype in ["float32"]:
         _test_einsum("ij,jk->ik", dtype, [2, 3], [3, 5])  # Matmul
         _test_einsum("ij,jk", dtype, [2, 3], [3, 5])  # Matmul
         _test_einsum("i,i->", dtype, [2], [2])  # Dot product
         _test_einsum("i,j->ij", dtype, [3], [5])  # Outer produce
         _test_einsum("ij->ji", dtype, [2, 3])  # Transpose
         _test_einsum("ii->i", dtype, [3, 3])  # Diag
         _test_einsum("ii", dtype, [3, 3])  # Trace of a square matrix
         _test_einsum("bij,bjk->bik", dtype, [7, 5, 3], [7, 3, 2])  # Batch matmul


 #######################################################################
 # Pad
 # ---


 def _test_pad(input_shape, paddings, mode, **kwargs):
     """One iteration of pad operation with given shape"""

     x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape)

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=input_shape, dtype="float32")
         pad_values = constant_op.constant(paddings)
         _ = tf.pad(in_data, paddings=pad_values, mode=mode, **kwargs)

         if mode == "CONSTANT":
             if "constant_values" in kwargs:
                 out_name = "PadV2:0"
             else:
                 out_name = "Pad:0"
         else:
             out_name = "MirrorPad:0"

         compare_tf_with_tvm(x, "Placeholder:0", out_name)


 def test_forward_pad():
     """Pad"""
     _test_pad((2, 3), [[1, 1], [2, 2]], mode="CONSTANT")
     _test_pad((2, 3), [[1, 1], [2, 2]], mode="CONSTANT", constant_values=1.0)
     _test_pad((2, 3), [[1, 1], [2, 2]], mode="SYMMETRIC")
     _test_pad((2, 3), [[1, 1], [2, 2]], mode="REFLECT")


 #######################################################################
 # Logical operators
 # --------------------


 def test_logical_and():
     with tf.Graph().as_default():
         in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
         in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
         _ = tf.logical_and(in1, in2, name="out")
         in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
         in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
         compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")


 def test_logical_or():
     with tf.Graph().as_default():
         in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
         in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
         _ = tf.logical_or(in1, in2, name="out")
         in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
         in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
         compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")


 def test_logical_xor():
     with tf.Graph().as_default():
         in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
         in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in2")
         _ = tf.logical_xor(in1, in2, name="out")
         in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
         in_data2 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
         compare_tf_with_tvm([in_data1, in_data2], ["in1:0", "in2:0"], "out:0")


 def test_logical_not():
     with tf.Graph().as_default():
         in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name="in1")
         _ = tf.logical_not(in1, name="out")
         in_data1 = np.random.choice(a=[False, True], size=(1, 4, 4, 3)).astype("bool")
         compare_tf_with_tvm(in_data1, "in1:0", "out:0")


 def test_forward_logical():
     test_logical_and()
     test_logical_or()
     test_logical_xor()
     test_logical_not()


 #######################################################################
 # Where, Select, SelectV2
 # -------------
 def test_forward_where():
     """Where: return elements depending on conditions"""
     with tf.Graph().as_default():
         with tf.Session() as _:
             input1 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input1")
             input2 = tf.placeholder(tf.int32, shape=[1, 4, 4, 3], name="input2")
             mask = input1 > input2
             tf.where(mask, input1 + 1, input2 * 2)
             in_data1 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("uint32")
             in_data2 = np.random.uniform(0, 10, size=(1, 4, 4, 3)).astype("uint32")
             compare_tf_with_tvm([in_data1, in_data2], ["input1:0", "input2:0"], "Select:0")


 #######################################################################
 # Inception V3
 # ------------
 @pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/10275")
 def test_forward_inception_v3():
     """test inception V3 model"""
     with tf.Graph().as_default():
         graph_def = tf_testing.get_workload(
             "InceptionV3/inception_v3_2016_08_28_frozen-with_shapes.pb"
         )
         # Call the utility to import the graph definition into default graph.
         graph_def = tf_testing.ProcessGraphDefParam(graph_def)

         data = np.random.uniform(size=(1, 299, 299, 3)).astype("float32")

         with tf.Session() as sess:
             tf_output = run_tf_graph(sess, data, "input:0", "InceptionV3/Predictions/Reshape_1:0")
             tvm_output = run_tvm_graph(graph_def, data, "input")
             tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)


 #######################################################################
 # Inception V1
 # ------------


 def test_forward_inception_v1():
     """test inception V1 model"""
     with tf.Graph().as_default():
         graph_def = tf_testing.get_workload("InceptionV1/classify_image_graph_def-with_shapes.pb")
         # Call the utility to import the graph definition into default graph.
         graph_def = tf_testing.ProcessGraphDefParam(graph_def)

         # Build an image from random data.
         img_array = np.random.uniform(size=(1, 600, 600, 3)).astype("uint8")
         img = Image.frombuffer("RGB", (600, 600), img_array.tostring(), "raw", "RGB", 0, 1)
         temp = utils.tempdir()
         img_path = temp.relpath("tf-test.jpg")
         img.save(img_path)

         if not tf.gfile.Exists(os.path.join(img_path)):
             tf.logging.fatal("File does not exist %s", img_path)
         data = tf.gfile.FastGFile(os.path.join(img_path), "rb").read()

         temp.remove()

         # Extract tensorflow decoded image frame for tvm input
         with tf.Session() as sess:
             tvm_data = run_tf_graph(sess, data, "DecodeJpeg/contents:0", "DecodeJpeg:0")

         with tf.Session() as sess:
             tf_output = run_tf_graph(sess, data, "DecodeJpeg/contents:0", "softmax:0")
             tvm_output = run_tvm_graph(graph_def, tvm_data, "DecodeJpeg/contents")
             tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)


 #######################################################################
 # Mobilenet
 # ---------


 def test_forward_mobilenet():
     """test mobilenet model"""
     # MobilenetV2
     with tf.Graph().as_default():
         graph_def = tf_testing.get_workload(
             "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz",
             "mobilenet_v2_1.4_224_frozen.pb",
         )
         # Call the utility to import the graph definition into default graph.
         graph_def = tf_testing.ProcessGraphDefParam(graph_def)

         data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
         out_node = "MobilenetV2/Predictions/Reshape_1"

         with tf.Session() as sess:
             # Add shapes to the graph.
             graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
             tf_output = run_tf_graph(sess, data, "input:0", out_node + ":0")
             tvm_output = run_tvm_graph(graph_def, data, "input")
             tvm.testing.assert_allclose(
                 np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
             )


 #######################################################################
 # ResnetV2
 # --------


 @tvm.testing.requires_gpu
 def test_forward_resnetv2():
     """test resnet model"""
     if is_gpu_available():
         with tf.Graph().as_default():
             graph_def = tf_testing.get_workload(
                 "ResnetV2/resnet-20180601_resnet_v2_imagenet-shapes.pb"
             )
             # Call the utility to import the graph definition into default graph.
             graph_def = tf_testing.ProcessGraphDefParam(graph_def)

             data = np.random.uniform(size=(128, 224, 224, 3)).astype("float32")
             out_node = "ArgMax"

             with tf.Session() as sess:
                 tf_output = run_tf_graph(sess, data, "input_tensor:0", out_node + ":0")
                 for device in ["llvm", "cuda"]:
                     _ = tvm.device(device, 0)
                     if not tvm.testing.device_enabled(device):
                         print(f"Skip because {device} is not enabled")
                         continue
                     tvm_output = run_tvm_graph(
                         graph_def, data, "input_tensor", len(tf_output), target=device
                     )
                     tvm.testing.assert_allclose(
                         np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
                     )


 #######################################################################
 # SSD
 # ---


 def _test_ssd_impl():
     """Test SSD with backbone MobileNet V1"""
     with tf.Graph().as_default():
         graph_def = tf_testing.get_workload(
             "object_detection/ssd_mobilenet_v1_ppn_shared_"
             "box_predictor_300x300_coco14_sync_2018_07_03.pb"
         )
         # Call the utility to import the graph definition into default graph.
         graph_def = tf_testing.ProcessGraphDefParam(graph_def)

         data = np.random.uniform(0.0, 255.0, size=(1, 512, 512, 3)).astype("uint8")
         in_node = "image_tensor"
         out_node = ["detection_boxes", "detection_scores", "detection_classes"]

         with tf.Session() as sess:
             tf_output = run_tf_graph(
                 sess, data, f"{in_node}:0", [f"{oname}:0" for oname in out_node]
             )
             # TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
             for device in ["llvm"]:
                 _ = tvm.device(device, 0)
                 if not tvm.testing.device_enabled(device):
                     print(f"Skip because {device} is not enabled")
                     continue
                 tvm_output = run_tvm_graph(
                     graph_def,
                     data,
                     in_node,
                     len(out_node),
                     target=device,
                     layout="NCHW",
                     out_names=out_node,
                     mode="vm",
                     disabled_pass=["FoldScaleAxis"],
                     serialize=True,
                 )
                 for i in range(len(out_node)):
                     tvm.testing.assert_allclose(tvm_output[i], tf_output[i], rtol=1e-3, atol=1e-3)


 @pytest.mark.skip(
     reason="Use of threading module here hides errors, see https://github.com/apache/tvm/pull/10231"
 )
 def test_forward_ssd():
     run_thread = threading.Thread(target=_test_ssd_impl, args=())
     old_stack_size = threading.stack_size(100 * 1024 * 1024)
     run_thread.start()
     run_thread.join()
     threading.stack_size(old_stack_size)


 #######################################################################
 # Placeholder
 # -----------


 def test_forward_placeholder():
     """test a simple pb with Placeholder node in the end of GraphDef"""
     with tf.Graph().as_default():
         graph_def = tf_testing.get_workload("Custom/placeholder.pb")
         # Call the utility to import the graph definition into default graph.
         graph_def = tf_testing.ProcessGraphDefParam(graph_def)

         data = np.random.uniform(size=(1, 224, 224, 3)).astype("float32")
         out_node = "mul"

         with tf.Session() as sess:
             # Add shapes to the graph.
             graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
             tf_output = run_tf_graph(sess, data, "Placeholder:0", out_node + ":0")
             tvm_output = run_tvm_graph(graph_def, data, "Placeholder")
             tvm.testing.assert_allclose(
                 np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5
             )


 #######################################################################
 # PTB
 # ---
 try:
     # Load contrib for running ptb model in tf version before 2.0
     import tensorflow.contrib
 except ImportError:
     pass


 def test_forward_ptb():
     """test ptb model"""
     config = tf_testing.get_config()
     num_steps = config.num_steps
     num_hidden = config.hidden_size
     num_layers = config.num_layers
     batch_size = config.batch_size
     vocab_size = config.vocab_size
     out_sample_shape = (batch_size, vocab_size)
     out_state_shape = (batch_size, num_hidden)
     # Sample input
     inpt = "we have no useful information on"
     cnt_sample = 20

     def _pretty_print(items, is_char_model, id2word):
         if not is_char_model:
             return " ".join([id2word[x] for x in items])
         else:
             return "".join([id2word[x] for x in items]).replace("_", " ")

     def _get_tvm_graph_module(graph_def):
         # Cell inputs 'c and 'h' consist of all layers values
         shape_dict = {"Model/Placeholder": (batch_size, num_steps)}

         with tvm.testing.disable_span_filling():
             mod, params = relay.frontend.from_tensorflow(
                 graph_def,
                 shape=shape_dict,
                 outputs=[
                     "Model/Softmax:0",
                     "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:1",
                     "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:6",
                     "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:1",
                     "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:6",
                 ],
             )
         with tvm.testing.enable_span_filling():
             mod_with_span, _ = relay.frontend.from_tensorflow(
                 graph_def,
                 shape=shape_dict,
                 outputs=[
                     "Model/Softmax:0",
                     "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:1",
                     "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell:6",
                     "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:1",
                     "Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_1:6",
                 ],
             )
         assert tvm.ir.structural_equal(mod["main"], mod_with_span["main"])

         target = "llvm"
         with tvm.transform.PassContext(opt_level=0):
             graph, lib, params = relay.build(mod, target, params=params)

         dev = tvm.cpu(0)
         return params, graph_executor.create(graph, lib, dev)

     def _do_tvm_sample(model, data, in_states, params, num_samples):
         """Sampled from the model"""
         samples = []
         state = in_states
         sample = None

         def _get_sample(data, state):
             input_data = np.full((batch_size, num_steps), data, dtype="int32")

             model.set_input("Model/Placeholder", tvm.nd.array(input_data.astype("int32")))
             model.set_input(
                 "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState/zeros",
                 tvm.nd.array(state[0].astype("float32")),
             )
             model.set_input(
                 "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState/zeros_1",
                 tvm.nd.array(state[1].astype("float32")),
             )
             model.set_input(
                 "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState_1/zeros",
                 tvm.nd.array(state[2].astype("float32")),
             )
             model.set_input(
                 "Model/MultiRNNCellZeroState/LSTMBlockCellZeroState_1/zeros_1",
                 tvm.nd.array(state[3].astype("float32")),
             )
             model.set_input(**params)
             model.run()
             tvm_output = model.get_output(0, tvm.nd.empty(out_sample_shape, "float32")).numpy()

             state_output = []
             for i in range(4):
                 state_output.append(
                     model.get_output(i + 1, tvm.nd.empty(out_state_shape, "float32")).numpy()
                 )
             sample = tf_testing.pick_from_weight(tvm_output[0])

             return sample, state_output

         for x in data:
             sample, state = _get_sample(x, state)

         if sample is not None:
             samples.append(sample)
         else:
             samples.append(0)

         k = 1
         while k < num_samples:
             sample, state = _get_sample(samples[-1], state)
             samples.append(sample)
             k += 1
         return samples, state

     with tf.Graph().as_default():
         word_to_id, id_to_word, graph_def = tf_testing.get_workload_ptb()
         vocab_size = len(word_to_id)
         # Call the utility to import the graph definition into default graph.
         graph_def = tf_testing.ProcessGraphDefParam(graph_def)
         sess = tf.Session()

     # TVM graph module creation
     params, m = _get_tvm_graph_module(graph_def)

     # Create 10 predicted statments of 20 words
     cnt_stm = 0
     while cnt_stm < 10:
         cnt_stm += 1
         in_state = [np.full((batch_size, num_hidden), 0, dtype="float32")] * 2 * num_layers
         seed_for_sample = inpt.split()
         tvm_samples, _ = _do_tvm_sample(
             m, [word_to_id[word] for word in seed_for_sample], in_state, params, cnt_sample
         )
         tvm_sample_str = _pretty_print(tvm_samples, False, id_to_word)
         tf_samples, _ = tf_testing.do_tf_sample(
             sess, [word_to_id[word] for word in seed_for_sample], in_state, cnt_sample
         )
         tf_sample_str = _pretty_print(tf_samples, False, id_to_word)
         inpt = tvm_sample_str
         tvm.testing.assert_allclose(tf_samples, tvm_samples, rtol=1e-5, atol=1e-5)
         assert tvm_sample_str == tf_sample_str


 #######################################################################
 # LRN (Local Response Normalization)
 # ----------------------------------


 def _test_lrn(ishape, size, axis, bias, alpha, beta):
     """testing local response normalization"""
     lrn_depth_radius = size / 2

     inp_array = np.random.uniform(size=ishape).astype(np.float32)

     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype, name="lrn0_data")
         nn_ops.local_response_normalization(
             in1, name="lrn", depth_radius=lrn_depth_radius, bias=bias, alpha=alpha, beta=beta
         )

         compare_tf_with_tvm(inp_array, "lrn0_data:0", "lrn:0")


 def test_forward_lrn():
     _test_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)


 #######################################################################
 # l2_normalize
 # ------------


 def _test_l2_normalize(ishape, eps, axis):
     """testing l2 normalize (uses max, sum, square, sqrt frontend operators)"""

     inp_array = np.random.uniform(size=ishape).astype(np.float32)

     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
         nn.l2_normalize(in1, axis=axis, epsilon=eps, name=None, dim=None)

         compare_tf_with_tvm(inp_array, "Placeholder:0", "l2_normalize:0")


 def test_forward_l2_normalize():
     _test_l2_normalize((1, 3, 20, 20), 0.001, (0,))


 #######################################################################
 # transpose
 # ---------


 def _test_forward_transpose(ishape, axes=None):
     data = np.random.uniform(size=ishape).astype(np.float32)

     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="transpose_data")

         if axes is None:
             tf.transpose(in1)
         else:
             tf.transpose(in1, perm=axes)

         compare_tf_with_tvm(data, "transpose_data:0", "transpose:0")


 def _test_forward_tranapose_axes_input(ishape, axes):
     data = np.random.uniform(size=ishape).astype(np.float32)
     axes_np = np.array(axes).astype(np.int32)

     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="transpose_data")

         const1 = tf.constant(axes_np, dtype=tf.int32)

         # make axes an input to tf.transpose, but not an input to the graph,
         # so it can be extracted with infer_value_simulated
         axes = tf.reverse(const1, axis=[-1])
         tf.transpose(in1, axes)

         compare_tf_with_tvm([data], ["transpose_data:0"], "transpose:0")


 def test_forward_transpose():
     _test_forward_transpose((2, 3, 4), (1, 2, 0))
     _test_forward_transpose((2, 3, 4))
     _test_forward_transpose((7, 8, 8, 10))
     _test_forward_transpose((2, 3, 4), (1, 2, 0))
     _test_forward_transpose((2, 3, 4), (0, 1, 2))
     _test_forward_transpose((2, 3, 4, 5), (3, 0, 1, 2))
     _test_forward_tranapose_axes_input((2, 3, 4), (1, 2, 0))
     _test_forward_tranapose_axes_input((2, 3, 4, 5), (3, 0, 1, 2))


 def _test_forward_slice_operation_input(input_value, begin_value, size_value):
     input_data = np.array(input_value, dtype=np.float32)
     with tf.Graph().as_default():
         input_tensor = tf.placeholder(shape=input_data.shape, dtype=input_data.dtype, name="input")
         tf.slice(input_tensor, begin_value, size_value, name="slice_output")
         compare_tf_with_tvm([input_data], ["input:0"], "slice_output:0")


 def test_forward_slice():
     _test_forward_slice_operation_input([1, 1], [0], [2])
     _test_forward_slice_operation_input([0, 1, 2, 3], [3], [-1])
     _test_forward_slice_operation_input(
         [[0, 1, 2, 3], [4, 5, 6, 7]], begin_value=[0, 1], size_value=[-1, -1]
     )


 def test_forward_ceil():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(size=ishape).astype(np.float32)
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
         tf.ceil(in1)
         compare_tf_with_tvm(inp_array, "Placeholder:0", "Ceil:0")


 def test_forward_floor():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(size=ishape).astype(np.float32)
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
         tf.floor(in1)
         compare_tf_with_tvm(inp_array, "Placeholder:0", "Floor:0")


 def test_forward_relu():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
     for mode in ["graph_executor", "vm"]:
         with tf.Graph().as_default():
             in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
             tf.nn.relu(in1)
             compare_tf_with_tvm(inp_array, "Placeholder:0", "Relu:0", mode=mode)


 def test_forward_leaky_relu():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
     for mode in ["graph_executor", "vm"]:
         with tf.Graph().as_default():
             in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
             tf.nn.leaky_relu(in1, alpha=0.4)
             compare_tf_with_tvm(inp_array, "Placeholder:0", "LeakyRelu:0", mode=mode)


 def test_forward_elu():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
         tf.nn.elu(in1)
         compare_tf_with_tvm(inp_array, "Placeholder:0", "Elu:0")


 def test_forward_selu():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
         tf.nn.selu(in1)
         compare_tf_with_tvm(inp_array, "Placeholder:0", "Selu:0")


 def test_forward_tanh():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
         tf.nn.tanh(in1)
         compare_tf_with_tvm(inp_array, "Placeholder:0", "Tanh:0")


 #######################################################################
 # Softmax
 # -------
 def test_forward_softmax():
     """test operator Softmax"""

     def check_softmax(in_shape, axis, dtype):
         np_data = np.random.uniform(-100, 100, size=in_shape).astype(dtype)
         tf.reset_default_graph()
         with tf.Graph().as_default():
             in_data = tf.placeholder(dtype, in_shape, name="in_data")
             tf.nn.softmax(in_data, axis=axis, name="Softmax")
             compare_tf_with_tvm([np_data], ["in_data:0"], "Softmax:0")

     check_softmax((2, 3, 5), 2, "float32")
     check_softmax((2, 3, 5), -1, "float32")


 #######################################################################
 # Tensor
 # ------


 def test_forward_round():
     """test Round"""
     np_data = np.random.uniform(-10, 10, size=(5, 7)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (5, 7), name="in_data")
         tf.round(in_data, name="round")
         compare_tf_with_tvm([np_data], ["in_data:0"], "round:0")


 def test_forward_abs():
     """test operator Abs"""
     np_data = np.random.uniform(1, 100, size=(9, 11)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (9, 11), name="in_data")
         tf.math.abs(in_data, name="abs")
         compare_tf_with_tvm([np_data], ["in_data:0"], "abs:0")


 def _test_forward_zeros_like(in_shape, dtype):
     np_data = np.random.uniform(-10, 10, size=in_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, in_shape, name="in_data")
         tf.zeros_like(in_data, name="zeros_like")
         compare_tf_with_tvm([np_data], ["in_data:0"], "zeros_like:0")


 def test_forward_zeros_like():
     if tf.__version__ < LooseVersion("1.2"):
         _test_forward_zeros_like((2, 3), "int32")
         _test_forward_zeros_like((2, 3, 5), "int8")
         _test_forward_zeros_like((2, 3, 5, 7), "uint16")
         _test_forward_zeros_like((2, 3, 11), "float32")
         _test_forward_zeros_like((2, 3, 11), "float64")


 def test_forward_squared_difference():
     ishape = (1, 3, 10, 14)
     inp_array_a = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
     inp_array_b = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array_a.shape, dtype=inp_array_a.dtype, name="in1")
         in2 = tf.placeholder(shape=inp_array_b.shape, dtype=inp_array_b.dtype, name="in2")
         out = tf.math.squared_difference(in1, in2)
         compare_tf_with_tvm([inp_array_a, inp_array_b], [in1.name, in2.name], out.name)


 def _test_forward_reverse_v2(in_shape, axis, dtype):
     np_data = np.random.uniform(-10, 10, size=in_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(dtype, in_shape, name="in_data")
         tf.reverse(in_data, axis=[axis], name="reverse")
         compare_tf_with_tvm([np_data], ["in_data:0"], "reverse:0")


 def test_forward_reverse_v2():
     """test ReverseV2"""
     _test_forward_reverse_v2((2, 3), 0, "int32")
     _test_forward_reverse_v2((2, 3, 5), 2, "float32")
     _test_forward_reverse_v2((2, 3, 5, 7), 1, "float32")
     _test_forward_reverse_v2((2, 3, 5), -1, "float64")
     _test_forward_reverse_v2((2, 3, 5), -3, "float64")


 def test_forward_sign():
     """test Sign"""
     np_data = np.random.uniform(-10, 10, size=(5, 7, 11)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
         tf.sign(in_data, name="sign")
         compare_tf_with_tvm([np_data], ["in_data:0"], "sign:0")


 def test_forward_square():
     """test operator Square"""
     np_data = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (2, 3, 5), name="in_data")
         tf.square(in_data, name="square")
         compare_tf_with_tvm([np_data], ["in_data:0"], "square:0")


 def test_forward_pow_exp():
     """test Pow and Exp"""
     np_in1 = np.random.uniform(-2, 2, size=(5, 7, 11)).astype(np.float32)
     np_in2 = np.random.uniform(-2, 2, size=(5, 7, 11)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in1 = tf.placeholder(tf.float32, (5, 7, 11), name="in1")
         in2 = tf.placeholder(tf.float32, (5, 7, 11), name="in2")
         _ = tf.pow(in1, in2, name="pow")
         _ = tf.exp(in1, name="exp")
         compare_tf_with_tvm([np_in1, np_in2], ["in1:0", "in2:0"], "pow:0")
         compare_tf_with_tvm([np_in1], ["in1:0"], "exp:0")


 def test_forward_unary():
     """Unary"""

     def _test_forward_unary(op, a_min=1, a_max=5, dtype=np.float32):
         """test unary operators"""
         np_data = np.random.uniform(a_min, a_max, size=(2, 3, 5)).astype(dtype)
         tf.reset_default_graph()
         with tf.Graph().as_default():
             in_data = tf.placeholder(dtype, (2, 3, 5), name="in_data")
             out = op(in_data)
             compare_tf_with_tvm([np_data], ["in_data:0"], out.name)

     _test_forward_unary(tf.acos, -1, 1)
     _test_forward_unary(tf.asin, -1, 1)
     _test_forward_unary(tf.atanh, -1, 1)
     _test_forward_unary(tf.sinh)
     _test_forward_unary(tf.cosh)
     _test_forward_unary(tf.acosh)
     _test_forward_unary(tf.asinh)
     _test_forward_unary(tf.atan)
     _test_forward_unary(tf.sin)
     _test_forward_unary(tf.cos)
     _test_forward_unary(tf.tan)
     _test_forward_unary(tf.tanh)
     _test_forward_unary(tf.erf)
     _test_forward_unary(tf.log)
     _test_forward_unary(tf.log1p)


 def test_forward_atan2():
     """test operator tan"""
     tf.disable_eager_execution()
     np_data_1 = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
     np_data_2 = np.random.uniform(1, 100, size=(2, 3, 5)).astype(np.float32)
     tf.reset_default_graph()
     in_data_1 = tf.placeholder(tf.float32, (2, 3, 5), name="in_data_1")
     in_data_2 = tf.placeholder(tf.float32, (2, 3, 5), name="in_data_2")
     tf.atan2(in_data_1, in_data_2, name="atan2")
     compare_tf_with_tvm([np_data_1, np_data_2], ["in_data_1:0", "in_data_2:0"], "atan2:0")


 def test_forward_expm1():
     """test operator expm1"""

     def _test_forward_expm1(shape):
         tf.disable_eager_execution()
         np_data = np.random.uniform(1, 10, size=shape).astype(np.float32)
         tf.reset_default_graph()
         in_data = tf.placeholder(tf.float32, shape, name="in_data")
         tf.expm1(in_data, name="expm1")
         compare_tf_with_tvm([np_data], ["in_data:0"], "expm1:0")

     _test_forward_expm1([1, 100])
     _test_forward_expm1([1, 10, 10])
     _test_forward_expm1([2, 5, 2, 5])


 def test_forward_softsign():
     """test operator softsign"""

     def _test_forward_softsign(shape):
         tf.disable_eager_execution()
         np_data = np.random.uniform(1, 100, size=shape).astype(np.float32)
         tf.reset_default_graph()
         in_data = tf.placeholder(tf.float32, shape, name="in_data")
         tf.nn.softsign(in_data, name="softsign")
         compare_tf_with_tvm([np_data], ["in_data:0"], "softsign:0")

     _test_forward_softsign([1, 100])
     _test_forward_softsign([1, 10, 10])
     _test_forward_softsign([2, 5, 2, 5])


 def test_forward_rint():
     """test operator rint"""

     def _test_forward_rint(shape):
         tf.disable_eager_execution()
         np_data = np.random.uniform(-100, 100, size=shape).astype(np.float32)
         tf.reset_default_graph()
         in_data = tf.placeholder(tf.float32, shape, name="in_data")
         tf.math.rint(in_data, name="rint")
         compare_tf_with_tvm([np_data], ["in_data:0"], "rint:0")

     _test_forward_rint([100])
     _test_forward_rint([1, 100])
     _test_forward_rint([1, 10, 10])
     _test_forward_rint([2, 5, 2, 5])


 def test_forward_negative():
     """test tf operator Neg"""
     np_data = np.random.uniform(-100, 255, size=(224, 224, 3)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (224, 224, 3), name="in_data")
         tf.negative(in_data, name="negative")
         compare_tf_with_tvm([np_data], ["in_data:0"], "negative:0")


 def test_forward_log_softmax():
     """test operator LogSoftmax"""
     np_data = np.random.uniform(1, 100, size=(9, 11)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (9, 11), name="in_data")
         tf.math.log_softmax(in_data, name="LogSoftmax")
         compare_tf_with_tvm([np_data], ["in_data:0"], "LogSoftmax:0")


 def test_forward_softplus():
     """test operator Softplus"""
     np_data = np.random.uniform(1, 10, size=(2, 3, 5)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (2, 3, 5), name="in_data")
         tf.nn.softplus(in_data, name="softplus")
         compare_tf_with_tvm([np_data], ["in_data:0"], "softplus:0")


 def test_forward_rsqrt():
     """test Rsqrt"""
     np_data = np.random.uniform(1, 100, size=(5, 7, 11)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
         tf.rsqrt(in_data, name="rsqrt")
         compare_tf_with_tvm([np_data], ["in_data:0"], "rsqrt:0")


 def test_forward_sqrt():
     """test Sqrt"""
     np_data = np.random.uniform(1, 100, size=(5, 7, 11)).astype(np.float32)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, (5, 7, 11), name="in_data")
         tf.sqrt(in_data, name="sqrt")
         compare_tf_with_tvm([np_data], ["in_data:0"], "sqrt:0")


 def _test_forward_right_shift(in_shape, dtype):
     """test operator RightShift"""
     lh_data = np.random.randint(1, 3, size=in_shape).astype(dtype)
     rh_data = np.random.randint(1, 8, size=in_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         lft_data = tf.placeholder(dtype, in_shape, name="lft_data")
         rgt_data = tf.placeholder(dtype, in_shape, name="rgt_data")
         tf.bitwise.right_shift(lft_data, rgt_data, name="RightShift")
         compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "RightShift:0")


 def test_forward_right_shift():
     _test_forward_right_shift((7,), "int32")
     _test_forward_right_shift((3, 11), "int16")


 def _test_forward_left_shift(in_shape, dtype):
     """test operator LeftShift"""
     lh_data = np.random.randint(100, 1000000, size=in_shape).astype(dtype)
     rh_data = np.random.randint(1, 3, size=in_shape).astype(dtype)
     tf.reset_default_graph()
     with tf.Graph().as_default():
         lft_data = tf.placeholder(dtype, in_shape, name="lft_data")
         rgt_data = tf.placeholder(dtype, in_shape, name="rgt_data")
         tf.bitwise.left_shift(lft_data, rgt_data, name="LeftShift")
         compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "LeftShift:0")


 def test_forward_left_shift():
     _test_forward_left_shift((10,), "int32")
     _test_forward_left_shift((224, 224, 3), "int16")


 #######################################################################
 # Mean
 # ----


 def test_forward_mean():
     """Mean"""

     def check_mean(ishape, **kwargs):
         inp_array = np.random.uniform(size=ishape).astype(np.float32)
         with tf.Graph().as_default():
             in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
             tf.keras.backend.mean(in1, **kwargs)
             compare_tf_with_tvm(inp_array, "Placeholder:0", "Mean:0", no_gpu=True)

     check_mean((10, 8, 16, 32))
     check_mean((10, 8, 16, 32), axis=(2, 3))
     check_mean((10, 8, 16, 32), axis=(1, 2), keepdims=True)


 #######################################################################
 # Size
 # ----


 def test_forward_size():
     """Size"""

     def check_size(ishape):
         np_input = np.random.uniform(size=ishape).astype(np.float32)

         # if all dimensions are constant, TF will optimize away size operator into constant
         tf_input_shape = list(np_input.shape)
         tf_input_shape[0] = None

         with tf.Graph().as_default():
             tf_input = tf.placeholder(shape=tf_input_shape, dtype=np_input.dtype, name="input")
             tf.size(tf_input, name="size")
             compare_tf_with_tvm([np_input], ["input:0"], "size:0")

     check_size((10, 8, 16, 32))
     check_size((10,))


 #######################################################################
 # All, Any, Max, Min, Prod, variance, std, logsumexp, euclidean_norm
 # ------------------------------------------------------------------


 def test_forward_reduce():
     """Reduce"""

     def _check_op(tf_op, ishape, axis, keepdims, dtype="float32"):
         tf.reset_default_graph()
         if dtype == "bool":
             np_data = np.random.choice([True, False], size=ishape)
         else:
             np_data = np.random.uniform(size=ishape).astype(dtype)
         if tf_op == tf.math.reduce_prod:
             axis = 1
             np_data = np_data.reshape(1, -1)
         with tf.Graph().as_default():
             in_data = tf.placeholder(dtype, name="in_data")
             reduce_op = tf_op(in_data, axis=axis, keepdims=keepdims, name="reduce_std")
             compare_tf_with_tvm([np_data], ["in_data:0"], reduce_op.name)

     def _test_math_op(op, d_types=None):
         d_types = d_types or ["int32", "float32"]
         for dtype in d_types:
             _check_op(op, (3, 10), axis=(-1), keepdims=False, dtype=dtype)
             _check_op(op, (8, 16, 32), axis=(-1), keepdims=False, dtype=dtype)
             _check_op(op, (1, 8, 8, 3), axis=(2, 3), keepdims=True, dtype=dtype)
             _check_op(op, (2, 3, 10, 10), axis=(1, 2), keepdims=True, dtype=dtype)

     _test_math_op(tf.math.reduce_all, d_types=["bool"])
     _test_math_op(tf.math.reduce_any, d_types=["bool"])
     _test_math_op(tf.math.reduce_max)
     _test_math_op(tf.math.reduce_min)
     _test_math_op(tf.math.reduce_prod)
     _test_math_op(tf.math.reduce_variance, d_types=["float32"])
     _test_math_op(tf.math.reduce_std, d_types=["float32"])
     _test_math_op(tf.math.reduce_logsumexp, d_types=["float32"])
     if package_version.parse(tf.VERSION) >= package_version.parse("1.15.0"):
         _test_math_op(tf.math.reduce_euclidean_norm)


 #######################################################################
 # All, Max, Min
 # ------------------------------------------------------------------


 def test_forward_raw_reduce():
     """Raw reduce"""

     def _check_op(tf_op, ishape, axis, keepdims, range_axis=False, dtype="float32"):
         tf.reset_default_graph()
         if dtype == "bool":
             np_data = np.random.choice([True, False], size=ishape)
         else:
             np_data = np.random.uniform(size=ishape).astype(dtype)
         if tf_op == tf.math.reduce_prod:
             axis = 1
             np_data = np_data.reshape(1, -1)
         with tf.Graph().as_default():
             if range_axis:
                 axis = tf.range(axis[0], axis[1], axis[2], name="range", dtype="int32")
             in_data = tf.placeholder(dtype, name="in_data")
             reduce_op = tf_op(input=in_data, axis=axis, keep_dims=keepdims, name="reduce_std")
             compare_tf_with_tvm([np_data], ["in_data:0"], reduce_op.name)

     def _test_raw_reduce_op(op, d_types=None):
         d_types = d_types or ["int32", "float32"]
         for dtype in d_types:
             _check_op(op, (3, 10), axis=(-1), keepdims=False, dtype=dtype)
             _check_op(op, (8, 16, 32), axis=(-1), keepdims=False, dtype=dtype)
             _check_op(op, (1, 8, 8, 3), axis=(2, 3), keepdims=True, dtype=dtype)
             _check_op(op, (2, 3, 10, 10), axis=(1, 2), keepdims=True, dtype=dtype)
             _check_op(op, (1, 8, 8, 3), axis=(2, 4, 1), keepdims=True, range_axis=True, dtype=dtype)
             _check_op(
                 op, (2, 3, 10, 10), axis=(1, 3, 1), keepdims=True, range_axis=True, dtype=dtype
             )

     if package_version.parse(tf.VERSION) >= package_version.parse("2.4.1"):
         _test_raw_reduce_op(tf.raw_ops.All, d_types=["bool"])
         _test_raw_reduce_op(tf.raw_ops.Max)
         _test_raw_reduce_op(tf.raw_ops.Min)


 #######################################################################
 # Relational operators
 # --------------------


 def _test_forward_rel_op(data, func):
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=data[0].shape, dtype=data[0].dtype, name="in1")
         in2 = tf.placeholder(shape=data[1].shape, dtype=data[1].dtype, name="in2")
         op = func(in1, in2, name="op")
         _ = tf.cast(op, tf.int32, name="out1")
         compare_tf_with_tvm([data[0], data[1]], ["in1:0", "in2:0"], "out1:0")


 def test_forward_rel_ops():
     t1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     t2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
     _test_forward_rel_op([t1, t2], math_ops.less)
     _test_forward_rel_op([t1, t2], math_ops.greater)
     _test_forward_rel_op([t1, t2], math_ops.less_equal)
     _test_forward_rel_op([t1, t2], math_ops.greater_equal)
     _test_forward_rel_op([t1, t2], math_ops.equal)
     _test_forward_rel_op([t1, t2], math_ops.not_equal)


 #######################################################################
 # ExpandDims
 # ----------


 def _test_forward_expand_dims(data, axis):
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=data.shape, dtype=data.dtype, name="in1")
         out = tf.expand_dims(in1, axis)
         compare_tf_with_tvm([data], [in1.name], out.name)


 def test_forward_expand_dims():
     _test_forward_expand_dims(np.int32(1), 0)
     _test_forward_expand_dims(np.array([1]), 0)
     _test_forward_expand_dims(np.array([1]), -1)
     _test_forward_expand_dims(np.array([[1], [2]]), 0)
     _test_forward_expand_dims(np.array([[1], [2]]), 1)
     _test_forward_expand_dims(np.array([[1], [2]]), -1)


 #######################################################################
 # Maximum, Minimum
 # ----------------
 def test_forward_maximum():
     """test Op Maximum"""

     def check_maximum(lh_shape, rh_shape, dtype):
         tf.reset_default_graph()
         lh_data = np.random.uniform(size=lh_shape).astype(dtype)
         rh_data = np.random.uniform(size=rh_shape).astype(dtype)
         with tf.Graph().as_default():
             lft_data = tf.placeholder(dtype, name="lft_data")
             rgt_data = tf.placeholder(dtype, name="rgt_data")
             tf.math.maximum(lft_data, rgt_data, name="maximum")
             compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "maximum:0")

     check_maximum((10, 8, 16, 32), (1,), dtype="int32")
     check_maximum((10, 8, 16, 32), (10, 8, 16, 32), dtype="float32")


 def test_forward_minimum():
     """test Op Minimum"""

     def check_minimum(lh_shape, rh_shape, dtype):
         tf.reset_default_graph()
         lh_data = np.random.uniform(size=lh_shape).astype(dtype)
         rh_data = np.random.uniform(size=rh_shape).astype(dtype)
         with tf.Graph().as_default():
             lft_data = tf.placeholder(dtype, name="lft_data")
             rgt_data = tf.placeholder(dtype, name="rgt_data")
             tf.math.minimum(lft_data, rgt_data, name="minimum")
             compare_tf_with_tvm([lh_data, rh_data], ["lft_data:0", "rgt_data:0"], "minimum:0")

     check_minimum((10, 8, 16, 32), (1,), dtype="int32")
     check_minimum((10, 8, 16, 32), (10, 8, 16, 32), dtype="float32")


 #######################################################################
 # PlaceholderWithDefault
 # ----------------------
 def test_placeholder():
     """Placeholder"""
     with tf.Graph().as_default():
         in_data1 = np.random.uniform(-5, 5, size=(3, 4, 5)).astype(np.float32)
         var1 = tf.Variable(in_data1, name="in1")
         var2 = array_ops.placeholder_with_default(var1, None, name="place1")

         in_data2 = np.random.uniform(-5, 5, size=(3, 4, 5)).astype(np.float32)
         place1 = array_ops.placeholder(shape=in_data1.shape, dtype=in_data1.dtype, name="in2")

         out1 = tf.math.add(var1, var2, name="out1")
         _ = tf.math.add(out1, place1, name="out2")

         compare_tf_with_tvm(
             [in_data1, in_data2], ["place1:0", "in2:0"], "out2:0", init_global_variables=True
         )


 #######################################################################
 # OneHot
 # ----------------------


 def _test_forward_one_hot(indices_shape, depth, on_value, off_value, axis, out_dtype):
     inp_array1 = np.random.randint(0, 5, size=indices_shape)
     with tf.Graph().as_default():
         in1 = tf.placeholder(shape=inp_array1.shape, dtype=inp_array1.dtype)
         out = tf.one_hot(in1, depth, on_value, off_value, axis, dtype=out_dtype)
         compare_tf_with_tvm(inp_array1, in1.name, out.name)


 def test_forward_one_hot():
     _test_forward_one_hot((3,), 3, 1, 0, -1, "int32")
     _test_forward_one_hot((3,), 3, 1.0, 0.0, -1, "float32")
     _test_forward_one_hot((2, 2), 5, 2, -2, 0, "int32")
     _test_forward_one_hot((2, 2), 5, 0.5, -0.5, 1, "float32")
     _test_forward_one_hot((3, 2, 4, 5), 6, 1, 0, 1, "int32")
     _test_forward_one_hot((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")


 #######################################################################
 # AddN
 # ----------------------


 def _test_forward_add_n(inputs):
     tf.reset_default_graph()
     with tf.Graph().as_default():
         temp = []
         for each in inputs:
             temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
         output = tf.add_n(temp)
         compare_tf_with_tvm(list(inputs), [each.name for each in temp], output.name)


 def test_forward_add_n():
     """Add n"""
     x = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
     y = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
     z = np.random.randint(1, 100, size=(3, 3, 3), dtype=np.int32)
     m, n, o = x.astype(np.float32), y.astype(np.float32), z.astype(np.float32)
     in0 = x
     in1 = [x, y]
     in2 = (x, y, z)
     in3 = m
     in4 = [m, n]
     in5 = (m, n, o)
     _test_forward_add_n(in0)
     _test_forward_add_n(in1)
     _test_forward_add_n(in2)
     _test_forward_add_n(in3)
     _test_forward_add_n(in4)
     _test_forward_add_n(in5)


 #######################################################################
 # Sharing params case
 # ----------------------


 def test_sharing_node():
     """Test the sharing params case."""
     np_data = np.random.uniform(size=(2, 2, 2)).astype("float32")
     with tf.Graph().as_default():
         in_data = tf.placeholder(tf.float32, shape=(2, 2, 2), name="in_data")
         axis = tf.constant([-1], dtype=tf.int32, name="axis")
         mean0 = tf.reduce_mean(in_data, axis=axis, keepdims=False, name="mean0")
         mean1 = tf.reduce_mean(in_data, axis=axis, keepdims=False, name="mean1")
         _ = tf.add(mean0, mean1, name="out")
         compare_tf_with_tvm([np_data], ["in_data:0"], "out:0")


 #######################################################################
 # Unravel Index
 # ----------------------
 def _test_forward_unravel_index(inputs):
     tf.reset_default_graph()
     with tf.Graph().as_default():
         temp = []
         for each in inputs:
             temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
         output = tf.unravel_index(temp[0], temp[1])
         compare_tf_with_tvm(list(inputs), [each.name for each in temp], output.name)


 def _test_forward_unravel_index_scalar(x, y, dtype="int32"):
     tf.reset_default_graph()
     with tf.Graph().as_default():
         indices_1 = constant_op.constant(x, dtype=dtype)
         dims_1 = constant_op.constant(y, dtype=dtype)
         out_1 = array_ops.unravel_index(indices_1, dims_1)
         compare_tf_with_tvm([], [], out_1.name)


 def test_forward_unravel_index():
     """Unravel index"""
     x = np.array([0, 1, 2, 3])
     y = np.array([2, 2])
     _test_forward_unravel_index([x, y])

     x = np.array([0, 1, 2, 5])
     y = np.array([2, 3])
     _test_forward_unravel_index([x, y])

     x = np.array([0, 1, 2, 5])
     y = np.array([6])
     _test_forward_unravel_index([x, y])

     x = np.array([102, 300, 16])
     y = np.array([10, 10, 9, 6])
     _test_forward_unravel_index([x, y])

     x = np.array([100])
     y = np.array([10, 10, 9, 6])
     _test_forward_unravel_index([x, y])

     # Test scalar input
     _test_forward_unravel_index_scalar(13, [1, 4, 5, 2])


 #######################################################################
 # Dilation2d
 # ----------------------
 def _test_dilation2d(tensor_in_sizes, filter_in_sizes, strides, dilations, padding):
     """One iteration of dilation2d with given shapes and attributes"""

     total_size_1 = np.prod(tensor_in_sizes)
     total_size_2 = np.prod(filter_in_sizes)
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
     filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]

     with tf.Graph().as_default():
         in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype="float32")
         in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype="float32")

         nn_ops.dilation2d(in_data, in_filter, strides=strides, rates=dilations, padding=padding)

         compare_tf_with_tvm(
             np.reshape(data_array, tensor_in_sizes).astype("float32"),
             "Placeholder:0",
             "Dilation2D:0",
             no_gpu=True,
         )


 def test_forward_dilation():
     """Dilation2d"""
     _test_dilation2d([1, 18, 18, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "VALID")
     _test_dilation2d([1, 15, 15, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "SAME")
     _test_dilation2d([1, 5, 5, 1], [2, 2, 1], [1, 1, 1, 1], [1, 1, 1, 1], "VALID")
     _test_dilation2d([1, 5, 5, 1], [3, 3, 1], [1, 1, 1, 1], [1, 2, 2, 1], "VALID")
     _test_dilation2d([1, 5, 5, 3], [3, 3, 3], [1, 1, 1, 1], [1, 1, 1, 1], "SAME")
     _test_dilation2d([1, 28, 28, 3], [5, 5, 3], [1, 2, 2, 1], [1, 1, 1, 1], "VALID")
     _test_dilation2d([1, 224, 224, 10], [8, 8, 10], [1, 1, 1, 1], [1, 1, 1, 1], "VALID")
     _test_dilation2d([1, 18, 18, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "SAME")
     _test_dilation2d([1, 15, 15, 32], [4, 4, 32], [1, 1, 1, 1], [1, 2, 1, 1], "VALID")
     _test_dilation2d([1, 5, 5, 1], [7, 2, 1], [1, 3, 1, 1], [1, 1, 1, 1], "SAME")
     _test_dilation2d([1, 5, 5, 1], [3, 4, 1], [1, 2, 1, 1], [1, 2, 2, 1], "SAME")
     _test_dilation2d([1, 5, 5, 3], [3, 3, 3], [1, 1, 4, 1], [1, 1, 1, 1], "VALID")
     _test_dilation2d([1, 28, 28, 3], [5, 6, 3], [1, 1, 2, 1], [1, 1, 1, 1], "SAME")
     _test_dilation2d([1, 224, 224, 10], [8, 8, 10], [1, 3, 1, 1], [1, 1, 1, 1], "SAME")
     _test_dilation2d([1, 3, 3, 1], [2, 2, 1], [1, 1, 1, 1], [1, 2, 2, 1], "SAME")
     _test_dilation2d([1, 3, 3, 1], [2, 2, 1], [1, 1, 1, 1], [1, 1, 2, 1], "VALID")


 def _test_identityn(data_np_list):
     with tf.Graph().as_default():
         data_tensors = []
         data_tensors_name = []
         for index, data_np in enumerate(data_np_list):
             tensor_name = f"data_{index}"
             data_tensors_name.append(tensor_name + ":0")
             data_tensors.append(
                 tf.placeholder(shape=data_np.shape, dtype=str(data_np.dtype), name=tensor_name)
             )

         output = tf.identity_n(data_tensors)
         output_names = [out.name for out in output]
         compare_tf_with_tvm(
             data_np_list,
             data_tensors_name,
             output_names,
         )


 @pytest.mark.parametrize(
     "data_np_list",
     [
         (
             [
                 np.array([[1, 1], [0, 3], [0, 1], [2, 0], [3, 1]], dtype=np.int64),
                 np.array([1, 2, 3, 4, 5], dtype=np.int64),
                 np.array([5, 6], dtype=np.int64),
             ]
         ),
         (
             [
                 np.array([[1, 1], [0, 3], [2, 0], [3, 1]], dtype=np.int64),
                 np.array([1, 2, 3, 4], dtype=np.int64),
                 np.array([5, 6], dtype=np.int64),
                 np.array([True, False, True]),
             ]
         ),
         (
             [
                 np.array([]),
                 np.array([[]]),
             ]
         ),
     ],
 )
 def test_forward_identityn(data_np_list):
     """Identityn"""
     _test_identityn(data_np_list)


 #######################################################################
 # infinity ops
 # ------------
 def _verify_infiniteness_ops(tf_op, name):
     """test operator infinity ops"""

     # Only float types are allowed in Tensorflow for isfinite and isinf
     # float16 is failing on cuda
     tf_dtypes = ["float32", "float64"]  # pylint: disable=redefined-outer-name
     for tf_dtype in tf_dtypes:
         shape = (8, 8)
         data = np.random.uniform(size=shape).astype(tf_dtype)
         data.ravel()[np.random.choice(data.size, int(data.size * 0.5), replace=False)] = np.infty
         data.ravel()[np.random.choice(data.size, int(data.size * 0.5), replace=False)] = np.nan

         tf.reset_default_graph()
         in_data = tf.placeholder(tf_dtype, shape, name="in_data")
         tf_op(in_data, name=name)
         compare_tf_with_tvm([data], ["in_data:0"], f"{name}:0")


 def test_forward_isinf():
     _verify_infiniteness_ops(tf.is_inf, "isinf")


 def test_forward_isfinite():
     _verify_infiniteness_ops(tf.is_finite, "isfinite")


 def test_forward_isnan():
     _verify_infiniteness_ops(tf.is_nan, "isnan")


 def _test_spop_placeholder_without_shape_info():
     with tf.Graph().as_default():

         @function.Defun(*[tf.int32] * 2)
         def Forward(x, y):
             print(x.name)
             print(y.name)
             b = tf.add(x, y)
             return b

         pl1 = tf.placeholder(tf.int32, name="pl1")
         pl2 = tf.placeholder(tf.int32, name="pl2")
         pl3 = tf.placeholder(tf.int32, name="pl3")
         data = np.array([[-1, 1], [2, -2]], dtype=np.int32)
         data2 = np.array([[-2, 3], [4, -6]], dtype=np.int32)
         data3 = np.array([[-2, 3], [4, -6]], dtype=np.int32)
         z1 = gen_functional_ops.StatefulPartitionedCall(args=[pl1, pl2], Tout=[tf.int32], f=Forward)
         z2 = z1 + pl3
         compare_tf_with_tvm(
             [data, data2, data3],
             ["pl1:0", "pl2:0", "pl3:0"],
             ["StatefulPartitionedCall:0", z2.name],
             mode="vm",
             init_global_variables=True,
         )


 def _test_spop_placeholder_with_shape_and_default_value():
     with tf.Graph().as_default():
         data = np.ones([1], dtype=int).astype(np.int32)
         dataVar = tf.Variable(data, shape=data.shape)
         pl1 = array_ops.placeholder_with_default(dataVar, shape=data.shape, name="pl1")
         tpl = tf.convert_to_tensor(pl1, dtype=tf.int32)

         @function.Defun(*[tf.int32])
         def pl_with_default(pl):
             return tf.expand_dims(tf.multiply(pl, pl), 0)

         _ = gen_functional_ops.StatefulPartitionedCall(
             args=[tpl], Tout=[tf.int32], f=pl_with_default
         )
         compare_tf_with_tvm(
             data, ["pl1:0"], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
         )


 def _test_spop_placeholder_numpy_arange_feed():
     with tf.Graph().as_default():
         t1 = tf.placeholder(tf.int32, (3, 3, 3), "t1")
         t1_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
         t2 = tf.placeholder(tf.int32, (3, 3, 3), "t2")
         t2_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))

         @tf.function
         def add(x, y):
             return tf.add(x, y, "add_t1_t2")

         t3 = add(t1, t2)
         compare_tf_with_tvm(
             [t1_data, t2_data], ["t1:0", "t2:0"], [t3.name], mode="vm", init_global_variables=True
         )


 def _test_spop_placeholder_numpy_array_feed():
     with tf.Graph().as_default():
         t1_data = np.array([[-1, 1, 3], [2, -2, 4], [2, -3, 14]], dtype=np.int32)
         t2_data = np.array([[-2, 1, 2], [12, -2, 14], [12, -3, 4]], dtype=np.int32)
         t1 = tf.placeholder(tf.int32, name="t1")
         t2 = tf.placeholder(tf.int32, name="t2")

         @tf.function
         def add(x, y):
             return tf.add(x, y, "add_t1_t2")

         t3 = add(t1, t2)
         compare_tf_with_tvm(
             [t1_data, t2_data], ["t1:0", "t2:0"], [t3.name], mode="vm", init_global_variables=True
         )


 def _test_spop_function_invocation_basic():
     with tf.Graph().as_default():

         def fun1(a):
             return tf.multiply(a, a)

         def fun2(b):
             return tf.multiply(b, 10)

         @tf.function
         def fun3(x, y):
             x = fun2(x)
             y = fun1(y)
             z = tf.add(x, y)
             return z

         t3 = fun3(tf.constant(10.5), tf.constant(20.4))

         compare_tf_with_tvm([], [], [t3.name], mode="vm", init_global_variables=True)


 def _test_spop_function_invocation_nested():
     with tf.Graph().as_default():
         t1 = tf.placeholder(tf.int32, (3, 3, 3), name="t1")
         t1_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))
         t2 = tf.placeholder(tf.int32, name="t2")
         t2_data = np.arange(27, dtype=np.int32).reshape((3, 3, 3))

         @tf.function
         def myfunc(x, y):
             return tf.add(x, y, "myfunc")

         @tf.function
         def myfunc2(x, y):
             z = myfunc(x, y)
             l = myfunc(z, y)
             m = myfunc(l, z)
             return tf.add(l, m, "myfunc2")

         res1 = myfunc(t1, t2)
         res2 = myfunc2(res1, t1)

         compare_tf_with_tvm(
             [t1_data, t2_data], ["t1:0", "t2:0"], [res2.name], mode="vm", init_global_variables=True
         )


 def _test_spop_function_invocation_no_autograph():
     with tf.Graph().as_default():

         @tf.function(autograph=False)
         def fun1(a):
             return tf.multiply(a, a)

         @tf.function(autograph=False)
         def fun2(b):
             return tf.multiply(b, 10)

         @tf.function
         def fun3(x, y):
             x = fun2(x)
             y = fun1(y)
             z = tf.add(x, y)
             return z

         t3 = fun3(tf.constant(10.5), tf.constant(20.4))

         compare_tf_with_tvm([], [], [t3.name], mode="vm", init_global_variables=True)


 def _test_spop_function_invocation_defun():
     with tf.Graph().as_default():

         def fun1(a):
             return tf.multiply(a, a)

         def fun2(b):
             return tf.multiply(b, b)

         @function.Defun(dtypes.float32, dtypes.float32, func_name="Fun3")
         def fun3(x, y):
             x = fun2(x)
             y = fun1(y)
             z = tf.add(x, y)
             return z

         _ = gen_functional_ops.StatefulPartitionedCall(
             args=[tf.constant(10.5), tf.constant(20.4)],
             Tout=[dtypes.float32],
             f=fun3,
             name="SpopFnInvocation",
         )
         compare_tf_with_tvm([], [], "SpopFnInvocation:0", mode="vm", init_global_variables=True)


 def _test_spop_arithmetic():
     with tf.Graph().as_default():

         @function.Defun(*[dtypes.int32] * 3)
         def arithmetic(m, x, c):
             z = tf.add(tf.multiply(m, x), c)
             return z

         m = tf.constant(10)
         x = tf.constant(20)
         c = tf.constant(2)
         _ = gen_functional_ops.StatefulPartitionedCall(
             args=[m, x, c], Tout=[tf.int32], f=arithmetic
         )

         compare_tf_with_tvm(
             [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
         )


 def _test_spop_control_flow():
     with tf.Graph().as_default():

         @function.Defun(*[dtypes.float32] * 2)
         def Body1(x, y):
             with ops.device("/job:localhost/replica:0/task:0/device:CPU:0"):
                 z = math_ops.multiply(x, y)
                 i = 0
                 while i < 10:
                     i += 1
                     if i == 5:
                         continue
                     z = math_ops.multiply(x, y * i)
             return z

         _ = gen_functional_ops.StatefulPartitionedCall(
             args=[constant_op.constant(32.0), constant_op.constant(100.0)],
             Tout=[dtypes.float32],
             f=Body1,
         )
         compare_tf_with_tvm(
             [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
         )


 def _test_spop_variables():
     with tf.Graph().as_default():
         const1 = tf.constant(10)
         const2 = tf.constant(20)
         var1 = tf.Variable(const1, dtype=tf.int32)
         var2 = tf.Variable(const2, dtype=tf.int32)

         @function.Defun(tf.int32, tf.int32)
         def Forward(x, y):
             return tf.multiply(x, y)

         _ = gen_functional_ops.StatefulPartitionedCall(
             args=[var1, var2], Tout=[tf.int32], f=Forward
         )
         compare_tf_with_tvm(
             [], [], "StatefulPartitionedCall:0", init_global_variables=True, mode="vm"
         )


 def _test_spop_constants():
     with tf.Graph().as_default():

         @function.Defun(*[dtypes.int32] * 2)
         def constantsFn(x, y):
             vv = tf.constant([2, 3, 4], name="vv")
             z = tf.add(vv + x, y)
             return z

         a = tf.constant(20000, name="a")
         b = tf.constant(40000, name="b")
         _ = gen_functional_ops.StatefulPartitionedCall(args=[a, b], Tout=[tf.int32], f=constantsFn)

         compare_tf_with_tvm(
             [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
         )


 def _test_spop_stateful():
     # This test case is to test that TVM rejects any TF stateful operations
     # (including Resource Variables) except StatefulPartitionedCall/PartitionedCall
     # (as these two operators can still be used as container graphs to execute
     # "stateless" operations internally.
     tf.reset_default_graph()
     with tf.Graph().as_default():

         @tf.function
         def FunctionWithStatefulOp_One(i):
             b = tf.random.uniform(shape=[2, 4], maxval=10, dtype=tf.float32, seed=10)
             y = tf.multiply(b, i)
             return y

         @tf.function
         def FunctionWithStatefulOp(m, n):
             a = tf.random.uniform(shape=[2, 4], maxval=10, dtype=tf.float32, seed=10)
             x = tf.multiply(a, m)
             y = FunctionWithStatefulOp_One(n)
             z = tf.multiply(x, y)
             return z

         op = FunctionWithStatefulOp(constant_op.constant(1.0), constant_op.constant(2.0))
         with pytest.raises(Exception) as execinfo:
             compare_tf_with_tvm([], [], [op.name], init_global_variables=True, mode="vm")
         assert execinfo.value.args[0].startswith("The following operators are not implemented")


 def _test_spop_device_assignment():
     # This test case is to test that TVM rejects inconsistent device assignment
     # while using StatefulPartitionedCall/PartitionedCall operators which in case of TVM will
     # be used as container graphs to internally execute "stateless" operations.

     tf.reset_default_graph()
     with tf.Graph().as_default():

         def fun1(a):
             with ops.device("/GPU:0"):
                 return tf.multiply(a, a)

         def fun2(b):
             with ops.device("/job:localhost/replica:0/task:0/device:CPU:1"):
                 return tf.multiply(b, b)

         @function.Defun(dtypes.float32, dtypes.float32, func_name="Fun3")
         def fun3(x, y):
             with ops.device("/CPU:0"):
                 x = fun2(x)
             with ops.device("/job:localhost/replica:0/task:0/device:CPU:2"):
                 y = fun1(y)
             with ops.device("/job:localhost/replica:0/task:0/device:CPU:3"):
                 z = tf.add(x, y)
                 return z

         _ = gen_functional_ops.StatefulPartitionedCall(
             args=[tf.constant(10.5), tf.constant(20.4)], Tout=[dtypes.float32], f=fun3
         )
         with pytest.raises(Exception) as execinfo:
             compare_tf_with_tvm(
                 [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
             )
         assert execinfo.value.args[0].startswith("Found inconsistent Device assignment")


 def _test_spop_resource_variables():
     # This test case is to test that TVM rejects any graph containing
     # resource variables with StatefulPartitionedOp.

     tf.reset_default_graph()
     with tf.Graph().as_default():

         const1 = tf.constant(10)
         const2 = tf.constant(20)
         var1 = tf.Variable(const1, dtype=tf.int32, use_resource=True)
         var2 = tf.Variable(const2, dtype=tf.int32, use_resource=True)

         @tf.function
         def resourceVariablesTest(x, y):
             return tf.multiply(x, y)

         _ = resourceVariablesTest(var1, var2)
         with pytest.raises(Exception) as execinfo:
             compare_tf_with_tvm(
                 [], [], "StatefulPartitionedCall:0", mode="vm", init_global_variables=True
             )
         # pylint: disable=implicit-str-concat
         assert execinfo.value.args[0].startswith("Graph is not frozen." " Provide a frozen graph")


 def test_forward_spop():
     """Spop"""
     _test_spop_stateful()
     _test_spop_device_assignment()
     # tensorflow version upgrade support
     # This test is expected to fail in TF version >= 2.6
     # as the generated graph will be considered frozen, hence
     # not passing the criteria for the test below.
     if tf.__version__ < LooseVersion("2.6.1"):
         _test_spop_resource_variables()

     # Placeholder test cases
     _test_spop_placeholder_without_shape_info()
     _test_spop_placeholder_with_shape_and_default_value()
     _test_spop_placeholder_numpy_arange_feed()
     _test_spop_placeholder_numpy_array_feed()

     # Function Invocation test cases
     _test_spop_function_invocation_basic()
     _test_spop_function_invocation_nested()
     _test_spop_function_invocation_no_autograph()
     _test_spop_function_invocation_defun()

     # Test cases for various other TF constructs
     _test_spop_arithmetic()
     _test_spop_control_flow()
     _test_spop_variables()
     _test_spop_constants()


 #######################################################################
 # Dynamic input shape
 # -------------------
 def test_forward_dynamic_input_shape():
     """Dynamic input shape"""
     tf.reset_default_graph()

     with tf.Graph().as_default():
         data = tf.placeholder(tf.float32, name="data", shape=(None,))
         _ = data + 1
         np_data = np.random.uniform(size=(2,)).astype("float32")
         out_name = "add"

         with tf.Session() as sess:
             graph_def = tf_testing.AddShapesToGraphDef(sess, out_name)
             tf_output = run_tf_graph(sess, np_data, "data:0", [f"{out_name}:0"])
             # TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
             for device in ["llvm"]:
                 _ = tvm.device(device, 0)
                 if not tvm.testing.device_enabled(device):
                     print(f"Skip because {device} is not enabled")
                     continue
                 tvm_output = run_tvm_graph(
                     graph_def,
                     np_data,
                     ["data"],
                     1,
                     target=device,
                     layout="NCHW",
                     out_names=[out_name],
                     mode="vm",
                     ignore_in_shape=True,
                 )
                 tvm.testing.assert_allclose(tvm_output[0], tf_output[0], rtol=1e-5, atol=1e-5)


 def test_forward_dynmaic_rnn_lstmblockcell():
     """Dynmaic rnn lstmblockcell"""
     if package_version.parse(tf.VERSION) >= package_version.parse("2.0.0"):
         return

     total_series_length = 50000
     truncated_backprop_length = 15
     state_size = 4
     echo_step = 3
     batch_size = 5
     num_layers = 5

     def generateData():
         x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
         y = np.roll(x, echo_step)
         y[0:echo_step] = 0

         x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
         y = y.reshape((batch_size, -1))

         return (x, y)

     batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])

     init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])

     state_per_layer_list = tf.unstack(init_state, axis=0)
     rnn_tuple_state = tuple(
         list(
             tf.nn.rnn_cell.LSTMStateTuple(
                 state_per_layer_list[idx][0], state_per_layer_list[idx][1]
             )
             for idx in range(num_layers)
         )
     )

     # Forward passes
     def lstm_cell():
         return tensorflow.contrib.rnn.LSTMBlockCell(state_size)

     cell = tf.nn.rnn_cell.MultiRNNCell(
         [lstm_cell() for _ in range(num_layers)], state_is_tuple=True
     )
     states_series, current_state = tf.nn.dynamic_rnn(
         cell, tf.expand_dims(batchX_placeholder, -1), initial_state=rnn_tuple_state
     )

     with tf.Session() as sess:
         sess.run(tf.global_variables_initializer())
         x, _ = generateData()
         _current_state = np.zeros((num_layers, 2, batch_size, state_size))

         start_idx = 0
         end_idx = start_idx + truncated_backprop_length

         batchX = x[:, start_idx:end_idx]

         # Save current state for TVM
         current_state_tvm = _current_state

         _current_state, _states_series = sess.run(
             [current_state, states_series],
             feed_dict={batchX_placeholder: batchX, init_state: _current_state},
         )

         # Organize results and corresponding names
         tf_output = [_states_series]

         for c in _current_state:
             tf_output.append(c.c)
             tf_output.append(c.h)

         name = [states_series.name.split(":")[0]]

         for t in current_state:
             name.append(t.c.name.split(":")[0])
             name.append(t.h.name.split(":")[0])

         graph_def = sess.graph.as_graph_def(add_shapes=True)

         final_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, name)

         _ = run_tvm_graph(
             final_graph_def,
             [batchX.astype("float32"), current_state_tvm.astype("float32")],
             ["Placeholder", "Placeholder_1"],
             out_names=name,
             num_output=len(name),
             mode="vm",
             disabled_pass=["FoldScaleAxis"],
         )

         # Compare result
         for _, tf_out in enumerate(tf_output):
             tvm.testing.assert_allclose(tf_out, tf_out, atol=1e-5, rtol=1e-5)


 #######################################################################
 # Unique
 # ------------


 def _test_unique(n, dtype, is_dyn):
     tf.reset_default_graph()
     np_data = np.random.randint(100, size=n).astype(dtype)
     with tf.Graph().as_default():
         if is_dyn:
             in_data = tf.placeholder(dtype, [n], name="in_data")
         else:
             in_data = tf.constant(np_data, dtype, name="in_data")
         tf.unique(in_data)
         if is_dyn:
             compare_tf_with_tvm(np_data, "in_data:0", ["Unique:0", "Unique:1"], mode="vm")
         else:
             compare_tf_with_tvm(np_data, "", ["Unique:0", "Unique:1"], mode="vm")


 def test_forward_unique():
     """test Unique"""

     for dtype in ["int32", "int64"]:
         for is_dyn in [False, True]:
             _test_unique(50, dtype, is_dyn)
             _test_unique(100, dtype, is_dyn)


 #######################################################################
 # Unique with counts
 # ------------


 def _test_unique_with_counts(n, dtype, is_dyn):
     tf.reset_default_graph()
     np_data = np.random.randint(100, size=n).astype(dtype)
     with tf.Graph().as_default():
         if is_dyn:
             in_data = tf.placeholder(dtype, [n], name="in_data")
         else:
             in_data = tf.constant(np_data, dtype, name="in_data")
         tf.unique_with_counts(in_data)
         if is_dyn:
             compare_tf_with_tvm(
                 np_data,
                 "in_data:0",
                 ["UniqueWithCounts:0", "UniqueWithCounts:1", "UniqueWithCounts:2"],
                 mode="vm",
             )
         else:
             compare_tf_with_tvm(
                 np_data,
                 "",
                 ["UniqueWithCounts:0", "UniqueWithCounts:1", "UniqueWithCounts:2"],
                 mode="vm",
             )


 def test_forward_unique_with_counts():
     """test UniqueWithCounts"""

     for dtype in ["int32", "int64"]:
         for is_dyn in [False, True]:
             _test_unique_with_counts(10, dtype, is_dyn)
             _test_unique_with_counts(20, dtype, is_dyn)


 #######################################################################
 # check graph ir for nn.moments
 # ------------


 def test_moments():
     """NN.moments"""
     g = tf.Graph()
     shape = [4, 176, 8, 8]
     dtype = "float32"
     with g.as_default():
         A = tf.placeholder(shape=shape, dtype=dtype, name="A")
         _ = tf.placeholder(shape=shape, dtype=dtype, name="B")
         mean, variance = tf.nn.moments(A, [1], keep_dims=True)
         _ = (A - mean) / tf.sqrt(variance + 0.0005)

     with tvm.testing.disable_span_filling():
         mod, _ = from_tensorflow(g.as_graph_def(add_shapes=True))
     with tvm.testing.enable_span_filling():
         mod_with_span, _ = from_tensorflow(g.as_graph_def(add_shapes=True))
     assert tvm.ir.structural_equal(mod["main"], mod_with_span["main"], map_free_vars=True)

     program = """
     def @main(%A: Tensor[(4, 176, 8, 8), float32]) {
         %527 = mean(%A, axis=[1], keepdims=True) /* moments/mean */;
         %528 = subtract(%A, %527) /* sub */;
         %529 = subtract(%A, %527);
         %530 = multiply(%529, %529) /* moments/SquaredDifference */;
         %531 = mean(%530, axis=[1], keepdims=True) /* moments/variance */;
         %532 = add(%531, 0.0005f) /* add */;
         %533 = sqrt(%532) /* Sqrt */;
         divide(%528, %533) /* truediv */
     }
     """
     mod_golden = tvm.relay.parse('#[version = "0.0.5"]\n' + program)
     tvm.ir.assert_structural_equal(mod["main"].body, mod_golden["main"].body, map_free_vars=True)


 #######################################################################
 # invert_permutation
 # --------------------


 def test_invert_permutation():
     """test InvertPermutation"""
     tf.reset_default_graph()

     input_shape = [6]
     x = np.array([3, 4, 0, 2, 1, 5]).astype("int32")
     with tf.Graph().as_default():
         in_data = tf.placeholder(shape=input_shape, dtype="int32")
         tf.invert_permutation(in_data)
         out_name = "InvertPermutation:0"
         compare_tf_with_tvm(x, "Placeholder:0", out_name, no_gpu=False)


 #######################################################################
 # Bincount
 # ----


 def _test_bincount(in_shape, size, weights):
     with tf.Graph().as_default():
         inputs = []
         data = []
         inputs.append(tf.placeholder(shape=in_shape, dtype="int32", name="input0"))
         data.append(np.random.uniform(0, size, size=in_shape).astype("int32"))
         inputs.append(tf.placeholder(shape=(), dtype="int32", name="size"))
         data.append(np.array(size, "int32"))
         if weights:
             inputs.append(tf.placeholder(shape=in_shape, dtype="float32", name="weights"))
             data.append(np.reshape(weights, in_shape).astype("float32"))
         else:
             inputs.append(tf.placeholder(shape=(0,), dtype="float32", name="weights"))
             data.append(np.array([], "float32"))
         result = tf.raw_ops.Bincount(arr=data[0], size=data[1], weights=data[2])
         compare_tf_with_tvm(data, [a.name for a in inputs], result.name, mode="vm")


 def test_forward_bincount():
     """Test Bincount Op"""
     # 2D input
     _test_bincount((3, 10), 20, [1.0] * 30)
     _test_bincount((3, 10), 20, [1.5] * 30)
     _test_bincount((3, 10), 20, None)
     # 1D input
     _test_bincount((10,), 20, [1.0] * 10)
     _test_bincount((10,), 20, [1.5] * 10)
     _test_bincount((10,), 20, None)


 #######################################################################
 # DenseBincount
 # ----


 def _test_dense_bincount(in_shape, size, weights, binary_output):
     with tf.Graph().as_default():
         inputs = []
         data = []
         inputs.append(tf.placeholder(shape=in_shape, dtype="int32", name="input0"))
         data.append(np.random.uniform(0, size, size=in_shape).astype("int32"))
         inputs.append(tf.placeholder(shape=(), dtype="int32", name="size"))
         data.append(np.array(size, "int32"))
         if weights:
             inputs.append(tf.placeholder(shape=in_shape, dtype="float32", name="weights"))
             data.append(np.reshape(weights, in_shape).astype("float32"))
         else:
             inputs.append(tf.placeholder(shape=(0,), dtype="float32", name="weights"))
             data.append(np.array([], "float32"))
         result = tf.raw_ops.DenseBincount(
             input=data[0],
             size=data[1],
             weights=data[2],
             binary_output=binary_output,
         )
         compare_tf_with_tvm(data, [a.name for a in inputs], result.name, mode="vm")


 def test_forward_dense_bincount():
     """Test DenseBincount Op"""
     for binary_output in [False, True]:
         # 2D input
         _test_dense_bincount((3, 10), 20, [1.0] * 30, binary_output)
         _test_dense_bincount((3, 10), 20, [1.5] * 30, binary_output)
         _test_dense_bincount((3, 10), 20, None, binary_output)
         # 1D input
         _test_dense_bincount((10,), 20, [1.0] * 10, binary_output)
         _test_dense_bincount((10,), 20, [1.5] * 10, binary_output)
         _test_dense_bincount((10,), 20, None, binary_output)


 #######################################################################
 # Test structural_equal and span of a model
 # --------------------------------------
 class TestSetSpan:
     """Test Structure and span of frequently-used models"""

     def _verify(self, res_fptr, golden_fptr):
         with tvm.testing.enable_span_filling():
             with_span = res_fptr()
         with tvm.testing.disable_span_filling():
             without_span = res_fptr()
         assert tvm.ir.structural_equal(with_span, without_span)
         _verify_structural_equal_with_span(with_span, golden_fptr())

     def test_conv2d_bias_add_span(self):
         """Test Structure and span of conv2d and bias add model match to the expected result"""

         def _res():
             in_shape = (1, 5, 5, 1)
             kernel_shpae = (2, 2, 1, 2)
             kernel_in = np.ones(kernel_shpae)
             bias_val_shape = tuple([2])
             bias_val_in = np.ones(bias_val_shape)

             with tf.Graph().as_default() as g:
                 x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
                 kernel = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
                 bias_val_tensor = tf.constant(bias_val_in, dtype=tf.float32, name="conv2d_bias")
                 conv2d = tf.nn.conv2d(
                     x, kernel, strides=[1, 1, 1, 1], padding="VALID", name="conv2d"
                 )
                 _ = tf.nn.bias_add(conv2d, bias_val_tensor, name="bias_add")

                 mod, _ = relay.frontend.from_tensorflow(
                     g.as_graph_def(), shape={"input": in_shape}, outputs=["bias_add"]
                 )
                 return mod["main"]

         def _golden():
             model_in = relay.var(
                 "input", relay.TensorType([1, 5, 5, 1]), span=_create_span("input")
             )
             weight = relay.var(
                 "filter_weight", relay.TensorType([2, 2, 1, 2]), span=_create_span("filter_weight")
             )
             bias = relay.var("conv2d_bias", relay.TensorType([2]), span=_create_span("conv2d_bias"))
             conv2d = _set_span(
                 relay.nn.conv2d(
                     model_in,
                     weight,
                     channels=2,
                     kernel_size=[2, 2],
                     data_layout="NHWC",
                     kernel_layout="HWIO",
                 ),
                 "conv2d",
             )
             add = _set_span(relay.op.add(conv2d, bias), "bias_add")
             mod = ir.IRModule.from_expr(add)
             return mod["main"]

         self._verify(_res, _golden)

     def test_fully_connected_bias_add_span(self):
         """Test Structure and span of fully connected model match to the expected result"""

         def _res():
             in_shape = (1, 10)
             kernel_shpae = (10, 10)
             kernel_in = np.ones(kernel_shpae)
             bias_val_shape = tuple([10])
             bias_val_in = np.ones(bias_val_shape)

             with tf.Graph().as_default() as g:
                 x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
                 in_filter = tf.constant(kernel_in, dtype=tf.float32, name="filter_weight")
                 bias_val_tensor = tf.constant(bias_val_in, dtype=tf.float32, name="dense_bias")
                 mat_mul = math_ops.mat_mul(x, in_filter, name="dense")
                 _ = tf.nn.bias_add(mat_mul, bias_val_tensor, name="bias_add")

                 mod, _ = relay.frontend.from_tensorflow(
                     g.as_graph_def(),
                     shape={"input": in_shape},
                     outputs=["bias_add"],
                     convert_config={"use_dense": True},
                 )
                 return mod["main"]

         def _golden():
             model_in = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
             weight = relay.var(
                 "filter_weight", relay.TensorType([10, 10]), span=_create_span("filter_weight")
             )
             bias = relay.var("dense_bias", relay.TensorType([10]), span=_create_span("dense_bias"))
             transpose = _set_span(relay.transpose(weight, [1, 0]), "dense")
             dense = _set_span(relay.nn.dense(model_in, transpose, units=10), "dense")
             add = _set_span(relay.op.add(dense, bias), "bias_add")
             mod = ir.IRModule.from_expr(add)
             return mod["main"]

         self._verify(_res, _golden)

     def test_reshape_span(self):
         """Test Structure and span of reshape model match to the expected result"""

         def _res():
             in_shape = (1, 10)
             output_shape = (2, 5)

             with tf.Graph().as_default() as g:
                 x = array_ops.placeholder(shape=in_shape, dtype="float32", name="input")
                 _ = array_ops.reshape(x, output_shape, "reshape")

                 mod, _ = relay.frontend.from_tensorflow(
                     g.as_graph_def(), shape={"input": in_shape}, outputs=["reshape"]
                 )
                 return mod["main"]

         def _golden():
             model_in = relay.var("input", relay.TensorType([1, 10]), span=_create_span("input"))
             reshape = _set_span(relay.reshape(model_in, [2, 5]), "reshape")
             mod = ir.IRModule.from_expr(reshape)
             return mod["main"]

         self._verify(_res, _golden)

     def test_batch_norm_span(self):
         """Test Structure and span of batchnorm model match to the expected result"""

         def _res():
             in_shape = (1, 12, 12, 32)
             with tf.Graph().as_default() as g:
                 input_tensor = tf.placeholder(tf.float32, shape=in_shape, name="input")
                 alpha = tf.constant(
                     np.ones(
                         in_shape[-1],
                     ),
                     dtype=tf.float32,
                     name="alpha",
                 )
                 beta = tf.constant(
                     np.ones(
                         in_shape[-1],
                     ),
                     dtype=tf.float32,
                     name="beta",
                 )
                 _ = tf.nn.fused_batch_norm(x=input_tensor, offset=beta, scale=alpha, name="bn")
                 mod, _ = relay.frontend.from_tensorflow(
                     g.as_graph_def(), shape={"input": in_shape}, outputs=["bn"]
                 )
                 return mod["main"]

         def _golden():
             model_in = relay.var(
                 "input", relay.TensorType([1, 12, 12, 32]), span=_create_span("input")
             )
             alpha = relay.var("alpha", relay.TensorType([32]), span=_create_span("alpha"))
             beta = relay.var("beta", relay.TensorType([32]), span=_create_span("beta"))
             mean = _set_span(relay.op.mean(model_in, axis=[3], exclude=True), "bn")
             variance_mean = _set_span(
                 relay.op.mean(model_in, axis=[3], keepdims=True, exclude=True), "bn"
             )
             variance = _set_span(
                 relay.op._make._variance(model_in, variance_mean, [3], False, True, False), "bn"
             )
             bn = _set_span(
                 relay.nn.batch_norm(model_in, alpha, beta, mean, variance, axis=3, epsilon=0.001),
                 "bn",
             )
             mod = ir.IRModule.from_expr(bn[0])
             return mod["main"]

         self._verify(_res, _golden)


 if __name__ == "__main__":
     tvm.testing.main()