tests/python/relay/test_link_params.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 import collections
 import ctypes
 import json
 import os
 import re
 from contextlib import redirect_stderr
 from io import StringIO

 import numpy as np
 import tvm
 import tvm.relay
 import tvm.testing
 from tvm import meta_schedule as ms
 from tvm import relay
 from tvm.contrib import utils
 from tvm.relay.backend import Executor, Runtime

 INPUT_SHAPE = (1, 3, 16, 16)

 KERNEL_SHAPE = (3, 3, 3, 3)


 # The data types that are linkable.
 linkable_dtype = tvm.testing.parameter(
     *(
         [f"uint{b}" for b in (8, 16, 32, 64)]
         + [f"int{b}" for b in (8, 16, 32, 64)]
         + ["float32", "float64"]
     )
 )


 def dtype_info(dtype):
     """Lookup numpy type info for the given string dtype (of linkable_dtype params above)."""
     if "int" in dtype:
         return np.iinfo(getattr(np, dtype))
     else:
         return np.finfo(getattr(np, dtype))


 # Note: for debugging, set this to an integer (i.e. 1.0). Then all "random" tensors will become
 # predictable
 RANDOM_TENSOR_START = None


 def _make_random_tensor(dtype, shape):
     """Create a random test tensor with given shape and dtype."""
     global RAND_SEED
     if RANDOM_TENSOR_START is not None:
         to_return = np.arange(
             RANDOM_TENSOR_START, RANDOM_TENSOR_START + np.prod(shape), dtype=dtype
         ).reshape(shape)
         RAND_SEED += np.prod(shape)
         return to_return

     dinfo = dtype_info(dtype)
     if "int" in dtype:
         return np.random.randint(dinfo.min, dinfo.max, shape, dtype=dtype)
     else:
         to_return = np.random.uniform(0, dinfo.max, shape).astype(dtype)
         np.reshape(to_return, np.prod(shape))[::2] *= -1
         return to_return


 def _lookup_sid(graph, name):
     """Lookup the storage id of a named parameter.

     Arguments
     ---------
     graph : dict
         Parsed JSON graph.

     name : str
         Name of the tensor parameter to lookup.

     Returns
     -------
     int :
         The storage_id of the parameter.
     """
     num_outputs_seen = 0
     for i, n in enumerate(graph["nodes"]):
         if n["name"] == name:
             print("sid", name, graph["attrs"]["storage_id"][1], num_outputs_seen)
             return graph["attrs"]["storage_id"][1][num_outputs_seen]
         else:
             if "attrs" in n and "num_outputs" in n["attrs"]:
                 num_outputs_seen += int(n["attrs"]["num_outputs"])
             else:
                 num_outputs_seen += 1

     raise KeyError(f"no such param: {name}")


 def _get_ctypes_dtype(dt):
     """Return a ctypes c_* datatype given a string data type."""
     if "int" in dt:
         return getattr(ctypes, f"c_{dt}")
     elif dt == "float32":
         return ctypes.c_float
     elif dt == "float64":
         return ctypes.c_double
     else:
         assert False, f"unknown dtype: {dt}"


 def _verify_linked_param(dtype, lib, mod, graph, name):
     """Directly read memory from the linked library to verify the linked parameter is correct."""
     sid = _lookup_sid(graph, name)
     # NOTE: query_imports=True because when loading a module from disk (i.e. for C backend),
     # a GraphExecutorFactory module is created instead of the module itself.
     param_ptr = mod.get_function("_lookup_linked_param", True)(sid)
     gen_param = lib.params[name]
     arr_data = (_get_ctypes_dtype(dtype) * np.prod(gen_param.shape)).from_address(param_ptr.value)
     arr = np.ndarray(shape=gen_param.shape, dtype=gen_param.dtype, buffer=arr_data, order="C")
     if "int" in gen_param.dtype:
         np.testing.assert_equal(gen_param.numpy(), arr)
     else:
         np.testing.assert_allclose(gen_param.numpy(), arr)
     return dtype == gen_param.dtype


 def _make_mod_and_params(dtype):
     """Create a Relay module and parameters to test the given datatype."""
     param_decls = collections.OrderedDict()
     param_init = {}

     def _add_decl(name, dtype):
         param_decls[name] = f"%{name} : Tensor[{KERNEL_SHAPE}, {dtype}]"
         param_init[name] = _make_random_tensor(dtype, KERNEL_SHAPE)

     # Add several parameters so that the number of parameters
     _add_decl(f"{dtype}_a", dtype)
     _add_decl(f"{dtype}_b", dtype)

     mod_lines = [
         '#[version = "0.0.5"]',
         f"def @main(%rand_input : Tensor[{INPUT_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } )  {{",
         # This program ensures that GraphPlanMemory alternates between the same two storage IDs for a
         # while. In doing this, it ensures that param %{dtype}_b will be placed into the graph at an
         # index unequal to its storage_id. This ensures that GraphExecutorCodegen encodes the storage_id
         # and not the parameter index into the graph.
         (
             f'    %0 = nn.conv2d(%rand_input, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
             f'kernel_size=[3, 3], out_dtype="{dtype}");'
         ),
         (
             f'    %1 = nn.conv2d(%0, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
             f'kernel_size=[3, 3], out_dtype="{dtype}");'
         ),
         (
             f'    %2 = nn.conv2d(%1, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
             f'kernel_size=[3, 3], out_dtype="{dtype}");'
         ),
         (
             f'    %3 = nn.conv2d(%2, %{dtype}_b, data_layout="NCHW", kernel_layout="OIHW", '
             f'kernel_size=[3, 3], out_dtype="{dtype}");'
         ),
         "    %3",
         "}",
     ]

     mod = tvm.relay.fromtext("\n".join(mod_lines))
     return mod, param_init


 @tvm.testing.requires_llvm
 def test_llvm_link_params(linkable_dtype):
     ir_mod, param_init = _make_mod_and_params(linkable_dtype)
     rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
     main_func = ir_mod["main"]
     target = "llvm"
     runtime = Runtime("crt", {"system-lib": True})
     executor = Executor("graph", {"link-params": True})
     with tvm.transform.PassContext(opt_level=3):
         lib = tvm.relay.build(ir_mod, target, runtime=runtime, executor=executor, params=param_init)

         # NOTE: Need to export_library() and load_library() to link all the Module(llvm, ...)
         # against one another.
         temp_dir = utils.TempDirectory()
         export_file = temp_dir / "lib.so"
         lib.lib.export_library(export_file)
         mod = tvm.runtime.load_module(export_file)
         assert len(lib.params.keys()) == 0  # NOTE: params became tir.constants
         assert mod.get_function("TVMSystemLibEntryPoint") != None

         graph = json.loads(lib.graph_json)
         for p in lib.params:
             _verify_linked_param(linkable_dtype, lib, mod, graph, p) or found_one

         # Wrap in function to explicitly deallocate the runtime.
         def _run_linked(lib, mod):
             graph_json, _, _ = lib
             graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
             graph_rt.set_input("rand_input", rand_input)  # NOTE: params not required.
             graph_rt.run()
             return graph_rt.get_output(0)

         linked_output = _run_linked(lib, mod)

     runtime = Runtime("cpp", {"system-lib": True})
     with tvm.transform.PassContext(opt_level=3):
         lib = tvm.relay.build(ir_mod, "llvm", runtime=runtime, params=param_init)

         def _run_unlinked(lib):
             graph_json, mod, lowered_params = lib
             graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
             graph_rt.set_input("rand_input", rand_input, **lowered_params)
             graph_rt.run()
             return graph_rt.get_output(0)

         unlinked_output = _run_unlinked(lib)

     if "int" in linkable_dtype:
         np.testing.assert_equal(unlinked_output.numpy(), linked_output.numpy())
     else:
         np.testing.assert_allclose(unlinked_output.numpy(), linked_output.numpy())


 def _get_c_datatype(dtype):
     """Translate LINKABLE_DTYPES element to c datatype."""
     if "int" in dtype:
         return f"{dtype}_t"
     elif dtype == "float32":
         return "float"
     elif dtype == "float64":
         return "double"
     else:
         assert False, f"unknown dtype {dtype}"


 HEX_NUM_RE = re.compile(r"[+\-]?(?:(?:0x[0-9A-Fa-f.p+-]+)|(?:INFINITY)|(?:NAN))")


 def test_c_link_params(linkable_dtype):
     temp_dir = utils.tempdir()
     mod, param_init = _make_mod_and_params(linkable_dtype)
     rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
     main_func = mod["main"]
     target = "c"
     executor = Executor("graph", {"link-params": True})
     with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
         lib = tvm.relay.build(mod, target, executor=executor, params=param_init)
         assert len(lib.params.keys()) == 0  # NOTE: params became tir.constants

         src = lib.lib.get_source()
         lib.lib.save(temp_dir.relpath("test.c"), "c")
         c_dtype = _get_c_datatype(linkable_dtype)
         src_lines = src.split("\n")
         param = param_init[f"{linkable_dtype}_a"].reshape(np.prod(KERNEL_SHAPE))
         param_def = rf"^static const {c_dtype} __attribute__\(\(section\(\".rodata.tvm\"\), aligned\(16\)\)\) [a-zA-Z_0-9]*constant_\d+\[{np.prod(param.shape)}\] = {{$"

         for i, line in enumerate(src_lines):
             if re.match(param_def, line):
                 i += 1
                 break
         else:
             assert False, f'did not find parameter definition "{param_def}":\n{src}'

         cursor = 0
         width = dtype_info(linkable_dtype).bits // 4 + 2
         if linkable_dtype.startswith("int"):
             width += 1  # Account for sign

         while "};" not in src_lines[i]:
             for match in HEX_NUM_RE.finditer(src_lines[i]):
                 cursor += 1
             i += 1

         assert cursor == np.prod(param.shape)

         # Need a unique name per library to avoid dlopen caching the lib load.
         lib_path = temp_dir.relpath(f"test-{linkable_dtype}-linked.so")
         lib["remove_params"]().export_library(lib_path)
         lib_mod = tvm.runtime.load_module(lib_path)

         #            lib_mod = lib_factory['default']()
         graph = json.loads(lib.graph_json)
         for p in lib.params:
             _verify_linked_param(linkable_dtype, lib, lib_mod, graph, p)

         # Wrap in function to explicitly deallocate the runtime.
         def _run_linked(lib_mod):
             graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
             graph_rt.set_input("rand_input", rand_input)  # NOTE: params not required.
             graph_rt.run()

             return graph_rt.get_output(0)

         linked_output = _run_linked(lib_mod)

     linked_params = lib.params
     with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
         lib = tvm.relay.build(mod, "c", params=param_init)
         _, _, params = lib
         # Need a unique name per library to avoid dlopen caching the lib load.
         lib_path = temp_dir.relpath(f"test-{linkable_dtype}-unlinked.so")
         lib.export_library(lib_path)
         lib_mod = tvm.runtime.load_module(lib_path)

         def _run_unlinked(lib_mod):
             graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
             graph_rt.set_input("rand_input", rand_input, **params)
             graph_rt.run()
             return graph_rt.get_output(0)

         unlinked_output = _run_unlinked(lib_mod)

     if "int" in linkable_dtype:
         np.testing.assert_equal(unlinked_output.numpy(), linked_output.numpy())
     else:
         np.testing.assert_allclose(unlinked_output.numpy(), linked_output.numpy())


 @tvm.testing.requires_micro
 def test_crt_link_params(linkable_dtype):
     from tvm import micro

     mod, param_init = _make_mod_and_params(linkable_dtype)
     rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
     main_func = mod["main"]
     target = "c"
     runtime = Runtime("crt", {"system-lib": True})
     executor = Executor("graph", {"link-params": True})
     with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
         factory = tvm.relay.build(
             mod, target, runtime=runtime, executor=executor, params=param_init
         )
         assert len(factory.get_params().keys()) == 0  # NOTE: params became tir.constants

         temp_dir = tvm.contrib.utils.tempdir()
         template_project_dir = tvm.micro.get_microtvm_template_projects("crt")
         project = tvm.micro.generate_project(
             template_project_dir, factory, temp_dir / "project", {"verbose": 1}
         )
         project.build()
         project.flash()
         with tvm.micro.Session(project.transport()) as sess:
             graph_rt = tvm.micro.session.create_local_graph_executor(
                 factory.get_graph_json(), sess.get_system_lib(), sess.device
             )

             assert len(factory.params.keys()) == 0  # NOTE: params became tir.constants

             # NOTE: not setting params here.
             graph_rt.set_input("rand_input", rand_input)
             graph_rt.run()
             linked_output = graph_rt.get_output(0).numpy()

     runtime = Runtime("cpp", {"system-lib": True})
     with tvm.transform.PassContext(opt_level=3):
         lib = tvm.relay.build(mod, "llvm", runtime=runtime, params=param_init)

         def _run_unlinked(lib):
             graph_json, mod, lowered_params = lib
             graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
             graph_rt.set_input("rand_input", rand_input, **lowered_params)
             graph_rt.run()
             return graph_rt.get_output(0).numpy()

         unlinked_output = _run_unlinked(lib)

     if "int" in linkable_dtype:
         np.testing.assert_equal(unlinked_output, linked_output)
     else:
         np.testing.assert_allclose(unlinked_output, linked_output)


 def test_tir_link_params():
     def get_dense(data_shape, weight_shape):
         data = relay.var("data", shape=data_shape, dtype="float32")
         weight = relay.var("weight", shape=weight_shape, dtype="float32")
         dense = relay.nn.dense(data, weight)
         return relay.Function([data, weight], dense)

     def get_ref_dense(data_np, weight_np):
         return np.dot(data_np, np.transpose(weight_np))

     def schedule_dense(sch):
         dense = sch.get_block("T_matmul_NT")
         _y, _x, _k = sch.get_loops(dense)

     M, N, K = 128, 128, 128
     data_shape = (M, K)
     weight_shape = (N, K)
     relay_mod = tvm.IRModule.from_expr(get_dense(data_shape, weight_shape))
     relay_mod = relay.transform.InferType()(relay_mod)
     data_np = np.random.randn(*data_shape).astype("float32")
     weight_np = np.random.randn(*weight_shape).astype("float32")
     target = "llvm"
     params = {"weight": weight_np}

     def schedule_fn(sch):
         if "nn_dense" in sch.mod.attrs["task_name"]:
             schedule_dense(sch)
             return True
         return False

     with StringIO() as stderr_buf, redirect_stderr(stderr_buf):
         with ms.database.ScheduleFnDatabase(schedule_fn), tvm.transform.PassContext(
             opt_level=3,
             config={"relay.backend.use_meta_schedule": True},
         ):
             executor = Executor("graph", {"link-params": True})
             lib = relay.build(relay_mod, target=target, executor=executor)

         # Workload look up should succeed. This does not work when the test is invoked from pytest.
         assert not "Cannot find workload" in stderr_buf.getvalue()

     dev = tvm.device(target, 0)
     runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
     runtime.set_input(**params)
     runtime.set_input("data", data_np)
     runtime.run()
     out = runtime.get_output(0).numpy()
     ref = get_ref_dense(data_np, weight_np)
     tvm.testing.assert_allclose(out, ref, atol=1e-4, rtol=1e-4)


 if __name__ == "__main__":
     tvm.testing.main()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	import collections
	import ctypes
	import json
	import os
	import re
	from contextlib import redirect_stderr
	from io import StringIO

	import numpy as np
	import tvm
	import tvm.relay
	import tvm.testing
	from tvm import meta_schedule as ms
	from tvm import relay
	from tvm.contrib import utils
	from tvm.relay.backend import Executor, Runtime

	INPUT_SHAPE = (1, 3, 16, 16)

	KERNEL_SHAPE = (3, 3, 3, 3)


	# The data types that are linkable.
	linkable_dtype = tvm.testing.parameter(
	*(
	[f"uint{b}" for b in (8, 16, 32, 64)]
	+ [f"int{b}" for b in (8, 16, 32, 64)]
	+ ["float32", "float64"]
	)
	)


	def dtype_info(dtype):
	"""Lookup numpy type info for the given string dtype (of linkable_dtype params above)."""
	if "int" in dtype:
	return np.iinfo(getattr(np, dtype))
	else:
	return np.finfo(getattr(np, dtype))


	# Note: for debugging, set this to an integer (i.e. 1.0). Then all "random" tensors will become
	# predictable
	RANDOM_TENSOR_START = None


	def _make_random_tensor(dtype, shape):
	"""Create a random test tensor with given shape and dtype."""
	global RAND_SEED
	if RANDOM_TENSOR_START is not None:
	to_return = np.arange(
	RANDOM_TENSOR_START, RANDOM_TENSOR_START + np.prod(shape), dtype=dtype
	).reshape(shape)
	RAND_SEED += np.prod(shape)
	return to_return

	dinfo = dtype_info(dtype)
	if "int" in dtype:
	return np.random.randint(dinfo.min, dinfo.max, shape, dtype=dtype)
	else:
	to_return = np.random.uniform(0, dinfo.max, shape).astype(dtype)
	np.reshape(to_return, np.prod(shape))[::2] *= -1
	return to_return


	def _lookup_sid(graph, name):
	"""Lookup the storage id of a named parameter.

	Arguments
	---------
	graph : dict
	Parsed JSON graph.

	name : str
	Name of the tensor parameter to lookup.

	Returns
	-------
	int :
	The storage_id of the parameter.
	"""
	num_outputs_seen = 0
	for i, n in enumerate(graph["nodes"]):
	if n["name"] == name:
	print("sid", name, graph["attrs"]["storage_id"][1], num_outputs_seen)
	return graph["attrs"]["storage_id"][1][num_outputs_seen]
	else:
	if "attrs" in n and "num_outputs" in n["attrs"]:
	num_outputs_seen += int(n["attrs"]["num_outputs"])
	else:
	num_outputs_seen += 1

	raise KeyError(f"no such param: {name}")


	def _get_ctypes_dtype(dt):
	"""Return a ctypes c_* datatype given a string data type."""
	if "int" in dt:
	return getattr(ctypes, f"c_{dt}")
	elif dt == "float32":
	return ctypes.c_float
	elif dt == "float64":
	return ctypes.c_double
	else:
	assert False, f"unknown dtype: {dt}"


	def _verify_linked_param(dtype, lib, mod, graph, name):
	"""Directly read memory from the linked library to verify the linked parameter is correct."""
	sid = _lookup_sid(graph, name)
	# NOTE: query_imports=True because when loading a module from disk (i.e. for C backend),
	# a GraphExecutorFactory module is created instead of the module itself.
	param_ptr = mod.get_function("_lookup_linked_param", True)(sid)
	gen_param = lib.params[name]
	arr_data = (_get_ctypes_dtype(dtype) * np.prod(gen_param.shape)).from_address(param_ptr.value)
	arr = np.ndarray(shape=gen_param.shape, dtype=gen_param.dtype, buffer=arr_data, order="C")
	if "int" in gen_param.dtype:
	np.testing.assert_equal(gen_param.numpy(), arr)
	else:
	np.testing.assert_allclose(gen_param.numpy(), arr)
	return dtype == gen_param.dtype


	def _make_mod_and_params(dtype):
	"""Create a Relay module and parameters to test the given datatype."""
	param_decls = collections.OrderedDict()
	param_init = {}

	def _add_decl(name, dtype):
	param_decls[name] = f"%{name} : Tensor[{KERNEL_SHAPE}, {dtype}]"
	param_init[name] = _make_random_tensor(dtype, KERNEL_SHAPE)

	# Add several parameters so that the number of parameters
	_add_decl(f"{dtype}_a", dtype)
	_add_decl(f"{dtype}_b", dtype)

	mod_lines = [
	'#[version = "0.0.5"]',
	f"def @main(%rand_input : Tensor[{INPUT_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } ) {{",
	# This program ensures that GraphPlanMemory alternates between the same two storage IDs for a
	# while. In doing this, it ensures that param %{dtype}_b will be placed into the graph at an
	# index unequal to its storage_id. This ensures that GraphExecutorCodegen encodes the storage_id
	# and not the parameter index into the graph.
	(
	f' %0 = nn.conv2d(%rand_input, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
	f'kernel_size=[3, 3], out_dtype="{dtype}");'
	),
	(
	f' %1 = nn.conv2d(%0, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
	f'kernel_size=[3, 3], out_dtype="{dtype}");'
	),
	(
	f' %2 = nn.conv2d(%1, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
	f'kernel_size=[3, 3], out_dtype="{dtype}");'
	),
	(
	f' %3 = nn.conv2d(%2, %{dtype}_b, data_layout="NCHW", kernel_layout="OIHW", '
	f'kernel_size=[3, 3], out_dtype="{dtype}");'
	),
	" %3",
	"}",
	]

	mod = tvm.relay.fromtext("\n".join(mod_lines))
	return mod, param_init


	@tvm.testing.requires_llvm
	def test_llvm_link_params(linkable_dtype):
	ir_mod, param_init = _make_mod_and_params(linkable_dtype)
	rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
	main_func = ir_mod["main"]
	target = "llvm"
	runtime = Runtime("crt", {"system-lib": True})
	executor = Executor("graph", {"link-params": True})
	with tvm.transform.PassContext(opt_level=3):
	lib = tvm.relay.build(ir_mod, target, runtime=runtime, executor=executor, params=param_init)

	# NOTE: Need to export_library() and load_library() to link all the Module(llvm, ...)
	# against one another.
	temp_dir = utils.TempDirectory()
	export_file = temp_dir / "lib.so"
	lib.lib.export_library(export_file)
	mod = tvm.runtime.load_module(export_file)
	assert len(lib.params.keys()) == 0 # NOTE: params became tir.constants
	assert mod.get_function("TVMSystemLibEntryPoint") != None

	graph = json.loads(lib.graph_json)
	for p in lib.params:
	_verify_linked_param(linkable_dtype, lib, mod, graph, p) or found_one

	# Wrap in function to explicitly deallocate the runtime.
	def _run_linked(lib, mod):
	graph_json, _, _ = lib
	graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
	graph_rt.set_input("rand_input", rand_input) # NOTE: params not required.
	graph_rt.run()
	return graph_rt.get_output(0)

	linked_output = _run_linked(lib, mod)

	runtime = Runtime("cpp", {"system-lib": True})
	with tvm.transform.PassContext(opt_level=3):
	lib = tvm.relay.build(ir_mod, "llvm", runtime=runtime, params=param_init)

	def _run_unlinked(lib):
	graph_json, mod, lowered_params = lib
	graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
	graph_rt.set_input("rand_input", rand_input, **lowered_params)
	graph_rt.run()
	return graph_rt.get_output(0)

	unlinked_output = _run_unlinked(lib)

	if "int" in linkable_dtype:
	np.testing.assert_equal(unlinked_output.numpy(), linked_output.numpy())
	else:
	np.testing.assert_allclose(unlinked_output.numpy(), linked_output.numpy())


	def _get_c_datatype(dtype):
	"""Translate LINKABLE_DTYPES element to c datatype."""
	if "int" in dtype:
	return f"{dtype}_t"
	elif dtype == "float32":
	return "float"
	elif dtype == "float64":
	return "double"
	else:
	assert False, f"unknown dtype {dtype}"


	HEX_NUM_RE = re.compile(r"[+\-]?(?:(?:0x[0-9A-Fa-f.p+-]+)\|(?:INFINITY)\|(?:NAN))")


	def test_c_link_params(linkable_dtype):
	temp_dir = utils.tempdir()
	mod, param_init = _make_mod_and_params(linkable_dtype)
	rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
	main_func = mod["main"]
	target = "c"
	executor = Executor("graph", {"link-params": True})
	with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
	lib = tvm.relay.build(mod, target, executor=executor, params=param_init)
	assert len(lib.params.keys()) == 0 # NOTE: params became tir.constants

	src = lib.lib.get_source()
	lib.lib.save(temp_dir.relpath("test.c"), "c")
	c_dtype = _get_c_datatype(linkable_dtype)
	src_lines = src.split("\n")
	param = param_init[f"{linkable_dtype}_a"].reshape(np.prod(KERNEL_SHAPE))
	param_def = rf"^static const {c_dtype} __attribute__\(\(section\(\".rodata.tvm\"\), aligned\(16\)\)\) [a-zA-Z_0-9]*constant_\d+\[{np.prod(param.shape)}\] = {{$"

	for i, line in enumerate(src_lines):
	if re.match(param_def, line):
	i += 1
	break
	else:
	assert False, f'did not find parameter definition "{param_def}":\n{src}'

	cursor = 0
	width = dtype_info(linkable_dtype).bits // 4 + 2
	if linkable_dtype.startswith("int"):
	width += 1 # Account for sign

	while "};" not in src_lines[i]:
	for match in HEX_NUM_RE.finditer(src_lines[i]):
	cursor += 1
	i += 1

	assert cursor == np.prod(param.shape)

	# Need a unique name per library to avoid dlopen caching the lib load.
	lib_path = temp_dir.relpath(f"test-{linkable_dtype}-linked.so")
	lib["remove_params"]().export_library(lib_path)
	lib_mod = tvm.runtime.load_module(lib_path)

	# lib_mod = lib_factory['default']()
	graph = json.loads(lib.graph_json)
	for p in lib.params:
	_verify_linked_param(linkable_dtype, lib, lib_mod, graph, p)

	# Wrap in function to explicitly deallocate the runtime.
	def _run_linked(lib_mod):
	graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
	graph_rt.set_input("rand_input", rand_input) # NOTE: params not required.
	graph_rt.run()

	return graph_rt.get_output(0)

	linked_output = _run_linked(lib_mod)

	linked_params = lib.params
	with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
	lib = tvm.relay.build(mod, "c", params=param_init)
	_, _, params = lib
	# Need a unique name per library to avoid dlopen caching the lib load.
	lib_path = temp_dir.relpath(f"test-{linkable_dtype}-unlinked.so")
	lib.export_library(lib_path)
	lib_mod = tvm.runtime.load_module(lib_path)

	def _run_unlinked(lib_mod):
	graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
	graph_rt.set_input("rand_input", rand_input, **params)
	graph_rt.run()
	return graph_rt.get_output(0)

	unlinked_output = _run_unlinked(lib_mod)

	if "int" in linkable_dtype:
	np.testing.assert_equal(unlinked_output.numpy(), linked_output.numpy())
	else:
	np.testing.assert_allclose(unlinked_output.numpy(), linked_output.numpy())


	@tvm.testing.requires_micro
	def test_crt_link_params(linkable_dtype):
	from tvm import micro

	mod, param_init = _make_mod_and_params(linkable_dtype)
	rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
	main_func = mod["main"]
	target = "c"
	runtime = Runtime("crt", {"system-lib": True})
	executor = Executor("graph", {"link-params": True})
	with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
	factory = tvm.relay.build(
	mod, target, runtime=runtime, executor=executor, params=param_init
	)
	assert len(factory.get_params().keys()) == 0 # NOTE: params became tir.constants

	temp_dir = tvm.contrib.utils.tempdir()
	template_project_dir = tvm.micro.get_microtvm_template_projects("crt")
	project = tvm.micro.generate_project(
	template_project_dir, factory, temp_dir / "project", {"verbose": 1}
	)
	project.build()
	project.flash()
	with tvm.micro.Session(project.transport()) as sess:
	graph_rt = tvm.micro.session.create_local_graph_executor(
	factory.get_graph_json(), sess.get_system_lib(), sess.device
	)

	assert len(factory.params.keys()) == 0 # NOTE: params became tir.constants

	# NOTE: not setting params here.
	graph_rt.set_input("rand_input", rand_input)
	graph_rt.run()
	linked_output = graph_rt.get_output(0).numpy()

	runtime = Runtime("cpp", {"system-lib": True})
	with tvm.transform.PassContext(opt_level=3):
	lib = tvm.relay.build(mod, "llvm", runtime=runtime, params=param_init)

	def _run_unlinked(lib):
	graph_json, mod, lowered_params = lib
	graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
	graph_rt.set_input("rand_input", rand_input, **lowered_params)
	graph_rt.run()
	return graph_rt.get_output(0).numpy()

	unlinked_output = _run_unlinked(lib)

	if "int" in linkable_dtype:
	np.testing.assert_equal(unlinked_output, linked_output)
	else:
	np.testing.assert_allclose(unlinked_output, linked_output)


	def test_tir_link_params():
	def get_dense(data_shape, weight_shape):
	data = relay.var("data", shape=data_shape, dtype="float32")
	weight = relay.var("weight", shape=weight_shape, dtype="float32")
	dense = relay.nn.dense(data, weight)
	return relay.Function([data, weight], dense)

	def get_ref_dense(data_np, weight_np):
	return np.dot(data_np, np.transpose(weight_np))

	def schedule_dense(sch):
	dense = sch.get_block("T_matmul_NT")
	_y, _x, _k = sch.get_loops(dense)

	M, N, K = 128, 128, 128
	data_shape = (M, K)
	weight_shape = (N, K)
	relay_mod = tvm.IRModule.from_expr(get_dense(data_shape, weight_shape))
	relay_mod = relay.transform.InferType()(relay_mod)
	data_np = np.random.randn(*data_shape).astype("float32")
	weight_np = np.random.randn(*weight_shape).astype("float32")
	target = "llvm"
	params = {"weight": weight_np}

	def schedule_fn(sch):
	if "nn_dense" in sch.mod.attrs["task_name"]:
	schedule_dense(sch)
	return True
	return False

	with StringIO() as stderr_buf, redirect_stderr(stderr_buf):
	with ms.database.ScheduleFnDatabase(schedule_fn), tvm.transform.PassContext(
	opt_level=3,
	config={"relay.backend.use_meta_schedule": True},
	):
	executor = Executor("graph", {"link-params": True})
	lib = relay.build(relay_mod, target=target, executor=executor)

	# Workload look up should succeed. This does not work when the test is invoked from pytest.
	assert not "Cannot find workload" in stderr_buf.getvalue()

	dev = tvm.device(target, 0)
	runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
	runtime.set_input(**params)
	runtime.set_input("data", data_np)
	runtime.run()
	out = runtime.get_output(0).numpy()
	ref = get_ref_dense(data_np, weight_np)
	tvm.testing.assert_allclose(out, ref, atol=1e-4, rtol=1e-4)


	if __name__ == "__main__":
	tvm.testing.main()