tests/python/runtime/test_runtime_module_based_interface.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import os
 import platform

 import numpy as np
 import pytest

 from tvm import relay, runtime
 from tvm.relay import testing
 import tvm
 from tvm.contrib import graph_executor
 from tvm.contrib.debugger import debug_executor
 from tvm.contrib.cuda_graph import cuda_graph_executor
 import tvm.testing
 import pytest


 def input_shape(mod):
     return [int(x) for x in mod["main"].checked_type.arg_types[0].shape]


 def verify(data):
     if not tvm.runtime.enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mod, params = relay.testing.synthetic.get_workload()
     with relay.build_config(opt_level=3):
         graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params)

     dev = tvm.cpu()
     module = graph_executor.create(graph, lib, dev)
     module.set_input("data", data)
     module.set_input(**graph_params)
     module.run()
     out = module.get_output(0).numpy()

     return out


 @tvm.testing.requires_llvm
 @pytest.mark.parametrize("target", ["llvm", "llvm -jit=orcjit"])
 def test_legacy_compatibility(target):
     mod, params = relay.testing.synthetic.get_workload()
     with relay.build_config(opt_level=3):
         graph, lib, graph_params = relay.build_module.build(mod, target, params=params)
     data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
     dev = tvm.cpu()
     module = graph_executor.create(graph, lib, dev)
     module.set_input("data", data)
     module.set_input(**graph_params)
     module.run()
     out = module.get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)


 @tvm.testing.requires_llvm
 @pytest.mark.parametrize("target", ["llvm", "llvm -jit=orcjit"])
 def test_cpu(target):
     mod, params = relay.testing.synthetic.get_workload()
     with relay.build_config(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, target, params=params)
     data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
     # raw api
     dev = tvm.cpu()
     gmod = complied_graph_lib["default"](dev)
     set_input = gmod["set_input"]
     run = gmod["run"]
     get_output = gmod["get_output"]
     set_input("data", tvm.nd.array(data))
     run()
     out = get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     # graph executor wrapper
     gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
     gmod.set_input("data", data)
     gmod.run()
     out = gmod.get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)


 @tvm.testing.requires_llvm
 def test_cpu_get_graph_json():
     mod, params = relay.testing.synthetic.get_workload()
     with relay.build_config(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, "llvm", params=params)
     from tvm.contrib import utils

     temp = utils.tempdir()
     file_name = "deploy_lib.so"
     path_lib = temp.relpath(file_name)
     complied_graph_lib.export_library(path_lib)
     loaded_lib = tvm.runtime.load_module(path_lib)
     json = loaded_lib["get_graph_json"]()
     assert isinstance(json, str) == True
     assert json.find("tvmgen_default_fused_nn_softmax_add") > -1


 @tvm.testing.requires_llvm
 @pytest.mark.parametrize("target", ["llvm", "llvm -jit=orcjit"])
 def test_cpu_get_graph_params_run(target):
     mod, params = relay.testing.synthetic.get_workload()
     with tvm.transform.PassContext(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, target, params=params)
     data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
     dev = tvm.cpu()
     from tvm.contrib import utils

     temp = utils.tempdir()
     file_name = "deploy_lib.so"
     path_lib = temp.relpath(file_name)
     complied_graph_lib.export_library(path_lib)

     loaded_lib = tvm.runtime.load_module(path_lib)
     loaded_params = loaded_lib["get_graph_params"]()

     gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
     gmod.set_input(key="data", value=data, **loaded_params)
     gmod.run()
     out = gmod.get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)


 @tvm.testing.requires_llvm
 def test_cpu_get_graph_params_compare():
     # Create sample net
     from tvm.relay.testing.init import create_workload, Constant

     inp_shape = (1, 3, 24, 12)
     dtype = "float32"
     data = relay.var("data", shape=inp_shape, dtype=dtype)
     conv_shape = [inp_shape[1], inp_shape[1], 3, 3]
     conv = relay.nn.conv2d(
         data,
         relay.var("conv_weight", shape=conv_shape, dtype=dtype),
         padding=1,
         kernel_size=3,
     )
     args = relay.analysis.free_vars(conv)
     func = relay.Function(args, conv)

     mod, params = create_workload(func, initializer=Constant())

     with tvm.transform.PassContext(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, "llvm", params=params)
     from tvm.contrib import utils

     temp = utils.tempdir()
     file_name = "deploy_lib.so"
     path_lib = temp.relpath(file_name)
     complied_graph_lib.export_library(path_lib)

     loaded_lib = tvm.runtime.load_module(path_lib)
     loaded_params = loaded_lib["get_graph_params"]()

     p0_squeezed = np.squeeze(loaded_params["p0"].numpy())
     tvm.testing.assert_allclose(params["conv_weight"].numpy(), p0_squeezed, atol=1e-5)


 @tvm.testing.requires_cuda
 @tvm.testing.requires_gpu
 def test_gpu():
     mod, params = relay.testing.synthetic.get_workload()
     with relay.build_config(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)
     data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
     dev = tvm.cuda()

     # raw api
     gmod = complied_graph_lib["default"](dev)
     set_input = gmod["set_input"]
     run = gmod["run"]
     get_output = gmod["get_output"]
     set_input("data", tvm.nd.array(data))
     run()
     out = get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     # graph executor wrapper
     gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
     gmod.set_input("data", data)
     gmod.run()
     out = gmod.get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)


 @tvm.testing.uses_gpu
 def test_mod_export():
     def verify_cpu_export(obj_format):
         mod, params = relay.testing.synthetic.get_workload()
         with relay.build_config(opt_level=3):
             complied_graph_lib = relay.build_module.build(mod, "llvm", params=params)

         from tvm.contrib import utils

         temp = utils.tempdir()
         if obj_format == ".so":
             file_name = "deploy_lib.so"
         else:
             assert obj_format == ".tar"
             file_name = "deploy_lib.tar"
         path_lib = temp.relpath(file_name)
         complied_graph_lib.export_library(path_lib)

         # run the setup in a separate function, so the load_lib
         # can get destructed right away
         # test the robustness wrt to parent module destruction
         def setup_gmod():
             loaded_lib = tvm.runtime.load_module(path_lib)
             dev = tvm.cpu(0)
             return loaded_lib["default"](dev)

         gmod = setup_gmod()
         # raw api
         set_input = gmod["set_input"]
         run = gmod["run"]
         get_output = gmod["get_output"]
         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
         set_input("data", tvm.nd.array(data))
         run()
         out = get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

         # graph executor wrapper
         gmod = graph_executor.GraphModule(setup_gmod())
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     def verify_gpu_export(obj_format):
         if not tvm.testing.device_enabled("cuda"):
             print("Skip because cuda is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
         with relay.build_config(opt_level=3):
             complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)

         from tvm.contrib import utils

         temp = utils.tempdir()
         if obj_format == ".so":
             file_name = "deploy_lib.so"
         else:
             assert obj_format == ".tar"
             file_name = "deploy_lib.tar"
         path_lib = temp.relpath(file_name)
         complied_graph_lib.export_library(path_lib)

         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")

         # run the setup in a separate function, so the load_lib
         # can get destructed right away
         # test the robustness wrt to parent module destruction
         def setup_gmod():
             loaded_lib = tvm.runtime.load_module(path_lib)
             dev = tvm.cuda()
             return loaded_lib["default"](dev)

         gmod = setup_gmod()
         # raw api
         set_input = gmod["set_input"]
         run = gmod["run"]
         get_output = gmod["get_output"]
         set_input("data", tvm.nd.array(data))
         run()
         out = get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

         # graph executor wrapper
         gmod = graph_executor.GraphModule(setup_gmod())
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     @tvm.testing.requires_llvm
     def verify_rpc_cpu_export(obj_format):
         mod, params = relay.testing.synthetic.get_workload()
         with relay.build_config(opt_level=3):
             complied_graph_lib = relay.build_module.build(mod, "llvm", params=params)

         from tvm.contrib import utils

         temp = utils.tempdir()
         if obj_format == ".so":
             file_name = "deploy_lib.so"
         else:
             assert obj_format == ".tar"
             file_name = "deploy_lib.tar"
         path_lib = temp.relpath(file_name)
         complied_graph_lib.export_library(path_lib)

         from tvm import rpc

         remote = rpc.LocalSession()
         remote.upload(path_lib)
         loaded_lib = remote.load_module(path_lib)
         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
         dev = remote.cpu()

         # raw api
         gmod = loaded_lib["default"](dev)
         set_input = gmod["set_input"]
         run = gmod["run"]
         get_output = gmod["get_output"]
         set_input("data", tvm.nd.array(data, device=dev))
         run()
         out = get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

         # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     def verify_rpc_gpu_export(obj_format):
         if not tvm.testing.device_enabled("cuda"):
             print("Skip because cuda is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
         with relay.build_config(opt_level=3):
             complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)

         from tvm.contrib import utils

         temp = utils.tempdir()
         if obj_format == ".so":
             file_name = "deploy_lib.so"
         else:
             assert obj_format == ".tar"
             file_name = "deploy_lib.tar"
         path_lib = temp.relpath(file_name)
         complied_graph_lib.export_library(path_lib)

         from tvm import rpc

         def check_remote(server):
             remote = rpc.connect(server.host, server.port)
             remote.upload(path_lib)
             loaded_lib = remote.load_module(path_lib)
             data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
             dev = remote.cuda()

             # raw api
             gmod = loaded_lib["default"](dev)
             set_input = gmod["set_input"]
             run = gmod["run"]
             get_output = gmod["get_output"]
             set_input("data", tvm.nd.array(data, device=dev))
             run()
             out = get_output(0).numpy()
             tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

             # graph executor wrapper
             gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
             gmod.set_input("data", data)
             gmod.run()
             out = gmod.get_output(0).numpy()
             tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

         check_remote(rpc.Server("127.0.0.1"))

     for obj_format in [".so", ".tar"]:
         verify_cpu_export(obj_format)
         verify_gpu_export(obj_format)
         verify_rpc_cpu_export(obj_format)
         verify_rpc_gpu_export(obj_format)


 @tvm.testing.requires_llvm
 @tvm.testing.uses_gpu
 def test_remove_package_params():
     def verify_cpu_remove_package_params(obj_format):
         mod, params = relay.testing.synthetic.get_workload()
         with relay.build_config(opt_level=3):
             complied_graph_lib = relay.build_module.build(mod, "llvm", params=params)

         from tvm.contrib import utils

         temp = utils.tempdir()
         if obj_format == ".so":
             file_name = "deploy_lib.so"
         else:
             assert obj_format == ".tar"
             file_name = "deploy_lib.tar"
         path_lib = temp.relpath(file_name)
         complied_graph_lib_no_params = complied_graph_lib["remove_params"]()
         complied_graph_lib_no_params.export_library(path_lib)
         with open(temp.relpath("deploy_param.params"), "wb") as fo:
             fo.write(runtime.save_param_dict(complied_graph_lib.get_params()))
         loaded_lib = tvm.runtime.load_module(path_lib)
         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
         dev = tvm.cpu(0)

         # raw api
         gmod = loaded_lib["default"](dev)
         set_input = gmod["set_input"]
         run = gmod["run"]
         get_output = gmod["get_output"]
         load_params = gmod["load_params"]
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         set_input("data", tvm.nd.array(data))
         load_params(loaded_params)
         run()
         out = get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

         # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
         gmod.run()
         out = gmod.get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     def verify_gpu_remove_package_params(obj_format):
         if not tvm.testing.device_enabled("cuda"):
             print("Skip because cuda is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
         with relay.build_config(opt_level=3):
             complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)

         from tvm.contrib import utils

         temp = utils.tempdir()
         if obj_format == ".so":
             file_name = "deploy_lib.so"
         else:
             assert obj_format == ".tar"
             file_name = "deploy_lib.tar"
         path_lib = temp.relpath(file_name)
         complied_graph_lib_no_params = complied_graph_lib["remove_params"]()
         complied_graph_lib_no_params.export_library(path_lib)
         with open(temp.relpath("deploy_param.params"), "wb") as fo:
             fo.write(runtime.save_param_dict(complied_graph_lib.get_params()))
         loaded_lib = tvm.runtime.load_module(path_lib)
         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
         dev = tvm.cuda(0)

         # raw api
         gmod = loaded_lib["default"](dev)
         set_input = gmod["set_input"]
         run = gmod["run"]
         get_output = gmod["get_output"]
         load_params = gmod["load_params"]
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         set_input("data", tvm.nd.array(data))
         load_params(loaded_params)
         run()
         out = get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

         # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
         gmod.run()
         out = gmod.get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     @tvm.testing.requires_llvm
     def verify_rpc_cpu_remove_package_params(obj_format):
         mod, params = relay.testing.synthetic.get_workload()
         with relay.build_config(opt_level=3):
             complied_graph_lib = relay.build_module.build(mod, "llvm", params=params)

         from tvm.contrib import utils

         temp = utils.tempdir()
         if obj_format == ".so":
             file_name = "deploy_lib.so"
         else:
             assert obj_format == ".tar"
             file_name = "deploy_lib.tar"
         path_lib = temp.relpath(file_name)
         complied_graph_lib_no_params = complied_graph_lib["remove_params"]()
         complied_graph_lib_no_params.export_library(path_lib)
         path_params = temp.relpath("deploy_param.params")
         with open(path_params, "wb") as fo:
             fo.write(runtime.save_param_dict(complied_graph_lib.get_params()))

         from tvm import rpc

         remote = rpc.LocalSession()
         remote.upload(path_lib)
         loaded_lib = remote.load_module(path_lib)
         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
         dev = remote.cpu()

         # raw api
         gmod = loaded_lib["default"](dev)
         set_input = gmod["set_input"]
         run = gmod["run"]
         get_output = gmod["get_output"]
         load_params = gmod["load_params"]
         loaded_params = bytearray(open(path_params, "rb").read())
         set_input("data", tvm.nd.array(data, device=dev))
         load_params(loaded_params)
         run()
         out = get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

         # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(path_params, "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
         gmod.run()
         out = gmod.get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     def verify_rpc_gpu_remove_package_params(obj_format):
         if not tvm.testing.device_enabled("cuda"):
             print("Skip because cuda is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
         with relay.build_config(opt_level=3):
             complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)

         from tvm.contrib import utils

         temp = utils.tempdir()
         if obj_format == ".so":
             file_name = "deploy_lib.so"
         else:
             assert obj_format == ".tar"
             file_name = "deploy_lib.tar"
         path_lib = temp.relpath(file_name)
         complied_graph_lib_no_params = complied_graph_lib["remove_params"]()
         complied_graph_lib_no_params.export_library(path_lib)
         path_params = temp.relpath("deploy_param.params")
         with open(path_params, "wb") as fo:
             fo.write(runtime.save_param_dict(complied_graph_lib.get_params()))

         from tvm import rpc

         remote = rpc.LocalSession()
         remote.upload(path_lib)
         loaded_lib = remote.load_module(path_lib)
         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
         dev = remote.cuda()

         # raw api
         gmod = loaded_lib["default"](dev)
         set_input = gmod["set_input"]
         run = gmod["run"]
         get_output = gmod["get_output"]
         load_params = gmod["load_params"]
         loaded_params = bytearray(open(path_params, "rb").read())
         set_input("data", tvm.nd.array(data, device=dev))
         load_params(loaded_params)
         run()
         out = get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

         # graph executor wrapper
         gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(path_params, "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
         gmod.run()
         out = gmod.get_output(0).numpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     for obj_format in [".so", ".tar"]:
         verify_cpu_remove_package_params(obj_format)
         verify_gpu_remove_package_params(obj_format)
         verify_rpc_cpu_remove_package_params(obj_format)
         verify_rpc_gpu_remove_package_params(obj_format)


 @tvm.testing.requires_llvm
 @pytest.mark.parametrize("target", ["llvm", "llvm -jit=orcjit"])
 def test_debug_graph_executor(target):
     mod, params = relay.testing.synthetic.get_workload()
     with relay.build_config(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, target, params=params)
     data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")

     # raw api
     dev = tvm.cpu()
     try:
         gmod = complied_graph_lib["debug_create"]("default", dev)
     except:
         print("Skip because debug graph_executor not enabled")
         return
     set_input = gmod["set_input"]
     run = gmod["run"]
     get_output = gmod["get_output"]
     set_input("data", tvm.nd.array(data))
     run()
     out = get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     # debug graph executor wrapper
     debug_g_mod = debug_executor.GraphModuleDebug(
         complied_graph_lib["debug_create"]("default", dev),
         [dev],
         complied_graph_lib.get_graph_json(),
         None,
     )
     debug_g_mod.set_input("data", data)
     debug_g_mod.run()
     out = debug_g_mod.get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)


 @tvm.testing.requires_cudagraph
 def test_cuda_graph_executor():
     mod, params = relay.testing.synthetic.get_workload()
     with tvm.transform.PassContext(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)
     data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")

     dev = tvm.cuda()
     try:
         gmod = complied_graph_lib["cuda_graph_create"](dev)
     except:
         print("Skip because cuda_graph not enabled")
         return
     set_input = gmod["set_input"]
     run = gmod["run"]
     get_output = gmod["get_output"]
     set_input("data", tvm.nd.array(data))
     run()
     out = get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

     # cuda graph executor wrapper
     cu_gmod = cuda_graph_executor.GraphModuleCudaGraph(gmod)
     cu_gmod.set_input("data", data)
     cu_gmod.run()
     out = cu_gmod.get_output(0).numpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)


 def test_multiple_imported_modules():
     def make_func(symbol):
         n = tvm.te.size_var("n")
         Ab = tvm.tir.decl_buffer((n,), dtype="float32")
         i = tvm.te.var("i")
         stmt = tvm.tir.For(
             i,
             0,
             n - 1,
             tvm.tir.ForKind.SERIAL,
             tvm.tir.BufferStore(Ab, tvm.tir.BufferLoad(Ab, [i]) + 1, [i + 1]),
         )
         return tvm.tir.PrimFunc([Ab], stmt).with_attr("global_symbol", symbol)

     def make_module(mod):
         mod = tvm.IRModule(mod)
         mod = tvm.driver.build(mod, target="llvm")
         return mod

     module_main = make_module({"main": make_func("main")})
     module_a = make_module({"func_a": make_func("func_a")})
     module_b = make_module({"func_b": make_func("func_b")})
     module_main.import_module(module_a)
     module_main.import_module(module_b)
     module_main.get_function("func_a", query_imports=True)
     module_main.get_function("func_b", query_imports=True)


 def test_num_threads():
     reported = tvm.runtime.num_threads()
     env_threads = os.getenv("TVM_NUM_THREADS")
     omp_env_threads = os.getenv("OMP_NUM_THREADS")
     if env_threads is not None:
         assert reported == int(env_threads)
     elif omp_env_threads is not None:
         assert reported == int(omp_env_threads)
     else:
         hardware_threads = os.cpu_count()
         assert reported == hardware_threads or reported == hardware_threads // 2


 @tvm.testing.requires_llvm
 @tvm.testing.requires_package("torch")
 def test_graph_module_zero_copy():
     mod = tvm.IRModule()
     params = {}
     dev = tvm.cpu()
     x = relay.var("x", shape=(1, 10))
     y = relay.var("y", shape=(1, 10))
     z = relay.add(x, y)
     mod["main"] = relay.Function([x, y], z)

     # need torch to do the from_dlpack trick
     import torch

     compiled_graph_lib = relay.build(mod, target="llvm", params=params)
     gm = graph_executor.GraphModule(compiled_graph_lib["default"](dev))
     x_data = torch.rand((1, 10))
     y_data = torch.rand((1, 10))
     z_data = torch.rand((1, 10))
     z_torch = x_data + y_data

     # zero copy run
     assert not np.allclose(z_data.numpy(), z_torch.numpy())
     gm.set_input_zero_copy("x", tvm.nd.from_dlpack(x_data))
     gm.set_input_zero_copy("y", tvm.nd.from_dlpack(y_data))
     gm.set_output_zero_copy(0, tvm.nd.from_dlpack(z_data))
     gm.run()

     tvm.testing.assert_allclose(z_data.numpy(), z_torch.numpy())

     # zero input copy with params
     gm = graph_executor.GraphModule(compiled_graph_lib["default"](dev))
     gm.set_input_zero_copy(x=tvm.nd.from_dlpack(x_data), y=tvm.nd.from_dlpack(y_data))
     gm.run()

     tvm.testing.assert_allclose(gm.get_output(0).numpy(), z_torch.numpy())


 @tvm.testing.requires_llvm
 def test_reshape_zero_copy():
     shape0 = (56, 224)
     shape1 = (112, 112)
     in_name0 = "infeats0"
     in_name1 = "infeats1"
     x0 = relay.var(in_name0, shape=shape0, dtype="float32")
     x0 = relay.reshape(x0, shape1)

     x1 = relay.var(in_name1, shape=shape1, dtype="float32")
     mat = relay.nn.matmul(x0, x1)
     _y = relay.reshape(mat, (-1))
     func = relay.Function(relay.analysis.free_vars(_y), _y)
     mod = tvm.IRModule.from_expr(func)

     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target="llvm")
     m = graph_executor.GraphModule(lib["default"](tvm.cpu(0)))

     data_ndarray0 = tvm.nd.array(
         np.random.random(shape0).astype(np.float32), device=tvm.device("llvm", 0)
     )
     data_ndarray1 = tvm.nd.array(
         np.random.random(shape1).astype(np.float32), device=tvm.device("llvm", 0)
     )

     def expected():
         m.set_input(in_name0, data_ndarray0)
         m.set_input(in_name1, data_ndarray1)
         m.run()
         return m.get_output(0).numpy()

     def zero_copy():
         from tvm.relay.frontend.common import infer_shape

         outshape = infer_shape(_y)
         output_view = tvm.nd.empty(outshape, device=tvm.device("llvm", 0))
         m.set_input_zero_copy(in_name0, data_ndarray0)
         m.set_input_zero_copy(in_name1, data_ndarray1)
         m.set_output_zero_copy(0, output_view)
         m.run()
         return output_view.numpy()

     golden_out = expected()
     out = zero_copy()
     tvm.testing.assert_allclose(golden_out, out)


 if __name__ == "__main__":
     test_legacy_compatibility()
     test_cpu()
     test_gpu()
     test_mod_export()
     test_remove_package_params()
     test_debug_graph_executor()
     test_multiple_imported_modules()
     test_cpu_get_graph_json()
     test_cpu_get_graph_params_run()
     test_cpu_get_graph_params_compare()
     test_graph_module_zero_copy()
     test_reshape_zero_copy()