blob: 594567eff3ae545a49de2206c2e0429cf143ae90 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import collections
import ctypes
import json
import os
import re
from contextlib import redirect_stderr
from io import StringIO
import numpy as np
import tvm
import tvm.relay
import tvm.testing
from tvm import meta_schedule as ms
from tvm import relay
from tvm.contrib import utils
from tvm.relay.backend import Executor, Runtime
INPUT_SHAPE = (1, 3, 16, 16)
KERNEL_SHAPE = (3, 3, 3, 3)
# The data types that are linkable.
linkable_dtype = tvm.testing.parameter(
*(
[f"uint{b}" for b in (8, 16, 32, 64)]
+ [f"int{b}" for b in (8, 16, 32, 64)]
+ ["float32", "float64"]
)
)
def dtype_info(dtype):
"""Lookup numpy type info for the given string dtype (of linkable_dtype params above)."""
if "int" in dtype:
return np.iinfo(getattr(np, dtype))
else:
return np.finfo(getattr(np, dtype))
# Note: for debugging, set this to an integer (i.e. 1.0). Then all "random" tensors will become
# predictable
RANDOM_TENSOR_START = None
def _make_random_tensor(dtype, shape):
"""Create a random test tensor with given shape and dtype."""
global RAND_SEED
if RANDOM_TENSOR_START is not None:
to_return = np.arange(
RANDOM_TENSOR_START, RANDOM_TENSOR_START + np.prod(shape), dtype=dtype
).reshape(shape)
RAND_SEED += np.prod(shape)
return to_return
dinfo = dtype_info(dtype)
if "int" in dtype:
return np.random.randint(dinfo.min, dinfo.max, shape, dtype=dtype)
else:
to_return = np.random.uniform(0, dinfo.max, shape).astype(dtype)
np.reshape(to_return, np.prod(shape))[::2] *= -1
return to_return
def _lookup_sid(graph, name):
"""Lookup the storage id of a named parameter.
Arguments
---------
graph : dict
Parsed JSON graph.
name : str
Name of the tensor parameter to lookup.
Returns
-------
int :
The storage_id of the parameter.
"""
num_outputs_seen = 0
for i, n in enumerate(graph["nodes"]):
if n["name"] == name:
print("sid", name, graph["attrs"]["storage_id"][1], num_outputs_seen)
return graph["attrs"]["storage_id"][1][num_outputs_seen]
else:
if "attrs" in n and "num_outputs" in n["attrs"]:
num_outputs_seen += int(n["attrs"]["num_outputs"])
else:
num_outputs_seen += 1
raise KeyError(f"no such param: {name}")
def _get_ctypes_dtype(dt):
"""Return a ctypes c_* datatype given a string data type."""
if "int" in dt:
return getattr(ctypes, f"c_{dt}")
elif dt == "float32":
return ctypes.c_float
elif dt == "float64":
return ctypes.c_double
else:
assert False, f"unknown dtype: {dt}"
def _verify_linked_param(dtype, lib, mod, graph, name):
"""Directly read memory from the linked library to verify the linked parameter is correct."""
sid = _lookup_sid(graph, name)
# NOTE: query_imports=True because when loading a module from disk (i.e. for C backend),
# a GraphExecutorFactory module is created instead of the module itself.
param_ptr = mod.get_function("_lookup_linked_param", True)(sid)
gen_param = lib.params[name]
arr_data = (_get_ctypes_dtype(dtype) * np.prod(gen_param.shape)).from_address(param_ptr.value)
arr = np.ndarray(shape=gen_param.shape, dtype=gen_param.dtype, buffer=arr_data, order="C")
if "int" in gen_param.dtype:
np.testing.assert_equal(gen_param.numpy(), arr)
else:
np.testing.assert_allclose(gen_param.numpy(), arr)
return dtype == gen_param.dtype
def _make_mod_and_params(dtype):
"""Create a Relay module and parameters to test the given datatype."""
param_decls = collections.OrderedDict()
param_init = {}
def _add_decl(name, dtype):
param_decls[name] = f"%{name} : Tensor[{KERNEL_SHAPE}, {dtype}]"
param_init[name] = _make_random_tensor(dtype, KERNEL_SHAPE)
# Add several parameters so that the number of parameters
_add_decl(f"{dtype}_a", dtype)
_add_decl(f"{dtype}_b", dtype)
mod_lines = [
'#[version = "0.0.5"]',
f"def @main(%rand_input : Tensor[{INPUT_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } ) {{",
# This program ensures that GraphPlanMemory alternates between the same two storage IDs for a
# while. In doing this, it ensures that param %{dtype}_b will be placed into the graph at an
# index unequal to its storage_id. This ensures that GraphExecutorCodegen encodes the storage_id
# and not the parameter index into the graph.
(
f' %0 = nn.conv2d(%rand_input, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
f'kernel_size=[3, 3], out_dtype="{dtype}");'
),
(
f' %1 = nn.conv2d(%0, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
f'kernel_size=[3, 3], out_dtype="{dtype}");'
),
(
f' %2 = nn.conv2d(%1, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
f'kernel_size=[3, 3], out_dtype="{dtype}");'
),
(
f' %3 = nn.conv2d(%2, %{dtype}_b, data_layout="NCHW", kernel_layout="OIHW", '
f'kernel_size=[3, 3], out_dtype="{dtype}");'
),
" %3",
"}",
]
mod = tvm.relay.fromtext("\n".join(mod_lines))
return mod, param_init
@tvm.testing.requires_llvm
def test_llvm_link_params(linkable_dtype):
ir_mod, param_init = _make_mod_and_params(linkable_dtype)
rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
main_func = ir_mod["main"]
target = "llvm"
runtime = Runtime("crt", {"system-lib": True})
executor = Executor("graph", {"link-params": True})
with tvm.transform.PassContext(opt_level=3):
lib = tvm.relay.build(ir_mod, target, runtime=runtime, executor=executor, params=param_init)
# NOTE: Need to export_library() and load_library() to link all the Module(llvm, ...)
# against one another.
temp_dir = utils.TempDirectory()
export_file = temp_dir / "lib.so"
lib.lib.export_library(export_file)
mod = tvm.runtime.load_module(export_file)
assert len(lib.params.keys()) == 0 # NOTE: params became tir.constants
assert mod.get_function("TVMSystemLibEntryPoint") != None
graph = json.loads(lib.graph_json)
for p in lib.params:
_verify_linked_param(linkable_dtype, lib, mod, graph, p) or found_one
# Wrap in function to explicitly deallocate the runtime.
def _run_linked(lib, mod):
graph_json, _, _ = lib
graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
graph_rt.set_input("rand_input", rand_input) # NOTE: params not required.
graph_rt.run()
return graph_rt.get_output(0)
linked_output = _run_linked(lib, mod)
runtime = Runtime("cpp", {"system-lib": True})
with tvm.transform.PassContext(opt_level=3):
lib = tvm.relay.build(ir_mod, "llvm", runtime=runtime, params=param_init)
def _run_unlinked(lib):
graph_json, mod, lowered_params = lib
graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
graph_rt.set_input("rand_input", rand_input, **lowered_params)
graph_rt.run()
return graph_rt.get_output(0)
unlinked_output = _run_unlinked(lib)
if "int" in linkable_dtype:
np.testing.assert_equal(unlinked_output.numpy(), linked_output.numpy())
else:
np.testing.assert_allclose(unlinked_output.numpy(), linked_output.numpy())
def _get_c_datatype(dtype):
"""Translate LINKABLE_DTYPES element to c datatype."""
if "int" in dtype:
return f"{dtype}_t"
elif dtype == "float32":
return "float"
elif dtype == "float64":
return "double"
else:
assert False, f"unknown dtype {dtype}"
HEX_NUM_RE = re.compile(r"[+\-]?(?:(?:0x[0-9A-Fa-f.p+-]+)|(?:INFINITY)|(?:NAN))")
def test_c_link_params(linkable_dtype):
temp_dir = utils.tempdir()
mod, param_init = _make_mod_and_params(linkable_dtype)
rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
main_func = mod["main"]
target = "c"
executor = Executor("graph", {"link-params": True})
with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
lib = tvm.relay.build(mod, target, executor=executor, params=param_init)
assert len(lib.params.keys()) == 0 # NOTE: params became tir.constants
src = lib.lib.get_source()
lib.lib.save(temp_dir.relpath("test.c"), "c")
c_dtype = _get_c_datatype(linkable_dtype)
src_lines = src.split("\n")
param = param_init[f"{linkable_dtype}_a"].reshape(np.prod(KERNEL_SHAPE))
param_def = rf"^static const {c_dtype} __attribute__\(\(section\(\".rodata.tvm\"\), aligned\(16\)\)\) [a-zA-Z_0-9]*constant_\d+\[{np.prod(param.shape)}\] = {{$"
for i, line in enumerate(src_lines):
if re.match(param_def, line):
i += 1
break
else:
assert False, f'did not find parameter definition "{param_def}":\n{src}'
cursor = 0
width = dtype_info(linkable_dtype).bits // 4 + 2
if linkable_dtype.startswith("int"):
width += 1 # Account for sign
while "};" not in src_lines[i]:
for match in HEX_NUM_RE.finditer(src_lines[i]):
cursor += 1
i += 1
assert cursor == np.prod(param.shape)
# Need a unique name per library to avoid dlopen caching the lib load.
lib_path = temp_dir.relpath(f"test-{linkable_dtype}-linked.so")
lib["remove_params"]().export_library(lib_path)
lib_mod = tvm.runtime.load_module(lib_path)
# lib_mod = lib_factory['default']()
graph = json.loads(lib.graph_json)
for p in lib.params:
_verify_linked_param(linkable_dtype, lib, lib_mod, graph, p)
# Wrap in function to explicitly deallocate the runtime.
def _run_linked(lib_mod):
graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
graph_rt.set_input("rand_input", rand_input) # NOTE: params not required.
graph_rt.run()
return graph_rt.get_output(0)
linked_output = _run_linked(lib_mod)
linked_params = lib.params
with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
lib = tvm.relay.build(mod, "c", params=param_init)
_, _, params = lib
# Need a unique name per library to avoid dlopen caching the lib load.
lib_path = temp_dir.relpath(f"test-{linkable_dtype}-unlinked.so")
lib.export_library(lib_path)
lib_mod = tvm.runtime.load_module(lib_path)
def _run_unlinked(lib_mod):
graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
graph_rt.set_input("rand_input", rand_input, **params)
graph_rt.run()
return graph_rt.get_output(0)
unlinked_output = _run_unlinked(lib_mod)
if "int" in linkable_dtype:
np.testing.assert_equal(unlinked_output.numpy(), linked_output.numpy())
else:
np.testing.assert_allclose(unlinked_output.numpy(), linked_output.numpy())
@tvm.testing.requires_micro
def test_crt_link_params(linkable_dtype):
from tvm import micro
mod, param_init = _make_mod_and_params(linkable_dtype)
rand_input = _make_random_tensor(linkable_dtype, INPUT_SHAPE)
main_func = mod["main"]
target = "c"
runtime = Runtime("crt", {"system-lib": True})
executor = Executor("graph", {"link-params": True})
with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
factory = tvm.relay.build(
mod, target, runtime=runtime, executor=executor, params=param_init
)
assert len(factory.get_params().keys()) == 0 # NOTE: params became tir.constants
temp_dir = tvm.contrib.utils.tempdir()
template_project_dir = tvm.micro.get_microtvm_template_projects("crt")
project = tvm.micro.generate_project(
template_project_dir, factory, temp_dir / "project", {"verbose": 1}
)
project.build()
project.flash()
with tvm.micro.Session(project.transport()) as sess:
graph_rt = tvm.micro.session.create_local_graph_executor(
factory.get_graph_json(), sess.get_system_lib(), sess.device
)
assert len(factory.params.keys()) == 0 # NOTE: params became tir.constants
# NOTE: not setting params here.
graph_rt.set_input("rand_input", rand_input)
graph_rt.run()
linked_output = graph_rt.get_output(0).numpy()
runtime = Runtime("cpp", {"system-lib": True})
with tvm.transform.PassContext(opt_level=3):
lib = tvm.relay.build(mod, "llvm", runtime=runtime, params=param_init)
def _run_unlinked(lib):
graph_json, mod, lowered_params = lib
graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
graph_rt.set_input("rand_input", rand_input, **lowered_params)
graph_rt.run()
return graph_rt.get_output(0).numpy()
unlinked_output = _run_unlinked(lib)
if "int" in linkable_dtype:
np.testing.assert_equal(unlinked_output, linked_output)
else:
np.testing.assert_allclose(unlinked_output, linked_output)
def test_tir_link_params():
def get_dense(data_shape, weight_shape):
data = relay.var("data", shape=data_shape, dtype="float32")
weight = relay.var("weight", shape=weight_shape, dtype="float32")
dense = relay.nn.dense(data, weight)
return relay.Function([data, weight], dense)
def get_ref_dense(data_np, weight_np):
return np.dot(data_np, np.transpose(weight_np))
def schedule_dense(sch):
dense = sch.get_block("T_matmul_NT")
_y, _x, _k = sch.get_loops(dense)
M, N, K = 128, 128, 128
data_shape = (M, K)
weight_shape = (N, K)
relay_mod = tvm.IRModule.from_expr(get_dense(data_shape, weight_shape))
relay_mod = relay.transform.InferType()(relay_mod)
data_np = np.random.randn(*data_shape).astype("float32")
weight_np = np.random.randn(*weight_shape).astype("float32")
target = "llvm"
params = {"weight": weight_np}
def schedule_fn(sch):
if "nn_dense" in sch.mod.attrs["task_name"]:
schedule_dense(sch)
return True
return False
with StringIO() as stderr_buf, redirect_stderr(stderr_buf):
with ms.database.ScheduleFnDatabase(schedule_fn), tvm.transform.PassContext(
opt_level=3,
config={"relay.backend.use_meta_schedule": True},
):
executor = Executor("graph", {"link-params": True})
lib = relay.build(relay_mod, target=target, executor=executor)
# Workload look up should succeed. This does not work when the test is invoked from pytest.
assert not "Cannot find workload" in stderr_buf.getvalue()
dev = tvm.device(target, 0)
runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
runtime.set_input(**params)
runtime.set_input("data", data_np)
runtime.run()
out = runtime.get_output(0).numpy()
ref = get_ref_dense(data_np, weight_np)
tvm.testing.assert_allclose(out, ref, atol=1e-4, rtol=1e-4)
if __name__ == "__main__":
tvm.testing.main()