blob: bfcead3011d39d0114aa274dbd700374a1cbb93e [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import tvm
from tvm import relax
import tvm.testing
from tvm.script.parser import ir as I, relax as R, tir as T
from tvm.relax.transform.legalize_ops import adreno as legalize_adreno
from tvm.ir.module import IRModule
from tvm.relax.expr_functor import PyExprMutator, PyExprVisitor, mutator, visitor
@visitor
class ValidateScope(PyExprVisitor): # pylint: disable=abstract-method
def __init__(self, scope_info: dict) -> None:
self.scope_info = scope_info
self.matched = True
def visit(self, mod: IRModule) -> None:
"""Entry point"""
for _, func in mod.functions_items():
if isinstance(func, relax.Function):
self.visit_expr(func)
return self.matched
def visit_call_(self, call: relax.Call) -> None: # pylint: disable=arguments-renamed
if call.op.name == "relax.call_tir":
# if call.args[0].name_hint in self.scope_info:
for idx, arg in enumerate(call.args[1]):
arg_sinfo = arg.struct_info
assert isinstance(
arg_sinfo, relax.TensorStructInfo
), f"Expected TensorStructInfo but git {type(arg_sinfo)}"
call_mem_scope = (
"global" if not arg_sinfo.vdevice else arg_sinfo.vdevice.memory_scope
)
assert (
call_mem_scope == self.scope_info[call.args[0].name_hint][0][idx]
), f"Scope mismatched for argument {idx} in {call.args[0].name_hint}"
if isinstance(call.sinfo_args[0], relax.TensorStructInfo):
call_mem_scope = (
"global"
if not call.sinfo_args[0].vdevice
else call.sinfo_args[0].vdevice.memory_scope
)
assert (
call_mem_scope == self.scope_info[call.args[0].name_hint][1][0]
), f"Scope mismatched for return scope: {call.args[0].name_hint}"
else:
assert isinstance(
call.sinfo_args[0], relax.TupleStructInfo
), f"Expected TupleStructInfo but git {type(call.sinfo_args[0])}"
for idx, sinfo in enumerate(call.sinfo_args[0].fields):
call_mem_scope = "global" if not sinfo.vdevice else sinfo.vdevice.memory_scope
assert (
call_mem_scope == self.scope_info[call.args[0].name_hint][1][idx]
), f"Scope mismatched for return scope for {idx} in {call.args[0].name_hint}"
def verify(mod, expected):
tgt = tvm.target.Target({"kind": "opencl", "device": "adreno"}, host="llvm")
skip_ops = [
"relax.nn.conv2d",
"relax.nn.max_pool2d",
"relax.nn.adaptive_avg_pool2d",
# "relax.nn.layer_norm",
]
with tgt:
mod = tvm.tir.transform.BindTarget(tvm.target.Target.current(allow_none=False))(mod)
mod = tvm.relax.transform.DecomposeOpsForInference()(mod)
mod = tvm.relax.transform.FoldConstant()(mod)
desired_layouts = {"relax.nn.conv2d": ["NCHW4c", "OIHW4o", "NCHW4c"]}
mod = tvm.relax.transform.ConvertLayout(desired_layouts)(mod)
mod = tvm.relax.transform.Normalize()(mod)
mod = tvm.relax.transform.FoldConstant()(mod)
mod = tvm.relax.transform.LegalizeOps(skip_ops=skip_ops)(mod)
mod = tvm.relax.transform.AnnotateTIROpPattern()(mod)
mod = tvm.relax.backend.adreno.transform.AnnotateCustomMemoryScope(tgt)(mod)
# There is a possibility of some skipped ops above might not use 5D layouts.
mod = tvm.relax.transform.LegalizeOps()(mod)
mod = tvm.relax.transform.LegalizeOps(
{"relax.nn.conv2d": legalize_adreno.conv2d_NCHWc_OIHWo},
)(mod)
# Lets get pattern info for newly legalized ops
mod = tvm.relax.transform.AnnotateTIROpPattern()(mod)
mod = tvm.relax.transform.FoldConstant()(mod)
mod = tvm.relax.transform.FuseOps()(mod)
mod = tvm.relax.transform.FuseTIR()(mod)
mod = tvm.relax.transform.DeadCodeElimination()(mod)
mod = tvm.relax.backend.adreno.transform.FoldVDeviceScopeChange()(mod)
mod = tvm.relax.transform.DeadCodeElimination()(mod)
mod = tvm.relax.transform.SpecializePrimFuncBasedOnCallSite()(mod)
mod = tvm.relax.transform.Normalize()(mod)
ValidateScope(expected).visit(mod)
def test_conv2d():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 64, 56, 56), "float32"), w: R.Tensor((32, 64, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 32, 54, 54), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
R.output(gv)
return gv
Expected = {
"te_layout_transform": (["global"], ["global.texture-nhwc"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (["global.texture-nhwc", "global"], ["global"]),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_NCHW_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(
x,
w,
data_layout="NCHW",
kernel_layout="OIHW",
out_dtype="float32",
)
R.output(gv)
return gv
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_NHWC_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 28, 28, 16), "float32"), w: R.Tensor((4, 3, 3, 16), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 26, 26, 4), "float32") = R.nn.conv2d(
x,
w,
data_layout="NHWC",
kernel_layout="OHWI",
out_dtype="float32",
)
R.output(gv)
return gv
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def _test_conv2d_symbolic_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor("float32", ndim=4), w: R.Tensor("float32", ndim=4)
) -> R.Tensor("float32", ndim=4):
with R.dataflow():
N, C, H, W = T.int64(), T.int64(16), T.int64(), T.int64()
Nw, Cw, Hw, Ww = T.int64(4), T.int64(16), T.int64(), T.int64()
lv0 = R.match_cast(x, R.Tensor((N, C, H, W), "float32"))
lv1 = R.match_cast(w, R.Tensor((Nw, Cw, Hw, Ww), "float32"))
gv: R.Tensor(
(N, T.int64(4), H + T.int64(1) - Hw, W + T.int64(1) - Ww), "float32"
) = R.nn.conv2d(lv0, lv1, out_dtype="float32")
R.output(gv)
return gv
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_relu_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.nn.relu(gv)
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"fused_conv2d_NCHWc_OIHWo_opencl_relu": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_relu_conv2d_relu_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
x0: R.Tensor((2, 16, 28, 28), "float32") = R.nn.relu(x)
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x0, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.nn.relu(gv)
R.output(gv2)
return gv2
Expected = {
"relu": (["global"], ["global"]),
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"fused_conv2d_NCHWc_OIHWo_opencl_relu1": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_relu_tanh_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.nn.relu(gv)
gv3: R.Tensor((2, 4, 26, 26), "float32") = R.tanh(gv2)
R.output(gv3)
return gv3
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"fused_conv2d_NCHWc_OIHWo_opencl_relu_tir_tanh": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_add_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"),
w: R.Tensor((4, 16, 3, 3), "float32"),
bias: R.Tensor((2, 4, 26, 26), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.add(gv, bias)
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"te_layout_transform2": (["global"], ["global.texture-weight"]),
"fused_conv2d_NCHWc_OIHWo_opencl_add": (
["global.texture-weight", "global", "global.texture-weight"],
["global"],
),
"te_layout_transform3": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_fma_relu_conv2d_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 4, 28, 28), "float32"),
w: R.Tensor((4, 4, 3, 3), "float32"),
scale: R.Tensor((2, 4, 26, 26), dtype="float32"),
bias: R.Tensor((2, 4, 26, 26), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.ewise_fma(gv, scale, bias)
gv3: R.Tensor((2, 4, 26, 26), "float32") = R.nn.relu(gv2)
gv4: R.Tensor((2, 4, 24, 24), "float32") = R.nn.conv2d(gv3, w, out_dtype="float32")
R.output(gv4)
return gv4
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"relu": (["global"], ["global"]),
"te_layout_transform3": (["global"], ["global.texture-weight"]),
"conv2d_NCHWc_OIHWo1_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform4": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_sum_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=2):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4), "float32") = R.sum(gv, axis=[2, 3])
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"sum": (["global"], ["global"]),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_sum_keepdims_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=2):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 1, 1), "float32") = R.sum(gv, axis=[2, 3], keepdims=True)
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"sum": (["global"], ["global"]),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_sum_reduce_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=2):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 26), "float32") = R.sum(gv, axis=[1, 2])
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"sum": (["global"], ["global"]),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_transpose_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((26, 26, 4, 2), "float32") = R.permute_dims(gv, axes=[3, 2, 1, 0])
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"transpose": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_expand_dims_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=6):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 1, 4, 1, 26, 26), "float32") = R.expand_dims(gv, axis=(-3, 1))
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"expand_dims": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_squeeze_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((1, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=3):
with R.dataflow():
gv: R.Tensor((1, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((4, 26, 26), "float32") = R.squeeze(gv, axis=[0])
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"squeeze": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_strided_slice_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 2, 9, 7), dtype="float32") = R.strided_slice(
gv, begin=[0, 0, 0], end=[4, 26, 26], strides=[2, 3, 4], axes=[1, 2, 3]
)
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"strided_slice": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_relu_concat_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.nn.relu(gv)
gv3: R.Tensor((2, 8, 26, 26), "float32") = R.concat((gv, gv2), axis=1)
R.output(gv3)
return gv3
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"fused_relu_concatenate": (["global.texture-weight"], ["global"]),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_relu_concat_split_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.nn.relu(gv)
gv3: R.Tensor((2, 8, 26, 26), "float32") = R.concat((gv, gv2), axis=1)
gv4 = R.split(gv3, indices_or_sections=2, axis=1)
R.output(gv4)
return gv4
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"fused_relu_concatenate_split": (["global.texture-weight"], ["global", "global"]),
"te_layout_transform2": (["global"], ["global"]),
"te_layout_transform3": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_relu_concat_split_transpose_concat_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.nn.relu(gv)
gv3: R.Tensor((2, 8, 26, 26), "float32") = R.concat((gv, gv2), axis=1)
gv4 = R.split(gv3, indices_or_sections=2, axis=1)
gv5: R.Tensor((26, 26, 4, 2), "float32") = R.permute_dims(gv4[0], axes=[3, 2, 1, 0])
gv6: R.Tensor((26, 26, 4, 2), "float32") = R.permute_dims(gv4[1], axes=[3, 2, 1, 0])
gv7: R.Tensor((26, 26, 8, 2), "float32") = R.concat((gv5, gv6), axis=2)
R.output(gv7)
return gv7
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"fused_relu_concatenate_split": (["global.texture-weight"], ["global", "global"]),
"te_layout_transform2": (["global"], ["global"]),
"te_layout_transform3": (["global"], ["global"]),
"fused_transpose_transpose_concatenate1": (["global", "global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_maxpool2d_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2 = R.nn.max_pool2d(
gv,
pool_size=[2, 2],
strides=[2, 2],
padding=[0, 0],
layout="NCHW",
out_layout="NCHW",
)
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"max_pool2d_opencl": (["global.texture-weight"], ["global"]),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_avgpool2d_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2 = R.nn.adaptive_avg_pool2d(gv, output_size=[13, 13], layout="NCHW")
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"adaptive_avg_pool2d_opencl": (["global.texture-weight"], ["global"]),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_softmax_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2 = R.nn.softmax(gv, axis=1)
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"softmax": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_layernorm_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"),
w: R.Tensor((4, 16, 3, 3), "float32"),
gamma: R.Tensor((26, 26), dtype="float32"),
beta: R.Tensor((26, 26), dtype="float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), dtype="float32") = R.nn.layer_norm(
gv, gamma, beta, axes=[-2, -1]
)
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"layer_norm": (["global", "global", "global"], ["global"]),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_binary_broadcast_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"),
w: R.Tensor((4, 16, 3, 3), "float32"),
bias: R.Tensor((26, 26), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.add(gv, bias)
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"add": (["global", "global"], ["global"]),
}
verify(Input, Expected)
def test_binary_ewise_scalar_sub_indexed():
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "float32")
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv: R.Tensor((2, 4, 26, 26), "float32") = R.nn.conv2d(x, w, out_dtype="float32")
gv2: R.Tensor((2, 4, 26, 26), "float32") = R.add(gv, R.const(1, "float32"))
R.output(gv2)
return gv2
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"fused_conv2d_NCHWc_OIHWo_opencl_add": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
}
verify(Input, Expected)
def test_residual_block():
r"""
- some kind of residual block followed by convolution to have texture after residual block
- scalar data type verification which should be mapped to global memory scope
layout_transform (NCHW->NCHW4c)
| <- buffer
conv2d (1) <- to get textures as output
/ \
conv2d (2) |
\ /
add <- add should be fused into conv2d (2)
multiply to scalar <- buffer to the input of multiply scalar value
relu
| <- texture in intermediate tensor
conv2d (3)
relu
| <- buffer
layout_transform (NCHW4c->NCHW)
"""
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 32, 40, 40), "float32"),
w1: R.Tensor((32, 32, 2, 2), "float32"),
w2: R.Tensor((32, 32, 1, 1), "float32"),
w3: R.Tensor((32, 32, 2, 2), "float32"),
bias: R.Tensor((1, 32, 1, 1), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv = R.nn.conv2d(x, w1, strides=[2, 2], out_dtype="float32")
gv1 = R.add(gv, bias)
gv2 = R.nn.relu(gv1)
gv3 = R.nn.conv2d(gv2, w2, strides=[1, 1], out_dtype="float32")
bias_1 = R.multiply(bias, R.const(0.15, "float32"))
gv4 = R.add(gv3, bias_1)
gv5 = R.nn.relu(gv4)
gv6 = R.nn.conv2d(gv5, w3, strides=[2, 2], out_dtype="float32")
gv7 = R.nn.relu(gv6)
R.output(gv7)
return gv7
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"te_layout_transform2": (["global"], ["global.texture-weight"]),
"fused_conv2d_NCHWc_OIHWo_opencl_add_relu": (
["global.texture-weight", "global", "global.texture-weight"],
["global.texture-weight"],
),
"te_layout_transform3": (["global"], ["global"]),
"multiply": (["global"], ["global"]),
"fused_conv2d_NCHWc_OIHWo1_opencl_add_relu": (
["global.texture-weight", "global", "global.texture-weight"],
["global.texture-weight"],
),
"fused_conv2d_NCHWc_OIHWo2_opencl_relu1": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform4": (["global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_conv2d_fallback_to_buffer_conv2d():
r"""
layout_transform (NCHW->NCHW4c)
| <- texture
conv2d (1) <- textures as output
/ \
conv2d (2) conv2d (3) <- conv2d (2) emits texture, conv2d (3) emits buffer
\ / <- concat shouldn't support textures here
concatenation
| <- buffer
layout_transform (NCHW4c->NCHW)
"""
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 32, 40, 40), "float32"),
w1: R.Tensor((96, 32, 2, 2), "float32"),
w2: R.Tensor((32, 96, 2, 2), "float32"),
w3: R.Tensor((5, 96, 2, 2), "float32"),
bias1: R.Tensor((1, 96, 1, 1), "float32"),
bias2: R.Tensor((1, 32, 1, 1), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv = R.nn.conv2d(x, w1, strides=[2, 2], out_dtype="float32")
gv1 = R.add(gv, bias1)
gv2 = R.nn.relu(gv1)
gv3 = R.nn.conv2d(gv2, w2, strides=[2, 2], out_dtype="float32")
gv4 = R.add(gv3, bias2)
gv5 = R.nn.relu(gv4)
gv6 = R.nn.conv2d(gv2, w3, strides=[2, 2], out_dtype="float32")
gv7 = R.concat((gv3, gv6), axis=1)
R.output(gv7)
return gv7
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"te_layout_transform2": (["global"], ["global.texture-weight"]),
"fused_conv2d_NCHWc_OIHWo_opencl_add_relu": (
["global.texture-weight", "global", "global.texture-weight"],
["global"],
),
"te_layout_transform3": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo1_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform4": (["global"], ["global"]),
"conv2d": (["global", "global"], ["global"]),
"te_layout_transform5": (["global"], ["global"]),
"concatenate": (["global", "global"], ["global"]),
}
verify(Input, Expected)
def test_conv2d_conv2d_conv2d_concat():
r"""
layout_transform (NCHW->NCHW4c)
| <- texture
conv2d (1) <- textures as output
/ \
conv2d (2) conv2d (3)
\ / <- concat does support textures here
concatenation
| <- buffer
layout_transform (NCHW4c->NCHW)
"""
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 32, 40, 40), "float32"),
w1: R.Tensor((96, 32, 2, 2), "float32"),
w2: R.Tensor((32, 96, 2, 2), "float32"),
w3: R.Tensor((8, 96, 2, 2), "float32"),
bias1: R.Tensor((1, 96, 1, 1), "float32"),
bias2: R.Tensor((1, 32, 1, 1), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv = R.nn.conv2d(x, w1, strides=[2, 2], out_dtype="float32")
gv1 = R.add(gv, bias1)
gv2 = R.nn.relu(gv1)
gv3 = R.nn.conv2d(gv2, w2, strides=[2, 2], out_dtype="float32")
gv4 = R.add(gv3, bias2)
gv5 = R.nn.relu(gv4)
gv6 = R.nn.conv2d(gv2, w3, strides=[2, 2], out_dtype="float32")
gv7 = R.concat((gv3, gv6), axis=1)
R.output(gv7)
return gv7
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"te_layout_transform2": (["global"], ["global.texture-weight"]),
"fused_conv2d_NCHWc_OIHWo_opencl_add_relu": (
["global.texture-weight", "global", "global.texture-weight"],
["global.texture-weight"],
),
"te_layout_transform3": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo1_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"te_layout_transform4": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo2_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"concatenate": (["global.texture-weight", "global.texture-weight"], ["global"]),
"te_layout_transform5": (["global"], ["global"]),
}
verify(Input, Expected)
def test_pooling_branching_texture_params():
r"""
Verification of the pooling and many branches having textures
layout_transform (NCHW->NCHW4c)
| <- texture
conv2d (0) <- to get textures
| <- textures
pooling
/ \ \ <- textures
conv2d (1) conv2d (2) conv2d (3)
\ / |
add | <- to have the only one output, will be fused
\ /
add <- to have the only one output, will be fused
| <- buffer
layout_transform (NCHW4c->NCHW)
"""
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((2, 32, 40, 40), "float32"),
w1: R.Tensor((32, 32, 1, 1), "float32"),
w2: R.Tensor((32, 32, 2, 2), "float32"),
w3: R.Tensor((32, 32, 1, 1), "float32"),
w4: R.Tensor((32, 32, 2, 2), "float32"),
bias1: R.Tensor((1, 32, 1, 1), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
gv = R.nn.conv2d(x, w1, strides=[1, 1], out_dtype="float32")
gv1 = R.nn.max_pool2d(gv, pool_size=[2, 2], strides=[2, 2])
gv2 = R.nn.conv2d(
gv1, w2, padding=[0, 0, 1, 1], strides=[1, 1], out_dtype="float32"
)
gv3 = R.add(gv2, bias1)
gv4 = R.nn.relu(gv3)
gv5 = R.nn.conv2d(
gv1, w3, padding=[0, 0, 0, 0], strides=[1, 1], out_dtype="float32"
)
gv6 = R.nn.conv2d(
gv1, w4, padding=[0, 1, 1, 0], strides=[1, 1], out_dtype="float32"
)
gv7 = R.nn.relu(gv6)
gv8 = R.add(gv2, gv5)
gv9 = R.add(gv8, gv6)
R.output(gv9)
return gv9
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"max_pool2d_opencl": (["global.texture-weight"], ["global.texture-weight"]),
"te_layout_transform2": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo2_opencl": (
["global.texture-weight", "global"],
["global.texture-weight"],
),
"fused_conv2d_NCHWc_OIHWo1_opencl_add": (
["global.texture-weight", "global", "global.texture-weight"],
["global.texture-weight"],
),
"fused_conv2d_NCHWc_OIHWo3_opencl_add": (
["global.texture-weight", "global", "global.texture-weight"],
["global"],
),
"te_layout_transform3": (["global"], ["global"]),
}
verify(Input, Expected)
def test_injective_inputs1():
r"""
Input
/ \
/ |
| /
conv2d (1) /
| /
conv2d (2) mean /
/ \ /
| | \ /
| | (3) add
| | |
| \ /
\ mul
\ /
add
"""
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((1, 4, 40, 40), "float32"),
w1: R.Tensor((4, 4, 3, 3), "float32"),
w2: R.Tensor((4, 4, 3, 3), "float32"),
w3: R.Tensor((4, 4, 3, 3), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
mean = R.mean(x, axis=1, keepdims=True)
conv1 = R.nn.conv2d(
x, w1, padding=[1, 1, 1, 1], strides=[1, 1], out_dtype="float32"
)
conv2 = R.nn.conv2d(
conv1, w2, padding=[1, 1, 1, 1], strides=[1, 1], out_dtype="float32"
)
ad3 = R.add(conv1, conv2)
ad1 = R.add(mean, conv1)
ad2 = R.multiply(ad1, conv1)
gv = R.add(ad3, ad2)
R.output(gv)
return gv
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"fused_mean_add1": (["global", "global"], ["global"]),
"fused_conv2d_NCHWc_OIHWo_opencl_add_multiply_add": (
[
"global.texture-weight",
"global",
"global.texture-weight",
"global.texture-weight",
"global.texture-weight",
],
["global"],
),
}
verify(Input, Expected)
def test_injective_nwo_inputs2():
r"""
Input
/ \
| \
conv2d \
| /
conv2d mean /
/ \ /
add | \ |
| | \ |
| | \ /
| | (3) add
| | |
| \ /
| \ /
\ mul
\ /
add
"""
@I.ir_module
class Input:
@R.function
def main(
x: R.Tensor((1, 4, 40, 40), "float32"),
w1: R.Tensor((4, 4, 3, 3), "float32"),
w2: R.Tensor((4, 4, 3, 3), "float32"),
w3: R.Tensor((4, 4, 3, 3), "float32"),
) -> R.Tensor(None, "float32", ndim=4):
with R.dataflow():
mean = R.mean(x, axis=1, keepdims=True)
conv1 = R.nn.conv2d(
x, w1, padding=[1, 1, 1, 1], strides=[1, 1], out_dtype="float32"
)
conv2 = R.nn.conv2d(
conv1, w2, padding=[1, 1, 1, 1], strides=[1, 1], out_dtype="float32"
)
ad3 = R.add(conv1, conv2)
ad1 = R.add(mean, conv1)
ad2 = R.multiply(ad1, conv2)
gv = R.add(ad2, ad3)
R.output(gv)
return gv
Expected = {
"te_layout_transform": (["global"], ["global.texture-weight"]),
"te_layout_transform1": (["global"], ["global"]),
"conv2d_NCHWc_OIHWo_opencl": (
["global.texture-weight", "global"],
["global"],
),
"te_layout_transform2": (["global"], ["global"]),
"fused_mean_add1": (["global", "global"], ["global"]),
"fused_conv2d_NCHWc_OIHWo_opencl_add_multiply_add": (
[
"global.texture-weight",
"global",
"global.texture-weight",
"global.texture-weight",
],
["global"],
),
}
verify(Input, Expected)
if __name__ == "__main__":
tvm.testing.main()