python/vta/intrin.py - tvm-vta - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 """VTA related intrinsics"""
 from __future__ import absolute_import as _abs

 import tvm
 from tvm import te

 def gemm(env, mock=False):
     """Matrix-matrix multiply intrinsic

     Parameters
     ----------
     env : Environment
         The Environment

     mock : bool
         Whether create a mock version.
     """
     wgt_lanes = env.WGT_ELEM_BITS // env.WGT_WIDTH
     assert wgt_lanes == env.BLOCK_OUT * env.BLOCK_IN
     wgt_shape = (env.BLOCK_OUT, env.BLOCK_IN)
     assert wgt_shape[0] * wgt_shape[1] == wgt_lanes

     inp_lanes = env.INP_ELEM_BITS // env.INP_WIDTH
     assert inp_lanes == env.BATCH * env.BLOCK_IN
     inp_shape = (env.BATCH, env.BLOCK_IN)
     assert inp_shape[0] * inp_shape[1] == inp_lanes

     out_lanes = env.ACC_ELEM_BITS // env.ACC_WIDTH
     assert out_lanes == env.BATCH * env.BLOCK_OUT
     out_shape = (env.BATCH, env.BLOCK_OUT)
     assert out_shape[0] * out_shape[1] == out_lanes

     wgt = te.placeholder((wgt_shape[0], wgt_shape[1]),
                          dtype="int%d" % env.WGT_WIDTH,
                          name=env.wgt_scope)
     inp = te.placeholder((inp_shape[0], inp_shape[1]),
                          dtype="int%d" % env.INP_WIDTH,
                          name=env.inp_scope)
     k = te.reduce_axis((0, wgt_shape[1]), name="k")
     out_dtype = "int%d" % env.ACC_WIDTH
     out = te.compute((out_shape[0], out_shape[1]),
                      lambda i, j: te.sum(inp[i, k].astype(out_dtype) *
                                          wgt[j, k].astype(out_dtype),
                                          axis=[k]),
                      name="out")
     wgt_layout = tvm.tir.decl_buffer(
         wgt.shape, wgt.dtype, env.wgt_scope,
         scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes)
     inp_layout = tvm.tir.decl_buffer(
         inp.shape, inp.dtype, env.inp_scope,
         scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes)
     out_layout = tvm.tir.decl_buffer(
         out.shape, out.dtype, env.acc_scope,
         scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes)

     def intrin_func(ins, outs):
         """Matrix-matrix multiply intrinsic function"""
         dinp, dwgt = ins
         dout = outs[0]
         def instr(index):
             """Generate matrix-matrix multiply VTA instruction"""
             irb = tvm.tir.ir_builder.create()
             dev = env.dev
             irb.scope_attr(dev.vta_axis, "coproc_scope",
                            dev.get_task_qid(dev.QID_COMPUTE))
             irb.scope_attr(dev.vta_axis, "coproc_uop_scope",
                            dev.vta_push_uop)
             if index in (0, 2):
                 irb.emit(tvm.tir.call_extern(
                     "int32", "VTAUopPush",
                     0, 0,
                     dout.access_ptr("rw", "int32"),
                     dinp.access_ptr("r", "int32"),
                     dwgt.access_ptr("r", "int32"),
                     0, 0, 0))
             else:
                 irb.emit(tvm.tir.call_extern(
                     "int32", "VTAUopPush",
                     0, 1,
                     dout.access_ptr("rw", "int32"),
                     0,
                     0,
                     0, 0, 0))
             return irb.get()
         # return a triple of normal-set, reset, update
         nop = tvm.tir.Evaluate(0)
         if mock:
             return (nop, nop, nop)
         return (instr(0), instr(1), instr(2))

     return te.decl_tensor_intrin(out.op, intrin_func,
                                  name="GEMM",
                                  binds={inp: inp_layout,
                                         wgt: wgt_layout,
                                         out: out_layout})
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	"""VTA related intrinsics"""
	from __future__ import absolute_import as _abs

	import tvm
	from tvm import te

	def gemm(env, mock=False):
	"""Matrix-matrix multiply intrinsic

	Parameters
	----------
	env : Environment
	The Environment

	mock : bool
	Whether create a mock version.
	"""
	wgt_lanes = env.WGT_ELEM_BITS // env.WGT_WIDTH
	assert wgt_lanes == env.BLOCK_OUT * env.BLOCK_IN
	wgt_shape = (env.BLOCK_OUT, env.BLOCK_IN)
	assert wgt_shape[0] * wgt_shape[1] == wgt_lanes

	inp_lanes = env.INP_ELEM_BITS // env.INP_WIDTH
	assert inp_lanes == env.BATCH * env.BLOCK_IN
	inp_shape = (env.BATCH, env.BLOCK_IN)
	assert inp_shape[0] * inp_shape[1] == inp_lanes

	out_lanes = env.ACC_ELEM_BITS // env.ACC_WIDTH
	assert out_lanes == env.BATCH * env.BLOCK_OUT
	out_shape = (env.BATCH, env.BLOCK_OUT)
	assert out_shape[0] * out_shape[1] == out_lanes

	wgt = te.placeholder((wgt_shape[0], wgt_shape[1]),
	dtype="int%d" % env.WGT_WIDTH,
	name=env.wgt_scope)
	inp = te.placeholder((inp_shape[0], inp_shape[1]),
	dtype="int%d" % env.INP_WIDTH,
	name=env.inp_scope)
	k = te.reduce_axis((0, wgt_shape[1]), name="k")
	out_dtype = "int%d" % env.ACC_WIDTH
	out = te.compute((out_shape[0], out_shape[1]),
	lambda i, j: te.sum(inp[i, k].astype(out_dtype) *
	wgt[j, k].astype(out_dtype),
	axis=[k]),
	name="out")
	wgt_layout = tvm.tir.decl_buffer(
	wgt.shape, wgt.dtype, env.wgt_scope,
	scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes)
	inp_layout = tvm.tir.decl_buffer(
	inp.shape, inp.dtype, env.inp_scope,
	scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes)
	out_layout = tvm.tir.decl_buffer(
	out.shape, out.dtype, env.acc_scope,
	scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes)

	def intrin_func(ins, outs):
	"""Matrix-matrix multiply intrinsic function"""
	dinp, dwgt = ins
	dout = outs[0]
	def instr(index):
	"""Generate matrix-matrix multiply VTA instruction"""
	irb = tvm.tir.ir_builder.create()
	dev = env.dev
	irb.scope_attr(dev.vta_axis, "coproc_scope",
	dev.get_task_qid(dev.QID_COMPUTE))
	irb.scope_attr(dev.vta_axis, "coproc_uop_scope",
	dev.vta_push_uop)
	if index in (0, 2):
	irb.emit(tvm.tir.call_extern(
	"int32", "VTAUopPush",
	0, 0,
	dout.access_ptr("rw", "int32"),
	dinp.access_ptr("r", "int32"),
	dwgt.access_ptr("r", "int32"),
	0, 0, 0))
	else:
	irb.emit(tvm.tir.call_extern(
	"int32", "VTAUopPush",
	0, 1,
	dout.access_ptr("rw", "int32"),
	0,
	0,
	0, 0, 0))
	return irb.get()
	# return a triple of normal-set, reset, update
	nop = tvm.tir.Evaluate(0)
	if mock:
	return (nop, nop, nop)
	return (instr(0), instr(1), instr(2))

	return te.decl_tensor_intrin(out.op, intrin_func,
	name="GEMM",
	binds={inp: inp_layout,
	wgt: wgt_layout,
	out: out_layout})