src/backend/opencl/codegen/intrin_rule_opencl.cc - tvm - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file intrin_rule_opencl.cc
  * \brief OpenCL intrinsic rules.
  */
 #include <tvm/arith/analyzer.h>
 #include <tvm/tirx/op_attr_types.h>

 #include "../../../target/intrin_rule.h"

 namespace tvm {
 namespace codegen {
 namespace intrin {
 using tirx::FLowerIntrinsic;

 // There is no warp shuffle instruction in standard OpenCL.  When shuffle is used, assume Intel's
 // shuffle extension.
 static PrimExpr DispatchIntelShuffle(const PrimExpr& e) {
   const CallNode* call = e.as<CallNode>();
   TVM_FFI_ICHECK(call != nullptr);
   TVM_FFI_ICHECK_EQ(call->args.size(), 5);  // mask, value, warp_id, width, warp_size
   arith::Analyzer analyzer;
   TVM_FFI_ICHECK(analyzer->CanProve(call->args[3] == call->args[4]))
       << "Intel warp shuffle dose not support width != warp_size";
   ffi::Array<PrimExpr> opencl_args{
       {StringImm("intel_sub_group_shuffle"), call->args[1], call->args[2]}};
   return Call(call->dtype, builtin::call_pure_extern(), opencl_args);
 }

 void RegisterOpenCLIntrinRules() {
   static bool registered = false;
   if (registered) return;
   registered = true;

   // clang-format off
 TVM_REGISTER_OP("tirx.clz")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.floor")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.ceil")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.trunc")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.fabs")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.round")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr {
       // OpenCL's rint() uses ties-to-even, matching constant-folding semantics.
       const tirx::CallNode* call = e.as<tirx::CallNode>();
       TVM_FFI_ICHECK(call != nullptr);
       ffi::Array<PrimExpr> new_args = {tirx::StringImm("rint")};
       for (auto arg : call->args) {
         new_args.push_back(arg);
       }
       return tirx::Call(call->dtype, tirx::builtin::call_pure_extern(), new_args);
     });

 TVM_REGISTER_OP("tirx.nearbyint")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.exp")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.erf")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.exp2")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.exp10")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.log")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.log2")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.log10")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.tanh")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.sqrt")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.pow")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.popcount")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.fmod")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.sin")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.sinh")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.cos")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.cosh")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

 TVM_REGISTER_OP("tirx.tvm_warp_shuffle")
     .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchIntelShuffle);
   // clang-format on
 }

 }  // namespace intrin
 }  // namespace codegen
 }  // namespace tvm
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* \file intrin_rule_opencl.cc
	* \brief OpenCL intrinsic rules.
	*/
	#include <tvm/arith/analyzer.h>
	#include <tvm/tirx/op_attr_types.h>

	#include "../../../target/intrin_rule.h"

	namespace tvm {
	namespace codegen {
	namespace intrin {
	using tirx::FLowerIntrinsic;

	// There is no warp shuffle instruction in standard OpenCL. When shuffle is used, assume Intel's
	// shuffle extension.
	static PrimExpr DispatchIntelShuffle(const PrimExpr& e) {
	const CallNode* call = e.as<CallNode>();
	TVM_FFI_ICHECK(call != nullptr);
	TVM_FFI_ICHECK_EQ(call->args.size(), 5); // mask, value, warp_id, width, warp_size
	arith::Analyzer analyzer;
	TVM_FFI_ICHECK(analyzer->CanProve(call->args[3] == call->args[4]))
	<< "Intel warp shuffle dose not support width != warp_size";
	ffi::Array<PrimExpr> opencl_args{
	{StringImm("intel_sub_group_shuffle"), call->args[1], call->args[2]}};
	return Call(call->dtype, builtin::call_pure_extern(), opencl_args);
	}

	void RegisterOpenCLIntrinRules() {
	static bool registered = false;
	if (registered) return;
	registered = true;

	// clang-format off
	TVM_REGISTER_OP("tirx.clz")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.floor")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.ceil")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.trunc")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.fabs")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.round")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr {
	// OpenCL's rint() uses ties-to-even, matching constant-folding semantics.
	const tirx::CallNode* call = e.as<tirx::CallNode>();
	TVM_FFI_ICHECK(call != nullptr);
	ffi::Array<PrimExpr> new_args = {tirx::StringImm("rint")};
	for (auto arg : call->args) {
	new_args.push_back(arg);
	}
	return tirx::Call(call->dtype, tirx::builtin::call_pure_extern(), new_args);
	});

	TVM_REGISTER_OP("tirx.nearbyint")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.exp")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.erf")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.exp2")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.exp10")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.log")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.log2")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.log10")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.tanh")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.sqrt")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.pow")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.popcount")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.fmod")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.sin")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.sinh")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.cos")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.cosh")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchPureExtern<Direct>);

	TVM_REGISTER_OP("tirx.tvm_warp_shuffle")
	.set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic", DispatchIntelShuffle);
	// clang-format on
	}

	} // namespace intrin
	} // namespace codegen
	} // namespace tvm