blob: b48afd1d3652d388ea923223aee1ddeae40347fb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file dnnl_batch_dot-inl.h
* \author: Bartosz Kuncer, bartosz.kuncer@intel.com
*/
#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_BATCH_DOT_INL_H_
#define MXNET_OPERATOR_NN_DNNL_DNNL_BATCH_DOT_INL_H_
#if MXNET_USE_ONEDNN == 1
#include <numeric>
#include <utility>
#include <vector>
#include "operator/tensor/dot-inl.h"
#include "dnnl_base-inl.h"
#include "dnnl_ops-inl.h"
namespace mxnet {
namespace op {
struct DNNLDotParam : public dmlc::Parameter<DNNLDotParam> {
bool transpose_a;
bool transpose_b;
bool quantized;
dmlc::optional<float> min_calib_range; // min float value calculated from calibration dataset
dmlc::optional<float> max_calib_range; // max float value calculated from calibration dataset
bool enable_float_output;
DMLC_DECLARE_PARAMETER(DNNLDotParam) {
DMLC_DECLARE_FIELD(transpose_a)
.describe("If true then transpose the first input before dot.")
.set_default(false);
DMLC_DECLARE_FIELD(transpose_b)
.describe("If true then transpose the second input before dot.")
.set_default(false);
DMLC_DECLARE_FIELD(quantized).set_default(false).describe("enable quantization");
DMLC_DECLARE_FIELD(min_calib_range)
.set_default(dmlc::optional<float>())
.describe(
"The minimum scalar value in the form of float32 obtained "
"through calibration. If present, it will be used to by "
"quantized convolution op to calculate primitive scale");
DMLC_DECLARE_FIELD(max_calib_range)
.set_default(dmlc::optional<float>())
.describe(
"The maximum scalar value in the form of float32 obtained "
"through calibration. If present, it will be used to by "
"quantized convolution op to calculate primitive scale");
DMLC_DECLARE_FIELD(enable_float_output)
.set_default(false)
.describe("Whether to enable float32 output.");
}
bool operator==(const DNNLDotParam& other) const {
return this->transpose_a == other.transpose_a && this->transpose_b == other.transpose_b &&
this->quantized == other.quantized && this->min_calib_range == other.min_calib_range &&
this->max_calib_range == other.max_calib_range;
}
};
using batch_dot_fwd_t = dnnl::matmul;
using batch_dot_fwd_pd_t = dnnl::matmul::primitive_desc;
typedef ParamOpSign<DNNLDotParam> BatchDotSignature;
class DNNLBatchDotFwd {
public:
static DNNLBatchDotFwd& GetCached(const DNNLDotParam& param,
const std::vector<NDArray>& inputs,
const std::vector<NDArray>& outputs);
DNNLBatchDotFwd(const DNNLDotParam& param,
const std::vector<NDArray>& inputs,
const std::vector<NDArray>& outputs);
void Execute(const OpContext& ctx,
const DNNLDotParam& param,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs);
private:
std::shared_ptr<batch_dot_fwd_t> fwd;
std::shared_ptr<batch_dot_fwd_pd_t> fwd_pd;
};
template <bool subgraph = true>
void DNNLBatchDotForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
DNNLDotParam dnnl_param;
if (!subgraph) {
const DotParam& param = nnvm::get<DotParam>(attrs.parsed);
dnnl_param.transpose_a = param.transpose_a;
dnnl_param.transpose_b = param.transpose_b;
dnnl_param.quantized = false;
} else {
dnnl_param = nnvm::get<DNNLDotParam>(attrs.parsed);
}
DNNLBatchDotFwd& fwd = DNNLBatchDotFwd::GetCached(dnnl_param, inputs, outputs);
fwd.Execute(ctx, dnnl_param, inputs, req, outputs);
}
} // namespace op
} // namespace mxnet
#endif // MXNET_USE_ONEDNN == 1
#endif // MXNET_OPERATOR_NN_DNNL_DNNL_BATCH_DOT_INL_H__