blob: 45713589dd4887ac272601a687db5964a5be212e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/* \file mkldnn_requantize-inl.h
* \brief
* \author Jin Huang, Xinyu Chen
*/
#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
#if MXNET_USE_MKLDNN == 1
#include <string>
#include <algorithm>
#include <vector>
#include "../requantize-inl.h"
#include "../../nn/mkldnn/mkldnn_base-inl.h"
namespace mxnet {
namespace op {
static void MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs,
const float real_range) {
using namespace mshadow;
using namespace mxnet_op;
using red::limits::MaxValue;
using red::limits::MinValue;
typedef int32_t SrcDType;
typedef int8_t DstDType;
// check shapes
size_t i_dim = inputs[0].shape().ndim();
size_t o_dim = outputs[0].shape().ndim();
CHECK_EQ(i_dim, o_dim);
float first_quantized_range = MinAbs(MinValue<SrcDType>(),
MaxValue<SrcDType>());
float first_real_range = MaxAbs(*inputs[1].data().dptr<float>(),
*inputs[2].data().dptr<float>());
float first_scale = first_real_range / first_quantized_range;
float second_real_range = real_range;
float second_quantized_range = MinAbs(MaxValue<DstDType>(),
MinValue<DstDType>());
float second_scale = second_quantized_range / second_real_range;
float scale = first_scale * second_scale;
*outputs[1].data().dptr<float>() = -second_real_range;
*outputs[2].data().dptr<float>() = second_real_range;
primitive_attr attr;
const int mask = 0;
std::vector<float> scales = {scale};
attr.set_output_scales(mask, scales);
attr.set_int_output_round_mode(round_nearest);
mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
NDArray in_buffer = inputs[0];
if (inputs[0].IsView() && inputs[0].IsMKLDNNData())
in_buffer = inputs[0].Reorder2Default();
auto i_mem = in_buffer.GetMKLDNNData();
auto i_mpd = i_mem->get_primitive_desc();
auto i_desc = i_mpd.desc();
mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>(i_desc.data.format);
mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_dim);
for (size_t i = 0; i < i_dim; i++) {
i_dims[i] = static_cast<int>(in_buffer.shape()[i]);
}
auto o_desc = mkldnn::memory::desc(i_dims,
(mkldnn::memory::data_type)data_type_enum<DstDType>::type,
i_fmt);
auto o_mpd = memory::primitive_desc(o_desc, cpu_engine);
auto reorder_pd = reorder::primitive_desc(i_mpd, o_mpd, attr);
auto o_mem = CreateMKLDNNMem(outputs[0], o_mpd, req[0]);
MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *i_mem, *o_mem.second));
CommitOutput(outputs[0], o_mem);
MKLDNNStream::Get()->Submit();
}
static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
using namespace mshadow;
using namespace mxnet_op;
typedef int32_t SrcDType;
typedef int8_t DstDType;
Stream<cpu> *s = ctx.get_stream<cpu>();
const RequantizeParam& param = nnvm::get<RequantizeParam>(attrs.parsed);
float real_range;
// Model is calibrated
if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
real_range =
MaxAbs(param.min_calib_range.value(), param.max_calib_range.value());
MKLDNNRequantizeForwardKer(attrs, ctx, inputs, req, outputs, real_range);
// Model is not calibrated
} else {
mxnet::TShape src_shape, dst_shape;
const size_t actual_float_size = sizeof(float);
const size_t actual_quantized_size = sizeof(SrcDType);
const size_t temp_reduce_size = ConfigReduce<cpu, SrcDType>(s,
inputs[0].shape(), mxnet::TShape({1}), &src_shape, &dst_shape);
Tensor<cpu, 1, char> temp_space =
ctx.requested[0].get_space_typed<cpu, 1, char>(
Shape1(2*actual_float_size+2*actual_quantized_size+temp_reduce_size), s);
Tensor<cpu, 1, float> actual_min_float(
reinterpret_cast<float*>(temp_space.dptr_), Shape1(1), s);
Tensor<cpu, 1, float> actual_max_float(
reinterpret_cast<float*>(temp_space.dptr_) + 1, Shape1(1), s);
const int dev_id = ctx.run_ctx.ctx.dev_id;
TBlob actual_min_quantized(reinterpret_cast<SrcDType*>(
temp_space.dptr_ + 8), Shape1(1), cpu::kDevMask, dev_id);
TBlob actual_max_quantized(reinterpret_cast<SrcDType*>(
temp_space.dptr_ + 8) + 1, Shape1(1), cpu::kDevMask, dev_id);
Tensor<cpu, 1, char> workspace(
temp_space.dptr_+2*actual_float_size+2*actual_quantized_size,
Shape1(temp_reduce_size), s);
broadcast::Reduce<red::minimum, 2, SrcDType, mshadow::op::identity>(
s, actual_min_quantized.reshape(dst_shape), kWriteTo,
workspace, inputs[0].Reorder2Default().data().reshape(src_shape));
Kernel<QuantizedToFloatStruct, cpu>::Launch(s, 1,
actual_min_float.dptr_, actual_min_quantized.dptr<SrcDType>(),
inputs[1].Reorder2Default().data().dptr<float>(),
inputs[2].Reorder2Default().data().dptr<float>());
broadcast::Reduce<red::maximum, 2, SrcDType, mshadow::op::identity>(
s, actual_max_quantized.reshape(dst_shape), kWriteTo,
workspace, inputs[0].Reorder2Default().data().reshape(src_shape));
Kernel<QuantizedToFloatStruct, cpu>::Launch(s, 1,
actual_max_float.dptr_, actual_max_quantized.dptr<SrcDType>(),
inputs[1].Reorder2Default().data().dptr<float>(),
inputs[2].Reorder2Default().data().dptr<float>());
real_range = MaxAbs(*actual_min_float.dptr_, *actual_max_float.dptr_);
MKLDNNRequantizeForwardKer(attrs, ctx, inputs, req, outputs, real_range);
}
}
} // namespace op
} // namespace mxnet
#endif // MXNET_USE_MKLDNN == 1
#endif // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_