src/operator/quantization/mkldnn/mkldnn_requantize-inl.h - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /* \file mkldnn_requantize-inl.h
  * \brief
  * \author Jin Huang, Xinyu Chen
  */

 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
 #if MXNET_USE_MKLDNN == 1
 #include <string>
 #include <algorithm>
 #include <vector>
 #include "../requantize-inl.h"
 #include "../../nn/mkldnn/mkldnn_base-inl.h"

 namespace mxnet {
 namespace op {

 static void MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs& attrs,
                                        const OpContext& ctx,
                                        const std::vector<NDArray>& inputs,
                                        const std::vector<OpReqType>& req,
                                        const std::vector<NDArray>& outputs,
                                        const float real_range) {
   using namespace mshadow;
   using namespace mxnet_op;
   using red::limits::MaxValue;
   using red::limits::MinValue;
   typedef int32_t SrcDType;
   typedef int8_t  DstDType;
   // check shapes
   size_t i_dim = inputs[0].shape().ndim();
   size_t o_dim = outputs[0].shape().ndim();
   CHECK_EQ(i_dim, o_dim);
   float first_quantized_range = MinAbs(MinValue<SrcDType>(),
                                        MaxValue<SrcDType>());
   float first_real_range = MaxAbs(*inputs[1].data().dptr<float>(),
                                   *inputs[2].data().dptr<float>());
   float first_scale = first_real_range / first_quantized_range;
   float second_real_range = real_range;
   float second_quantized_range = MinAbs(MaxValue<DstDType>(),
                                         MinValue<DstDType>());
   float second_scale = second_quantized_range / second_real_range;
   float scale = first_scale * second_scale;
   *outputs[1].data().dptr<float>() = -second_real_range;
   *outputs[2].data().dptr<float>() = second_real_range;
   primitive_attr attr;
   const int mask = 0;
   std::vector<float> scales = {scale};
   attr.set_output_scales(mask, scales);
   attr.set_int_output_round_mode(round_nearest);
   mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();

   NDArray in_buffer = inputs[0];
   if (inputs[0].IsView() && inputs[0].IsMKLDNNData())
     in_buffer = inputs[0].Reorder2Default();

   auto i_mem = in_buffer.GetMKLDNNData();
   auto i_mpd = i_mem->get_primitive_desc();
   auto i_desc = i_mpd.desc();
   mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>(i_desc.data.format);
   mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_dim);
   for (size_t i = 0; i < i_dim; i++) {
     i_dims[i] = static_cast<int>(in_buffer.shape()[i]);
   }
   auto o_desc = mkldnn::memory::desc(i_dims,
                                     (mkldnn::memory::data_type)data_type_enum<DstDType>::type,
                                     i_fmt);
   auto o_mpd = memory::primitive_desc(o_desc, cpu_engine);
   auto reorder_pd  = reorder::primitive_desc(i_mpd, o_mpd, attr);
   auto o_mem = CreateMKLDNNMem(outputs[0], o_mpd, req[0]);
   MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *i_mem, *o_mem.second));
   CommitOutput(outputs[0], o_mem);
   MKLDNNStream::Get()->Submit();
 }

 static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs,
                                     const OpContext& ctx,
                                     const std::vector<NDArray>& inputs,
                                     const std::vector<OpReqType>& req,
                                     const std::vector<NDArray>& outputs) {
   using namespace mshadow;
   using namespace mxnet_op;
   typedef int32_t SrcDType;
   typedef int8_t  DstDType;
   Stream<cpu> *s = ctx.get_stream<cpu>();
   const RequantizeParam& param = nnvm::get<RequantizeParam>(attrs.parsed);
   float real_range;
   // Model is calibrated
   if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
     real_range =
           MaxAbs(param.min_calib_range.value(), param.max_calib_range.value());
     MKLDNNRequantizeForwardKer(attrs, ctx, inputs, req, outputs, real_range);
   // Model is not calibrated
   } else {
     mxnet::TShape src_shape, dst_shape;
     const size_t actual_float_size = sizeof(float);
     const size_t actual_quantized_size = sizeof(SrcDType);
     const size_t temp_reduce_size = ConfigReduce<cpu, SrcDType>(s,
                          inputs[0].shape(), mxnet::TShape({1}), &src_shape, &dst_shape);
     Tensor<cpu, 1, char> temp_space =
       ctx.requested[0].get_space_typed<cpu, 1, char>(
       Shape1(2*actual_float_size+2*actual_quantized_size+temp_reduce_size), s);
     Tensor<cpu, 1, float> actual_min_float(
                  reinterpret_cast<float*>(temp_space.dptr_), Shape1(1), s);
     Tensor<cpu, 1, float> actual_max_float(
                  reinterpret_cast<float*>(temp_space.dptr_) + 1, Shape1(1), s);
     const int dev_id = ctx.run_ctx.ctx.dev_id;
     TBlob actual_min_quantized(reinterpret_cast<SrcDType*>(
                        temp_space.dptr_ + 8), Shape1(1), cpu::kDevMask, dev_id);
     TBlob actual_max_quantized(reinterpret_cast<SrcDType*>(
                    temp_space.dptr_ + 8) + 1, Shape1(1), cpu::kDevMask, dev_id);
     Tensor<cpu, 1, char> workspace(
             temp_space.dptr_+2*actual_float_size+2*actual_quantized_size,
             Shape1(temp_reduce_size), s);
     broadcast::Reduce<red::minimum, 2, SrcDType, mshadow::op::identity>(
         s, actual_min_quantized.reshape(dst_shape), kWriteTo,
         workspace, inputs[0].Reorder2Default().data().reshape(src_shape));
     Kernel<QuantizedToFloatStruct, cpu>::Launch(s, 1,
         actual_min_float.dptr_, actual_min_quantized.dptr<SrcDType>(),
         inputs[1].Reorder2Default().data().dptr<float>(),
         inputs[2].Reorder2Default().data().dptr<float>());
     broadcast::Reduce<red::maximum, 2, SrcDType, mshadow::op::identity>(
         s, actual_max_quantized.reshape(dst_shape), kWriteTo,
         workspace, inputs[0].Reorder2Default().data().reshape(src_shape));
     Kernel<QuantizedToFloatStruct, cpu>::Launch(s, 1,
         actual_max_float.dptr_, actual_max_quantized.dptr<SrcDType>(),
         inputs[1].Reorder2Default().data().dptr<float>(),
         inputs[2].Reorder2Default().data().dptr<float>());

     real_range = MaxAbs(*actual_min_float.dptr_, *actual_max_float.dptr_);
     MKLDNNRequantizeForwardKer(attrs, ctx, inputs, req, outputs, real_range);
   }
 }

 }  // namespace op
 }  // namespace mxnet

 #endif  // MXNET_USE_MKLDNN == 1
 #endif  // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/* \file mkldnn_requantize-inl.h
	* \brief
	* \author Jin Huang, Xinyu Chen
	*/

	#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
	#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
	#if MXNET_USE_MKLDNN == 1
	#include <string>
	#include <algorithm>
	#include <vector>
	#include "../requantize-inl.h"
	#include "../../nn/mkldnn/mkldnn_base-inl.h"

	namespace mxnet {
	namespace op {

	static void MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<NDArray>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<NDArray>& outputs,
	const float real_range) {
	using namespace mshadow;
	using namespace mxnet_op;
	using red::limits::MaxValue;
	using red::limits::MinValue;
	typedef int32_t SrcDType;
	typedef int8_t DstDType;
	// check shapes
	size_t i_dim = inputs[0].shape().ndim();
	size_t o_dim = outputs[0].shape().ndim();
	CHECK_EQ(i_dim, o_dim);
	float first_quantized_range = MinAbs(MinValue<SrcDType>(),
	MaxValue<SrcDType>());
	float first_real_range = MaxAbs(*inputs[1].data().dptr<float>(),
	*inputs[2].data().dptr<float>());
	float first_scale = first_real_range / first_quantized_range;
	float second_real_range = real_range;
	float second_quantized_range = MinAbs(MaxValue<DstDType>(),
	MinValue<DstDType>());
	float second_scale = second_quantized_range / second_real_range;
	float scale = first_scale * second_scale;
	*outputs[1].data().dptr<float>() = -second_real_range;
	*outputs[2].data().dptr<float>() = second_real_range;
	primitive_attr attr;
	const int mask = 0;
	std::vector<float> scales = {scale};
	attr.set_output_scales(mask, scales);
	attr.set_int_output_round_mode(round_nearest);
	mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();

	NDArray in_buffer = inputs[0];
	if (inputs[0].IsView() && inputs[0].IsMKLDNNData())
	in_buffer = inputs[0].Reorder2Default();

	auto i_mem = in_buffer.GetMKLDNNData();
	auto i_mpd = i_mem->get_primitive_desc();
	auto i_desc = i_mpd.desc();
	mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>(i_desc.data.format);
	mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_dim);
	for (size_t i = 0; i < i_dim; i++) {
	i_dims[i] = static_cast<int>(in_buffer.shape()[i]);
	}
	auto o_desc = mkldnn::memory::desc(i_dims,
	(mkldnn::memory::data_type)data_type_enum<DstDType>::type,
	i_fmt);
	auto o_mpd = memory::primitive_desc(o_desc, cpu_engine);
	auto reorder_pd = reorder::primitive_desc(i_mpd, o_mpd, attr);
	auto o_mem = CreateMKLDNNMem(outputs[0], o_mpd, req[0]);
	MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, i_mem, o_mem.second));
	CommitOutput(outputs[0], o_mem);
	MKLDNNStream::Get()->Submit();
	}

	static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<NDArray>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<NDArray>& outputs) {
	using namespace mshadow;
	using namespace mxnet_op;
	typedef int32_t SrcDType;
	typedef int8_t DstDType;
	Stream<cpu> *s = ctx.get_stream<cpu>();
	const RequantizeParam& param = nnvm::get<RequantizeParam>(attrs.parsed);
	float real_range;
	// Model is calibrated
	if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
	real_range =
	MaxAbs(param.min_calib_range.value(), param.max_calib_range.value());
	MKLDNNRequantizeForwardKer(attrs, ctx, inputs, req, outputs, real_range);
	// Model is not calibrated
	} else {
	mxnet::TShape src_shape, dst_shape;
	const size_t actual_float_size = sizeof(float);
	const size_t actual_quantized_size = sizeof(SrcDType);
	const size_t temp_reduce_size = ConfigReduce<cpu, SrcDType>(s,
	inputs[0].shape(), mxnet::TShape({1}), &src_shape, &dst_shape);
	Tensor<cpu, 1, char> temp_space =
	ctx.requested[0].get_space_typed<cpu, 1, char>(
	Shape1(2actual_float_size+2actual_quantized_size+temp_reduce_size), s);
	Tensor<cpu, 1, float> actual_min_float(
	reinterpret_cast<float*>(temp_space.dptr_), Shape1(1), s);
	Tensor<cpu, 1, float> actual_max_float(
	reinterpret_cast<float*>(temp_space.dptr_) + 1, Shape1(1), s);
	const int dev_id = ctx.run_ctx.ctx.dev_id;
	TBlob actual_min_quantized(reinterpret_cast<SrcDType*>(
	temp_space.dptr_ + 8), Shape1(1), cpu::kDevMask, dev_id);
	TBlob actual_max_quantized(reinterpret_cast<SrcDType*>(
	temp_space.dptr_ + 8) + 1, Shape1(1), cpu::kDevMask, dev_id);
	Tensor<cpu, 1, char> workspace(
	temp_space.dptr_+2actual_float_size+2actual_quantized_size,
	Shape1(temp_reduce_size), s);
	broadcast::Reduce<red::minimum, 2, SrcDType, mshadow::op::identity>(
	s, actual_min_quantized.reshape(dst_shape), kWriteTo,
	workspace, inputs[0].Reorder2Default().data().reshape(src_shape));
	Kernel<QuantizedToFloatStruct, cpu>::Launch(s, 1,
	actual_min_float.dptr_, actual_min_quantized.dptr<SrcDType>(),
	inputs[1].Reorder2Default().data().dptr<float>(),
	inputs[2].Reorder2Default().data().dptr<float>());
	broadcast::Reduce<red::maximum, 2, SrcDType, mshadow::op::identity>(
	s, actual_max_quantized.reshape(dst_shape), kWriteTo,
	workspace, inputs[0].Reorder2Default().data().reshape(src_shape));
	Kernel<QuantizedToFloatStruct, cpu>::Launch(s, 1,
	actual_max_float.dptr_, actual_max_quantized.dptr<SrcDType>(),
	inputs[1].Reorder2Default().data().dptr<float>(),
	inputs[2].Reorder2Default().data().dptr<float>());

	real_range = MaxAbs(actual_min_float.dptr_, actual_max_float.dptr_);
	MKLDNNRequantizeForwardKer(attrs, ctx, inputs, req, outputs, real_range);
	}
	}

	} // namespace op
	} // namespace mxnet

	#endif // MXNET_USE_MKLDNN == 1
	#endif // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_