src/operator/quantization/quantize-inl.h - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  *  Copyright (c) 2017 by Contributors
  * \file quantize-inl.h
  * \brief implementation of quantize operation
  */
 #ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_INL_H_

 #include <mxnet/operator_util.h>
 #include <vector>
 #include <limits>
 #include "../elemwise_op_common.h"
 #include "../mshadow_op.h"
 #include "../mxnet_op.h"
 #include "./quantization_utils.h"

 namespace mxnet {
 namespace op {

 struct QuantizeParam : public dmlc::Parameter<QuantizeParam> {
   int   out_type;
   DMLC_DECLARE_PARAMETER(QuantizeParam) {
     DMLC_DECLARE_FIELD(out_type)
     .add_enum("int8", mshadow::kInt8)
     .add_enum("uint8", mshadow::kUint8)
     .set_default(mshadow::kUint8)
     .describe("Output data type.");
   }
 };

 // quantize float to uint8_t
 struct quantize_unsigned {
   template<typename DstDType, typename SrcDType>
   MSHADOW_XINLINE static void Map(int i, DstDType *out, float *omin_range,
                                   float *omax_range, const SrcDType *in,
                                   const float *imin_range, const float *imax_range,
                                   const double min_limit, const double max_limit) {
     using mshadow::red::limits::MinValue;
     using mshadow::red::limits::MaxValue;
     const float scale = (max_limit - min_limit) / (*imax_range - *imin_range);
     out[i] = static_cast<DstDType>((in[i] - *imin_range) * scale + 0.5);
     *omin_range = *imin_range;
     *omax_range = *imax_range;
   }
 };


 // keep zero-center
 struct quantize_zero_centered {
   template<typename DstDType, typename SrcDType>
   MSHADOW_XINLINE static void Map(int i, DstDType *out, float *omin_range,
                                   float *omax_range, const SrcDType *in,
                                   const float *imin_range, const float *imax_range,
                                   const float quantized_range) {
     float real_range = MaxAbs(*imin_range, *imax_range);
     float scale = quantized_range / real_range;
     SrcDType x = in[i];
     out[i] = static_cast<DstDType>(
         Sign(x) * Min(Abs(x) * scale + 0.5f, quantized_range));
     *omin_range = -real_range;
     *omax_range =  real_range;
   }
 };

 template<typename xpu>
 void QuantizeCompute(const nnvm::NodeAttrs& attrs,
                      const OpContext& ctx,
                      const std::vector<TBlob>& inputs,
                      const std::vector<OpReqType>& req,
                      const std::vector<TBlob>& outputs) {
   using namespace mshadow;
   using namespace mxnet_op;
   using mshadow::red::limits::MinValue;
   using mshadow::red::limits::MaxValue;
   Stream<xpu> *s = ctx.get_stream<xpu>();

   const QuantizeParam& param = nnvm::get<QuantizeParam>(attrs.parsed);
   if (param.out_type == mshadow::kUint8) {
     if (std::is_same<xpu, gpu>::value) {
       LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, "
                     "please switch to the context of CPU or int8 data type for GPU.";
     }
     Kernel<quantize_unsigned, xpu>::Launch(s, outputs[0].Size(),
       outputs[0].dptr<uint8_t>(), outputs[1].dptr<float>(), outputs[2].dptr<float>(),
       inputs[0].dptr<float>(), inputs[1].dptr<float>(), inputs[2].dptr<float>(),
       MinValue<uint8_t>(), MaxValue<uint8_t>());
   } else if (param.out_type == mshadow::kInt8) {  // zero-centered quantization
     Kernel<quantize_zero_centered, xpu>::Launch(s, outputs[0].Size(),
       outputs[0].dptr<int8_t>(), outputs[1].dptr<float>(), outputs[2].dptr<float>(),
       inputs[0].dptr<float>(), inputs[1].dptr<float>(), inputs[2].dptr<float>(),
       MinAbs(MaxValue<int8_t>(), MinValue<int8_t>()));
   } else {
     LOG(FATAL) << "quantize op only supports int8 and uint8 as output type";
   }
 }

 inline bool QuantizeShape(const nnvm::NodeAttrs& attrs,
                           mxnet::ShapeVector *in_attrs,
                           mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U);
   CHECK_EQ(out_attrs->size(), 3U);

   mxnet::TShape dshape = (*in_attrs)[0];
   for (size_t i = 1; i < 3; ++i) {
     SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape(1, 1));
   }

   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
   SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1, 1));
   SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape(1, 1));

   if ((*out_attrs)[0].ndim() > 0) {
     dshape[0] = ((*out_attrs)[0])[0];
     SHAPE_ASSIGN_CHECK(*in_attrs, 0, dshape);
   }

   return shape_is_known(out_attrs->at(0));
 }

 inline bool QuantizeType(const nnvm::NodeAttrs& attrs,
                          std::vector<int> *in_attrs,
                          std::vector<int> *out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U);
   CHECK_EQ(out_attrs->size(), 3U);
   const QuantizeParam& param = nnvm::get<QuantizeParam>(attrs.parsed);
   TYPE_ASSIGN_CHECK(*in_attrs, 0, mshadow::kFloat32);
   TYPE_ASSIGN_CHECK(*in_attrs, 1, mshadow::kFloat32);
   TYPE_ASSIGN_CHECK(*in_attrs, 2, mshadow::kFloat32);
   if (param.out_type == mshadow::kUint8) {
     TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kUint8);
   } else if (param.out_type == mshadow::kInt8) {
     TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kInt8);
   } else {
     LOG(FATAL) << "quantize op only supports int8 and uint8 as output type";
   }
   TYPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::kFloat32);
   TYPE_ASSIGN_CHECK(*out_attrs, 2, mshadow::kFloat32);
   return (*in_attrs)[0] != -1;
 }

 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_OPERATOR_QUANTIZATION_QUANTIZE_INL_H_
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* Copyright (c) 2017 by Contributors
	* \file quantize-inl.h
	* \brief implementation of quantize operation
	*/
	#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZE_INL_H_
	#define MXNET_OPERATOR_QUANTIZATION_QUANTIZE_INL_H_

	#include <mxnet/operator_util.h>
	#include <vector>
	#include <limits>
	#include "../elemwise_op_common.h"
	#include "../mshadow_op.h"
	#include "../mxnet_op.h"
	#include "./quantization_utils.h"

	namespace mxnet {
	namespace op {

	struct QuantizeParam : public dmlc::Parameter<QuantizeParam> {
	int out_type;
	DMLC_DECLARE_PARAMETER(QuantizeParam) {
	DMLC_DECLARE_FIELD(out_type)
	.add_enum("int8", mshadow::kInt8)
	.add_enum("uint8", mshadow::kUint8)
	.set_default(mshadow::kUint8)
	.describe("Output data type.");
	}
	};

	// quantize float to uint8_t
	struct quantize_unsigned {
	template<typename DstDType, typename SrcDType>
	MSHADOW_XINLINE static void Map(int i, DstDType out, float omin_range,
	float omax_range, const SrcDType in,
	const float imin_range, const float imax_range,
	const double min_limit, const double max_limit) {
	using mshadow::red::limits::MinValue;
	using mshadow::red::limits::MaxValue;
	const float scale = (max_limit - min_limit) / (imax_range - imin_range);
	out[i] = static_cast<DstDType>((in[i] - imin_range) scale + 0.5);
	omin_range = imin_range;
	omax_range = imax_range;
	}
	};


	// keep zero-center
	struct quantize_zero_centered {
	template<typename DstDType, typename SrcDType>
	MSHADOW_XINLINE static void Map(int i, DstDType out, float omin_range,
	float omax_range, const SrcDType in,
	const float imin_range, const float imax_range,
	const float quantized_range) {
	float real_range = MaxAbs(imin_range, imax_range);
	float scale = quantized_range / real_range;
	SrcDType x = in[i];
	out[i] = static_cast<DstDType>(
	Sign(x) * Min(Abs(x) * scale + 0.5f, quantized_range));
	*omin_range = -real_range;
	*omax_range = real_range;
	}
	};

	template<typename xpu>
	void QuantizeCompute(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<TBlob>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<TBlob>& outputs) {
	using namespace mshadow;
	using namespace mxnet_op;
	using mshadow::red::limits::MinValue;
	using mshadow::red::limits::MaxValue;
	Stream<xpu> *s = ctx.get_stream<xpu>();

	const QuantizeParam& param = nnvm::get<QuantizeParam>(attrs.parsed);
	if (param.out_type == mshadow::kUint8) {
	if (std::is_same<xpu, gpu>::value) {
	LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, "
	"please switch to the context of CPU or int8 data type for GPU.";
	}
	Kernel<quantize_unsigned, xpu>::Launch(s, outputs[0].Size(),
	outputs[0].dptr<uint8_t>(), outputs[1].dptr<float>(), outputs[2].dptr<float>(),
	inputs[0].dptr<float>(), inputs[1].dptr<float>(), inputs[2].dptr<float>(),
	MinValue<uint8_t>(), MaxValue<uint8_t>());
	} else if (param.out_type == mshadow::kInt8) { // zero-centered quantization
	Kernel<quantize_zero_centered, xpu>::Launch(s, outputs[0].Size(),
	outputs[0].dptr<int8_t>(), outputs[1].dptr<float>(), outputs[2].dptr<float>(),
	inputs[0].dptr<float>(), inputs[1].dptr<float>(), inputs[2].dptr<float>(),
	MinAbs(MaxValue<int8_t>(), MinValue<int8_t>()));
	} else {
	LOG(FATAL) << "quantize op only supports int8 and uint8 as output type";
	}
	}

	inline bool QuantizeShape(const nnvm::NodeAttrs& attrs,
	mxnet::ShapeVector *in_attrs,
	mxnet::ShapeVector *out_attrs) {
	CHECK_EQ(in_attrs->size(), 3U);
	CHECK_EQ(out_attrs->size(), 3U);

	mxnet::TShape dshape = (*in_attrs)[0];
	for (size_t i = 1; i < 3; ++i) {
	SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape(1, 1));
	}

	SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
	SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1, 1));
	SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape(1, 1));

	if ((*out_attrs)[0].ndim() > 0) {
	dshape[0] = ((*out_attrs)[0])[0];
	SHAPE_ASSIGN_CHECK(*in_attrs, 0, dshape);
	}

	return shape_is_known(out_attrs->at(0));
	}

	inline bool QuantizeType(const nnvm::NodeAttrs& attrs,
	std::vector<int> *in_attrs,
	std::vector<int> *out_attrs) {
	CHECK_EQ(in_attrs->size(), 3U);
	CHECK_EQ(out_attrs->size(), 3U);
	const QuantizeParam& param = nnvm::get<QuantizeParam>(attrs.parsed);
	TYPE_ASSIGN_CHECK(*in_attrs, 0, mshadow::kFloat32);
	TYPE_ASSIGN_CHECK(*in_attrs, 1, mshadow::kFloat32);
	TYPE_ASSIGN_CHECK(*in_attrs, 2, mshadow::kFloat32);
	if (param.out_type == mshadow::kUint8) {
	TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kUint8);
	} else if (param.out_type == mshadow::kInt8) {
	TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kInt8);
	} else {
	LOG(FATAL) << "quantize op only supports int8 and uint8 as output type";
	}
	TYPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::kFloat32);
	TYPE_ASSIGN_CHECK(*out_attrs, 2, mshadow::kFloat32);
	return (*in_attrs)[0] != -1;
	}

	} // namespace op
	} // namespace mxnet
	#endif // MXNET_OPERATOR_QUANTIZATION_QUANTIZE_INL_H_