src/operator/image/resize-inl.h - mxnet - Git at Google

 /*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 /*!
 * \file resize-inl.h
 * \brief image resize operator using opencv and only support bilinear resize
 * \author Jake Lee
 */
 #ifndef MXNET_OPERATOR_IMAGE_RESIZE_INL_H_
 #define MXNET_OPERATOR_IMAGE_RESIZE_INL_H_

 #include <mxnet/base.h>
 #include <vector>

 #include "../mxnet_op.h"
 #include "../operator_common.h"
 #include "image_utils.h"

 #if MXNET_USE_OPENCV
   #include <opencv2/opencv.hpp>
 #endif  // MXNET_USE_OPENCV

 namespace mxnet {
 namespace op {
 namespace image {

 using namespace mshadow;

 #if MXNET_USE_CUDA
 template<typename DType, typename T, typename Acctype>
 void ResizeImplCUDA(Stream<gpu> *s,
                       const T input,
                       const T output);
 #endif  // MXNET_USE_CUDA

 struct ResizeParam : public dmlc::Parameter<ResizeParam> {
   mxnet::Tuple<int> size;
   bool keep_ratio;
   int interp;
   DMLC_DECLARE_PARAMETER(ResizeParam) {
     DMLC_DECLARE_FIELD(size)
     .set_default(mxnet::Tuple<int>())
     .describe("Size of new image. Could be (width, height) or (size)");
     DMLC_DECLARE_FIELD(keep_ratio)
     .describe("Whether to resize the short edge or both edges to `size`, "
       "if size is give as an integer.")
     .set_default(false);
     DMLC_DECLARE_FIELD(interp)
     .set_default(1)
     .describe("Interpolation method for resizing. By default uses bilinear interpolation"
         "Options are INTER_NEAREST - a nearest-neighbor interpolation"
         "INTER_LINEAR - a bilinear interpolation"
         "INTER_AREA - resampling using pixel area relation"
         "INTER_CUBIC - a bicubic interpolation over 4x4 pixel neighborhood"
         "INTER_LANCZOS4 - a Lanczos interpolation over 8x8 pixel neighborhood"
         "Note that the GPU version only support bilinear interpolation(1)");
   }
 };
 // handle the keep ratio param
 inline SizeParam GetHeightAndWidth(int data_h,
                                     int data_w,
                                     const ResizeParam& param) {
   CHECK((param.size.ndim() == 1) || (param.size.ndim() == 2))
       << "Input size dimension must be 1 or 2, but got "
       << param.size.ndim();
   int resized_h;
   int resized_w;
   if (param.size.ndim() == 1) {
     CHECK_GT(param.size[0], 0)
       << "Input size should be greater than 0, but got "
       << param.size[0];
     if (!param.keep_ratio) {
       resized_h = param.size[0];
       resized_w = param.size[0];
     } else {
       if (data_h > data_w) {
         resized_w = param.size[0];
         resized_h = static_cast<int>(data_h * resized_w / data_w);
       } else {
         resized_h = param.size[0];
         resized_w = static_cast<int>(data_w * resized_h / data_h);
       }
     }
   } else {
     CHECK_GT(param.size[0], 0)
         << "Input width should be greater than 0, but got "
         << param.size[0];
     CHECK_GT(param.size[1], 0)
         << "Input height should be greater than 0, but got "
         << param.size[1];
     resized_h = param.size[1];
     resized_w = param.size[0];
   }
   return SizeParam(resized_h, resized_w);
 }

 inline bool ResizeShapeImpl(const ResizeParam& param,
                             mxnet::ShapeVector *in_attrs,
                             mxnet::ShapeVector *out_attrs) {
   // input attrs should only be (h, w, c) or (n, h, w, c)
   CHECK((in_attrs->at(0).ndim() == 3U) || (in_attrs->at(0).ndim() == 4U))
     << "Input image dimension should be 3 or 4 but got "
     << in_attrs->at(0).ndim();
   const auto& ishape = (*in_attrs)[0];
   SizeParam size;
   if (ishape.ndim() == 3) {
     size = GetHeightAndWidth(ishape[H], ishape[W], param);
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({size.height, size.width, ishape[C]}));
   } else {
     size = GetHeightAndWidth(ishape[kH], ishape[kW], param);
     SHAPE_ASSIGN_CHECK(*out_attrs, 0,
       mxnet::TShape({ishape[N], size.height, size.width, ishape[kC]}));
   }
   return true;
 }

 inline bool ResizeShape(const nnvm::NodeAttrs& attrs,
                              mxnet::ShapeVector *in_attrs,
                              mxnet::ShapeVector *out_attrs) {
   const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
   return ResizeShapeImpl(param, in_attrs, out_attrs);
 }

 inline void ResizeImpl(const std::vector<TBlob> &inputs,
                       const std::vector<TBlob> &outputs,
                       const int height,
                       const int width,
                       const int interp,
                       const int input_index = 0,
                       const int output_index = 0) {
 #if MXNET_USE_OPENCV
   CHECK_NE(inputs[0].type_flag_, mshadow::kFloat16) << "opencv image mat doesn't support fp16";
   CHECK((inputs[0].type_flag_ != mshadow::kInt32) || (inputs[0].type_flag_ != mshadow::kInt64))
       << "opencv resize doesn't support int32, int64";
   // mapping to opencv matrix element type according to channel
   const int DTYPE[] = {CV_32F, CV_64F, -1, CV_8U, CV_32S};
   if (inputs[0].ndim() == 3) {
     const int cv_type = CV_MAKETYPE(DTYPE[inputs[0].type_flag_], inputs[0].shape_[C]);
     cv::Mat buf(inputs[0].shape_[H], inputs[0].shape_[W], cv_type, inputs[0].dptr_);
     cv::Mat dst(outputs[0].shape_[H], outputs[0].shape_[W], cv_type, outputs[0].dptr_);
     cv::resize(buf, dst, cv::Size(width, height), 0, 0, interp);
     CHECK(!dst.empty());
     CHECK_EQ(static_cast<void*>(dst.ptr()), outputs[0].dptr_);
   } else {
     const int cv_type = CV_MAKETYPE(DTYPE[inputs[0].type_flag_], inputs[0].shape_[kC]);
     MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
       cv::Mat buf(inputs[0].shape_[kH], inputs[0].shape_[kW], cv_type,
         inputs[0].dptr<DType>() + input_index);
       cv::Mat dst(outputs[0].shape_[kH], outputs[0].shape_[kW], cv_type,
         outputs[0].dptr<DType>() + output_index);
       cv::resize(buf, dst, cv::Size(width, height), 0, 0, interp);
       CHECK(!dst.empty());
       CHECK_EQ(static_cast<void*>(dst.ptr()), outputs[0].dptr<DType>() + output_index);
     });
   }
 #else
   LOG(FATAL) << "Build with USE_OPENCV=1 for image resize operator.";
 #endif  // MXNET_USE_OPENCV
 }

 template <typename xpu>
 inline void ResizeImplWrapper(const ResizeParam& param,
                               const OpContext &ctx,
                               const std::vector<TBlob> &inputs,
                               const std::vector<TBlob> &outputs) {
   SizeParam size;
   if (std::is_same<xpu, gpu>::value) {
 #if MXNET_USE_CUDA
     CHECK(param.interp == 1) << "interp should be 1 for using Resize on GPU.";
     mshadow::Stream<gpu> *s = ctx.get_stream<gpu>();
     MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
       if (inputs[0].ndim() == 3) {
         Tensor<gpu, 3, DType> input = inputs[0].get<gpu, 3, DType>(s);
         Tensor<gpu, 3, DType> output = outputs[0].get<gpu, 3, DType>(s);
         ResizeImplCUDA<DType, Tensor<gpu, 3, DType>, float>(s, input, output);
       } else {
         Tensor<gpu, 4, DType> input = inputs[0].get<gpu, 4, DType>(s);
         Tensor<gpu, 4, DType> output = outputs[0].get<gpu, 4, DType>(s);
         ResizeImplCUDA<DType, Tensor<gpu, 4, DType>, float>(s, input, output);
       }
     });
 #endif  // MXNET_USE_CUDA
   } else if (inputs[0].ndim() == 3) {
     size = GetHeightAndWidth(inputs[0].shape_[H], inputs[0].shape_[W], param);
     ResizeImpl(inputs, outputs, size.height, size.width, param.interp);
   } else {
     size = GetHeightAndWidth(inputs[0].shape_[kH], inputs[0].shape_[kW], param);
     const auto batch_size = inputs[0].shape_[N];
     const auto input_step = inputs[0].shape_[kH] * inputs[0].shape_[kW] * inputs[0].shape_[kC];
     const auto output_step = size.height * size.width * inputs[0].shape_[kC];
     #pragma omp parallel for
     for (auto i = 0; i < batch_size; ++i) {
       ResizeImpl(inputs, outputs, size.height, size.width,
         param.interp, i * input_step, i * output_step);
     }
   }
 }

 template <typename xpu>
 inline void Resize(const nnvm::NodeAttrs &attrs,
                    const OpContext &ctx,
                    const std::vector<TBlob> &inputs,
                    const std::vector<OpReqType> &req,
                    const std::vector<TBlob> &outputs) {
   CHECK_EQ(outputs.size(), 1U);
   const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
   ResizeImplWrapper<xpu>(param, ctx, inputs, outputs);
 }

 }  // namespace image
 }  // namespace op
 }  // namespace mxnet

 #endif  // MXNET_OPERATOR_IMAGE_RESIZE_INL_H_
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	/*!
	* \file resize-inl.h
	* \brief image resize operator using opencv and only support bilinear resize
	* \author Jake Lee
	*/
	#ifndef MXNET_OPERATOR_IMAGE_RESIZE_INL_H_
	#define MXNET_OPERATOR_IMAGE_RESIZE_INL_H_

	#include <mxnet/base.h>
	#include <vector>

	#include "../mxnet_op.h"
	#include "../operator_common.h"
	#include "image_utils.h"

	#if MXNET_USE_OPENCV
	#include <opencv2/opencv.hpp>
	#endif // MXNET_USE_OPENCV

	namespace mxnet {
	namespace op {
	namespace image {

	using namespace mshadow;

	#if MXNET_USE_CUDA
	template<typename DType, typename T, typename Acctype>
	void ResizeImplCUDA(Stream<gpu> *s,
	const T input,
	const T output);
	#endif // MXNET_USE_CUDA

	struct ResizeParam : public dmlc::Parameter<ResizeParam> {
	mxnet::Tuple<int> size;
	bool keep_ratio;
	int interp;
	DMLC_DECLARE_PARAMETER(ResizeParam) {
	DMLC_DECLARE_FIELD(size)
	.set_default(mxnet::Tuple<int>())
	.describe("Size of new image. Could be (width, height) or (size)");
	DMLC_DECLARE_FIELD(keep_ratio)
	.describe("Whether to resize the short edge or both edges to `size`, "
	"if size is give as an integer.")
	.set_default(false);
	DMLC_DECLARE_FIELD(interp)
	.set_default(1)
	.describe("Interpolation method for resizing. By default uses bilinear interpolation"
	"Options are INTER_NEAREST - a nearest-neighbor interpolation"
	"INTER_LINEAR - a bilinear interpolation"
	"INTER_AREA - resampling using pixel area relation"
	"INTER_CUBIC - a bicubic interpolation over 4x4 pixel neighborhood"
	"INTER_LANCZOS4 - a Lanczos interpolation over 8x8 pixel neighborhood"
	"Note that the GPU version only support bilinear interpolation(1)");
	}
	};
	// handle the keep ratio param
	inline SizeParam GetHeightAndWidth(int data_h,
	int data_w,
	const ResizeParam& param) {
	CHECK((param.size.ndim() == 1) \|\| (param.size.ndim() == 2))
	<< "Input size dimension must be 1 or 2, but got "
	<< param.size.ndim();
	int resized_h;
	int resized_w;
	if (param.size.ndim() == 1) {
	CHECK_GT(param.size[0], 0)
	<< "Input size should be greater than 0, but got "
	<< param.size[0];
	if (!param.keep_ratio) {
	resized_h = param.size[0];
	resized_w = param.size[0];
	} else {
	if (data_h > data_w) {
	resized_w = param.size[0];
	resized_h = static_cast<int>(data_h * resized_w / data_w);
	} else {
	resized_h = param.size[0];
	resized_w = static_cast<int>(data_w * resized_h / data_h);
	}
	}
	} else {
	CHECK_GT(param.size[0], 0)
	<< "Input width should be greater than 0, but got "
	<< param.size[0];
	CHECK_GT(param.size[1], 0)
	<< "Input height should be greater than 0, but got "
	<< param.size[1];
	resized_h = param.size[1];
	resized_w = param.size[0];
	}
	return SizeParam(resized_h, resized_w);
	}

	inline bool ResizeShapeImpl(const ResizeParam& param,
	mxnet::ShapeVector *in_attrs,
	mxnet::ShapeVector *out_attrs) {
	// input attrs should only be (h, w, c) or (n, h, w, c)
	CHECK((in_attrs->at(0).ndim() == 3U) \|\| (in_attrs->at(0).ndim() == 4U))
	<< "Input image dimension should be 3 or 4 but got "
	<< in_attrs->at(0).ndim();
	const auto& ishape = (*in_attrs)[0];
	SizeParam size;
	if (ishape.ndim() == 3) {
	size = GetHeightAndWidth(ishape[H], ishape[W], param);
	SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({size.height, size.width, ishape[C]}));
	} else {
	size = GetHeightAndWidth(ishape[kH], ishape[kW], param);
	SHAPE_ASSIGN_CHECK(*out_attrs, 0,
	mxnet::TShape({ishape[N], size.height, size.width, ishape[kC]}));
	}
	return true;
	}

	inline bool ResizeShape(const nnvm::NodeAttrs& attrs,
	mxnet::ShapeVector *in_attrs,
	mxnet::ShapeVector *out_attrs) {
	const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
	return ResizeShapeImpl(param, in_attrs, out_attrs);
	}

	inline void ResizeImpl(const std::vector<TBlob> &inputs,
	const std::vector<TBlob> &outputs,
	const int height,
	const int width,
	const int interp,
	const int input_index = 0,
	const int output_index = 0) {
	#if MXNET_USE_OPENCV
	CHECK_NE(inputs[0].type_flag_, mshadow::kFloat16) << "opencv image mat doesn't support fp16";
	CHECK((inputs[0].type_flag_ != mshadow::kInt32) \|\| (inputs[0].type_flag_ != mshadow::kInt64))
	<< "opencv resize doesn't support int32, int64";
	// mapping to opencv matrix element type according to channel
	const int DTYPE[] = {CV_32F, CV_64F, -1, CV_8U, CV_32S};
	if (inputs[0].ndim() == 3) {
	const int cv_type = CV_MAKETYPE(DTYPE[inputs[0].type_flag_], inputs[0].shape_[C]);
	cv::Mat buf(inputs[0].shape_[H], inputs[0].shape_[W], cv_type, inputs[0].dptr_);
	cv::Mat dst(outputs[0].shape_[H], outputs[0].shape_[W], cv_type, outputs[0].dptr_);
	cv::resize(buf, dst, cv::Size(width, height), 0, 0, interp);
	CHECK(!dst.empty());
	CHECK_EQ(static_cast<void*>(dst.ptr()), outputs[0].dptr_);
	} else {
	const int cv_type = CV_MAKETYPE(DTYPE[inputs[0].type_flag_], inputs[0].shape_[kC]);
	MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
	cv::Mat buf(inputs[0].shape_[kH], inputs[0].shape_[kW], cv_type,
	inputs[0].dptr<DType>() + input_index);
	cv::Mat dst(outputs[0].shape_[kH], outputs[0].shape_[kW], cv_type,
	outputs[0].dptr<DType>() + output_index);
	cv::resize(buf, dst, cv::Size(width, height), 0, 0, interp);
	CHECK(!dst.empty());
	CHECK_EQ(static_cast<void*>(dst.ptr()), outputs[0].dptr<DType>() + output_index);
	});
	}
	#else
	LOG(FATAL) << "Build with USE_OPENCV=1 for image resize operator.";
	#endif // MXNET_USE_OPENCV
	}

	template <typename xpu>
	inline void ResizeImplWrapper(const ResizeParam& param,
	const OpContext &ctx,
	const std::vector<TBlob> &inputs,
	const std::vector<TBlob> &outputs) {
	SizeParam size;
	if (std::is_same<xpu, gpu>::value) {
	#if MXNET_USE_CUDA
	CHECK(param.interp == 1) << "interp should be 1 for using Resize on GPU.";
	mshadow::Stream<gpu> *s = ctx.get_stream<gpu>();
	MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
	if (inputs[0].ndim() == 3) {
	Tensor<gpu, 3, DType> input = inputs[0].get<gpu, 3, DType>(s);
	Tensor<gpu, 3, DType> output = outputs[0].get<gpu, 3, DType>(s);
	ResizeImplCUDA<DType, Tensor<gpu, 3, DType>, float>(s, input, output);
	} else {
	Tensor<gpu, 4, DType> input = inputs[0].get<gpu, 4, DType>(s);
	Tensor<gpu, 4, DType> output = outputs[0].get<gpu, 4, DType>(s);
	ResizeImplCUDA<DType, Tensor<gpu, 4, DType>, float>(s, input, output);
	}
	});
	#endif // MXNET_USE_CUDA
	} else if (inputs[0].ndim() == 3) {
	size = GetHeightAndWidth(inputs[0].shape_[H], inputs[0].shape_[W], param);
	ResizeImpl(inputs, outputs, size.height, size.width, param.interp);
	} else {
	size = GetHeightAndWidth(inputs[0].shape_[kH], inputs[0].shape_[kW], param);
	const auto batch_size = inputs[0].shape_[N];
	const auto input_step = inputs[0].shape_[kH] * inputs[0].shape_[kW] * inputs[0].shape_[kC];
	const auto output_step = size.height * size.width * inputs[0].shape_[kC];
	#pragma omp parallel for
	for (auto i = 0; i < batch_size; ++i) {
	ResizeImpl(inputs, outputs, size.height, size.width,
	param.interp, i * input_step, i * output_step);
	}
	}
	}

	template <typename xpu>
	inline void Resize(const nnvm::NodeAttrs &attrs,
	const OpContext &ctx,
	const std::vector<TBlob> &inputs,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &outputs) {
	CHECK_EQ(outputs.size(), 1U);
	const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
	ResizeImplWrapper<xpu>(param, ctx, inputs, outputs);
	}

	} // namespace image
	} // namespace op
	} // namespace mxnet

	#endif // MXNET_OPERATOR_IMAGE_RESIZE_INL_H_