blob: b6f64d40ca82eb4537006daff04a36902ece744a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file resize-inl.h
* \brief image resize operator using opencv and only support bilinear resize
* \author Jake Lee
*/
#ifndef MXNET_OPERATOR_IMAGE_RESIZE_INL_H_
#define MXNET_OPERATOR_IMAGE_RESIZE_INL_H_
#include <mxnet/base.h>
#include <vector>
#include "../mxnet_op.h"
#include "../operator_common.h"
#include "image_utils.h"
#if MXNET_USE_OPENCV
#include <opencv2/opencv.hpp>
#endif // MXNET_USE_OPENCV
namespace mxnet {
namespace op {
namespace image {
using namespace mshadow;
#if MXNET_USE_CUDA
template<typename DType, typename T, typename Acctype>
void ResizeImplCUDA(Stream<gpu> *s,
const T input,
const T output);
#endif // MXNET_USE_CUDA
struct ResizeParam : public dmlc::Parameter<ResizeParam> {
mxnet::Tuple<int> size;
bool keep_ratio;
int interp;
DMLC_DECLARE_PARAMETER(ResizeParam) {
DMLC_DECLARE_FIELD(size)
.set_default(mxnet::Tuple<int>())
.describe("Size of new image. Could be (width, height) or (size)");
DMLC_DECLARE_FIELD(keep_ratio)
.describe("Whether to resize the short edge or both edges to `size`, "
"if size is give as an integer.")
.set_default(false);
DMLC_DECLARE_FIELD(interp)
.set_default(1)
.describe("Interpolation method for resizing. By default uses bilinear interpolation"
"Options are INTER_NEAREST - a nearest-neighbor interpolation"
"INTER_LINEAR - a bilinear interpolation"
"INTER_AREA - resampling using pixel area relation"
"INTER_CUBIC - a bicubic interpolation over 4x4 pixel neighborhood"
"INTER_LANCZOS4 - a Lanczos interpolation over 8x8 pixel neighborhood"
"Note that the GPU version only support bilinear interpolation(1)");
}
};
// handle the keep ratio param
inline SizeParam GetHeightAndWidth(int data_h,
int data_w,
const ResizeParam& param) {
CHECK((param.size.ndim() == 1) || (param.size.ndim() == 2))
<< "Input size dimension must be 1 or 2, but got "
<< param.size.ndim();
int resized_h;
int resized_w;
if (param.size.ndim() == 1) {
CHECK_GT(param.size[0], 0)
<< "Input size should be greater than 0, but got "
<< param.size[0];
if (!param.keep_ratio) {
resized_h = param.size[0];
resized_w = param.size[0];
} else {
if (data_h > data_w) {
resized_w = param.size[0];
resized_h = static_cast<int>(data_h * resized_w / data_w);
} else {
resized_h = param.size[0];
resized_w = static_cast<int>(data_w * resized_h / data_h);
}
}
} else {
CHECK_GT(param.size[0], 0)
<< "Input width should be greater than 0, but got "
<< param.size[0];
CHECK_GT(param.size[1], 0)
<< "Input height should be greater than 0, but got "
<< param.size[1];
resized_h = param.size[1];
resized_w = param.size[0];
}
return SizeParam(resized_h, resized_w);
}
inline bool ResizeShapeImpl(const ResizeParam& param,
mxnet::ShapeVector *in_attrs,
mxnet::ShapeVector *out_attrs) {
// input attrs should only be (h, w, c) or (n, h, w, c)
CHECK((in_attrs->at(0).ndim() == 3U) || (in_attrs->at(0).ndim() == 4U))
<< "Input image dimension should be 3 or 4 but got "
<< in_attrs->at(0).ndim();
const auto& ishape = (*in_attrs)[0];
SizeParam size;
if (ishape.ndim() == 3) {
size = GetHeightAndWidth(ishape[H], ishape[W], param);
SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({size.height, size.width, ishape[C]}));
} else {
size = GetHeightAndWidth(ishape[kH], ishape[kW], param);
SHAPE_ASSIGN_CHECK(*out_attrs, 0,
mxnet::TShape({ishape[N], size.height, size.width, ishape[kC]}));
}
return true;
}
inline bool ResizeShape(const nnvm::NodeAttrs& attrs,
mxnet::ShapeVector *in_attrs,
mxnet::ShapeVector *out_attrs) {
const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
return ResizeShapeImpl(param, in_attrs, out_attrs);
}
inline void ResizeImpl(const std::vector<TBlob> &inputs,
const std::vector<TBlob> &outputs,
const int height,
const int width,
const int interp,
const int input_index = 0,
const int output_index = 0) {
#if MXNET_USE_OPENCV
CHECK_NE(inputs[0].type_flag_, mshadow::kFloat16) << "opencv image mat doesn't support fp16";
CHECK((inputs[0].type_flag_ != mshadow::kInt32) || (inputs[0].type_flag_ != mshadow::kInt64))
<< "opencv resize doesn't support int32, int64";
// mapping to opencv matrix element type according to channel
const int DTYPE[] = {CV_32F, CV_64F, -1, CV_8U, CV_32S};
if (inputs[0].ndim() == 3) {
const int cv_type = CV_MAKETYPE(DTYPE[inputs[0].type_flag_], inputs[0].shape_[C]);
cv::Mat buf(inputs[0].shape_[H], inputs[0].shape_[W], cv_type, inputs[0].dptr_);
cv::Mat dst(outputs[0].shape_[H], outputs[0].shape_[W], cv_type, outputs[0].dptr_);
cv::resize(buf, dst, cv::Size(width, height), 0, 0, interp);
CHECK(!dst.empty());
CHECK_EQ(static_cast<void*>(dst.ptr()), outputs[0].dptr_);
} else {
const int cv_type = CV_MAKETYPE(DTYPE[inputs[0].type_flag_], inputs[0].shape_[kC]);
MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
cv::Mat buf(inputs[0].shape_[kH], inputs[0].shape_[kW], cv_type,
inputs[0].dptr<DType>() + input_index);
cv::Mat dst(outputs[0].shape_[kH], outputs[0].shape_[kW], cv_type,
outputs[0].dptr<DType>() + output_index);
cv::resize(buf, dst, cv::Size(width, height), 0, 0, interp);
CHECK(!dst.empty());
CHECK_EQ(static_cast<void*>(dst.ptr()), outputs[0].dptr<DType>() + output_index);
});
}
#else
LOG(FATAL) << "Build with USE_OPENCV=1 for image resize operator.";
#endif // MXNET_USE_OPENCV
}
template <typename xpu>
inline void ResizeImplWrapper(const ResizeParam& param,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
const std::vector<TBlob> &outputs) {
SizeParam size;
if (std::is_same<xpu, gpu>::value) {
#if MXNET_USE_CUDA
CHECK(param.interp == 1) << "interp should be 1 for using Resize on GPU.";
mshadow::Stream<gpu> *s = ctx.get_stream<gpu>();
MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
if (inputs[0].ndim() == 3) {
Tensor<gpu, 3, DType> input = inputs[0].get<gpu, 3, DType>(s);
Tensor<gpu, 3, DType> output = outputs[0].get<gpu, 3, DType>(s);
ResizeImplCUDA<DType, Tensor<gpu, 3, DType>, float>(s, input, output);
} else {
Tensor<gpu, 4, DType> input = inputs[0].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> output = outputs[0].get<gpu, 4, DType>(s);
ResizeImplCUDA<DType, Tensor<gpu, 4, DType>, float>(s, input, output);
}
});
#endif // MXNET_USE_CUDA
} else if (inputs[0].ndim() == 3) {
size = GetHeightAndWidth(inputs[0].shape_[H], inputs[0].shape_[W], param);
ResizeImpl(inputs, outputs, size.height, size.width, param.interp);
} else {
size = GetHeightAndWidth(inputs[0].shape_[kH], inputs[0].shape_[kW], param);
const auto batch_size = inputs[0].shape_[N];
const auto input_step = inputs[0].shape_[kH] * inputs[0].shape_[kW] * inputs[0].shape_[kC];
const auto output_step = size.height * size.width * inputs[0].shape_[kC];
#pragma omp parallel for
for (auto i = 0; i < batch_size; ++i) {
ResizeImpl(inputs, outputs, size.height, size.width,
param.interp, i * input_step, i * output_step);
}
}
}
template <typename xpu>
inline void Resize(const nnvm::NodeAttrs &attrs,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &outputs) {
CHECK_EQ(outputs.size(), 1U);
const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
ResizeImplWrapper<xpu>(param, ctx, inputs, outputs);
}
} // namespace image
} // namespace op
} // namespace mxnet
#endif // MXNET_OPERATOR_IMAGE_RESIZE_INL_H_