blob: 1ef1df1b74bd4a9b58f57161810c5882c4c9edbf [file] [log] [blame]
/*!
* Copyright (c) 2016 by Contributors
* \file optimizer_op-inl.h
* \brief Optimizer operators
* \author Junyuan Xie
*/
#include <dmlc/parameter.h>
#include <dmlc/logging.h>
#include <mxnet/ndarray.h>
#include <mxnet/operator.h>
#include <mxnet/operator_util.h>
#include <mxnet/op_attr_types.h>
#include <mshadow/base.h>
#include <nnvm/op.h>
#include <nnvm/op_attr_types.h>
#include "../operator/elemwise_op_common.h"
#if MXNET_USE_OPENCV
#include <opencv2/opencv.hpp>
#endif // MXNET_USE_OPENCV
namespace mxnet {
namespace io {
// http://www.64lines.com/jpeg-width-height
// Gets the JPEG size from the array of data passed to the function,
// file reference: http://www.obrador.com/essentialjpeg/headerinfo.htm
bool get_jpeg_size(const uint8_t* data, uint32_t data_size, int64_t *width, int64_t *height) {
// Check for valid JPEG image
uint32_t i = 0; // Keeps track of the position within the file
if (data[i] == 0xFF && data[i+1] == 0xD8 && data[i+2] == 0xFF && data[i+3] == 0xE0) {
i += 4;
// Check for valid JPEG header (null terminated JFIF)
if (data[i+2] == 'J' && data[i+3] == 'F' && data[i+4] == 'I'
&& data[i+5] == 'F' && data[i+6] == 0x00) {
// Retrieve the block length of the first block since
// the first block will not contain the size of file
uint16_t block_length = data[i] * 256 + data[i+1];
while (i < data_size) {
i+=block_length; // Increase the file index to get to the next block
if (i >= data_size) return false; // Check to protect against segmentation faults
if (data[i] != 0xFF) return false; // Check that we are truly at the start of another block
uint8_t m = data[i+1];
if (m == 0xC0 || (m >= 0xC1 && m <= 0xCF && m != 0xC4 && m != 0xC8 && m != 0xCC)) {
// 0xFFC0 is the "Start of frame" marker which contains the file size
// The structure of the 0xFFC0 block is quite simple
// [0xFFC0][ushort length][uchar precision][ushort x][ushort y]
*height = data[i+5]*256 + data[i+6];
*width = data[i+7]*256 + data[i+8];
return true;
} else {
i+=2; // Skip the block marker
block_length = data[i] * 256 + data[i+1]; // Go to the next block
}
}
return false; // If this point is reached then no size was found
} else {
return false; // Not a valid JFIF string
}
} else {
return false; // Not a valid SOI header
}
}
bool get_png_size(const uint8_t* data, uint32_t data_size, int64_t *width, int64_t *height) {
if (data[0] == 0x89 && data[1] == 0x50 && data[2] ==0x4E && data[3] == 0x47) {
uint8_t const* p = data + 16;
*width = ((p[0]*256 + p[1])*256 + p[2])*256 + p[3];
p += 4;
*height = ((p[0]*256 + p[1])*256 + p[2])*256 + p[3];
return true;
} else {
return false;
}
}
struct ImdecodeParam : public dmlc::Parameter<ImdecodeParam> {
int flag;
bool to_rgb;
DMLC_DECLARE_PARAMETER(ImdecodeParam) {
DMLC_DECLARE_FIELD(flag)
.set_lower_bound(0)
.set_default(1)
.describe("Convert decoded image to grayscale (0) or color (1).");
DMLC_DECLARE_FIELD(to_rgb)
.set_default(true)
.describe("Whether to convert decoded image to mxnet's default RGB format "
"(instead of opencv's default BGR).");
}
};
DMLC_REGISTER_PARAMETER(ImdecodeParam);
void Imdecode(const nnvm::NodeAttrs& attrs,
const std::vector<NDArray>& inputs,
std::vector<NDArray>* outputs) {
#if MXNET_USE_OPENCV
const auto& param = nnvm::get<ImdecodeParam>(attrs.parsed);
CHECK_EQ(inputs[0].ctx().dev_mask(), cpu::kDevMask) << "Only supports cpu input";
CHECK_EQ(inputs[0].dtype(), mshadow::kUint8) << "Input needs to be uint8 buffer";
const uint8_t* str_img = reinterpret_cast<uint8_t*>(inputs[0].data().dptr_);
uint32_t len = inputs[0].shape().Size();
NDArray ndin = inputs[0];
ndin.WaitToRead();
TShape oshape(3);
oshape[2] = param.flag == 0 ? 1 : 3;
if (get_jpeg_size(str_img, len, &oshape[1], &oshape[0])) {
} else if (get_png_size(str_img, len, &oshape[1], &oshape[0])) {
} else {
cv::Mat buf(1, ndin.shape().Size(), CV_8U, ndin.data().dptr_);
cv::Mat res = cv::imdecode(buf, param.flag);
if (res.empty()) {
LOG(INFO) << "Invalid image file. Only supports png and jpg.";
(*outputs)[0] = NDArray();
return;
}
oshape[0] = res.rows;
oshape[1] = res.cols;
NDArray ndout(oshape, Context::CPU(), false, mshadow::kUint8);
cv::Mat dst(ndout.shape()[0], ndout.shape()[1],
param.flag == 0 ? CV_8U : CV_8UC3,
ndout.data().dptr_);
res.copyTo(dst);
if (param.to_rgb && param.flag != 0) {
cv::cvtColor(dst, dst, CV_BGR2RGB);
}
(*outputs)[0] = ndout;
return;
}
NDArray ndout(oshape, Context::CPU(), true, mshadow::kUint8);
Engine::Get()->PushSync([ndin, ndout, param](RunContext ctx){
cv::Mat buf(1, ndin.shape().Size(), CV_8U, ndin.data().dptr_);
cv::Mat dst(ndout.shape()[0], ndout.shape()[1],
param.flag == 0 ? CV_8U : CV_8UC3,
ndout.data().dptr_);
#if (CV_MAJOR_VERSION > 2 || (CV_MAJOR_VERSION == 2 && CV_MINOR_VERSION >=4))
cv::imdecode(buf, param.flag, &dst);
#else
cv::Mat tmp = cv::imdecode(buf, param.flag);
CHECK(!tmp.empty());
tmp.copyTo(dst);
#endif
CHECK(!dst.empty());
CHECK_EQ(static_cast<void*>(dst.ptr()), ndout.data().dptr_);
if (param.to_rgb && param.flag != 0) {
cv::cvtColor(dst, dst, CV_BGR2RGB);
}
}, ndout.ctx(), {ndin.var()}, {ndout.var()},
FnProperty::kNormal, 0, PROFILER_MESSAGE("Imdecode"));
(*outputs)[0] = ndout;
#else
LOG(FATAL) << "Build with USE_OPENCV=1 for image io.";
#endif // MXNET_USE_OPENCV
}
struct ResizeParam : public dmlc::Parameter<ResizeParam> {
int w;
int h;
int interp;
DMLC_DECLARE_PARAMETER(ResizeParam) {
DMLC_DECLARE_FIELD(w)
.set_lower_bound(1)
.describe("Width of resized image.");
DMLC_DECLARE_FIELD(h)
.set_lower_bound(1)
.describe("Height of resized image.");
DMLC_DECLARE_FIELD(interp)
.set_default(1)
.describe("Interpolation method (default=cv2.INTER_LINEAR).");
}
};
DMLC_REGISTER_PARAMETER(ResizeParam);
inline bool ResizeShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *ishape,
std::vector<TShape> *oshape) {
const auto& param = nnvm::get<ResizeParam>(attrs.parsed);
if (ishape->size() != 1 || (*ishape)[0].ndim() != 3) return false;
oshape->clear();
oshape->push_back(mshadow::Shape3(param.h, param.w, (*ishape)[0][2]));
return true;
}
inline void Imresize(const nnvm::NodeAttrs& attrs,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &outputs) {
#if MXNET_USE_OPENCV
CHECK_NE(inputs[0].type_flag_, mshadow::kFloat16) << "imresize doesn't support fp16";
const int DTYPE[] = {CV_32F, CV_64F, -1, CV_8U, CV_32S};
int cv_type = CV_MAKETYPE(DTYPE[inputs[0].type_flag_], inputs[0].shape_[2]);
const auto& param = nnvm::get<ResizeParam>(attrs.parsed);
cv::Mat buf(inputs[0].shape_[0], inputs[0].shape_[1], cv_type, inputs[0].dptr_);
cv::Mat dst(outputs[0].shape_[0], outputs[0].shape_[1], cv_type, outputs[0].dptr_);
cv::resize(buf, dst, cv::Size(param.w, param.h), 0, 0, param.interp);
CHECK(!dst.empty());
CHECK_EQ(static_cast<void*>(dst.ptr()), outputs[0].dptr_);
#else
LOG(FATAL) << "Build with USE_OPENCV=1 for image io.";
#endif // MXNET_USE_OPENCV
}
struct MakeBorderParam : public dmlc::Parameter<MakeBorderParam> {
int top, bot, left, right;
int type;
double value;
DMLC_DECLARE_PARAMETER(MakeBorderParam) {
DMLC_DECLARE_FIELD(top)
.describe("Top margin.");
DMLC_DECLARE_FIELD(bot)
.describe("Bottom margin.");
DMLC_DECLARE_FIELD(left)
.describe("Left margin.");
DMLC_DECLARE_FIELD(right)
.describe("Right margin.");
DMLC_DECLARE_FIELD(type)
.set_default(0)
.describe("Filling type (default=cv2.BORDER_CONSTANT).");
DMLC_DECLARE_FIELD(value)
.set_default(0.0)
.describe("Fill with value.");
}
};
DMLC_REGISTER_PARAMETER(MakeBorderParam);
inline bool MakeBorderShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *ishape,
std::vector<TShape> *oshape) {
const auto& param = nnvm::get<MakeBorderParam>(attrs.parsed);
if (ishape->size() != 1 || (*ishape)[0].ndim() != 3) return false;
oshape->clear();
oshape->push_back(
mshadow::Shape3((*ishape)[0][0]+param.top+param.bot,
(*ishape)[0][1]+param.left+param.right,
(*ishape)[0][2]));
return true;
}
inline void copyMakeBorder(const nnvm::NodeAttrs& attrs,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &outputs) {
#if MXNET_USE_OPENCV
CHECK_NE(inputs[0].type_flag_, mshadow::kFloat16) << "imresize doesn't support fp16";
const int DTYPE[] = {CV_32F, CV_64F, -1, CV_8U, CV_32S};
int cv_type = CV_MAKETYPE(DTYPE[inputs[0].type_flag_], inputs[0].shape_[2]);
const auto& param = nnvm::get<MakeBorderParam>(attrs.parsed);
cv::Mat buf(inputs[0].shape_[0], inputs[0].shape_[1], cv_type, inputs[0].dptr_);
cv::Mat dst(outputs[0].shape_[0], outputs[0].shape_[1], cv_type, outputs[0].dptr_);
cv::copyMakeBorder(buf, dst,
param.top, param.bot, param.left, param.right,
param.type, cv::Scalar(param.value));
CHECK(!dst.empty());
CHECK_EQ(static_cast<void*>(dst.ptr()), outputs[0].dptr_);
#else
LOG(FATAL) << "Build with USE_OPENCV=1 for image io.";
#endif // MXNET_USE_OPENCV
}
NNVM_REGISTER_OP(_cvimdecode)
.describe("Decode image with OpenCV. \n"
"Note: return image in RGB by default, "
"instead of OpenCV's default BGR.")
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr_parser(op::ParamParser<ImdecodeParam>)
.set_attr<FNDArrayFunction>("FNDArrayFunction", Imdecode)
.add_argument("buf", "NDArray", "Buffer containing binary encoded image")
.add_arguments(ImdecodeParam::__FIELDS__());
NNVM_REGISTER_OP(_cvimresize)
.describe("Resize image with OpenCV. \n")
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr_parser(op::ParamParser<ResizeParam>)
.set_attr<nnvm::FInferShape>("FInferShape", ResizeShape)
.set_attr<nnvm::FInferType>("FInferType", op::ElemwiseType<1, 1>)
.set_attr<FCompute>("FCompute<cpu>", Imresize)
.add_argument("src", "NDArray", "source image")
.add_arguments(ResizeParam::__FIELDS__());
NNVM_REGISTER_OP(_cvcopyMakeBorder)
.describe("Pad image border with OpenCV. \n")
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr_parser(op::ParamParser<MakeBorderParam>)
.set_attr<nnvm::FInferShape>("FInferShape", MakeBorderShape)
.set_attr<nnvm::FInferType>("FInferType", op::ElemwiseType<1, 1>)
.set_attr<FCompute>("FCompute<cpu>", copyMakeBorder)
.add_argument("src", "NDArray", "source image")
.add_arguments(MakeBorderParam::__FIELDS__());
} // namespace io
} // namespace mxnet