blob: 2cebaaa3a48eaa8ac074f8d5429b9282ef79d560 [file] [log] [blame]
/*!
* Copyright (c) 2015 by Contributors
* \file iter_normalize.h
* \brief Iterator that subtracts mean and do a few augmentations.
*/
#ifndef MXNET_IO_ITER_NORMALIZE_H_
#define MXNET_IO_ITER_NORMALIZE_H_
#include <mxnet/base.h>
#include <mxnet/io.h>
#include <mxnet/ndarray.h>
#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <dmlc/timer.h>
#include <mshadow/tensor.h>
#include <utility>
#include <string>
#include <vector>
#include "../common/utils.h"
#include "./image_iter_common.h"
namespace mxnet {
namespace io {
/*!
* \brief Iterator that normalize a image.
* It also applies a few augmention before normalization.
*/
class ImageNormalizeIter : public IIterator<DataInst> {
public:
explicit ImageNormalizeIter(IIterator<DataInst> *base)
: base_(base), meanfile_ready_(false) {
}
virtual void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) {
param_.InitAllowUnknown(kwargs);
base_->Init(kwargs);
rnd_.seed(kRandMagic + param_.seed);
outimg_.set_pad(false);
meanimg_.set_pad(false);
if (param_.mean_img.length() != 0) {
std::unique_ptr<dmlc::Stream> fi(
dmlc::Stream::Create(param_.mean_img.c_str(), "r", true));
if (fi.get() == nullptr) {
this->CreateMeanImg();
} else {
fi.reset(nullptr);
if (param_.verbose) {
LOG(INFO) << "Load mean image from " << param_.mean_img;
}
// use python compatible ndarray store format
std::vector<NDArray> data;
std::vector<std::string> keys;
{
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(param_.mean_img.c_str(), "r"));
NDArray::Load(fi.get(), &data, &keys);
}
CHECK_EQ(data.size(), 1U)
<< "Invalid mean image file format";
data[0].WaitToRead();
mshadow::Tensor<cpu, 3> src = data[0].data().get<cpu, 3, real_t>();
meanimg_.Resize(src.shape_);
mshadow::Copy(meanimg_, src);
meanfile_ready_ = true;
}
}
}
virtual void BeforeFirst(void) {
base_->BeforeFirst();
}
virtual const DataInst& Value(void) const {
return out_;
}
virtual bool Next(void) {
if (!this->Next_()) return false;
return true;
}
private:
/*! \brief base iterator */
std::unique_ptr<IIterator<DataInst> > base_;
// whether mean image is ready.
bool meanfile_ready_;
/*! \brief output data */
DataInst out_;
// normalize parameter.
ImageNormalizeParam param_;
/*! \brief mean image, if needed */
mshadow::TensorContainer<cpu, 3> meanimg_;
/*! \brief temp space for output image */
mshadow::TensorContainer<cpu, 3> outimg_;
/*! \brief random numeber engine */
common::RANDOM_ENGINE rnd_;
// random magic number of this iterator
static const int kRandMagic = 0;
/*! \brief internal next function, inlined for fater processing. */
inline bool Next_(void) {
if (!base_->Next()) return false;
const DataInst &src = base_->Value();
this->SetOutImg(src);
out_.data.resize(2);
out_.data[0] = outimg_;
out_.data[1] = src.data[1];
out_.index = src.index;
out_.extra_data = src.extra_data;
return true;
}
/*!
* \brief Set the output image, after augmentation and normalization.
* \param src The source image.
*/
inline void SetOutImg(const DataInst &src) {
using namespace mshadow::expr; // NOLINT(*)
std::uniform_real_distribution<float> rand_uniform(0, 1);
std::bernoulli_distribution coin_flip(0.5);
mshadow::Tensor<cpu, 3> data = src.data[0].get<cpu, 3, real_t>();
outimg_.Resize(data.shape_);
float contrast =
rand_uniform(rnd_) * param_.max_random_contrast * 2 - param_.max_random_contrast + 1;
float illumination =
rand_uniform(rnd_) * param_.max_random_illumination * 2 - param_.max_random_illumination;
if (param_.mean_r > 0.0f || param_.mean_g > 0.0f ||
param_.mean_b > 0.0f || param_.mean_a > 0.0f) {
// subtract mean per channel
data[0] -= param_.mean_r;
if (data.shape_[0] >= 3) {
data[1] -= param_.mean_g;
data[2] -= param_.mean_b;
}
if (data.shape_[0] == 4) {
data[3] -= param_.mean_a;
}
if ((param_.rand_mirror && coin_flip(rnd_)) || param_.mirror) {
outimg_ = mirror(data * contrast + illumination) * param_.scale;
} else {
outimg_ = (data * contrast + illumination) * param_.scale;
}
} else if (!meanfile_ready_ || param_.mean_img.length() == 0) {
// do not subtract anything
if ((param_.rand_mirror && coin_flip(rnd_)) || param_.mirror) {
outimg_ = mirror(data) * param_.scale;
} else {
outimg_ = F<mshadow::op::identity>(data) * param_.scale;
}
} else {
CHECK(meanfile_ready_);
if ((param_.rand_mirror && coin_flip(rnd_)) || param_.mirror) {
outimg_ = mirror((data - meanimg_) * contrast + illumination) * param_.scale;
} else {
outimg_ = ((data - meanimg_) * contrast + illumination) * param_.scale;
}
}
}
// creat mean image.
inline void CreateMeanImg(void) {
if (param_.verbose) {
LOG(INFO) << "Cannot find " << param_.mean_img
<< ": create mean image, this will take some time...";
}
double start = dmlc::GetTime();
size_t imcnt = 1; // NOLINT(*)
CHECK(this->Next_()) << "input iterator failed.";
meanimg_.Resize(outimg_.shape_);
mshadow::Copy(meanimg_, outimg_);
while (this->Next_()) {
meanimg_ += outimg_;
imcnt += 1;
double elapsed = dmlc::GetTime() - start;
if (imcnt % 10000L == 0 && param_.verbose) {
LOG(INFO) << imcnt << " images processed, " << elapsed << " sec elapsed";
}
}
meanimg_ *= (1.0f / imcnt);
// save as mxnet python compatible format.
TBlob tmp = meanimg_;
{
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(param_.mean_img.c_str(), "w"));
NDArray::Save(fo.get(),
{NDArray(tmp, 0)},
{"mean_img"});
}
if (param_.verbose) {
LOG(INFO) << "Save mean image to " << param_.mean_img << "..";
}
meanfile_ready_ = true;
this->BeforeFirst();
}
};
/*!
* \brief Iterator that normalize a image.
* It also applies a few augmention before normalization.
*/
class ImageDetNormalizeIter : public IIterator<DataInst> {
public:
explicit ImageDetNormalizeIter(IIterator<DataInst> *base)
: base_(base), meanfile_ready_(false) {
}
virtual void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) {
param_.InitAllowUnknown(kwargs);
base_->Init(kwargs);
rnd_.seed(kRandMagic + param_.seed);
outimg_.set_pad(false);
meanimg_.set_pad(false);
if (param_.mean_img.length() != 0) {
std::unique_ptr<dmlc::Stream> fi(
dmlc::Stream::Create(param_.mean_img.c_str(), "r", true));
if (fi.get() == nullptr) {
this->CreateMeanImg();
} else {
fi.reset(nullptr);
if (param_.verbose) {
LOG(INFO) << "Load mean image from " << param_.mean_img;
}
// use python compatible ndarray store format
std::vector<NDArray> data;
std::vector<std::string> keys;
{
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(param_.mean_img.c_str(), "r"));
NDArray::Load(fi.get(), &data, &keys);
}
CHECK_EQ(data.size(), 1)
<< "Invalid mean image file format";
data[0].WaitToRead();
mshadow::Tensor<cpu, 3> src = data[0].data().get<cpu, 3, real_t>();
meanimg_.Resize(src.shape_);
mshadow::Copy(meanimg_, src);
meanfile_ready_ = true;
}
}
}
virtual void BeforeFirst(void) {
base_->BeforeFirst();
}
virtual const DataInst& Value(void) const {
return out_;
}
virtual bool Next(void) {
if (!this->Next_()) return false;
return true;
}
private:
/*! \brief base iterator */
std::unique_ptr<IIterator<DataInst> > base_;
// whether mean image is ready.
bool meanfile_ready_;
/*! \brief output data */
DataInst out_;
// normalize parameter.
ImageDetNormalizeParam param_;
/*! \brief mean image, if needed */
mshadow::TensorContainer<cpu, 3> meanimg_;
/*! \brief temp space for output image */
mshadow::TensorContainer<cpu, 3> outimg_;
/*! \brief random numeber engine */
common::RANDOM_ENGINE rnd_;
// random magic number of this iterator
static const int kRandMagic = 0;
/*! \brief internal next function, inlined for fater processing. */
inline bool Next_(void) {
if (!base_->Next()) return false;
const DataInst &src = base_->Value();
this->SetOutImg(src);
out_.data.resize(2);
out_.data[0] = outimg_;
out_.data[1] = src.data[1];
out_.index = src.index;
out_.extra_data = src.extra_data;
return true;
}
/*!
* \brief Set the output image, after augmentation and normalization.
* \param src The source image.
*/
inline void SetOutImg(const DataInst &src) {
using namespace mshadow::expr; // NOLINT(*)
mshadow::Tensor<cpu, 3> data = src.data[0].get<cpu, 3, real_t>();
outimg_.Resize(data.shape_);
if (param_.mean_r > 0.0f || param_.mean_g > 0.0f ||
param_.mean_b > 0.0f || param_.mean_a > 0.0f) {
// subtract mean per channel
data[0] -= param_.mean_r;
if (data.shape_[0] >= 3) {
data[1] -= param_.mean_g;
data[2] -= param_.mean_b;
}
if (data.shape_[0] == 4) {
data[3] -= param_.mean_a;
}
} else if (!meanfile_ready_ || param_.mean_img.length() == 0) {
// do not subtract anything
} else {
CHECK(meanfile_ready_);
data -= meanimg_;
}
// std
if (param_.std_r > 0.0f) {
data[0] /= param_.std_r;
}
if (data.shape_[0] >= 3 && param_.std_g > 0.0f) {
data[1] /= param_.std_g;
}
if (data.shape_[0] >= 3 && param_.std_b > 0.0f) {
data[2] /= param_.std_b;
}
if (data.shape_[0] == 4 && param_.std_a > 0.0f) {
data[3] /= param_.std_a;
}
outimg_ = data * param_.scale;
}
// creat mean image.
inline void CreateMeanImg(void) {
if (param_.verbose) {
LOG(INFO) << "Cannot find " << param_.mean_img
<< ": create mean image, this will take some time...";
}
double start = dmlc::GetTime();
size_t imcnt = 1; // NOLINT(*)
CHECK(this->Next_()) << "input iterator failed.";
meanimg_.Resize(outimg_.shape_);
mshadow::Copy(meanimg_, outimg_);
while (this->Next_()) {
meanimg_ += outimg_;
imcnt += 1;
double elapsed = dmlc::GetTime() - start;
if (imcnt % 10000L == 0 && param_.verbose) {
LOG(INFO) << imcnt << " images processed, " << elapsed << " sec elapsed";
}
}
meanimg_ *= (1.0f / imcnt);
// save as mxnet python compatible format.
TBlob tmp = meanimg_;
{
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(param_.mean_img.c_str(), "w"));
NDArray::Save(fo.get(),
{NDArray(tmp, 0)},
{"mean_img"});
}
if (param_.verbose) {
LOG(INFO) << "Save mean image to " << param_.mean_img << "..";
}
meanfile_ready_ = true;
this->BeforeFirst();
}
};
} // namespace io
} // namespace mxnet
#endif // MXNET_IO_ITER_NORMALIZE_H_