| /*! |
| * Copyright (c) 2015 by Contributors |
| * \file iter_normalize.h |
| * \brief Iterator that subtracts mean and do a few augmentations. |
| */ |
| #ifndef MXNET_IO_ITER_NORMALIZE_H_ |
| #define MXNET_IO_ITER_NORMALIZE_H_ |
| |
| #include <mxnet/base.h> |
| #include <mxnet/io.h> |
| #include <mxnet/ndarray.h> |
| #include <dmlc/logging.h> |
| #include <dmlc/parameter.h> |
| #include <dmlc/timer.h> |
| #include <mshadow/tensor.h> |
| #include <utility> |
| #include <string> |
| #include <vector> |
| #include "../common/utils.h" |
| #include "./image_iter_common.h" |
| |
| namespace mxnet { |
| namespace io { |
| |
| /*! |
| * \brief Iterator that normalize a image. |
| * It also applies a few augmention before normalization. |
| */ |
| class ImageNormalizeIter : public IIterator<DataInst> { |
| public: |
| explicit ImageNormalizeIter(IIterator<DataInst> *base) |
| : base_(base), meanfile_ready_(false) { |
| } |
| |
| virtual void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) { |
| param_.InitAllowUnknown(kwargs); |
| base_->Init(kwargs); |
| rnd_.seed(kRandMagic + param_.seed); |
| outimg_.set_pad(false); |
| meanimg_.set_pad(false); |
| if (param_.mean_img.length() != 0) { |
| std::unique_ptr<dmlc::Stream> fi( |
| dmlc::Stream::Create(param_.mean_img.c_str(), "r", true)); |
| if (fi.get() == nullptr) { |
| this->CreateMeanImg(); |
| } else { |
| fi.reset(nullptr); |
| if (param_.verbose) { |
| LOG(INFO) << "Load mean image from " << param_.mean_img; |
| } |
| // use python compatible ndarray store format |
| std::vector<NDArray> data; |
| std::vector<std::string> keys; |
| { |
| std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(param_.mean_img.c_str(), "r")); |
| NDArray::Load(fi.get(), &data, &keys); |
| } |
| CHECK_EQ(data.size(), 1U) |
| << "Invalid mean image file format"; |
| data[0].WaitToRead(); |
| mshadow::Tensor<cpu, 3> src = data[0].data().get<cpu, 3, real_t>(); |
| meanimg_.Resize(src.shape_); |
| mshadow::Copy(meanimg_, src); |
| meanfile_ready_ = true; |
| } |
| } |
| } |
| |
| virtual void BeforeFirst(void) { |
| base_->BeforeFirst(); |
| } |
| |
| virtual const DataInst& Value(void) const { |
| return out_; |
| } |
| |
| virtual bool Next(void) { |
| if (!this->Next_()) return false; |
| return true; |
| } |
| |
| private: |
| /*! \brief base iterator */ |
| std::unique_ptr<IIterator<DataInst> > base_; |
| // whether mean image is ready. |
| bool meanfile_ready_; |
| /*! \brief output data */ |
| DataInst out_; |
| // normalize parameter. |
| ImageNormalizeParam param_; |
| /*! \brief mean image, if needed */ |
| mshadow::TensorContainer<cpu, 3> meanimg_; |
| /*! \brief temp space for output image */ |
| mshadow::TensorContainer<cpu, 3> outimg_; |
| /*! \brief random numeber engine */ |
| common::RANDOM_ENGINE rnd_; |
| // random magic number of this iterator |
| static const int kRandMagic = 0; |
| |
| /*! \brief internal next function, inlined for fater processing. */ |
| inline bool Next_(void) { |
| if (!base_->Next()) return false; |
| const DataInst &src = base_->Value(); |
| this->SetOutImg(src); |
| out_.data.resize(2); |
| out_.data[0] = outimg_; |
| out_.data[1] = src.data[1]; |
| out_.index = src.index; |
| out_.extra_data = src.extra_data; |
| return true; |
| } |
| /*! |
| * \brief Set the output image, after augmentation and normalization. |
| * \param src The source image. |
| */ |
| inline void SetOutImg(const DataInst &src) { |
| using namespace mshadow::expr; // NOLINT(*) |
| |
| std::uniform_real_distribution<float> rand_uniform(0, 1); |
| std::bernoulli_distribution coin_flip(0.5); |
| mshadow::Tensor<cpu, 3> data = src.data[0].get<cpu, 3, real_t>(); |
| |
| outimg_.Resize(data.shape_); |
| float contrast = |
| rand_uniform(rnd_) * param_.max_random_contrast * 2 - param_.max_random_contrast + 1; |
| float illumination = |
| rand_uniform(rnd_) * param_.max_random_illumination * 2 - param_.max_random_illumination; |
| |
| if (param_.mean_r > 0.0f || param_.mean_g > 0.0f || |
| param_.mean_b > 0.0f || param_.mean_a > 0.0f) { |
| // subtract mean per channel |
| data[0] -= param_.mean_r; |
| if (data.shape_[0] >= 3) { |
| data[1] -= param_.mean_g; |
| data[2] -= param_.mean_b; |
| } |
| if (data.shape_[0] == 4) { |
| data[3] -= param_.mean_a; |
| } |
| if ((param_.rand_mirror && coin_flip(rnd_)) || param_.mirror) { |
| outimg_ = mirror(data * contrast + illumination) * param_.scale; |
| } else { |
| outimg_ = (data * contrast + illumination) * param_.scale; |
| } |
| } else if (!meanfile_ready_ || param_.mean_img.length() == 0) { |
| // do not subtract anything |
| if ((param_.rand_mirror && coin_flip(rnd_)) || param_.mirror) { |
| outimg_ = mirror(data) * param_.scale; |
| } else { |
| outimg_ = F<mshadow::op::identity>(data) * param_.scale; |
| } |
| } else { |
| CHECK(meanfile_ready_); |
| if ((param_.rand_mirror && coin_flip(rnd_)) || param_.mirror) { |
| outimg_ = mirror((data - meanimg_) * contrast + illumination) * param_.scale; |
| } else { |
| outimg_ = ((data - meanimg_) * contrast + illumination) * param_.scale; |
| } |
| } |
| } |
| // creat mean image. |
| inline void CreateMeanImg(void) { |
| if (param_.verbose) { |
| LOG(INFO) << "Cannot find " << param_.mean_img |
| << ": create mean image, this will take some time..."; |
| } |
| double start = dmlc::GetTime(); |
| size_t imcnt = 1; // NOLINT(*) |
| CHECK(this->Next_()) << "input iterator failed."; |
| meanimg_.Resize(outimg_.shape_); |
| mshadow::Copy(meanimg_, outimg_); |
| while (this->Next_()) { |
| meanimg_ += outimg_; |
| imcnt += 1; |
| double elapsed = dmlc::GetTime() - start; |
| if (imcnt % 10000L == 0 && param_.verbose) { |
| LOG(INFO) << imcnt << " images processed, " << elapsed << " sec elapsed"; |
| } |
| } |
| meanimg_ *= (1.0f / imcnt); |
| // save as mxnet python compatible format. |
| TBlob tmp = meanimg_; |
| { |
| std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(param_.mean_img.c_str(), "w")); |
| NDArray::Save(fo.get(), |
| {NDArray(tmp, 0)}, |
| {"mean_img"}); |
| } |
| if (param_.verbose) { |
| LOG(INFO) << "Save mean image to " << param_.mean_img << ".."; |
| } |
| meanfile_ready_ = true; |
| this->BeforeFirst(); |
| } |
| }; |
| |
| /*! |
| * \brief Iterator that normalize a image. |
| * It also applies a few augmention before normalization. |
| */ |
| class ImageDetNormalizeIter : public IIterator<DataInst> { |
| public: |
| explicit ImageDetNormalizeIter(IIterator<DataInst> *base) |
| : base_(base), meanfile_ready_(false) { |
| } |
| |
| virtual void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) { |
| param_.InitAllowUnknown(kwargs); |
| base_->Init(kwargs); |
| rnd_.seed(kRandMagic + param_.seed); |
| outimg_.set_pad(false); |
| meanimg_.set_pad(false); |
| if (param_.mean_img.length() != 0) { |
| std::unique_ptr<dmlc::Stream> fi( |
| dmlc::Stream::Create(param_.mean_img.c_str(), "r", true)); |
| if (fi.get() == nullptr) { |
| this->CreateMeanImg(); |
| } else { |
| fi.reset(nullptr); |
| if (param_.verbose) { |
| LOG(INFO) << "Load mean image from " << param_.mean_img; |
| } |
| // use python compatible ndarray store format |
| std::vector<NDArray> data; |
| std::vector<std::string> keys; |
| { |
| std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(param_.mean_img.c_str(), "r")); |
| NDArray::Load(fi.get(), &data, &keys); |
| } |
| CHECK_EQ(data.size(), 1) |
| << "Invalid mean image file format"; |
| data[0].WaitToRead(); |
| mshadow::Tensor<cpu, 3> src = data[0].data().get<cpu, 3, real_t>(); |
| meanimg_.Resize(src.shape_); |
| mshadow::Copy(meanimg_, src); |
| meanfile_ready_ = true; |
| } |
| } |
| } |
| |
| virtual void BeforeFirst(void) { |
| base_->BeforeFirst(); |
| } |
| |
| virtual const DataInst& Value(void) const { |
| return out_; |
| } |
| |
| virtual bool Next(void) { |
| if (!this->Next_()) return false; |
| return true; |
| } |
| |
| private: |
| /*! \brief base iterator */ |
| std::unique_ptr<IIterator<DataInst> > base_; |
| // whether mean image is ready. |
| bool meanfile_ready_; |
| /*! \brief output data */ |
| DataInst out_; |
| // normalize parameter. |
| ImageDetNormalizeParam param_; |
| /*! \brief mean image, if needed */ |
| mshadow::TensorContainer<cpu, 3> meanimg_; |
| /*! \brief temp space for output image */ |
| mshadow::TensorContainer<cpu, 3> outimg_; |
| /*! \brief random numeber engine */ |
| common::RANDOM_ENGINE rnd_; |
| // random magic number of this iterator |
| static const int kRandMagic = 0; |
| |
| /*! \brief internal next function, inlined for fater processing. */ |
| inline bool Next_(void) { |
| if (!base_->Next()) return false; |
| const DataInst &src = base_->Value(); |
| this->SetOutImg(src); |
| out_.data.resize(2); |
| out_.data[0] = outimg_; |
| out_.data[1] = src.data[1]; |
| out_.index = src.index; |
| out_.extra_data = src.extra_data; |
| return true; |
| } |
| /*! |
| * \brief Set the output image, after augmentation and normalization. |
| * \param src The source image. |
| */ |
| inline void SetOutImg(const DataInst &src) { |
| using namespace mshadow::expr; // NOLINT(*) |
| mshadow::Tensor<cpu, 3> data = src.data[0].get<cpu, 3, real_t>(); |
| |
| outimg_.Resize(data.shape_); |
| |
| if (param_.mean_r > 0.0f || param_.mean_g > 0.0f || |
| param_.mean_b > 0.0f || param_.mean_a > 0.0f) { |
| // subtract mean per channel |
| data[0] -= param_.mean_r; |
| if (data.shape_[0] >= 3) { |
| data[1] -= param_.mean_g; |
| data[2] -= param_.mean_b; |
| } |
| if (data.shape_[0] == 4) { |
| data[3] -= param_.mean_a; |
| } |
| } else if (!meanfile_ready_ || param_.mean_img.length() == 0) { |
| // do not subtract anything |
| } else { |
| CHECK(meanfile_ready_); |
| data -= meanimg_; |
| } |
| |
| // std |
| if (param_.std_r > 0.0f) { |
| data[0] /= param_.std_r; |
| } |
| if (data.shape_[0] >= 3 && param_.std_g > 0.0f) { |
| data[1] /= param_.std_g; |
| } |
| if (data.shape_[0] >= 3 && param_.std_b > 0.0f) { |
| data[2] /= param_.std_b; |
| } |
| if (data.shape_[0] == 4 && param_.std_a > 0.0f) { |
| data[3] /= param_.std_a; |
| } |
| outimg_ = data * param_.scale; |
| } |
| |
| // creat mean image. |
| inline void CreateMeanImg(void) { |
| if (param_.verbose) { |
| LOG(INFO) << "Cannot find " << param_.mean_img |
| << ": create mean image, this will take some time..."; |
| } |
| double start = dmlc::GetTime(); |
| size_t imcnt = 1; // NOLINT(*) |
| CHECK(this->Next_()) << "input iterator failed."; |
| meanimg_.Resize(outimg_.shape_); |
| mshadow::Copy(meanimg_, outimg_); |
| while (this->Next_()) { |
| meanimg_ += outimg_; |
| imcnt += 1; |
| double elapsed = dmlc::GetTime() - start; |
| if (imcnt % 10000L == 0 && param_.verbose) { |
| LOG(INFO) << imcnt << " images processed, " << elapsed << " sec elapsed"; |
| } |
| } |
| meanimg_ *= (1.0f / imcnt); |
| // save as mxnet python compatible format. |
| TBlob tmp = meanimg_; |
| { |
| std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(param_.mean_img.c_str(), "w")); |
| NDArray::Save(fo.get(), |
| {NDArray(tmp, 0)}, |
| {"mean_img"}); |
| } |
| if (param_.verbose) { |
| LOG(INFO) << "Save mean image to " << param_.mean_img << ".."; |
| } |
| meanfile_ready_ = true; |
| this->BeforeFirst(); |
| } |
| }; |
| } // namespace io |
| } // namespace mxnet |
| #endif // MXNET_IO_ITER_NORMALIZE_H_ |