blob: 05b3451ea8e8d567c773f82d7f466e50563fb2f4 [file] [log] [blame]
/************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
#include "singa/singa_config.h"
#ifdef USE_OPENCV
#ifndef SINGA_EXAMPLES_IMAGENET_ILSVRC12_H_
#define SINGA_EXAMPLES_IMAGENET_ILSVRC12_H_
#include <omp.h>
#include <cstdint>
#include <fstream>
#include <iostream>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
#include <thread>
#include <vector>
#include "singa/core/tensor.h"
#include "singa/io/decoder.h"
#include "singa/io/encoder.h"
#include "singa/io/reader.h"
#include "singa/io/transformer.h"
#include "singa/io/writer.h"
#include "singa/proto/io.pb.h"
#include "singa/utils/timer.h"
using std::string;
using namespace singa::io;
namespace singa {
/// size for resizing
const size_t kImageSize = 256;
const size_t kImageNBytes = 3 * kImageSize * kImageSize;
/// size for cropping
const size_t kCropSize = 227;
/// For reading ILSVRC2012 image data as tensors.
class ILSVRC {
public:
/// Setup encoder, decoder
ILSVRC();
~ILSVRC() {
if (encoder != nullptr) delete encoder;
if (decoder != nullptr) delete decoder;
if (transformer != nullptr) delete transformer;
if (reader != nullptr) {
reader->Close();
delete reader;
}
if (writer != nullptr) {
writer->Close();
delete writer;
}
}
/// Create binary files for training data
/// train_image_list: list file of training images
/// train_image_folder: folder where stores original training images
/// train_bin_folder: folder to store binary files
/// train_file_size: number of images that are contain in one binary file
void CreateTrainData(string train_image_list, string train_image_folder,
string train_bin_folder, size_t train_file_size);
/// Create binary files for test data
/// train_image_list: list file of test images
/// train_image_folder: folder where saves original test images
/// train_bin_folder: folder to save binary files
void CreateTestData(string test_image_list, string test_image_folder,
string test_bin_folder);
/// Load data from a binary file, return <images, labels> pair
/// suppose the data will be loaded file by file.
/// flag: kTrain or kTest
/// file: binary file which stores the images
/// read_size: number of images to be loaded
/// offset: offset in the file
/// n_read: number of images which are read
size_t LoadData(int flag, string file, size_t read_size, Tensor *x, Tensor *y,
size_t *n_read, int nthreads);
/// A wrapper method to spawn a thread to execute LoadData() method.
std::thread AsyncLoadData(int flag, string file, size_t read_size, Tensor *x,
Tensor *y, size_t *n_read, int nthreads);
void DecodeTransform(int flag, int thid, int nthreads,
vector<string *> images, Tensor *x, Tensor *y);
/// A wrapper method to spawn a thread to execute Decodetransform() method.
std::thread AsyncDecodeTransform(int flag, int thid, int nthreads,
vector<string *> images, Tensor *x,
Tensor *y);
/// Read mean from path
void ReadMean(string path);
protected:
/// Read one image at path, resize the image
Tensor ReadImage(string path);
/// Write buff to the file in kCreate/kAppend mode
void Write(string outfile, singa::io::Mode mode);
void WriteMean(Tensor &mean, string path);
private:
Tensor mean;
string last_read_file = "";
JPGEncoder *encoder = nullptr;
JPGDecoder *decoder = nullptr;
ImageTransformer *transformer = nullptr;
BinFileReader *reader = nullptr;
BinFileWriter *writer = nullptr;
};
ILSVRC::ILSVRC() {
EncoderConf en_conf;
en_conf.set_image_dim_order("CHW");
encoder = new JPGEncoder();
encoder->Setup(en_conf);
DecoderConf de_conf;
de_conf.set_image_dim_order("CHW");
decoder = new JPGDecoder();
decoder->Setup(de_conf);
TransformerConf trans_conf;
trans_conf.add_crop_shape(kCropSize);
trans_conf.add_crop_shape(kCropSize);
trans_conf.set_image_dim_order("CHW");
trans_conf.set_horizontal_mirror(true);
transformer = new ImageTransformer();
transformer->Setup(trans_conf);
}
Tensor ILSVRC::ReadImage(string path) {
cv::Mat mat = cv::imread(path, CV_LOAD_IMAGE_COLOR);
CHECK(mat.data != NULL) << "OpenCV load image fail: " << path;
cv::Size size(kImageSize, kImageSize);
cv::Mat resized;
cv::resize(mat, resized, size);
CHECK_EQ((size_t)resized.size().height, kImageSize);
CHECK_EQ((size_t)resized.size().width, kImageSize);
// dimension_order: CHW
Shape shape{(size_t)resized.channels(), (size_t)resized.rows,
(size_t)resized.cols};
Tensor image(shape, singa::kUChar);
unsigned char *data = new unsigned char[kImageNBytes];
for (int i = 0; i < resized.rows; i++)
for (int j = 0; j < resized.cols; j++)
for (int k = 0; k < resized.channels(); k++)
data[k * kImageSize * kImageSize + i * kImageSize + j] =
resized.at<cv::Vec3b>(i, j)[k];
image.CopyDataFromHostPtr<unsigned char>(data, kImageNBytes);
delete[] data;
return image;
}
void ILSVRC::WriteMean(Tensor &mean, string path) {
Tensor mean_lb(Shape{1}, kInt);
std::vector<Tensor> input;
input.push_back(mean);
input.push_back(mean_lb);
BinFileWriter bfwriter;
bfwriter.Open(path, kCreate);
bfwriter.Write(path, encoder->Encode(input));
bfwriter.Flush();
bfwriter.Close();
}
void ILSVRC::CreateTrainData(string image_list, string input_folder,
string output_folder, size_t file_size = 12800) {
std::vector<std::pair<string, int>> file_list;
size_t *sum = new size_t[kImageNBytes];
for (size_t i = 0; i < kImageNBytes; i++) sum[i] = 0u;
string image_file_name;
int label;
string outfile;
std::ifstream image_list_file(image_list.c_str(), std::ios::in);
while (image_list_file >> image_file_name >> label)
file_list.push_back(std::make_pair(image_file_name, label));
LOG(INFO) << "Data Shuffling";
std::shuffle(file_list.begin(), file_list.end(),
std::default_random_engine());
LOG(INFO) << "Total number of training images is " << file_list.size();
size_t num_train_images = file_list.size();
if (file_size == 0) file_size = num_train_images;
for (size_t imageid = 0; imageid < num_train_images; imageid++) {
string path = input_folder + "/" + file_list[imageid].first;
Tensor image = ReadImage(path);
auto image_data = image.data<unsigned char>();
for (size_t i = 0; i < kImageNBytes; i++)
sum[i] += static_cast<size_t>(image_data[i]);
label = file_list[imageid].second;
Tensor lb(Shape{1}, kInt);
lb.CopyDataFromHostPtr<int>(&label, 1);
std::vector<Tensor> input;
input.push_back(image);
input.push_back(lb);
string encoded_str = encoder->Encode(input);
if (writer == nullptr) {
writer = new BinFileWriter();
outfile = output_folder + "/train" +
std::to_string(imageid / file_size + 1) + ".bin";
writer->Open(outfile, kCreate);
}
writer->Write(path, encoded_str);
if ((imageid + 1) % file_size == 0) {
writer->Flush();
writer->Close();
LOG(INFO) << "Write " << file_size << " images into " << outfile;
delete writer;
writer = nullptr;
}
}
if (writer != nullptr) {
writer->Flush();
writer->Close();
LOG(INFO) << "Write " << num_train_images % file_size << " images into "
<< outfile;
delete writer;
writer = nullptr;
}
size_t num_file =
num_train_images / file_size + ((num_train_images % file_size) ? 1 : 0);
LOG(INFO) << "Write " << num_train_images << " images into " << num_file
<< " binary files";
Tensor mean = Tensor(Shape{3, kImageSize, kImageSize}, kUChar);
unsigned char *mean_data = new unsigned char[kImageNBytes];
for (size_t i = 0; i < kImageNBytes; i++)
mean_data[i] = static_cast<unsigned char>(sum[i] / num_train_images);
mean.CopyDataFromHostPtr<unsigned char>(mean_data, kImageNBytes);
string mean_path = output_folder + "/mean.bin";
WriteMean(mean, mean_path);
delete[] mean_data;
delete[] sum;
}
void ILSVRC::CreateTestData(string image_list, string input_folder,
string output_folder) {
std::vector<std::pair<string, int>> file_list;
string image_file_name;
string outfile = output_folder + "/test.bin";
int label;
std::ifstream image_list_file(image_list.c_str(), std::ios::in);
while (image_list_file >> image_file_name >> label)
file_list.push_back(std::make_pair(image_file_name, label));
LOG(INFO) << "Total number of test images is " << file_list.size();
size_t num_test_images = file_list.size();
for (size_t imageid = 0; imageid < num_test_images; imageid++) {
string path = input_folder + "/" + file_list[imageid].first;
Tensor image = ReadImage(path);
label = file_list[imageid].second;
Tensor lb(Shape{1}, singa::kInt);
lb.CopyDataFromHostPtr<int>(&label, 1);
std::vector<Tensor> input;
input.push_back(image);
input.push_back(lb);
string encoded_str = encoder->Encode(input);
if (writer == nullptr) {
writer = new BinFileWriter();
writer->Open(outfile, kCreate);
}
writer->Write(path, encoded_str);
}
if (writer != nullptr) {
writer->Flush();
writer->Close();
delete writer;
writer = nullptr;
}
LOG(INFO) << "Write " << num_test_images << " images into " << outfile;
}
void ILSVRC::ReadMean(string path) {
BinFileReader bfreader;
string key, value;
bfreader.Open(path);
bfreader.Read(&key, &value);
auto ret = decoder->Decode(value);
bfreader.Close();
mean = ret[0];
}
std::thread ILSVRC::AsyncLoadData(int flag, string file, size_t read_size,
Tensor *x, Tensor *y, size_t *n_read,
int nthreads) {
return std::thread(
[=]() { LoadData(flag, file, read_size, x, y, n_read, nthreads); });
}
size_t ILSVRC::LoadData(int flag, string file, size_t read_size, Tensor *x,
Tensor *y, size_t *n_read, int nthreads) {
if (file != last_read_file) {
if (reader != nullptr) {
reader->Close();
delete reader;
reader = nullptr;
}
reader = new BinFileReader();
reader->Open(file, 100 << 20);
last_read_file = file;
} else if (reader == nullptr) {
reader = new BinFileReader();
reader->Open(file, 100 << 20);
}
vector<string *> images;
for (size_t i = 0; i < read_size; i++) {
string image_path;
string *image = new string();
bool ret = reader->Read(&image_path, image);
if (ret == false) {
reader->Close();
delete reader;
reader = nullptr;
break;
}
images.push_back(image);
}
int nimg = images.size();
*n_read = nimg;
vector<std::thread> threads;
for (int i = 1; i < nthreads; i++) {
threads.push_back(AsyncDecodeTransform(flag, i, nthreads, images, x, y));
}
DecodeTransform(flag, 0, nthreads, images, x, y);
for (size_t i = 0; i < threads.size(); i++) threads[i].join();
for (int k = 0; k < nimg; k++) delete images.at(k);
return nimg;
}
std::thread ILSVRC::AsyncDecodeTransform(int flag, int thid, int nthreads,
vector<string *> images, Tensor *x,
Tensor *y) {
return std::thread(
[=]() { DecodeTransform(flag, thid, nthreads, images, x, y); });
}
void ILSVRC::DecodeTransform(int flag, int thid, int nthreads,
vector<string *> images, Tensor *x, Tensor *y) {
int nimg = images.size();
int start = nimg / nthreads * thid;
int end = start + nimg / nthreads;
for (int k = start; k < end; k++) {
std::vector<Tensor> pair = decoder->Decode(*images.at(k));
auto tmp_image = pair[0] - mean;
Tensor aug_image = transformer->Apply(flag, tmp_image);
CopyDataToFrom(x, aug_image, aug_image.Size(), k * aug_image.Size());
CopyDataToFrom(y, pair[1], 1, k);
}
if (thid == 0) {
for (int k = nimg / nthreads * nthreads; k < nimg; k++) {
std::vector<Tensor> pair = decoder->Decode(*images.at(k));
auto tmp_image = pair[0] - mean;
Tensor aug_image = transformer->Apply(flag, tmp_image);
CopyDataToFrom(x, aug_image, aug_image.Size(), k * aug_image.Size());
CopyDataToFrom(y, pair[1], 1, k);
}
}
}
} // namespace singa
#endif // SINGA_EXAMPLES_IMAGENET_ILSVRC12_H_
#endif // USE_OPENCV