| # Dogs vs. Cats classification with mxnet and R |
| |
| ## Packages and prerequisites |
| |
| In this tutorial, we mainly use the following three packages: |
| |
| * `mxnet`: model training |
| * `imager`: image processing |
| * `abind`: manipulations with arrays. |
| |
| It is an end-to-end R solution for the dogs vs cats Kaggle competition (https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/) |
| and it can be used as an example for fine-tuning. |
| All the code has been test on Ubuntu 16.04. |
| |
| ```{r, echo=FALSE} |
| knitr::opts_chunk$set(eval = FALSE) |
| ``` |
| |
| |
| ```{r} |
| library(imager) |
| library(mxnet) |
| library(abind) |
| ``` |
| |
| |
| ## Image processing |
| |
| ### Renaming train files |
| |
| ```{r} |
| files <- list.files("./train/") |
| old_names <- sapply(files, strsplit, split = ".", fixed = TRUE) |
| max_length <- max(sapply(old_names, function(x) nchar(x[[2]]))) |
| zeros <- max_length - sapply(old_names, function(x) nchar(x[[2]])) |
| zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = "")) |
| new_names <- Map(function(x, y) {paste0("./train/", x[1], "/", y, x[2], ".jpg")}, |
| x = old_names, y = zeros) |
| |
| # Full names |
| files <- paste0("./train/", files) |
| |
| dir.create("./train/cat") |
| dir.create("./train/dog") |
| |
| # New names will be in 00001.jpg format |
| Map(function(x, y) file.rename(from = x, to = y), files, new_names) |
| ``` |
| |
| ### Training images: 224x224, padded with empty space |
| |
| ```{r} |
| files <- list.files("./train/", recursive = TRUE) |
| new_names <- paste0("./train_pad_224x224/", files) |
| files <- paste0("./train/", files) |
| dir.create("./train_pad_224x224/") |
| dir.create("./train_pad_224x224/cat") |
| dir.create("./train_pad_224x224/dog") |
| |
| padImage <- function(x) { |
| long_side <- max(dim(x)[1:2]) |
| short_side <- min(dim(x)[1:2]) |
| pad_img <- pad(x, |
| nPix = long_side - short_side, |
| axes = ifelse(dim(x)[1] < dim(x)[2], "x", "y")) |
| return(pad_img) |
| } |
| |
| Map(function(x, y) { |
| pad_img <- padImage(load.image(x)) |
| res_img <- resize(pad_img, size_x = 224, size_y = 224) |
| imager::save.image(res_img, y) |
| }, x = files, y = new_names) |
| ``` |
| |
| ### Renaming test files |
| |
| ```{r} |
| files <- list.files("./test/") |
| max_length <- max(sapply(files, nchar)) |
| zeros <- max_length - sapply(files, nchar) |
| zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = "")) |
| newnames <- paste0("./test/", zeros, files) |
| |
| files <- paste0("./test/", files) |
| |
| Map(function(x, y) file.rename(from = x, to = y), files, newnames) |
| ``` |
| |
| |
| ### Test images: 224x224, padded with empty space |
| |
| ```{r} |
| files <- list.files("./test/") |
| new_names <- paste0("./test_pad_224x224/", files) |
| files <- paste0("./test/", files) |
| dir.create("./test_pad_224x224/") |
| |
| Map(function(x, y) { |
| pad_img <- padImage(load.image(x)) |
| res_img <- resize(pad_img, size_x = 224, size_y = 224) |
| imager::save.image(res_img, y) |
| }, x = files, y = new_names) |
| ``` |
| |
| ### Creating .rec files |
| |
| ```{r} |
| cat_files <- list.files("train_pad_224x224/cat/", recursive=TRUE) |
| cat_files <- paste0("cat/", cat_files) |
| |
| dog_files <- list.files("train_pad_224x224/dog/", recursive=TRUE) |
| dog_files <- paste0("dog/", dog_files) |
| |
| train_ind <- sample(length(cat_files), length(cat_files) * 0.8) |
| train_data <- c(1:(length(train_ind) * 2)) |
| train_data <- cbind(train_data, c(rep(0, length(train_ind)), rep(1, length(train_ind)))) |
| train_data <- cbind(train_data, c(cat_files[train_ind], dog_files[train_ind])) |
| train_data <- train_data[sample(nrow(train_data)),] |
| write.table(train_data, "cats_dogs_train.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE) |
| im2rec("cats_dogs_train.lst", "train_pad_224x224/", "cats_dogs_train.rec") |
| |
| val_ind <- c(1:length(cat_files))[!c(1:length(cat_files)) %in% train_ind] |
| val_data <- c(1:(length(val_ind) * 2)) |
| val_data <- cbind(val_data, c(rep(0, length(val_ind)), rep(1, length(val_ind)))) |
| val_data <- cbind(val_data, c(cat_files[val_ind], dog_files[val_ind])) |
| val_data <- val_data[sample(nrow(val_data)),] |
| write.table(val_data, "cats_dogs_val.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE) |
| im2rec("cats_dogs_val.lst", "train_pad_224x224/", "cats_dogs_val.rec") |
| ``` |
| |
| ## The data iterator |
| |
| ```{r} |
| get_iterator <- function(data_shape, train_data, val_data, batch_size = 128) { |
| train <- mx.io.ImageRecordIter(path.imgrec = train_data, |
| batch.size = batch_size, |
| data.shape = data_shape, |
| rand.crop = TRUE, |
| rand.mirror = TRUE) |
| |
| val <- mx.io.ImageRecordIter(path.imgrec = val_data, |
| batch.size = batch_size, |
| data.shape = data_shape, |
| rand.crop = FALSE, |
| rand.mirror = FALSE) |
| |
| return(list(train = train, val = val)) |
| } |
| ``` |
| |
| |
| ```{r} |
| data <- get_iterator(data_shape = c(224, 224, 3), |
| train_data = "cats_dogs_train.rec", |
| val_data = "cats_dogs_val.rec", |
| batch_size = 8) |
| train <- data$train |
| val <- data$val |
| ``` |
| |
| |
| ## Load pretrained model |
| |
| Here we use the pretrained model from http://data.dmlc.ml/models/imagenet/. |
| There are 1000 classes in imagenet, |
| and we need to replace the last fully connected layer with a new layer for 2 classes. |
| |
| |
| ```{r} |
| download.file('http://data.dmlc.ml/data/Inception.zip', destfile = 'Inception.zip') |
| unzip("Inception.zip") |
| inception_bn <- mx.model.load("./Inception-BN", iteration = 126) |
| |
| symbol <- inception_bn$symbol |
| # check symbol$arguments for layer names |
| internals <- symbol$get.internals() |
| outputs <- internals$outputs |
| |
| flatten <- internals$get.output(which(outputs == "flatten_output")) |
| |
| new_fc <- mx.symbol.FullyConnected(data = flatten, |
| num_hidden = 2, |
| name = "fc1") |
| # set name to original name in symbol$arguments |
| new_soft <- mx.symbol.SoftmaxOutput(data = new_fc, |
| name = "softmax") |
| # set name to original name in symbol$arguments |
| |
| arg_params_new <- mx.model.init.params(symbol = new_soft, |
| input.shape = list("data" = c(224, 224, 3, 8)), |
| output.shape = NULL, |
| initializer = mx.init.uniform(0.1), |
| ctx = mx.cpu())$arg.params |
| fc1_weights_new <- arg_params_new[["fc1_weight"]] |
| fc1_bias_new <- arg_params_new[["fc1_bias"]] |
| |
| arg_params_new <- inception_bn$arg.params |
| |
| arg_params_new[["fc1_weight"]] <- fc1_weights_new |
| arg_params_new[["fc1_bias"]] <- fc1_bias_new |
| ``` |
| |
| |
| ## Fine-tuning |
| |
| ```{r} |
| model <- mx.model.FeedForward.create( |
| symbol = new_soft, |
| X = train, |
| eval.data = val, |
| ctx = mx.gpu(0), |
| eval.metric = mx.metric.accuracy, |
| num.round = 2, |
| learning.rate = 0.05, |
| momentum = 0.9, |
| wd = 0.00001, |
| kvstore = "local", |
| array.batch.size = 128, |
| epoch.end.callback = mx.callback.save.checkpoint("inception_bn"), |
| batch.end.callback = mx.callback.log.train.metric(150), |
| initializer = mx.init.Xavier(factor_type = "in", magnitude = 2.34), |
| optimizer = "sgd", |
| arg.params = arg_params_new, |
| aux.params = inception_bn$aux.params |
| ) |
| ``` |
| ## Making predictions |
| |
| ```{r} |
| preprocImage<- function(src, # URL or file location |
| height = 224, |
| width = 224, |
| num_channels = 3, # 3 for RGB, 1 for grayscale |
| mult_by = 1, # set to 255 for normalized image |
| crop = FALSE) { # no crop by default |
| im <- load.image(src) |
| |
| if (crop) { |
| shape <- dim(im) |
| short_edge <- min(shape[1:2]) |
| xx <- floor((shape[1] - short_edge) / 2) |
| yy <- floor((shape[2] - short_edge) / 2) |
| im <- crop.borders(im, xx, yy) |
| } |
| |
| resized <- resize(im, size_x = width, size_y = height) |
| arr <- as.array(resized) * mult_by |
| dim(arr) <- c(width, height, num_channels, 1) |
| return(arr) |
| } |
| ``` |
| |
| ```{r} |
| files <- list.files("./test_pad_224x224/") |
| files <- paste0("./test_pad_224x224/", files) |
| |
| files <- split(files, rep(1:1250, each = 10)) |
| probs <- lapply(files, function(x) { |
| images <- lapply(x, preprocImage, mult_by = 255) |
| images <- do.call(abind, images) |
| probs <- predict(model, X = images, ctx = mx.gpu(0)) |
| }) |
| saveRDS(probs, "probs.rds") |
| probs <- t(do.call(cbind, probs)) |
| |
| preds <- data.frame(id = 1:12500, label = probs[, 2]) |
| write.csv(preds, "subm.csv", row.names = FALSE, quote = FALSE) |
| ``` |
| |
| |
| <!-- INSERT SOURCE DOWNLOAD BUTTONS --> |