blob: 726bb1a43c77be12734f7f2f16bb0eecef09b9a7 [file] [log] [blame]
# Dogs vs. Cats classification with mxnet and R
## Packages and prerequisites
In this tutorial, we mainly use the following three packages:
* `mxnet`: model training
* `imager`: image processing
* `abind`: manipulations with arrays.
It is an end-to-end R solution for the dogs vs cats Kaggle competition (https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/)
and it can be used as an example for fine-tuning.
All the code has been test on Ubuntu 16.04.
```{r, echo=FALSE}
knitr::opts_chunk$set(eval = FALSE)
```
```{r}
library(imager)
library(mxnet)
library(abind)
```
## Image processing
### Renaming train files
```{r}
files <- list.files("./train/")
old_names <- sapply(files, strsplit, split = ".", fixed = TRUE)
max_length <- max(sapply(old_names, function(x) nchar(x[[2]])))
zeros <- max_length - sapply(old_names, function(x) nchar(x[[2]]))
zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
new_names <- Map(function(x, y) {paste0("./train/", x[1], "/", y, x[2], ".jpg")},
x = old_names, y = zeros)
# Full names
files <- paste0("./train/", files)
dir.create("./train/cat")
dir.create("./train/dog")
# New names will be in 00001.jpg format
Map(function(x, y) file.rename(from = x, to = y), files, new_names)
```
### Training images: 224x224, padded with empty space
```{r}
files <- list.files("./train/", recursive = TRUE)
new_names <- paste0("./train_pad_224x224/", files)
files <- paste0("./train/", files)
dir.create("./train_pad_224x224/")
dir.create("./train_pad_224x224/cat")
dir.create("./train_pad_224x224/dog")
padImage <- function(x) {
long_side <- max(dim(x)[1:2])
short_side <- min(dim(x)[1:2])
pad_img <- pad(x,
nPix = long_side - short_side,
axes = ifelse(dim(x)[1] < dim(x)[2], "x", "y"))
return(pad_img)
}
Map(function(x, y) {
pad_img <- padImage(load.image(x))
res_img <- resize(pad_img, size_x = 224, size_y = 224)
imager::save.image(res_img, y)
}, x = files, y = new_names)
```
### Renaming test files
```{r}
files <- list.files("./test/")
max_length <- max(sapply(files, nchar))
zeros <- max_length - sapply(files, nchar)
zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
newnames <- paste0("./test/", zeros, files)
files <- paste0("./test/", files)
Map(function(x, y) file.rename(from = x, to = y), files, newnames)
```
### Test images: 224x224, padded with empty space
```{r}
files <- list.files("./test/")
new_names <- paste0("./test_pad_224x224/", files)
files <- paste0("./test/", files)
dir.create("./test_pad_224x224/")
Map(function(x, y) {
pad_img <- padImage(load.image(x))
res_img <- resize(pad_img, size_x = 224, size_y = 224)
imager::save.image(res_img, y)
}, x = files, y = new_names)
```
### Creating .rec files
```{r}
cat_files <- list.files("train_pad_224x224/cat/", recursive=TRUE)
cat_files <- paste0("cat/", cat_files)
dog_files <- list.files("train_pad_224x224/dog/", recursive=TRUE)
dog_files <- paste0("dog/", dog_files)
train_ind <- sample(length(cat_files), length(cat_files) * 0.8)
train_data <- c(1:(length(train_ind) * 2))
train_data <- cbind(train_data, c(rep(0, length(train_ind)), rep(1, length(train_ind))))
train_data <- cbind(train_data, c(cat_files[train_ind], dog_files[train_ind]))
train_data <- train_data[sample(nrow(train_data)),]
write.table(train_data, "cats_dogs_train.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
im2rec("cats_dogs_train.lst", "train_pad_224x224/", "cats_dogs_train.rec")
val_ind <- c(1:length(cat_files))[!c(1:length(cat_files)) %in% train_ind]
val_data <- c(1:(length(val_ind) * 2))
val_data <- cbind(val_data, c(rep(0, length(val_ind)), rep(1, length(val_ind))))
val_data <- cbind(val_data, c(cat_files[val_ind], dog_files[val_ind]))
val_data <- val_data[sample(nrow(val_data)),]
write.table(val_data, "cats_dogs_val.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
im2rec("cats_dogs_val.lst", "train_pad_224x224/", "cats_dogs_val.rec")
```
## The data iterator
```{r}
get_iterator <- function(data_shape, train_data, val_data, batch_size = 128) {
train <- mx.io.ImageRecordIter(path.imgrec = train_data,
batch.size = batch_size,
data.shape = data_shape,
rand.crop = TRUE,
rand.mirror = TRUE)
val <- mx.io.ImageRecordIter(path.imgrec = val_data,
batch.size = batch_size,
data.shape = data_shape,
rand.crop = FALSE,
rand.mirror = FALSE)
return(list(train = train, val = val))
}
```
```{r}
data <- get_iterator(data_shape = c(224, 224, 3),
train_data = "cats_dogs_train.rec",
val_data = "cats_dogs_val.rec",
batch_size = 8)
train <- data$train
val <- data$val
```
## Load pretrained model
Here we use the pretrained model from http://data.mxnet.io/mxnet/data/.
There are 1000 classes in imagenet,
and we need to replace the last fully connected layer with a new layer for 2 classes.
```{r}
download.file('http://data.mxnet.io/mxnet/data/Inception.zip', destfile = 'Inception.zip')
unzip("Inception.zip")
inception_bn <- mx.model.load("./Inception-BN", iteration = 126)
symbol <- inception_bn$symbol
# check symbol$arguments for layer names
internals <- symbol$get.internals()
outputs <- internals$outputs
flatten <- internals$get.output(which(outputs == "flatten_output"))
new_fc <- mx.symbol.FullyConnected(data = flatten,
num_hidden = 2,
name = "fc1")
# set name to original name in symbol$arguments
new_soft <- mx.symbol.SoftmaxOutput(data = new_fc,
name = "softmax")
# set name to original name in symbol$arguments
arg_params_new <- mx.model.init.params(symbol = new_soft,
input.shape = list("data" = c(224, 224, 3, 8)),
output.shape = NULL,
initializer = mx.init.uniform(0.1),
ctx = mx.cpu())$arg.params
fc1_weights_new <- arg_params_new[["fc1_weight"]]
fc1_bias_new <- arg_params_new[["fc1_bias"]]
arg_params_new <- inception_bn$arg.params
arg_params_new[["fc1_weight"]] <- fc1_weights_new
arg_params_new[["fc1_bias"]] <- fc1_bias_new
```
## Fine-tuning
```{r}
model <- mx.model.FeedForward.create(
symbol = new_soft,
X = train,
eval.data = val,
ctx = mx.gpu(0),
eval.metric = mx.metric.accuracy,
num.round = 2,
learning.rate = 0.05,
momentum = 0.9,
wd = 0.00001,
kvstore = "local",
array.batch.size = 128,
epoch.end.callback = mx.callback.save.checkpoint("inception_bn"),
batch.end.callback = mx.callback.log.train.metric(150),
initializer = mx.init.Xavier(factor_type = "in", magnitude = 2.34),
optimizer = "sgd",
arg.params = arg_params_new,
aux.params = inception_bn$aux.params
)
```
## Making predictions
```{r}
preprocImage<- function(src, # URL or file location
height = 224,
width = 224,
num_channels = 3, # 3 for RGB, 1 for grayscale
mult_by = 1, # set to 255 for normalized image
crop = FALSE) { # no crop by default
im <- load.image(src)
if (crop) {
shape <- dim(im)
short_edge <- min(shape[1:2])
xx <- floor((shape[1] - short_edge) / 2)
yy <- floor((shape[2] - short_edge) / 2)
im <- crop.borders(im, xx, yy)
}
resized <- resize(im, size_x = width, size_y = height)
arr <- as.array(resized) * mult_by
dim(arr) <- c(width, height, num_channels, 1)
return(arr)
}
```
```{r}
files <- list.files("./test_pad_224x224/")
files <- paste0("./test_pad_224x224/", files)
files <- split(files, rep(1:1250, each = 10))
probs <- lapply(files, function(x) {
images <- lapply(x, preprocImage, mult_by = 255)
images <- do.call(abind, images)
probs <- predict(model, X = images, ctx = mx.gpu(0))
})
saveRDS(probs, "probs.rds")
probs <- t(do.call(cbind, probs))
preds <- data.frame(id = 1:12500, label = probs[, 2])
write.csv(preds, "subm.csv", row.names = FALSE, quote = FALSE)
```
<!-- INSERT SOURCE DOWNLOAD BUTTONS -->