R-package/vignettes/CatsDogsFinetune.Rmd - mxnet - Git at Google

 # Dogs vs. Cats classification with mxnet and R

 ## Packages and prerequisites

 In this tutorial, we mainly use the following three packages:

 * `mxnet`: model training
 * `imager`: image processing
 * `abind`: manipulations with arrays.

 It is an end-to-end R solution for the dogs vs cats Kaggle competition (https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/)
 and it can be used as an example for fine-tuning.
 All the code has been test on Ubuntu 16.04.

 ```{r, echo=FALSE}
 knitr::opts_chunk$set(eval = FALSE)
 ```


 ```{r}
 library(imager)
 library(mxnet)
 library(abind)
 ```


 ## Image processing

 ### Renaming train files

 ```{r}
 files <- list.files("./train/")
 old_names <- sapply(files, strsplit, split = ".", fixed = TRUE)
 max_length <- max(sapply(old_names, function(x) nchar(x[[2]])))
 zeros <- max_length - sapply(old_names, function(x) nchar(x[[2]]))
 zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
 new_names <- Map(function(x, y) {paste0("./train/", x[1], "/", y, x[2], ".jpg")},
                  x = old_names, y = zeros)

 # Full names
 files <- paste0("./train/", files)

 dir.create("./train/cat")
 dir.create("./train/dog")

 # New names will be in 00001.jpg format
 Map(function(x, y) file.rename(from = x, to = y), files, new_names)
 ```

 ### Training images: 224x224, padded with empty space

 ```{r}
 files <- list.files("./train/", recursive = TRUE)
 new_names <- paste0("./train_pad_224x224/", files)
 files <- paste0("./train/", files)
 dir.create("./train_pad_224x224/")
 dir.create("./train_pad_224x224/cat")
 dir.create("./train_pad_224x224/dog")

 padImage <- function(x) {
   long_side <- max(dim(x)[1:2])
   short_side <- min(dim(x)[1:2])
   pad_img <- pad(x,
   nPix = long_side - short_side,
   axes = ifelse(dim(x)[1] < dim(x)[2], "x", "y"))
   return(pad_img)
 }

 Map(function(x, y) {
   pad_img <- padImage(load.image(x))
   res_img <- resize(pad_img,  size_x = 224, size_y = 224)
   imager::save.image(res_img, y)
   }, x = files, y = new_names)
 ```

 ### Renaming test files

 ```{r}
 files <- list.files("./test/")
 max_length <- max(sapply(files, nchar))
 zeros <- max_length - sapply(files, nchar)
 zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
 newnames <- paste0("./test/", zeros, files)

 files <- paste0("./test/", files)

 Map(function(x, y) file.rename(from = x, to = y), files, newnames)
 ```


 ### Test images: 224x224, padded with empty space

 ```{r}
 files <- list.files("./test/")
 new_names <- paste0("./test_pad_224x224/", files)
 files <- paste0("./test/", files)
 dir.create("./test_pad_224x224/")

 Map(function(x, y) {
   pad_img <- padImage(load.image(x))
   res_img <- resize(pad_img,  size_x = 224, size_y = 224)
   imager::save.image(res_img, y)
 }, x = files, y = new_names)
 ```

 ### Creating .rec files

 ```{r}
 cat_files <- list.files("train_pad_224x224/cat/", recursive=TRUE)
 cat_files <- paste0("cat/", cat_files)

 dog_files <- list.files("train_pad_224x224/dog/", recursive=TRUE)
 dog_files <- paste0("dog/", dog_files)

 train_ind <- sample(length(cat_files), length(cat_files) * 0.8)
 train_data <- c(1:(length(train_ind) * 2))
 train_data <- cbind(train_data, c(rep(0, length(train_ind)), rep(1, length(train_ind))))
 train_data <- cbind(train_data, c(cat_files[train_ind], dog_files[train_ind]))
 train_data <- train_data[sample(nrow(train_data)),]
 write.table(train_data, "cats_dogs_train.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
 im2rec("cats_dogs_train.lst", "train_pad_224x224/", "cats_dogs_train.rec")

 val_ind <- c(1:length(cat_files))[!c(1:length(cat_files)) %in% train_ind]
 val_data <- c(1:(length(val_ind) * 2))
 val_data <- cbind(val_data, c(rep(0, length(val_ind)), rep(1, length(val_ind))))
 val_data <- cbind(val_data, c(cat_files[val_ind], dog_files[val_ind]))
 val_data <- val_data[sample(nrow(val_data)),]
 write.table(val_data, "cats_dogs_val.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
 im2rec("cats_dogs_val.lst", "train_pad_224x224/", "cats_dogs_val.rec")
 ```

 ## The data iterator

 ```{r}
 get_iterator <- function(data_shape, train_data, val_data, batch_size = 128) {
     train <- mx.io.ImageRecordIter(path.imgrec = train_data,
                                    batch.size  = batch_size,
                                    data.shape  = data_shape,
                                    rand.crop   = TRUE,
                                    rand.mirror = TRUE)

     val <- mx.io.ImageRecordIter(path.imgrec = val_data,
                                  batch.size  = batch_size,
                                  data.shape  = data_shape,
                                  rand.crop   = FALSE,
                                  rand.mirror = FALSE)

   return(list(train = train, val = val))
 }
 ```


 ```{r}
 data  <- get_iterator(data_shape = c(224, 224, 3),
                       train_data = "cats_dogs_train.rec",
                       val_data   = "cats_dogs_val.rec",
                       batch_size = 8)
 train <- data$train
 val   <- data$val
 ```


 ## Load pretrained model

 Here we use the pretrained model from http://data.dmlc.ml/models/imagenet/.
 There are 1000 classes in imagenet,
 and we need to replace the last fully connected layer with a new layer for 2 classes.


 ```{r}
 download.file('http://data.dmlc.ml/data/Inception.zip', destfile = 'Inception.zip')
 unzip("Inception.zip")
 inception_bn <- mx.model.load("./Inception-BN", iteration = 126)

 symbol <- inception_bn$symbol
 # check symbol$arguments for layer names
 internals <- symbol$get.internals()
 outputs <- internals$outputs

 flatten <- internals$get.output(which(outputs == "flatten_output"))

 new_fc <- mx.symbol.FullyConnected(data = flatten,
                                    num_hidden = 2,
                                    name = "fc1")
 # set name to original name in symbol$arguments
 new_soft <- mx.symbol.SoftmaxOutput(data = new_fc,
                                     name = "softmax")
 # set name to original name in symbol$arguments

 arg_params_new <- mx.model.init.params(symbol = new_soft,
                                        input.shape = list("data" = c(224, 224, 3, 8)),
                                        output.shape = NULL,
                                        initializer = mx.init.uniform(0.1),
                                        ctx = mx.cpu())$arg.params
 fc1_weights_new <- arg_params_new[["fc1_weight"]]
 fc1_bias_new <- arg_params_new[["fc1_bias"]]

 arg_params_new <- inception_bn$arg.params

 arg_params_new[["fc1_weight"]] <- fc1_weights_new
 arg_params_new[["fc1_bias"]] <- fc1_bias_new
 ```


 ## Fine-tuning

 ```{r}
 model <- mx.model.FeedForward.create(
   symbol             = new_soft,
   X                  = train,
   eval.data          = val,
   ctx                = mx.gpu(0),
   eval.metric        = mx.metric.accuracy,
   num.round          = 2,
   learning.rate      = 0.05,
   momentum           = 0.9,
   wd                 = 0.00001,
   kvstore            = "local",
   array.batch.size   = 128,
   epoch.end.callback = mx.callback.save.checkpoint("inception_bn"),
   batch.end.callback = mx.callback.log.train.metric(150),
   initializer        = mx.init.Xavier(factor_type = "in", magnitude = 2.34),
   optimizer          = "sgd",
   arg.params         = arg_params_new,
   aux.params         = inception_bn$aux.params
 )
 ```
 ## Making predictions

 ```{r}
 preprocImage<- function(src, # URL or file location
                         height = 224,
                         width = 224,
                         num_channels = 3, # 3 for RGB, 1 for grayscale
                         mult_by = 1,      # set to 255 for normalized image
                         crop = FALSE) {   # no crop by default
   im <- load.image(src)

   if (crop) {
     shape <- dim(im)
     short_edge <- min(shape[1:2])
     xx <- floor((shape[1] - short_edge) / 2)
     yy <- floor((shape[2] - short_edge) / 2)
     im <- crop.borders(im, xx, yy)
   }

   resized <- resize(im,  size_x = width, size_y = height)
   arr <- as.array(resized) * mult_by
   dim(arr) <- c(width, height, num_channels, 1)
   return(arr)
 }
 ```

 ```{r}
 files <- list.files("./test_pad_224x224/")
 files <- paste0("./test_pad_224x224/", files)

 files <- split(files, rep(1:1250, each = 10))
 probs <- lapply(files, function(x) {
   images <- lapply(x, preprocImage, mult_by = 255)
   images <- do.call(abind, images)
   probs <- predict(model, X = images, ctx = mx.gpu(0))
 })
 saveRDS(probs, "probs.rds")
 probs <- t(do.call(cbind, probs))

 preds <- data.frame(id = 1:12500, label = probs[, 2])
 write.csv(preds, "subm.csv", row.names = FALSE, quote = FALSE)
 ```


 <!-- INSERT SOURCE DOWNLOAD BUTTONS -->
	# Dogs vs. Cats classification with mxnet and R

	## Packages and prerequisites

	In this tutorial, we mainly use the following three packages:

	* `mxnet`: model training
	* `imager`: image processing
	* `abind`: manipulations with arrays.

	It is an end-to-end R solution for the dogs vs cats Kaggle competition (https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/)
	and it can be used as an example for fine-tuning.
	All the code has been test on Ubuntu 16.04.

	```{r, echo=FALSE}
	knitr::opts_chunk$set(eval = FALSE)
	```


	```{r}
	library(imager)
	library(mxnet)
	library(abind)
	```


	## Image processing

	### Renaming train files

	```{r}
	files <- list.files("./train/")
	old_names <- sapply(files, strsplit, split = ".", fixed = TRUE)
	max_length <- max(sapply(old_names, function(x) nchar(x[[2]])))
	zeros <- max_length - sapply(old_names, function(x) nchar(x[[2]]))
	zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
	new_names <- Map(function(x, y) {paste0("./train/", x[1], "/", y, x[2], ".jpg")},
	x = old_names, y = zeros)

	# Full names
	files <- paste0("./train/", files)

	dir.create("./train/cat")
	dir.create("./train/dog")

	# New names will be in 00001.jpg format
	Map(function(x, y) file.rename(from = x, to = y), files, new_names)
	```

	### Training images: 224x224, padded with empty space

	```{r}
	files <- list.files("./train/", recursive = TRUE)
	new_names <- paste0("./train_pad_224x224/", files)
	files <- paste0("./train/", files)
	dir.create("./train_pad_224x224/")
	dir.create("./train_pad_224x224/cat")
	dir.create("./train_pad_224x224/dog")

	padImage <- function(x) {
	long_side <- max(dim(x)[1:2])
	short_side <- min(dim(x)[1:2])
	pad_img <- pad(x,
	nPix = long_side - short_side,
	axes = ifelse(dim(x)[1] < dim(x)[2], "x", "y"))
	return(pad_img)
	}

	Map(function(x, y) {
	pad_img <- padImage(load.image(x))
	res_img <- resize(pad_img, size_x = 224, size_y = 224)
	imager::save.image(res_img, y)
	}, x = files, y = new_names)
	```

	### Renaming test files

	```{r}
	files <- list.files("./test/")
	max_length <- max(sapply(files, nchar))
	zeros <- max_length - sapply(files, nchar)
	zeros <- sapply(zeros, function(x) paste(rep(0, x), collapse = ""))
	newnames <- paste0("./test/", zeros, files)

	files <- paste0("./test/", files)

	Map(function(x, y) file.rename(from = x, to = y), files, newnames)
	```


	### Test images: 224x224, padded with empty space

	```{r}
	files <- list.files("./test/")
	new_names <- paste0("./test_pad_224x224/", files)
	files <- paste0("./test/", files)
	dir.create("./test_pad_224x224/")

	Map(function(x, y) {
	pad_img <- padImage(load.image(x))
	res_img <- resize(pad_img, size_x = 224, size_y = 224)
	imager::save.image(res_img, y)
	}, x = files, y = new_names)
	```

	### Creating .rec files

	```{r}
	cat_files <- list.files("train_pad_224x224/cat/", recursive=TRUE)
	cat_files <- paste0("cat/", cat_files)

	dog_files <- list.files("train_pad_224x224/dog/", recursive=TRUE)
	dog_files <- paste0("dog/", dog_files)

	train_ind <- sample(length(cat_files), length(cat_files) * 0.8)
	train_data <- c(1:(length(train_ind) * 2))
	train_data <- cbind(train_data, c(rep(0, length(train_ind)), rep(1, length(train_ind))))
	train_data <- cbind(train_data, c(cat_files[train_ind], dog_files[train_ind]))
	train_data <- train_data[sample(nrow(train_data)),]
	write.table(train_data, "cats_dogs_train.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
	im2rec("cats_dogs_train.lst", "train_pad_224x224/", "cats_dogs_train.rec")

	val_ind <- c(1:length(cat_files))[!c(1:length(cat_files)) %in% train_ind]
	val_data <- c(1:(length(val_ind) * 2))
	val_data <- cbind(val_data, c(rep(0, length(val_ind)), rep(1, length(val_ind))))
	val_data <- cbind(val_data, c(cat_files[val_ind], dog_files[val_ind]))
	val_data <- val_data[sample(nrow(val_data)),]
	write.table(val_data, "cats_dogs_val.lst", quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
	im2rec("cats_dogs_val.lst", "train_pad_224x224/", "cats_dogs_val.rec")
	```

	## The data iterator

	```{r}
	get_iterator <- function(data_shape, train_data, val_data, batch_size = 128) {
	train <- mx.io.ImageRecordIter(path.imgrec = train_data,
	batch.size = batch_size,
	data.shape = data_shape,
	rand.crop = TRUE,
	rand.mirror = TRUE)

	val <- mx.io.ImageRecordIter(path.imgrec = val_data,
	batch.size = batch_size,
	data.shape = data_shape,
	rand.crop = FALSE,
	rand.mirror = FALSE)

	return(list(train = train, val = val))
	}
	```


	```{r}
	data <- get_iterator(data_shape = c(224, 224, 3),
	train_data = "cats_dogs_train.rec",
	val_data = "cats_dogs_val.rec",
	batch_size = 8)
	train <- data$train
	val <- data$val
	```


	## Load pretrained model

	Here we use the pretrained model from http://data.dmlc.ml/models/imagenet/.
	There are 1000 classes in imagenet,
	and we need to replace the last fully connected layer with a new layer for 2 classes.


	```{r}
	download.file('http://data.dmlc.ml/data/Inception.zip', destfile = 'Inception.zip')
	unzip("Inception.zip")
	inception_bn <- mx.model.load("./Inception-BN", iteration = 126)

	symbol <- inception_bn$symbol
	# check symbol$arguments for layer names
	internals <- symbol$get.internals()
	outputs <- internals$outputs

	flatten <- internals$get.output(which(outputs == "flatten_output"))

	new_fc <- mx.symbol.FullyConnected(data = flatten,
	num_hidden = 2,
	name = "fc1")
	# set name to original name in symbol$arguments
	new_soft <- mx.symbol.SoftmaxOutput(data = new_fc,
	name = "softmax")
	# set name to original name in symbol$arguments

	arg_params_new <- mx.model.init.params(symbol = new_soft,
	input.shape = list("data" = c(224, 224, 3, 8)),
	output.shape = NULL,
	initializer = mx.init.uniform(0.1),
	ctx = mx.cpu())$arg.params
	fc1_weights_new <- arg_params_new[["fc1_weight"]]
	fc1_bias_new <- arg_params_new[["fc1_bias"]]

	arg_params_new <- inception_bn$arg.params

	arg_params_new[["fc1_weight"]] <- fc1_weights_new
	arg_params_new[["fc1_bias"]] <- fc1_bias_new
	```


	## Fine-tuning

	```{r}
	model <- mx.model.FeedForward.create(
	symbol = new_soft,
	X = train,
	eval.data = val,
	ctx = mx.gpu(0),
	eval.metric = mx.metric.accuracy,
	num.round = 2,
	learning.rate = 0.05,
	momentum = 0.9,
	wd = 0.00001,
	kvstore = "local",
	array.batch.size = 128,
	epoch.end.callback = mx.callback.save.checkpoint("inception_bn"),
	batch.end.callback = mx.callback.log.train.metric(150),
	initializer = mx.init.Xavier(factor_type = "in", magnitude = 2.34),
	optimizer = "sgd",
	arg.params = arg_params_new,
	aux.params = inception_bn$aux.params
	)
	```
	## Making predictions

	```{r}
	preprocImage<- function(src, # URL or file location
	height = 224,
	width = 224,
	num_channels = 3, # 3 for RGB, 1 for grayscale
	mult_by = 1, # set to 255 for normalized image
	crop = FALSE) { # no crop by default
	im <- load.image(src)

	if (crop) {
	shape <- dim(im)
	short_edge <- min(shape[1:2])
	xx <- floor((shape[1] - short_edge) / 2)
	yy <- floor((shape[2] - short_edge) / 2)
	im <- crop.borders(im, xx, yy)
	}

	resized <- resize(im, size_x = width, size_y = height)
	arr <- as.array(resized) * mult_by
	dim(arr) <- c(width, height, num_channels, 1)
	return(arr)
	}
	```

	```{r}
	files <- list.files("./test_pad_224x224/")
	files <- paste0("./test_pad_224x224/", files)

	files <- split(files, rep(1:1250, each = 10))
	probs <- lapply(files, function(x) {
	images <- lapply(x, preprocImage, mult_by = 255)
	images <- do.call(abind, images)
	probs <- predict(model, X = images, ctx = mx.gpu(0))
	})
	saveRDS(probs, "probs.rds")
	probs <- t(do.call(cbind, probs))

	preds <- data.frame(id = 1:12500, label = probs[, 2])
	write.csv(preds, "subm.csv", row.names = FALSE, quote = FALSE)
	```


	<!-- INSERT SOURCE DOWNLOAD BUTTONS -->