versions/1.0.0/_sources/tutorials/r/charRnnModel.txt - mxnet-site - Git at Google

 Char RNN Example
 =============================================

 This tutorial shows how to use an LSTM model to build a char-level language model, and generate text from it. For demonstration purposes, we use a Shakespearean text. You can find the data on [GitHub](https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare).

 Load the Data
 ---------
 Load in the data and preprocess it:

  ```r
     require(mxnet)
  ```

  ```
     ## Loading required package: mxnet
  ```

  ```
     ## Loading required package: methods
  ```
 Set the basic network parameters:

  ```r
     batch.size = 32
     seq.len = 32
     num.hidden = 16
     num.embed = 16
     num.lstm.layer = 1
     num.round = 1
     learning.rate= 0.1
     wd=0.00001
     clip_gradient=1
     update.period = 1
  ```
 Download the data:

  ```r
     download.data <- function(data_dir) {
         dir.create(data_dir, showWarnings = FALSE)
         if (!file.exists(paste0(data_dir,'input.txt'))) {
             download.file(url='https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt',
                           destfile=paste0(data_dir,'input.txt'), method='wget')
         }
     }
  ```
 Make a dictionary from the text:

  ```r
     make.dict <- function(text, max.vocab=10000) {
         text <- strsplit(text, '')
         dic <- list()
         idx <- 1
         for (c in text[[1]]) {
             if (!(c %in% names(dic))) {
                 dic[[c]] <- idx
                 idx <- idx + 1
             }
         }
         if (length(dic) == max.vocab - 1)
             dic[["UNKNOWN"]] <- idx
         cat(paste0("Total unique char: ", length(dic), "\n"))
         return (dic)
      }
  ```
 Transfer the text into a data feature:

  ```r
     make.data <- function(file.path, seq.len=32, max.vocab=10000, dic=NULL)      {
         fi <- file(file.path, "r")
         text <- paste(readLines(fi), collapse="\n")
         close(fi)

         if (is.null(dic))
             dic <- make.dict(text, max.vocab)
         lookup.table <- list()
         for (c in names(dic)) {
             idx <- dic[[c]]
             lookup.table[[idx]] <- c
          }

         char.lst <- strsplit(text, '')[[1]]
         num.seq <- as.integer(length(char.lst) / seq.len)
         char.lst <- char.lst[1:(num.seq * seq.len)]
         data <- array(0, dim=c(seq.len, num.seq))
         idx <- 1
         for (i in 1:num.seq) {
              for (j in 1:seq.len) {
                  if (char.lst[idx] %in% names(dic))
                     data[j, i] <- dic[[ char.lst[idx] ]]-1
                 else {
                     data[j, i] <- dic[["UNKNOWN"]]-1
                 }
                 idx <- idx + 1
             }
         }
          return (list(data=data, dic=dic, lookup.table=lookup.table))
     }
  ```
 Move the tail text:

  ```r
     drop.tail <- function(X, batch.size) {
         shape <- dim(X)
         nstep <- as.integer(shape[2] / batch.size)
         return (X[, 1:(nstep * batch.size)])
     }
  ```
 Get the label of X:

  ```r
     get.label <- function(X) {
         label <- array(0, dim=dim(X))
         d <- dim(X)[1]
         w <- dim(X)[2]
         for (i in 0:(w-1)) {
             for (j in 1:d) {
                 label[i*d+j] <- X[(i*d+j)%%(w*d)+1]
             }
         }
         return (label)
     }
  ```
 Get the training data and evaluation data:

  ```r
     download.data("./data/")
     ret <- make.data("./data/input.txt", seq.len=seq.len)
  ```

  ```
     ## Total unique char: 65
  ```

  ```r
     X <- ret$data
     dic <- ret$dic
     lookup.table <- ret$lookup.table

     vocab <- length(dic)

     shape <- dim(X)
     train.val.fraction <- 0.9
     size <- shape[2]

     X.train.data <- X[, 1:as.integer(size * train.val.fraction)]
     X.val.data <- X[, -(1:as.integer(size * train.val.fraction))]
     X.train.data <- drop.tail(X.train.data, batch.size)
     X.val.data <- drop.tail(X.val.data, batch.size)

     X.train.label <- get.label(X.train.data)
     X.val.label <- get.label(X.val.data)

     X.train <- list(data=X.train.data, label=X.train.label)
     X.val <- list(data=X.val.data, label=X.val.label)
  ```

 Train the Model
 --------------
 In `mxnet`, we have a function called `mx.lstm` so that users can build a general LSTM model:


  ```r
     model <- mx.lstm(X.train, X.val,
                      ctx=mx.cpu(),
                      num.round=num.round,
                      update.period=update.period,
                      num.lstm.layer=num.lstm.layer,
                      seq.len=seq.len,
                      num.hidden=num.hidden,
                      num.embed=num.embed,
                      num.label=vocab,
                      batch.size=batch.size,
                      input.size=vocab,
                      initializer=mx.init.uniform(0.1),
                      learning.rate=learning.rate,
                      wd=wd,
                      clip_gradient=clip_gradient)
  ```

  ```
     ## Epoch [31] Train: NLL=3.53787130224343, Perp=34.3936275728271
     ## Epoch [62] Train: NLL=3.43087958036949, Perp=30.903813186055
     ## Epoch [93] Train: NLL=3.39771238228587, Perp=29.8956319855751
     ## Epoch [124] Train: NLL=3.37581711716687, Perp=29.2481732041015
     ## Epoch [155] Train: NLL=3.34523331338447, Perp=28.3671933405139
     ## Epoch [186] Train: NLL=3.30756356274787, Perp=27.31848454823
     ## Epoch [217] Train: NLL=3.25642968403829, Perp=25.9566978956055
     ## Epoch [248] Train: NLL=3.19825967486207, Perp=24.4898727477925
     ## Epoch [279] Train: NLL=3.14013971549828, Perp=23.1070950525017
     ## Epoch [310] Train: NLL=3.08747601837462, Perp=21.9216781782189
     ## Epoch [341] Train: NLL=3.04015595674863, Perp=20.9085038031042
     ## Epoch [372] Train: NLL=2.99839339255659, Perp=20.0532932584534
     ## Epoch [403] Train: NLL=2.95940091012609, Perp=19.2864139984503
     ## Epoch [434] Train: NLL=2.92603311380224, Perp=18.6534872738302
     ## Epoch [465] Train: NLL=2.89482756896395, Perp=18.0803835531869
     ## Epoch [496] Train: NLL=2.86668230478397, Perp=17.5786009078994
     ## Epoch [527] Train: NLL=2.84089368534943, Perp=17.1310684830416
     ## Epoch [558] Train: NLL=2.81725862932279, Perp=16.7309220880514
     ## Epoch [589] Train: NLL=2.79518870141492, Perp=16.3657166956952
     ## Epoch [620] Train: NLL=2.77445683225304, Perp=16.0299176962855
     ## Epoch [651] Train: NLL=2.75490970113174, Perp=15.719621374694
     ## Epoch [682] Train: NLL=2.73697900634351, Perp=15.4402696117257
     ## Epoch [713] Train: NLL=2.72059739336781, Perp=15.1893935780915
     ## Epoch [744] Train: NLL=2.70462837571585, Perp=14.948760335793
     ## Epoch [775] Train: NLL=2.68909904683828, Perp=14.7184093476224
     ## Epoch [806] Train: NLL=2.67460054451836, Perp=14.5065539595711
     ## Epoch [837] Train: NLL=2.66078997776751, Perp=14.3075873113043
     ## Epoch [868] Train: NLL=2.6476781639279, Perp=14.1212134100373
     ## Epoch [899] Train: NLL=2.63529039846876, Perp=13.9473621677371
     ## Epoch [930] Train: NLL=2.62367693518974, Perp=13.7863219168709
     ## Epoch [961] Train: NLL=2.61238282674384, Perp=13.6314936713501
     ## Iter [1] Train: Time: 10301.6818172932 sec, NLL=2.60536539345356, Perp=13.5361704272949
     ## Iter [1] Val: NLL=2.26093848746227, Perp=9.59208699731232
  ```

 Build Inference from the Model
 --------------------
 Use the helper function for random sample:

  ```r
     cdf <- function(weights) {
         total <- sum(weights)
         result <- c()
         cumsum <- 0
         for (w in weights) {
             cumsum <- cumsum+w
             result <- c(result, cumsum / total)
         }
         return (result)
     }

     search.val <- function(cdf, x) {
         l <- 1
         r <- length(cdf)
         while (l <= r) {
             m <- as.integer((l+r)/2)
             if (cdf[m] < x) {
                 l <- m+1
             } else {
                 r <- m-1
             }
         }
         return (l)
     }
     choice <- function(weights) {
         cdf.vals <- cdf(as.array(weights))
         x <- runif(1)
         idx <- search.val(cdf.vals, x)
         return (idx)
     }
  ```
 Use random output or fixed output by choosing the greatest probability:

  ```r
     make.output <- function(prob, sample=FALSE) {
         if (!sample) {
             idx <- which.max(as.array(prob))
         }
         else {
             idx <- choice(prob)
         }
         return (idx)

     }
  ```

 In `mxnet`, we have a function called `mx.lstm.inference` so that users can build an inference from an LSTM model, and then use the `mx.lstm.forward` function to get forward output from the inference.

 Build an inference from the model:

  ```r
     infer.model <- mx.lstm.inference(num.lstm.layer=num.lstm.layer,
                                      input.size=vocab,
                                      num.hidden=num.hidden,
                                      num.embed=num.embed,
                                      num.label=vocab,
                                      arg.params=model$arg.params,
                                      ctx=mx.cpu())
  ```
 Generate a sequence of 75 characters using the  `mx.lstm.forward` function:

  ```r
     start <- 'a'
     seq.len <- 75
     random.sample <- TRUE

     last.id <- dic[[start]]
     out <- "a"
     for (i in (1:(seq.len-1))) {
         input <- c(last.id-1)
         ret <- mx.lstm.forward(infer.model, input, FALSE)
         infer.model <- ret$model
         prob <- ret$prob
         last.id <- make.output(prob, random.sample)
         out <- paste0(out, lookup.table[[last.id]])
     }
     cat (paste0(out, "\n"))
  ```
 The result:

  ```
     ah not a drobl greens
     Settled asing lately sistering sounted to their hight
  ```

 Create Other RNN Models
 ----------------
 In `mxnet`, other RNN models, like custom RNN and GRU, are also provided:

 - For a custom RNN model, you can replace `mx.lstm` with `mx.rnn` to train an RNN model. You can replace `mx.lstm.inference` and `mx.lstm.forward` with `mx.rnn.inference` and `mx.rnn.forward` to build inference from an RNN model and get the forward result from the inference model.
 - For a GRU model, you can replace `mx.lstm` with `mx.gru` to train a GRU model. You can replace `mx.lstm.inference` and `mx.lstm.forward` with `mx.gru.inference` and `mx.gru.forward` to build inference from a GRU model and get the forward result from the inference model.

 ## Next Steps
 * [MXNet tutorials index](http://mxnet.io/tutorials/index.html)
	Char RNN Example
	=============================================

	This tutorial shows how to use an LSTM model to build a char-level language model, and generate text from it. For demonstration purposes, we use a Shakespearean text. You can find the data on [GitHub](https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare).

	Load the Data
	---------
	Load in the data and preprocess it:

	```r
	require(mxnet)
	```

	```
	## Loading required package: mxnet
	```

	```
	## Loading required package: methods
	```
	Set the basic network parameters:

	```r
	batch.size = 32
	seq.len = 32
	num.hidden = 16
	num.embed = 16
	num.lstm.layer = 1
	num.round = 1
	learning.rate= 0.1
	wd=0.00001
	clip_gradient=1
	update.period = 1
	```
	Download the data:

	```r
	download.data <- function(data_dir) {
	dir.create(data_dir, showWarnings = FALSE)
	if (!file.exists(paste0(data_dir,'input.txt'))) {
	download.file(url='https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt',
	destfile=paste0(data_dir,'input.txt'), method='wget')
	}
	}
	```
	Make a dictionary from the text:

	```r
	make.dict <- function(text, max.vocab=10000) {
	text <- strsplit(text, '')
	dic <- list()
	idx <- 1
	for (c in text[[1]]) {
	if (!(c %in% names(dic))) {
	dic[[c]] <- idx
	idx <- idx + 1
	}
	}
	if (length(dic) == max.vocab - 1)
	dic[["UNKNOWN"]] <- idx
	cat(paste0("Total unique char: ", length(dic), "\n"))
	return (dic)
	}
	```
	Transfer the text into a data feature:

	```r
	make.data <- function(file.path, seq.len=32, max.vocab=10000, dic=NULL) {
	fi <- file(file.path, "r")
	text <- paste(readLines(fi), collapse="\n")
	close(fi)

	if (is.null(dic))
	dic <- make.dict(text, max.vocab)
	lookup.table <- list()
	for (c in names(dic)) {
	idx <- dic[[c]]
	lookup.table[[idx]] <- c
	}

	char.lst <- strsplit(text, '')[[1]]
	num.seq <- as.integer(length(char.lst) / seq.len)
	char.lst <- char.lst[1:(num.seq * seq.len)]
	data <- array(0, dim=c(seq.len, num.seq))
	idx <- 1
	for (i in 1:num.seq) {
	for (j in 1:seq.len) {
	if (char.lst[idx] %in% names(dic))
	data[j, i] <- dic[[ char.lst[idx] ]]-1
	else {
	data[j, i] <- dic[["UNKNOWN"]]-1
	}
	idx <- idx + 1
	}
	}
	return (list(data=data, dic=dic, lookup.table=lookup.table))
	}
	```
	Move the tail text:

	```r
	drop.tail <- function(X, batch.size) {
	shape <- dim(X)
	nstep <- as.integer(shape[2] / batch.size)
	return (X[, 1:(nstep * batch.size)])
	}
	```
	Get the label of X:

	```r
	get.label <- function(X) {
	label <- array(0, dim=dim(X))
	d <- dim(X)[1]
	w <- dim(X)[2]
	for (i in 0:(w-1)) {
	for (j in 1:d) {
	label[id+j] <- X[(id+j)%%(w*d)+1]
	}
	}
	return (label)
	}
	```
	Get the training data and evaluation data:

	```r
	download.data("./data/")
	ret <- make.data("./data/input.txt", seq.len=seq.len)
	```

	```
	## Total unique char: 65
	```

	```r
	X <- ret$data
	dic <- ret$dic
	lookup.table <- ret$lookup.table

	vocab <- length(dic)

	shape <- dim(X)
	train.val.fraction <- 0.9
	size <- shape[2]

	X.train.data <- X[, 1:as.integer(size * train.val.fraction)]
	X.val.data <- X[, -(1:as.integer(size * train.val.fraction))]
	X.train.data <- drop.tail(X.train.data, batch.size)
	X.val.data <- drop.tail(X.val.data, batch.size)

	X.train.label <- get.label(X.train.data)
	X.val.label <- get.label(X.val.data)

	X.train <- list(data=X.train.data, label=X.train.label)
	X.val <- list(data=X.val.data, label=X.val.label)
	```

	Train the Model
	--------------
	In `mxnet`, we have a function called `mx.lstm` so that users can build a general LSTM model:


	```r
	model <- mx.lstm(X.train, X.val,
	ctx=mx.cpu(),
	num.round=num.round,
	update.period=update.period,
	num.lstm.layer=num.lstm.layer,
	seq.len=seq.len,
	num.hidden=num.hidden,
	num.embed=num.embed,
	num.label=vocab,
	batch.size=batch.size,
	input.size=vocab,
	initializer=mx.init.uniform(0.1),
	learning.rate=learning.rate,
	wd=wd,
	clip_gradient=clip_gradient)
	```

	```
	## Epoch [31] Train: NLL=3.53787130224343, Perp=34.3936275728271
	## Epoch [62] Train: NLL=3.43087958036949, Perp=30.903813186055
	## Epoch [93] Train: NLL=3.39771238228587, Perp=29.8956319855751
	## Epoch [124] Train: NLL=3.37581711716687, Perp=29.2481732041015
	## Epoch [155] Train: NLL=3.34523331338447, Perp=28.3671933405139
	## Epoch [186] Train: NLL=3.30756356274787, Perp=27.31848454823
	## Epoch [217] Train: NLL=3.25642968403829, Perp=25.9566978956055
	## Epoch [248] Train: NLL=3.19825967486207, Perp=24.4898727477925
	## Epoch [279] Train: NLL=3.14013971549828, Perp=23.1070950525017
	## Epoch [310] Train: NLL=3.08747601837462, Perp=21.9216781782189
	## Epoch [341] Train: NLL=3.04015595674863, Perp=20.9085038031042
	## Epoch [372] Train: NLL=2.99839339255659, Perp=20.0532932584534
	## Epoch [403] Train: NLL=2.95940091012609, Perp=19.2864139984503
	## Epoch [434] Train: NLL=2.92603311380224, Perp=18.6534872738302
	## Epoch [465] Train: NLL=2.89482756896395, Perp=18.0803835531869
	## Epoch [496] Train: NLL=2.86668230478397, Perp=17.5786009078994
	## Epoch [527] Train: NLL=2.84089368534943, Perp=17.1310684830416
	## Epoch [558] Train: NLL=2.81725862932279, Perp=16.7309220880514
	## Epoch [589] Train: NLL=2.79518870141492, Perp=16.3657166956952
	## Epoch [620] Train: NLL=2.77445683225304, Perp=16.0299176962855
	## Epoch [651] Train: NLL=2.75490970113174, Perp=15.719621374694
	## Epoch [682] Train: NLL=2.73697900634351, Perp=15.4402696117257
	## Epoch [713] Train: NLL=2.72059739336781, Perp=15.1893935780915
	## Epoch [744] Train: NLL=2.70462837571585, Perp=14.948760335793
	## Epoch [775] Train: NLL=2.68909904683828, Perp=14.7184093476224
	## Epoch [806] Train: NLL=2.67460054451836, Perp=14.5065539595711
	## Epoch [837] Train: NLL=2.66078997776751, Perp=14.3075873113043
	## Epoch [868] Train: NLL=2.6476781639279, Perp=14.1212134100373
	## Epoch [899] Train: NLL=2.63529039846876, Perp=13.9473621677371
	## Epoch [930] Train: NLL=2.62367693518974, Perp=13.7863219168709
	## Epoch [961] Train: NLL=2.61238282674384, Perp=13.6314936713501
	## Iter [1] Train: Time: 10301.6818172932 sec, NLL=2.60536539345356, Perp=13.5361704272949
	## Iter [1] Val: NLL=2.26093848746227, Perp=9.59208699731232
	```

	Build Inference from the Model
	--------------------
	Use the helper function for random sample:

	```r
	cdf <- function(weights) {
	total <- sum(weights)
	result <- c()
	cumsum <- 0
	for (w in weights) {
	cumsum <- cumsum+w
	result <- c(result, cumsum / total)
	}
	return (result)
	}

	search.val <- function(cdf, x) {
	l <- 1
	r <- length(cdf)
	while (l <= r) {
	m <- as.integer((l+r)/2)
	if (cdf[m] < x) {
	l <- m+1
	} else {
	r <- m-1
	}
	}
	return (l)
	}
	choice <- function(weights) {
	cdf.vals <- cdf(as.array(weights))
	x <- runif(1)
	idx <- search.val(cdf.vals, x)
	return (idx)
	}
	```
	Use random output or fixed output by choosing the greatest probability:

	```r
	make.output <- function(prob, sample=FALSE) {
	if (!sample) {
	idx <- which.max(as.array(prob))
	}
	else {
	idx <- choice(prob)
	}
	return (idx)

	}
	```

	In `mxnet`, we have a function called `mx.lstm.inference` so that users can build an inference from an LSTM model, and then use the `mx.lstm.forward` function to get forward output from the inference.

	Build an inference from the model:

	```r
	infer.model <- mx.lstm.inference(num.lstm.layer=num.lstm.layer,
	input.size=vocab,
	num.hidden=num.hidden,
	num.embed=num.embed,
	num.label=vocab,
	arg.params=model$arg.params,
	ctx=mx.cpu())
	```
	Generate a sequence of 75 characters using the `mx.lstm.forward` function:

	```r
	start <- 'a'
	seq.len <- 75
	random.sample <- TRUE

	last.id <- dic[[start]]
	out <- "a"
	for (i in (1:(seq.len-1))) {
	input <- c(last.id-1)
	ret <- mx.lstm.forward(infer.model, input, FALSE)
	infer.model <- ret$model
	prob <- ret$prob
	last.id <- make.output(prob, random.sample)
	out <- paste0(out, lookup.table[[last.id]])
	}
	cat (paste0(out, "\n"))
	```
	The result:

	```
	ah not a drobl greens
	Settled asing lately sistering sounted to their hight
	```

	Create Other RNN Models
	----------------
	In `mxnet`, other RNN models, like custom RNN and GRU, are also provided:

	- For a custom RNN model, you can replace `mx.lstm` with `mx.rnn` to train an RNN model. You can replace `mx.lstm.inference` and `mx.lstm.forward` with `mx.rnn.inference` and `mx.rnn.forward` to build inference from an RNN model and get the forward result from the inference model.
	- For a GRU model, you can replace `mx.lstm` with `mx.gru` to train a GRU model. You can replace `mx.lstm.inference` and `mx.lstm.forward` with `mx.gru.inference` and `mx.gru.forward` to build inference from a GRU model and get the forward result from the inference model.

	## Next Steps
	* [MXNet tutorials index](http://mxnet.io/tutorials/index.html)