scripts/nn/util.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 /*
  * Utility functions.
  */

 channel_sums = function(matrix[double] X, int C, int Hin, int Win)
     return (matrix[double] out) {
   /*
    * Computes a channel-wise summation over a 4D input.
    *
    * Inputs:
    *  - X: Inputs, of shape (N, C*Hin*Win).
    *  - C: Number of input channels (dimensionality of input depth).
    *  - Hin: Input height.
    *  - Win: Input width.
    *
    * Outputs:
    *  - out: Outputs, of shape (C, 1).
    */
   # Here we sum each column, reshape to (C, Hin*Win), and sum each row to result in the summation
   # for each channel.
   out = rowSums(matrix(colSums(X), rows=C, cols=Hin*Win))  # shape (C, 1)
 }

 predict_class = function(matrix[double] Prob, int C, int H, int W) return (matrix[double] Prediction) {
   /*
    * Computes the class labels from the probabilities.
    *
    * Inputs:
    *  - Prob: Input Probability
    *  - C: Number of output labels
    *  - Hin: Input height.
    *  - Win: Input width.
    *
    * Outputs:
    *  - Prediction: Class Labels.
    */
   if(H == 1 & W == 1) {
     Prediction = rowIndexMax(Prob); # assuming one-based label mapping
   }
   else {
     N = nrow(Prob);
     Prediction = matrix(0, rows=N, cols=H*W);
     parfor(n in 1:N) {
       Prob1 = matrix(Prob[n,], rows=C, cols=H*W);
       Prediction[n,] = t(rowIndexMax(t(Prob1))); # assuming one-based label mapping
     }
   }
 }

 im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int strideh, int stridew)
     return (matrix[double] img_cols) {
   /*
    * Rearrange local image regions (patches) into columns.
    *
    * Assumes image has already been padded as necessary.
    *
    * Inputs:
    *  - img: Input image, of shape (C, Hin*Win), where C is the number
    *      of input channels (depth).
    *  - Hin: Input height, including padding.
    *  - Win: Input width, including padding.
    *  - Hf: Filter height.
    *  - Wf: Filter width.
    *  - strideh: Stride over height.
    *  - stridew: Stride over width.
    *
    * Outputs:
    *  - img_cols: Local spatial regions (patches) of the image stretched
    *      out into columns, of shape (C*Hf*Wf, Hout*Wout).
    */
   C = nrow(img)
   Hout = as.integer(floor((Hin-Hf)/strideh + 1))
   Wout = as.integer(floor((Win-Wf)/stridew + 1))

   # Note: We start with `img_cols` transposed to allow for row-major
   # left-indexing inside the loop, which is more performant.
   img_cols = matrix(0, rows=Hout*Wout, cols=C*Hf*Wf)  # zeros
   parfor (hout in 1:Hout, check=0) {  # all output rows
     hin = (hout-1)*strideh + 1
     parfor (wout in 1:Wout, check=0) {  # all output columns
       win = (wout-1)*stridew + 1
       # Extract a local patch of the input image corresponding spatially to the filter sizes.
       img_patch = matrix(0, rows=C, cols=Hf*Wf)  # zeros
       parfor (c in 1:C) {  # all channels
         img_slice = matrix(img[c,], rows=Hin, cols=Win)  # reshape
         img_patch[c,] = matrix(img_slice[hin:hin+Hf-1, win:win+Wf-1], rows=1, cols=Hf*Wf)
       }
       img_cols[(hout-1)*Wout + wout,] = t(matrix(img_patch, rows=C*Hf*Wf, cols=1))  # reshape
     }
   }
   img_cols = t(img_cols)
 }

 col2im = function(matrix[double] img_cols, int C, int Hin, int Win, int Hf, int Wf,
                   int strideh, int stridew, string reduction)
     return (matrix[double] img) {
   /*
    * Create an image from columns of local image regions (patches).
    *
    * The reduction strategy determines how to deal with overlapping
    * patches.  If it is set to "add", any overlapping patches will be
    * added together when creating the image.  This is useful when
    * computing gradients on the original image given gradients on the
    * patches.  Otherwise, if "none" is provided, any overlapping
    * patches will just override previous ones when creating the image.
    * This is useful when recreating an image from the output of
    * `im2col`.
    *
    * Assumes original image was already padded as necessary.
    *
    * Inputs:
    *  - img_cols: Local spatial regions (patches) of the image stretched
    *      out into columns, of shape (C*Hf*Wf, Hout*Wout).
    *  - C: Number of input channels (dimensionality of input depth).
    *  - Hin: Input height, including padding.
    *  - Win: Input width, including padding.
    *  - Hf: Filter height.
    *  - Wf: Filter width.
    *  - strideh: Stride over height.
    *  - stridew: Stride over width.
    *  - reduction: The reduction strategy to use for overlapping
    *      patches.  Valid options are "add" and "none".
    *
    * Outputs:
    *  - img: Input image, of shape (C, Hin*Win).
    */
   Hout = as.integer(floor((Hin-Hf)/strideh + 1))
   Wout = as.integer(floor((Win-Wf)/stridew + 1))

   img = matrix(0, rows=C, cols=Hin*Win)  # zeros
   for (hout in 1:Hout) {  # all output rows
     hin = (hout-1)*strideh + 1
     for (wout in 1:Wout) {  # all output columns
       win = (wout-1)*stridew + 1
       # Extract a local patch of the input image corresponding spatially to the filter sizes.
       img_patch = matrix(img_cols[,(hout-1)*Wout + wout], rows=C, cols=Hf*Wf)  # zeros
       parfor (c in 1:C) {  # all channels
         img_patch_slice = matrix(img_patch[c,], rows=Hf, cols=Wf)  # reshape
         if (reduction == "add") {
           img_slice = matrix(0, rows=Hin, cols=Win)
           img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice
           img[c,] = img[c,] + matrix(img_slice, rows=1, cols=Hin*Win)
         } else {
           img_slice = matrix(img[c,], rows=Hin, cols=Win)
           img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice
           img[c,] = matrix(img_slice, rows=1, cols=Hin*Win)
         }
       }
     }
   }
 }

 pad_image = function(matrix[double] img, int Hin, int Win, int padh, int padw, double pad_value)
     return (matrix[double] img_padded) {
   /*
    * Pads an image along the height and width dimensions with zeros.
    *
    * Inputs:
    *  - img: Input image, of shape (C, Hin*Win), where C is the number
    *      of input channels (depth).
    *  - Hin: Input height.
    *  - Win: Input width.
    *  - padh: Padding for top and bottom sides.
    *  - padw: Padding for left and right sides.
    *  - pad_value: Value to use for the padding.
    *      A typical value is 0.
    *
    * Outputs:
    *  - img_padded: The input image padded along the height and width
    *      dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)).
    */
   C = nrow(img)
   img_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))  # zeros
   parfor (c in 1:C) {
     img_slice = matrix(img[c,], rows=Hin, cols=Win)  # depth slice C reshaped
     img_padded_slice = matrix(pad_value, rows=Hin+2*padh, cols=Win+2*padw)
     img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = img_slice
     img_padded[c,] = matrix(img_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))  # reshape
   }
 }

 unpad_image = function(matrix[double] img_padded, int Hin, int Win, int padh, int padw)
     return (matrix[double] img) {
   /*
    * Unpads an image along the height and width dimensions.
    *
    * Inputs:
    *  - img_padded: The input image padded along the height and width
    *      dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)).
    *  - Hin: Input height of unpadded image.
    *  - Win: Input width of unpadded image.
    *  - padh: Padding for top and bottom sides.
    *  - padw: Padding for left and right sides.
    *
    * Outputs:
    *  - img: Input image, of shape (C, Hin*Win), where C is the number
    *      of input channels (depth).
    */
   C = nrow(img_padded)
   img = matrix(0, rows=C, cols=Hin*Win)
   parfor (c in 1:C) {
     img_padded_slice = matrix(img_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
     img_slice = img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
     img[c,] = matrix(img_slice, rows=1, cols=Hin*Win)
   }
 }

 threshold = function(matrix[double] X, double thresh)
     return (matrix[double] out) {
   /*
    * Computes an indicator matrix with values in {0, 1} depending on
    * whether or not the values in X are above the input threshold.
    *
    * Inputs:
    *  - X: Inputs, of shape (any, any).
    *  - thresh: Input threshold.
    *
    * Outputs:
    *  - out: Outputs, of same shape as X.
    */
   out = X > thresh
 }

 transpose_NCHW_to_CNHW = function(matrix[double] X, int C)
     return (matrix[double] out) {
   /*
    * Reshape util for tensors in NCHW format.
    * Transposes the 1st and 2nd dimensions.
    *
    * Inputs:
    *  - X: Inputs, of shape (N, C*H*W).
    *  - C: Number of channels (dimensionality of depth).
    *
    * Outputs:
    *  - out: Outputs with the N and C axes transposed, of
    *      shape (C, N*H*W).
    */
   N = nrow(X)
   D = ncol(X) / C

   # This is an easy reshape because the channels remain intact. By
   # reshaping X to a matrix with N*C rows, we can reduce our task to
   # re-ordering rows (followed by the obvious reshape to achieve the
   # required output shape with C rows).
   #
   # The difficult part is to obtain the permutation matrix required
   # for re-ordering the rows. In this case, since we want to bring the
   # ith channels from all rows together, we will need a column vector
   # of the following form:
   # [1, 1+C, 1+2C, ..., 1+(N-1)C,
   #  2, 2+C, ..., 2+(N-1)C,
   #  3, 3+C, ..., 3+(N-1)C,
   #  .
   #  .
   #  .
   #  C, 2C, ..., NC]'
   # This vector can be produced via an outer call.
   col_idx = outer(seq(1,C), C*t(seq(0,N-1)), "+")

   # Generate the permutation matrix by:
   # - reshaping the result of outer into a col
   # - invoking table
   permut = table(seq(1, N*C), matrix(col_idx, rows=N*C, cols=1), N*C, N*C)

   # Generate the output by:
   # - pre-multiplying the (reshaped) X with the permutation matrix
   # - reshape to get the output shape with C rows
   out = matrix(permut %*% matrix(X, rows=N*C, cols=D), rows=C, cols=N*D)
 }

 top_k_row = function(matrix[double] X, integer r, integer k)
     return (matrix[double] values, matrix[double] indices) {
   /*
    * Computes the top k values (i.e. probabilities) and associated
    * indices (i.e. classes) in the rth row of the input matrix X.
    *
    * Inputs:
    *  - X: Inputs, of shape (N, D).
    *  - r: Input row number of X to look for.
    *  - k: Input number of top elements to look for.
    *
    * Outputs:
    *  - values: The top k values at the rth row, of shape
    *    (1, k).
    *  - indices: The class indices, of shape (1, k).
    */

   #TODO: do r & k need to be checked in the valid range
   row = X[r, ]
   row_t = t(row)
   indices = order(target=row_t, by=1, decreasing=TRUE, index.return=TRUE)
   indices = t(indices)
   indices = indices[1, 1:k]

   values = matrix(0, rows=1, cols=k)
   for (i in 1:k) {
     values[1, i] = row[1, as.scalar(indices[1, i])]
   }
 }

 top_k = function(matrix[double] X, integer k)
      return (matrix[double] values, matrix[double] indices) {
   /*
    * Computes the top k values (i.e. probabilities) and associated
    * indices (i.e. classes) for the input matrix X.
    *
    * Inputs:
    *  - X: Inputs, of shape (N, D).
    *  - k: Input number of top elements to look for.
    *
    * Outputs:
    *  - values: The top k values along a certain dimension, of shape
    *    (N, k).
    *  - indices: The indices of classes, of shape (N, K).
    */
   N = nrow(X)
   D = ncol(X)
   values = matrix(0, rows=N, cols=k)
   indices = matrix(0, rows=N, cols=k)

   parfor (r in 1:N) {
     [value, index] = top_k_row(X, r, k)
     values[r, ] = value
     indices[r, ] = index
   }
 }

 top_k2d = function(matrix[double] X, int k, int C, int Hin, int Win)
      return (matrix[double] values, matrix[double] indices) {
   /*
    * Computes the top k values (i.e. probabilities) and associated
    * indices (i.e. classes) for the input matrix X.
    *
    * Inputs:
    *  - X: Inputs, of shape (N, C*Hin*Win).
    *  - k: Input number of top elements to look for.
    *  - C: Number of input channels (dimensionality of input depth).
    *  - Hin: Input height.
    *  - Win: Input width.
    *
    * Outputs:
    *  - values: The top k values along a certain dimension, of shape
    *    (N, k*Hin*Win).
    *  - indices: The indices of classes, of shape (N, k*Hin*Win).
    */
   N = nrow(X)

   # Reshape the input matrix (N, C*Hin*Win) to (N*Hin*Win, C)
   X_C_NHW = transpose_NCHW_to_CNHW(X, C)
   X_NHW_C = t(X_C_NHW)

   # Compute the top k for the reshape matrix.
   [values_NHW_K, indices_NHW_K] = top_k(X_NHW_C, k)  # shape: (N*Hin*Win, k)

   values_K_NHW = t(values_NHW_K)
   indices_K_NHW = t(indices_NHW_K)

   values =  transpose_NCHW_to_CNHW(values_K_NHW, N)
   indices = transpose_NCHW_to_CNHW(indices_K_NHW, N)
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	/*
	* Utility functions.
	*/

	channel_sums = function(matrix[double] X, int C, int Hin, int Win)
	return (matrix[double] out) {
	/*
	* Computes a channel-wise summation over a 4D input.
	*
	* Inputs:
	* - X: Inputs, of shape (N, CHinWin).
	* - C: Number of input channels (dimensionality of input depth).
	* - Hin: Input height.
	* - Win: Input width.
	*
	* Outputs:
	* - out: Outputs, of shape (C, 1).
	*/
	# Here we sum each column, reshape to (C, Hin*Win), and sum each row to result in the summation
	# for each channel.
	out = rowSums(matrix(colSums(X), rows=C, cols=Hin*Win)) # shape (C, 1)
	}

	predict_class = function(matrix[double] Prob, int C, int H, int W) return (matrix[double] Prediction) {
	/*
	* Computes the class labels from the probabilities.
	*
	* Inputs:
	* - Prob: Input Probability
	* - C: Number of output labels
	* - Hin: Input height.
	* - Win: Input width.
	*
	* Outputs:
	* - Prediction: Class Labels.
	*/
	if(H == 1 & W == 1) {
	Prediction = rowIndexMax(Prob); # assuming one-based label mapping
	}
	else {
	N = nrow(Prob);
	Prediction = matrix(0, rows=N, cols=H*W);
	parfor(n in 1:N) {
	Prob1 = matrix(Prob[n,], rows=C, cols=H*W);
	Prediction[n,] = t(rowIndexMax(t(Prob1))); # assuming one-based label mapping
	}
	}
	}

	im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int strideh, int stridew)
	return (matrix[double] img_cols) {
	/*
	* Rearrange local image regions (patches) into columns.
	*
	* Assumes image has already been padded as necessary.
	*
	* Inputs:
	* - img: Input image, of shape (C, Hin*Win), where C is the number
	* of input channels (depth).
	* - Hin: Input height, including padding.
	* - Win: Input width, including padding.
	* - Hf: Filter height.
	* - Wf: Filter width.
	* - strideh: Stride over height.
	* - stridew: Stride over width.
	*
	* Outputs:
	* - img_cols: Local spatial regions (patches) of the image stretched
	* out into columns, of shape (CHfWf, Hout*Wout).
	*/
	C = nrow(img)
	Hout = as.integer(floor((Hin-Hf)/strideh + 1))
	Wout = as.integer(floor((Win-Wf)/stridew + 1))

	# Note: We start with `img_cols` transposed to allow for row-major
	# left-indexing inside the loop, which is more performant.
	img_cols = matrix(0, rows=HoutWout, cols=CHf*Wf) # zeros
	parfor (hout in 1:Hout, check=0) { # all output rows
	hin = (hout-1)*strideh + 1
	parfor (wout in 1:Wout, check=0) { # all output columns
	win = (wout-1)*stridew + 1
	# Extract a local patch of the input image corresponding spatially to the filter sizes.
	img_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
	parfor (c in 1:C) { # all channels
	img_slice = matrix(img[c,], rows=Hin, cols=Win) # reshape
	img_patch[c,] = matrix(img_slice[hin:hin+Hf-1, win:win+Wf-1], rows=1, cols=Hf*Wf)
	}
	img_cols[(hout-1)Wout + wout,] = t(matrix(img_patch, rows=CHf*Wf, cols=1)) # reshape
	}
	}
	img_cols = t(img_cols)
	}

	col2im = function(matrix[double] img_cols, int C, int Hin, int Win, int Hf, int Wf,
	int strideh, int stridew, string reduction)
	return (matrix[double] img) {
	/*
	* Create an image from columns of local image regions (patches).
	*
	* The reduction strategy determines how to deal with overlapping
	* patches. If it is set to "add", any overlapping patches will be
	* added together when creating the image. This is useful when
	* computing gradients on the original image given gradients on the
	* patches. Otherwise, if "none" is provided, any overlapping
	* patches will just override previous ones when creating the image.
	* This is useful when recreating an image from the output of
	* `im2col`.
	*
	* Assumes original image was already padded as necessary.
	*
	* Inputs:
	* - img_cols: Local spatial regions (patches) of the image stretched
	* out into columns, of shape (CHfWf, Hout*Wout).
	* - C: Number of input channels (dimensionality of input depth).
	* - Hin: Input height, including padding.
	* - Win: Input width, including padding.
	* - Hf: Filter height.
	* - Wf: Filter width.
	* - strideh: Stride over height.
	* - stridew: Stride over width.
	* - reduction: The reduction strategy to use for overlapping
	* patches. Valid options are "add" and "none".
	*
	* Outputs:
	* - img: Input image, of shape (C, Hin*Win).
	*/
	Hout = as.integer(floor((Hin-Hf)/strideh + 1))
	Wout = as.integer(floor((Win-Wf)/stridew + 1))

	img = matrix(0, rows=C, cols=Hin*Win) # zeros
	for (hout in 1:Hout) { # all output rows
	hin = (hout-1)*strideh + 1
	for (wout in 1:Wout) { # all output columns
	win = (wout-1)*stridew + 1
	# Extract a local patch of the input image corresponding spatially to the filter sizes.
	img_patch = matrix(img_cols[,(hout-1)Wout + wout], rows=C, cols=HfWf) # zeros
	parfor (c in 1:C) { # all channels
	img_patch_slice = matrix(img_patch[c,], rows=Hf, cols=Wf) # reshape
	if (reduction == "add") {
	img_slice = matrix(0, rows=Hin, cols=Win)
	img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice
	img[c,] = img[c,] + matrix(img_slice, rows=1, cols=Hin*Win)
	} else {
	img_slice = matrix(img[c,], rows=Hin, cols=Win)
	img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice
	img[c,] = matrix(img_slice, rows=1, cols=Hin*Win)
	}
	}
	}
	}
	}

	pad_image = function(matrix[double] img, int Hin, int Win, int padh, int padw, double pad_value)
	return (matrix[double] img_padded) {
	/*
	* Pads an image along the height and width dimensions with zeros.
	*
	* Inputs:
	* - img: Input image, of shape (C, Hin*Win), where C is the number
	* of input channels (depth).
	* - Hin: Input height.
	* - Win: Input width.
	* - padh: Padding for top and bottom sides.
	* - padw: Padding for left and right sides.
	* - pad_value: Value to use for the padding.
	* A typical value is 0.
	*
	* Outputs:
	* - img_padded: The input image padded along the height and width
	* dimensions, of shape (C, (Hin+2padh)(Win+2*padw)).
	*/
	C = nrow(img)
	img_padded = matrix(0, rows=C, cols=(Hin+2padh)(Win+2*padw)) # zeros
	parfor (c in 1:C) {
	img_slice = matrix(img[c,], rows=Hin, cols=Win) # depth slice C reshaped
	img_padded_slice = matrix(pad_value, rows=Hin+2padh, cols=Win+2padw)
	img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = img_slice
	img_padded[c,] = matrix(img_padded_slice, rows=1, cols=(Hin+2padh)(Win+2*padw)) # reshape
	}
	}

	unpad_image = function(matrix[double] img_padded, int Hin, int Win, int padh, int padw)
	return (matrix[double] img) {
	/*
	* Unpads an image along the height and width dimensions.
	*
	* Inputs:
	* - img_padded: The input image padded along the height and width
	* dimensions, of shape (C, (Hin+2padh)(Win+2*padw)).
	* - Hin: Input height of unpadded image.
	* - Win: Input width of unpadded image.
	* - padh: Padding for top and bottom sides.
	* - padw: Padding for left and right sides.
	*
	* Outputs:
	* - img: Input image, of shape (C, Hin*Win), where C is the number
	* of input channels (depth).
	*/
	C = nrow(img_padded)
	img = matrix(0, rows=C, cols=Hin*Win)
	parfor (c in 1:C) {
	img_padded_slice = matrix(img_padded[c,], rows=(Hin+2padh), cols=(Win+2padw))
	img_slice = img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
	img[c,] = matrix(img_slice, rows=1, cols=Hin*Win)
	}
	}

	threshold = function(matrix[double] X, double thresh)
	return (matrix[double] out) {
	/*
	* Computes an indicator matrix with values in {0, 1} depending on
	* whether or not the values in X are above the input threshold.
	*
	* Inputs:
	* - X: Inputs, of shape (any, any).
	* - thresh: Input threshold.
	*
	* Outputs:
	* - out: Outputs, of same shape as X.
	*/
	out = X > thresh
	}

	transpose_NCHW_to_CNHW = function(matrix[double] X, int C)
	return (matrix[double] out) {
	/*
	* Reshape util for tensors in NCHW format.
	* Transposes the 1st and 2nd dimensions.
	*
	* Inputs:
	* - X: Inputs, of shape (N, CHW).
	* - C: Number of channels (dimensionality of depth).
	*
	* Outputs:
	* - out: Outputs with the N and C axes transposed, of
	* shape (C, NHW).
	*/
	N = nrow(X)
	D = ncol(X) / C

	# This is an easy reshape because the channels remain intact. By
	# reshaping X to a matrix with N*C rows, we can reduce our task to
	# re-ordering rows (followed by the obvious reshape to achieve the
	# required output shape with C rows).
	#
	# The difficult part is to obtain the permutation matrix required
	# for re-ordering the rows. In this case, since we want to bring the
	# ith channels from all rows together, we will need a column vector
	# of the following form:
	# [1, 1+C, 1+2C, ..., 1+(N-1)C,
	# 2, 2+C, ..., 2+(N-1)C,
	# 3, 3+C, ..., 3+(N-1)C,
	# .
	# .
	# .
	# C, 2C, ..., NC]'
	# This vector can be produced via an outer call.
	col_idx = outer(seq(1,C), C*t(seq(0,N-1)), "+")

	# Generate the permutation matrix by:
	# - reshaping the result of outer into a col
	# - invoking table
	permut = table(seq(1, NC), matrix(col_idx, rows=NC, cols=1), NC, NC)

	# Generate the output by:
	# - pre-multiplying the (reshaped) X with the permutation matrix
	# - reshape to get the output shape with C rows
	out = matrix(permut %% matrix(X, rows=NC, cols=D), rows=C, cols=N*D)
	}

	top_k_row = function(matrix[double] X, integer r, integer k)
	return (matrix[double] values, matrix[double] indices) {
	/*
	* Computes the top k values (i.e. probabilities) and associated
	* indices (i.e. classes) in the rth row of the input matrix X.
	*
	* Inputs:
	* - X: Inputs, of shape (N, D).
	* - r: Input row number of X to look for.
	* - k: Input number of top elements to look for.
	*
	* Outputs:
	* - values: The top k values at the rth row, of shape
	* (1, k).
	* - indices: The class indices, of shape (1, k).
	*/

	#TODO: do r & k need to be checked in the valid range
	row = X[r, ]
	row_t = t(row)
	indices = order(target=row_t, by=1, decreasing=TRUE, index.return=TRUE)
	indices = t(indices)
	indices = indices[1, 1:k]

	values = matrix(0, rows=1, cols=k)
	for (i in 1:k) {
	values[1, i] = row[1, as.scalar(indices[1, i])]
	}
	}

	top_k = function(matrix[double] X, integer k)
	return (matrix[double] values, matrix[double] indices) {
	/*
	* Computes the top k values (i.e. probabilities) and associated
	* indices (i.e. classes) for the input matrix X.
	*
	* Inputs:
	* - X: Inputs, of shape (N, D).
	* - k: Input number of top elements to look for.
	*
	* Outputs:
	* - values: The top k values along a certain dimension, of shape
	* (N, k).
	* - indices: The indices of classes, of shape (N, K).
	*/
	N = nrow(X)
	D = ncol(X)
	values = matrix(0, rows=N, cols=k)
	indices = matrix(0, rows=N, cols=k)

	parfor (r in 1:N) {
	[value, index] = top_k_row(X, r, k)
	values[r, ] = value
	indices[r, ] = index
	}
	}

	top_k2d = function(matrix[double] X, int k, int C, int Hin, int Win)
	return (matrix[double] values, matrix[double] indices) {
	/*
	* Computes the top k values (i.e. probabilities) and associated
	* indices (i.e. classes) for the input matrix X.
	*
	* Inputs:
	* - X: Inputs, of shape (N, CHinWin).
	* - k: Input number of top elements to look for.
	* - C: Number of input channels (dimensionality of input depth).
	* - Hin: Input height.
	* - Win: Input width.
	*
	* Outputs:
	* - values: The top k values along a certain dimension, of shape
	* (N, kHinWin).
	* - indices: The indices of classes, of shape (N, kHinWin).
	*/
	N = nrow(X)

	# Reshape the input matrix (N, CHinWin) to (NHinWin, C)
	X_C_NHW = transpose_NCHW_to_CNHW(X, C)
	X_NHW_C = t(X_C_NHW)

	# Compute the top k for the reshape matrix.
	[values_NHW_K, indices_NHW_K] = top_k(X_NHW_C, k) # shape: (NHinWin, k)

	values_K_NHW = t(values_NHW_K)
	indices_K_NHW = t(indices_NHW_K)

	values = transpose_NCHW_to_CNHW(values_K_NHW, N)
	indices = transpose_NCHW_to_CNHW(indices_K_NHW, N)
	}