blob: 807d7baad923fd5403efad3f94aeb0d2433ad78a [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
/*
* Utility functions.
*/
channel_sums = function(matrix[double] X, int C, int Hin, int Win)
return (matrix[double] out) {
/*
* Computes a channel-wise summation over a 4D input.
*
* Inputs:
* - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
*
* Outputs:
* - out: Outputs, of shape (C, 1).
*/
# Here we sum each column, reshape to (C, Hin*Win), and sum each row to result in the summation
# for each channel.
out = rowSums(matrix(colSums(X), rows=C, cols=Hin*Win)) # shape (C, 1)
}
predict_class = function(matrix[double] Prob, int C, int H, int W) return (matrix[double] Prediction) {
/*
* Computes the class labels from the probabilities.
*
* Inputs:
* - Prob: Input Probability
* - C: Number of output labels
* - Hin: Input height.
* - Win: Input width.
*
* Outputs:
* - Prediction: Class Labels.
*/
if(H == 1 & W == 1) {
Prediction = rowIndexMax(Prob); # assuming one-based label mapping
}
else {
N = nrow(Prob);
Prediction = matrix(0, rows=N, cols=H*W);
parfor(n in 1:N) {
Prob1 = matrix(Prob[n,], rows=C, cols=H*W);
Prediction[n,] = t(rowIndexMax(t(Prob1))); # assuming one-based label mapping
}
}
}
im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int strideh, int stridew)
return (matrix[double] img_cols) {
/*
* Rearrange local image regions (patches) into columns.
*
* Assumes image has already been padded as necessary.
*
* Inputs:
* - img: Input image, of shape (C, Hin*Win), where C is the number
* of input channels (depth).
* - Hin: Input height, including padding.
* - Win: Input width, including padding.
* - Hf: Filter height.
* - Wf: Filter width.
* - strideh: Stride over height.
* - stridew: Stride over width.
*
* Outputs:
* - img_cols: Local spatial regions (patches) of the image stretched
* out into columns, of shape (C*Hf*Wf, Hout*Wout).
*/
C = nrow(img)
Hout = as.integer(floor((Hin-Hf)/strideh + 1))
Wout = as.integer(floor((Win-Wf)/stridew + 1))
# Note: We start with `img_cols` transposed to allow for row-major
# left-indexing inside the loop, which is more performant.
img_cols = matrix(0, rows=Hout*Wout, cols=C*Hf*Wf) # zeros
parfor (hout in 1:Hout, check=0) { # all output rows
hin = (hout-1)*strideh + 1
parfor (wout in 1:Wout, check=0) { # all output columns
win = (wout-1)*stridew + 1
# Extract a local patch of the input image corresponding spatially to the filter sizes.
img_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
parfor (c in 1:C) { # all channels
img_slice = matrix(img[c,], rows=Hin, cols=Win) # reshape
img_patch[c,] = matrix(img_slice[hin:hin+Hf-1, win:win+Wf-1], rows=1, cols=Hf*Wf)
}
img_cols[(hout-1)*Wout + wout,] = t(matrix(img_patch, rows=C*Hf*Wf, cols=1)) # reshape
}
}
img_cols = t(img_cols)
}
col2im = function(matrix[double] img_cols, int C, int Hin, int Win, int Hf, int Wf,
int strideh, int stridew, string reduction)
return (matrix[double] img) {
/*
* Create an image from columns of local image regions (patches).
*
* The reduction strategy determines how to deal with overlapping
* patches. If it is set to "add", any overlapping patches will be
* added together when creating the image. This is useful when
* computing gradients on the original image given gradients on the
* patches. Otherwise, if "none" is provided, any overlapping
* patches will just override previous ones when creating the image.
* This is useful when recreating an image from the output of
* `im2col`.
*
* Assumes original image was already padded as necessary.
*
* Inputs:
* - img_cols: Local spatial regions (patches) of the image stretched
* out into columns, of shape (C*Hf*Wf, Hout*Wout).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height, including padding.
* - Win: Input width, including padding.
* - Hf: Filter height.
* - Wf: Filter width.
* - strideh: Stride over height.
* - stridew: Stride over width.
* - reduction: The reduction strategy to use for overlapping
* patches. Valid options are "add" and "none".
*
* Outputs:
* - img: Input image, of shape (C, Hin*Win).
*/
Hout = as.integer(floor((Hin-Hf)/strideh + 1))
Wout = as.integer(floor((Win-Wf)/stridew + 1))
img = matrix(0, rows=C, cols=Hin*Win) # zeros
for (hout in 1:Hout) { # all output rows
hin = (hout-1)*strideh + 1
for (wout in 1:Wout) { # all output columns
win = (wout-1)*stridew + 1
# Extract a local patch of the input image corresponding spatially to the filter sizes.
img_patch = matrix(img_cols[,(hout-1)*Wout + wout], rows=C, cols=Hf*Wf) # zeros
parfor (c in 1:C) { # all channels
img_patch_slice = matrix(img_patch[c,], rows=Hf, cols=Wf) # reshape
if (reduction == "add") {
img_slice = matrix(0, rows=Hin, cols=Win)
img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice
img[c,] = img[c,] + matrix(img_slice, rows=1, cols=Hin*Win)
} else {
img_slice = matrix(img[c,], rows=Hin, cols=Win)
img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice
img[c,] = matrix(img_slice, rows=1, cols=Hin*Win)
}
}
}
}
}
pad_image = function(matrix[double] img, int Hin, int Win, int padh, int padw, double pad_value)
return (matrix[double] img_padded) {
/*
* Pads an image along the height and width dimensions with zeros.
*
* Inputs:
* - img: Input image, of shape (C, Hin*Win), where C is the number
* of input channels (depth).
* - Hin: Input height.
* - Win: Input width.
* - padh: Padding for top and bottom sides.
* - padw: Padding for left and right sides.
* - pad_value: Value to use for the padding.
* A typical value is 0.
*
* Outputs:
* - img_padded: The input image padded along the height and width
* dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)).
*/
C = nrow(img)
img_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
parfor (c in 1:C) {
img_slice = matrix(img[c,], rows=Hin, cols=Win) # depth slice C reshaped
img_padded_slice = matrix(pad_value, rows=Hin+2*padh, cols=Win+2*padw)
img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = img_slice
img_padded[c,] = matrix(img_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
}
}
unpad_image = function(matrix[double] img_padded, int Hin, int Win, int padh, int padw)
return (matrix[double] img) {
/*
* Unpads an image along the height and width dimensions.
*
* Inputs:
* - img_padded: The input image padded along the height and width
* dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)).
* - Hin: Input height of unpadded image.
* - Win: Input width of unpadded image.
* - padh: Padding for top and bottom sides.
* - padw: Padding for left and right sides.
*
* Outputs:
* - img: Input image, of shape (C, Hin*Win), where C is the number
* of input channels (depth).
*/
C = nrow(img_padded)
img = matrix(0, rows=C, cols=Hin*Win)
parfor (c in 1:C) {
img_padded_slice = matrix(img_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
img_slice = img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
img[c,] = matrix(img_slice, rows=1, cols=Hin*Win)
}
}
threshold = function(matrix[double] X, double thresh)
return (matrix[double] out) {
/*
* Computes an indicator matrix with values in {0, 1} depending on
* whether or not the values in X are above the input threshold.
*
* Inputs:
* - X: Inputs, of shape (any, any).
* - thresh: Input threshold.
*
* Outputs:
* - out: Outputs, of same shape as X.
*/
out = X > thresh
}
transpose_NCHW_to_CNHW = function(matrix[double] X, int C)
return (matrix[double] out) {
/*
* Reshape util for tensors in NCHW format.
* Transposes the 1st and 2nd dimensions.
*
* Inputs:
* - X: Inputs, of shape (N, C*H*W).
* - C: Number of channels (dimensionality of depth).
*
* Outputs:
* - out: Outputs with the N and C axes transposed, of
* shape (C, N*H*W).
*/
N = nrow(X)
D = ncol(X) / C
# This is an easy reshape because the channels remain intact. By
# reshaping X to a matrix with N*C rows, we can reduce our task to
# re-ordering rows (followed by the obvious reshape to achieve the
# required output shape with C rows).
#
# The difficult part is to obtain the permutation matrix required
# for re-ordering the rows. In this case, since we want to bring the
# ith channels from all rows together, we will need a column vector
# of the following form:
# [1, 1+C, 1+2C, ..., 1+(N-1)C,
# 2, 2+C, ..., 2+(N-1)C,
# 3, 3+C, ..., 3+(N-1)C,
# .
# .
# .
# C, 2C, ..., NC]'
# This vector can be produced via an outer call.
col_idx = outer(seq(1,C), C*t(seq(0,N-1)), "+")
# Generate the permutation matrix by:
# - reshaping the result of outer into a col
# - invoking table
permut = table(seq(1, N*C), matrix(col_idx, rows=N*C, cols=1), N*C, N*C)
# Generate the output by:
# - pre-multiplying the (reshaped) X with the permutation matrix
# - reshape to get the output shape with C rows
out = matrix(permut %*% matrix(X, rows=N*C, cols=D), rows=C, cols=N*D)
}
top_k_row = function(matrix[double] X, integer r, integer k)
return (matrix[double] values, matrix[double] indices) {
/*
* Computes the top k values (i.e. probabilities) and associated
* indices (i.e. classes) in the rth row of the input matrix X.
*
* Inputs:
* - X: Inputs, of shape (N, D).
* - r: Input row number of X to look for.
* - k: Input number of top elements to look for.
*
* Outputs:
* - values: The top k values at the rth row, of shape
* (1, k).
* - indices: The class indices, of shape (1, k).
*/
#TODO: do r & k need to be checked in the valid range
row = X[r, ]
row_t = t(row)
indices = order(target=row_t, by=1, decreasing=TRUE, index.return=TRUE)
indices = t(indices)
indices = indices[1, 1:k]
values = matrix(0, rows=1, cols=k)
for (i in 1:k) {
values[1, i] = row[1, as.scalar(indices[1, i])]
}
}
top_k = function(matrix[double] X, integer k)
return (matrix[double] values, matrix[double] indices) {
/*
* Computes the top k values (i.e. probabilities) and associated
* indices (i.e. classes) for the input matrix X.
*
* Inputs:
* - X: Inputs, of shape (N, D).
* - k: Input number of top elements to look for.
*
* Outputs:
* - values: The top k values along a certain dimension, of shape
* (N, k).
* - indices: The indices of classes, of shape (N, K).
*/
N = nrow(X)
D = ncol(X)
values = matrix(0, rows=N, cols=k)
indices = matrix(0, rows=N, cols=k)
parfor (r in 1:N) {
[value, index] = top_k_row(X, r, k)
values[r, ] = value
indices[r, ] = index
}
}
top_k2d = function(matrix[double] X, int k, int C, int Hin, int Win)
return (matrix[double] values, matrix[double] indices) {
/*
* Computes the top k values (i.e. probabilities) and associated
* indices (i.e. classes) for the input matrix X.
*
* Inputs:
* - X: Inputs, of shape (N, C*Hin*Win).
* - k: Input number of top elements to look for.
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
*
* Outputs:
* - values: The top k values along a certain dimension, of shape
* (N, k*Hin*Win).
* - indices: The indices of classes, of shape (N, k*Hin*Win).
*/
N = nrow(X)
# Reshape the input matrix (N, C*Hin*Win) to (N*Hin*Win, C)
X_C_NHW = transpose_NCHW_to_CNHW(X, C)
X_NHW_C = t(X_C_NHW)
# Compute the top k for the reshape matrix.
[values_NHW_K, indices_NHW_K] = top_k(X_NHW_C, k) # shape: (N*Hin*Win, k)
values_K_NHW = t(values_NHW_K)
indices_K_NHW = t(indices_NHW_K)
values = transpose_NCHW_to_CNHW(values_K_NHW, N)
indices = transpose_NCHW_to_CNHW(indices_K_NHW, N)
}