blob: d9b960a0499ca5e34a019360e32e518667aacedc [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
/*
* 2D Cross-Entropy loss function.
*/
source("scripts/nn/util.dml") as util
source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
forward = function(matrix[double] pred, matrix[double] y, int C)
return (double loss) {
/*
* Computes the forward pass for a 2D cross-entropy loss function. The
* inputs consist of N examples, each of shape (C, Hin, Win), where
* each pixel has C dimensions corresponding to normalized
* probabilities of C classes. The loss is applied to each pixel
* location, and then averaged over all pixels and all examples.
*
* ```
* L_ijk = -y_ijk^T * log(pred_ijk)
* L = (1/N*H*W) sum(L_ijk) for i=1 to N, j=1 to H, k=1 to W.
* ```
*
* In these equations, `L` is the total loss, `L_ijk` is the loss for
* the pixel `j, k` in the example 'i', `y_ijk` is the C-dimensional
* vector of target class probabilities, `pred_ijk` is C-dimensional
* vector of predicted class probabilities, and `N` is the number of
* examples.
*
* For each pixel location, this can be interpreted as the negative
* log-likelihood assuming a Bernoulli distribution generalized to C
* dimensions, or a Multinomial with one observation.
*
* Inputs:
* - pred: Predictions, of shape (N, C*Win*Hin).
* - y: Targets, of shape (N, C*Win*Hin).
* - C: Number of input channels (dimensionality of input depth).
*
* Outputs:
* - loss: Average loss.
*/
N = nrow(y)
#Transpose the matrix from (N, C*H*W) to (N*H*W, C)
pred_C_NHW = util::transpose_NCHW_to_CNHW(pred, C)
pred_NHW_C = t(pred_C_NHW)
#Transpose the matrix from (N, C*H*W) to (N*H*W, C)
y_C_NHW = util::transpose_NCHW_to_CNHW(y, C)
y_NHW_C = t(y_C_NHW)
loss = cross_entropy_loss::forward(pred_NHW_C, y_NHW_C)
}
backward = function(matrix[double] pred, matrix[double] y, int C)
return (matrix[double] dpred) {
/*
* Computes the backward pass of a 2D cross-entropy loss function. The
* inputs consist of N examples with a shape (Hin, Win), each pixel in
* the 2d-example with C dimensions corresponding to normalized
* probabilities of C classes.
*
* Inputs:
* - pred: Predictions, of shape (N, C*Win*Hin).
* - y: Targets, of shape (N, C*Win*Hin).
* - C: Number of input channels (dimensionality of input depth).
*
* Outputs:
* - dpred: Gradient wrt `pred`, of shape (N, C*Win*Hin).
*/
N = nrow(y)
#Transpose the matrix from (N, C*H*W) to (N*H*W, C)
pred_C_NHW = util::transpose_NCHW_to_CNHW(pred, C)
pred_NHW_C = t(pred_C_NHW)
#Transpose the matrix from (N, C*H*W) to (N*H*W, C)
y_C_NHW = util::transpose_NCHW_to_CNHW(y, C)
y_NHW_C = t(y_C_NHW)
dpred_NHW_C = cross_entropy_loss::backward(pred_NHW_C, y_NHW_C)
#Transpose the matrix from (N*H*W, C) to (N, C*H*W)
dpred_C_NHW = t(dpred_NHW_C)
dpred = util::transpose_NCHW_to_CNHW(dpred_C_NHW, N)
}