blob: fd7f157e379c87059b4c1780b39170747a32e90f [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# =============================================================================
'''
Loss module includes a set of training loss implmentations. Some are converted
from C++ implementation, and the rest are implemented directly using python
Tensor.
Example usage::
from singa import tensor
from singa import loss
x = tensor.Tensor((3, 5))
x.uniform(0, 1) # randomly genearte the prediction activation
y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int)) # set the truth
f = loss.SoftmaxCrossEntropy()
l = f.forward(True, x, y) # l is tensor with 3 loss values
g = f.backward() # g is a tensor containing all gradients of x w.r.t l
'''
from __future__ import division
from __future__ import absolute_import
from builtins import object
from . import singa_wrap as singa
from . import tensor
from .proto import model_pb2
class Loss(object):
'''Base loss class.
Subclasses that wrap the C++ loss classes can use the inherited foward,
backward, and evaluate functions of this base class. Other subclasses need
to override these functions
'''
def __init__(self):
self.swig_loss = None
def forward(self, flag, x, y):
'''Compute the loss values.
Args:
flag: kTrain/kEval or bool. If it is kTrain/True, then the backward
function must be called before calling forward again.
x (Tensor): the prediction Tensor
y (Tensor): the ground truch Tensor, x.shape[0] must = y.shape[0]
Returns:
a tensor of floats for the loss values, one per sample
'''
if type(flag) is bool:
if flag:
flag = model_pb2.kTrain
else:
flag = model_pb2.kEval
return tensor.from_raw_tensor(
self.swig_loss.Forward(flag, x.data, y.data))
def backward(self):
'''
Returns:
the grad of x w.r.t. the loss
'''
return tensor.from_raw_tensor(self.swig_loss.Backward())
def evaluate(self, flag, x, y): # TODO(wangwei) remove flag
'''
Args:
flag (int): must be kEval, to be removed
x (Tensor): the prediction Tensor
y (Tensor): the ground truth Tnesor
Returns:
the averaged loss for all samples in x.
'''
if type(flag) is bool:
if flag:
flag = model_pb2.kTrain
else:
flag = model_pb2.kEval
return self.swig_loss.Evaluate(flag, x.data, y.data)
class SoftmaxCrossEntropy(Loss):
'''This loss function is a combination of SoftMax and Cross-Entropy loss.
It converts the inputs via SoftMax function and then
computes the cross-entropy loss against the ground truth values.
For each sample, the ground truth could be a integer as the label index;
or a binary array, indicating the label distribution. The ground truth
tensor thus could be a 1d or 2d tensor.
The data/feature tensor could 1d (for a single sample) or 2d for a batch of
samples.
'''
def __init__(self):
super(SoftmaxCrossEntropy, self).__init__()
self.swig_loss = singa.SoftmaxCrossEntropy()
class SigmoidCrossEntropy(Loss):
'''This loss evaluates the cross-entropy loss between the prediction and the
truth values with the prediction probability generated from Sigmoid.
'''
def __init__(self, epsilon=1e-8):
super(SigmoidCrossEntropy, self).__init__()
self.truth = None
self.prob = None
self.epsilon = epsilon # to avoid log(x) with x being too small
def forward(self, flag, x, y):
'''loss is -yi * log pi - (1-yi) log (1-pi), where pi=sigmoid(xi)
Args:
flag (bool): true for training; false for evaluation
x (Tensor): the prediction Tensor
y (Tensor): the truth Tensor, a binary array value per sample
Returns:
a Tensor with one error value per sample
'''
p = tensor.sigmoid(x)
if flag:
self.truth = y
self.prob = p
np = 1 - p
p += (p < self.epsilon) * self.epsilon
np += (np < self.epsilon) * self.epsilon
l = (y - 1) * tensor.log(np) - y * tensor.log(p)
# TODO(wangwei): add unary operation -Tensor
return tensor.average(l, axis=1)
def backward(self):
''' Compute the gradient of loss w.r.t to x.
Returns:
dx = pi - yi.
'''
assert self.truth is not None, 'must call forward in a prior'
dx = self.prob - self.truth
self.truth = None
return dx
def evaluate(self, flag, x, y):
'''Compuate the averaged error.
Returns:
a float value as the averaged error
'''
l = self.forward(False, x, y)
return l.l1()
class SquaredError(Loss):
'''This loss evaluates the squared error between the prediction and the
truth values.
It is implemented using Python Tensor operations.
'''
def __init__(self):
super(SquaredError, self).__init__()
self.err = None
def forward(self, flag, x, y):
'''Compute the error as 0.5 * ||x-y||^2.
Args:
flag (int): kTrain or kEval; if kTrain, then the backward must be
called before calling forward again.
x (Tensor): the prediction Tensor
y (Tensor): the truth Tensor, an integer value per sample, whose
value is [0, x.shape[1])
Returns:
a Tensor with one error value per sample
'''
self.err = x - y
return tensor.square(self.err) * 0.5
def backward(self):
'''Compute the gradient of x w.r.t the error.
Returns:
x - y
'''
return self.err
def evaluate(self, flag, x, y):
'''Compuate the averaged error.
Returns:
a float value as the averaged error
'''
return tensor.sum(tensor.square(x - y)) * 0.5 / x.size()