| # coding: utf-8 |
| # pylint: disable=no-member, too-many-lines |
| |
| """Online evaluation metric module.""" |
| from __future__ import absolute_import |
| import math |
| from collections import OrderedDict |
| |
| import numpy |
| |
| from .base import numeric_types, string_types |
| from . import ndarray |
| from . import registry |
| |
| |
| def check_label_shapes(labels, preds, shape=0): |
| if shape == 0: |
| label_shape, pred_shape = len(labels), len(preds) |
| else: |
| label_shape, pred_shape = labels.shape, preds.shape |
| |
| if label_shape != pred_shape: |
| raise ValueError("Shape of labels {} does not match shape of " |
| "predictions {}".format(label_shape, pred_shape)) |
| |
| |
| class EvalMetric(object): |
| """Base class for all evaluation metrics. |
| |
| .. note:: |
| |
| This is a base class that provides common metric interfaces. |
| One should not use this class directly, but instead create new metric |
| classes that extend it. |
| |
| Parameters |
| ---------- |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| """ |
| def __init__(self, name, output_names=None, |
| label_names=None, **kwargs): |
| self.name = str(name) |
| self.output_names = output_names |
| self.label_names = label_names |
| self._kwargs = kwargs |
| self.reset() |
| |
| def __str__(self): |
| return "EvalMetric: {}".format(dict(self.get_name_value())) |
| |
| def get_config(self): |
| """Save configurations of metric. Can be recreated |
| from configs with metric.create(**config) |
| """ |
| config = self._kwargs.copy() |
| config.update({ |
| 'metric': self.__class__.__name__, |
| 'name': self.name, |
| 'output_names': self.output_names, |
| 'label_names': self.label_names}) |
| return config |
| |
| def update_dict(self, label, pred): |
| """Update the internal evaluation with named label and pred |
| |
| Parameters |
| ---------- |
| labels : OrderedDict of str -> NDArray |
| name to array mapping for labels. |
| |
| preds : list of NDArray |
| name to array mapping of predicted outputs. |
| """ |
| if self.output_names is not None: |
| pred = [pred[name] for name in self.output_names] |
| else: |
| pred = list(pred.values()) |
| |
| if self.label_names is not None: |
| label = [label[name] for name in self.label_names] |
| else: |
| label = list(label.values()) |
| |
| self.update(label, pred) |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| raise NotImplementedError() |
| |
| def reset(self): |
| """Resets the internal evaluation result to initial state.""" |
| self.num_inst = 0 |
| self.sum_metric = 0.0 |
| |
| def get(self): |
| """Gets the current evaluation result. |
| |
| Returns |
| ------- |
| names : list of str |
| Name of the metrics. |
| values : list of float |
| Value of the evaluations. |
| """ |
| if self.num_inst == 0: |
| return (self.name, float('nan')) |
| else: |
| return (self.name, self.sum_metric / self.num_inst) |
| |
| def get_name_value(self): |
| """Returns zipped name and value pairs. |
| |
| Returns |
| ------- |
| list of tuples |
| A (name, value) tuple list. |
| """ |
| name, value = self.get() |
| if not isinstance(name, list): |
| name = [name] |
| if not isinstance(value, list): |
| value = [value] |
| return list(zip(name, value)) |
| |
| # pylint: disable=invalid-name |
| register = registry.get_register_func(EvalMetric, 'metric') |
| alias = registry.get_alias_func(EvalMetric, 'metric') |
| _create = registry.get_create_func(EvalMetric, 'metric') |
| # pylint: enable=invalid-name |
| |
| |
| def create(metric, *args, **kwargs): |
| """Creates evaluation metric from metric names or instances of EvalMetric |
| or a custom metric function. |
| |
| Parameters |
| ---------- |
| metric : str or callable |
| Specifies the metric to create. |
| This argument must be one of the below: |
| |
| - Name of a metric. |
| - An instance of `EvalMetric`. |
| - A list, each element of which is a metric or a metric name. |
| - An evaluation function that computes custom metric for a given batch of |
| labels and predictions. |
| *args : list |
| Additional arguments to metric constructor. |
| Only used when metric is str. |
| **kwargs : dict |
| Additional arguments to metric constructor. |
| Only used when metric is str |
| |
| Examples |
| -------- |
| >>> def custom_metric(label, pred): |
| ... return np.mean(np.abs(label - pred)) |
| ... |
| >>> metric1 = mx.metric.create('acc') |
| >>> metric2 = mx.metric.create(custom_metric) |
| >>> metric3 = mx.metric.create([metric1, metric2, 'rmse']) |
| """ |
| if callable(metric): |
| return CustomMetric(metric, *args, **kwargs) |
| elif isinstance(metric, list): |
| composite_metric = CompositeEvalMetric() |
| for child_metric in metric: |
| composite_metric.add(create(child_metric, *args, **kwargs)) |
| return composite_metric |
| |
| return _create(metric, *args, **kwargs) |
| |
| |
| @register |
| @alias('composite') |
| class CompositeEvalMetric(EvalMetric): |
| """Manages multiple evaluation metrics. |
| |
| Parameters |
| ---------- |
| metrics : list of EvalMetric |
| List of child metrics. |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] |
| >>> labels = [mx.nd.array([0, 1, 1])] |
| >>> eval_metrics_1 = mx.metric.Accuracy() |
| >>> eval_metrics_2 = mx.metric.F1() |
| >>> eval_metrics = mx.metric.CompositeEvalMetric() |
| >>> for child_metric in [eval_metrics_1, eval_metrics_2]: |
| >>> eval_metrics.add(child_metric) |
| >>> eval_metrics.update(labels = labels, preds = predicts) |
| >>> print eval_metrics.get() |
| (['accuracy', 'f1'], [0.6666666666666666, 0.8]) |
| """ |
| |
| def __init__(self, metrics=None, name='composite', |
| output_names=None, label_names=None): |
| super(CompositeEvalMetric, self).__init__( |
| 'composite', output_names=output_names, label_names=label_names) |
| if metrics is None: |
| metrics = [] |
| self.metrics = [create(i) for i in metrics] |
| |
| def add(self, metric): |
| """Adds a child metric. |
| |
| Parameters |
| ---------- |
| metric |
| A metric instance. |
| """ |
| self.metrics.append(create(metric)) |
| |
| def get_metric(self, index): |
| """Returns a child metric. |
| |
| Parameters |
| ---------- |
| index : int |
| Index of child metric in the list of metrics. |
| """ |
| try: |
| return self.metrics[index] |
| except IndexError: |
| return ValueError("Metric index {} is out of range 0 and {}".format( |
| index, len(self.metrics))) |
| |
| def update_dict(self, labels, preds): # pylint: disable=arguments-differ |
| if self.label_names is not None: |
| labels = OrderedDict([i for i in labels.items() |
| if i[0] in self.label_names]) |
| if self.output_names is not None: |
| preds = OrderedDict([i for i in preds.items() |
| if i[0] in self.output_names]) |
| |
| for metric in self.metrics: |
| metric.update_dict(labels, preds) |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| for metric in self.metrics: |
| metric.update(labels, preds) |
| |
| def reset(self): |
| """Resets the internal evaluation result to initial state.""" |
| try: |
| for metric in self.metrics: |
| metric.reset() |
| except AttributeError: |
| pass |
| |
| def get(self): |
| """Returns the current evaluation result. |
| |
| Returns |
| ------- |
| names : list of str |
| Name of the metrics. |
| values : list of float |
| Value of the evaluations. |
| """ |
| names = [] |
| values = [] |
| for metric in self.metrics: |
| name, value = metric.get() |
| if isinstance(name, string_types): |
| name = [name] |
| if isinstance(value, numeric_types): |
| value = [value] |
| names.extend(name) |
| values.extend(value) |
| return (names, values) |
| |
| def get_config(self): |
| config = super(CompositeEvalMetric, self).get_config() |
| config.update({'metrics': [i.get_config() for i in self.metrics]}) |
| return config |
| |
| |
| ######################## |
| # CLASSIFICATION METRICS |
| ######################## |
| |
| |
| @register |
| @alias('acc') |
| class Accuracy(EvalMetric): |
| """Computes accuracy classification score. |
| |
| The accuracy score is defined as |
| |
| .. math:: |
| \\text{accuracy}(y, \\hat{y}) = \\frac{1}{n} \\sum_{i=0}^{n-1} |
| \\text{1}(\\hat{y_i} == y_i) |
| |
| Parameters |
| ---------- |
| axis : int, default=1 |
| The axis that represents classes |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] |
| >>> labels = [mx.nd.array([0, 1, 1])] |
| >>> acc = mx.metric.Accuracy() |
| >>> acc.update(preds = predicts, labels = labels) |
| >>> print acc.get() |
| ('accuracy', 0.6666666666666666) |
| """ |
| def __init__(self, axis=1, name='accuracy', |
| output_names=None, label_names=None): |
| super(Accuracy, self).__init__( |
| name, axis=axis, |
| output_names=output_names, label_names=label_names) |
| self.axis = axis |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| check_label_shapes(labels, preds) |
| |
| for label, pred_label in zip(labels, preds): |
| if pred_label.shape != label.shape: |
| pred_label = ndarray.argmax(pred_label, axis=self.axis) |
| pred_label = pred_label.asnumpy().astype('int32') |
| label = label.asnumpy().astype('int32') |
| |
| check_label_shapes(label, pred_label) |
| |
| self.sum_metric += (pred_label.flat == label.flat).sum() |
| self.num_inst += len(pred_label.flat) |
| |
| |
| @register |
| @alias('top_k_accuracy', 'top_k_acc') |
| class TopKAccuracy(EvalMetric): |
| """Computes top k predictions accuracy. |
| |
| `TopKAccuracy` differs from Accuracy in that it considers the prediction |
| to be ``True`` as long as the ground truth label is in the top K |
| predicated labels. |
| |
| If `top_k` = ``1``, then `TopKAccuracy` is identical to `Accuracy`. |
| |
| Parameters |
| ---------- |
| top_k : int |
| Whether targets are in top k predictions. |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> np.random.seed(999) |
| >>> top_k = 3 |
| >>> labels = [mx.nd.array([2, 6, 9, 2, 3, 4, 7, 8, 9, 6])] |
| >>> predicts = [mx.nd.array(np.random.rand(10, 10))] |
| >>> acc = mx.metric.TopKAccuracy(top_k=top_k) |
| >>> acc.update(labels, predicts) |
| >>> print acc.get() |
| ('top_k_accuracy', 0.3) |
| """ |
| |
| def __init__(self, top_k=1, name='top_k_accuracy', |
| output_names=None, label_names=None): |
| super(TopKAccuracy, self).__init__( |
| name, top_k=top_k, |
| output_names=output_names, label_names=label_names) |
| self.top_k = top_k |
| assert(self.top_k > 1), 'Please use Accuracy if top_k is no more than 1' |
| self.name += '_%d' % self.top_k |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| check_label_shapes(labels, preds) |
| |
| for label, pred_label in zip(labels, preds): |
| assert(len(pred_label.shape) <= 2), 'Predictions should be no more than 2 dims' |
| pred_label = numpy.argsort(pred_label.asnumpy().astype('float32'), axis=1) |
| label = label.asnumpy().astype('int32') |
| check_label_shapes(label, pred_label) |
| num_samples = pred_label.shape[0] |
| num_dims = len(pred_label.shape) |
| if num_dims == 1: |
| self.sum_metric += (pred_label.flat == label.flat).sum() |
| elif num_dims == 2: |
| num_classes = pred_label.shape[1] |
| top_k = min(num_classes, self.top_k) |
| for j in range(top_k): |
| self.sum_metric += (pred_label[:, num_classes - 1 - j].flat == label.flat).sum() |
| self.num_inst += num_samples |
| |
| |
| @register |
| class F1(EvalMetric): |
| """Computes the F1 score of a binary classification problem. |
| |
| The F1 score is equivalent to weighted average of the precision and recall, |
| where the best value is 1.0 and the worst value is 0.0. The formula for F1 score is:: |
| |
| F1 = 2 * (precision * recall) / (precision + recall) |
| |
| The formula for precision and recall is:: |
| |
| precision = true_positives / (true_positives + false_positives) |
| recall = true_positives / (true_positives + false_negatives) |
| |
| .. note:: |
| |
| This F1 score only supports binary classification. |
| |
| Parameters |
| ---------- |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] |
| >>> labels = [mx.nd.array([0., 1., 1.])] |
| >>> acc = mx.metric.F1() |
| >>> acc.update(preds = predicts, labels = labels) |
| >>> print acc.get() |
| ('f1', 0.8) |
| """ |
| |
| def __init__(self, name='f1', |
| output_names=None, label_names=None): |
| super(F1, self).__init__( |
| name, output_names=output_names, label_names=label_names) |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| check_label_shapes(labels, preds) |
| |
| for label, pred in zip(labels, preds): |
| pred = pred.asnumpy() |
| label = label.asnumpy().astype('int32') |
| pred_label = numpy.argmax(pred, axis=1) |
| |
| check_label_shapes(label, pred) |
| if len(numpy.unique(label)) > 2: |
| raise ValueError("F1 currently only supports binary classification.") |
| |
| true_positives, false_positives, false_negatives = 0., 0., 0. |
| |
| for y_pred, y_true in zip(pred_label, label): |
| if y_pred == 1 and y_true == 1: |
| true_positives += 1. |
| elif y_pred == 1 and y_true == 0: |
| false_positives += 1. |
| elif y_pred == 0 and y_true == 1: |
| false_negatives += 1. |
| |
| if true_positives + false_positives > 0: |
| precision = true_positives / (true_positives + false_positives) |
| else: |
| precision = 0. |
| |
| if true_positives + false_negatives > 0: |
| recall = true_positives / (true_positives + false_negatives) |
| else: |
| recall = 0. |
| |
| if precision + recall > 0: |
| f1_score = 2 * precision * recall / (precision + recall) |
| else: |
| f1_score = 0. |
| |
| self.sum_metric += f1_score |
| self.num_inst += 1 |
| |
| |
| @register |
| class Perplexity(EvalMetric): |
| """Computes perplexity. |
| |
| Perplexity is a measurement of how well a probability distribution |
| or model predicts a sample. A low perplexity indicates the model |
| is good at predicting the sample. |
| |
| The perplexity of a model q is defined as |
| |
| .. math:: |
| b^{\\big(-\\frac{1}{N} \\sum_{i=1}^N \\log_b q(x_i) \\big)} |
| = \\exp \\big(-\\frac{1}{N} \\sum_{i=1}^N \\log q(x_i)\\big) |
| |
| where we let `b = e`. |
| |
| :math:`q(x_i)` is the predicted value of its ground truth |
| label on sample :math:`x_i`. |
| |
| For example, we have three samples :math:`x_1, x_2, x_3` and their labels |
| are :math:`[0, 1, 1]`. |
| Suppose our model predicts :math:`q(x_1) = p(y_1 = 0 | x_1) = 0.3` |
| and :math:`q(x_2) = 1.0`, |
| :math:`q(x_3) = 0.6`. The perplexity of model q is |
| :math:`exp\\big(-(\\log 0.3 + \\log 1.0 + \\log 0.6) / 3\\big) = 1.77109762852`. |
| |
| Parameters |
| ---------- |
| ignore_label : int or None |
| Index of invalid label to ignore when |
| counting. By default, sets to -1. |
| If set to `None`, it will include all entries. |
| axis : int (default -1) |
| The axis from prediction that was used to |
| compute softmax. By default use the last |
| axis. |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] |
| >>> labels = [mx.nd.array([0, 1, 1])] |
| >>> perp = mx.metric.Perplexity(ignore_label=None) |
| >>> perp.update(labels, predicts) |
| >>> print perp.get() |
| ('Perplexity', 1.7710976285155853) |
| """ |
| def __init__(self, ignore_label, axis=-1, name='perplexity', |
| output_names=None, label_names=None): |
| super(Perplexity, self).__init__( |
| name, ignore_label=ignore_label, |
| output_names=output_names, label_names=label_names) |
| self.ignore_label = ignore_label |
| self.axis = axis |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| assert len(labels) == len(preds) |
| loss = 0. |
| num = 0 |
| for label, pred in zip(labels, preds): |
| assert label.size == pred.size/pred.shape[-1], \ |
| "shape mismatch: %s vs. %s"%(label.shape, pred.shape) |
| label = label.as_in_context(pred.context).reshape((label.size,)) |
| pred = ndarray.pick(pred, label.astype(dtype='int32'), axis=self.axis) |
| if self.ignore_label is not None: |
| ignore = label == self.ignore_label |
| num -= ndarray.sum(ignore).asscalar() |
| pred = pred*(1-ignore) + ignore |
| loss -= ndarray.sum(ndarray.log(ndarray.maximum(1e-10, pred))).asscalar() |
| num += pred.size |
| self.sum_metric += loss |
| self.num_inst += num |
| |
| def get(self): |
| """Returns the current evaluation result. |
| |
| Returns |
| ------- |
| Tuple of (str, float) |
| Representing name of the metric and evaluation result. |
| """ |
| return (self.name, math.exp(self.sum_metric/self.num_inst)) |
| |
| #################### |
| # REGRESSION METRICS |
| #################### |
| |
| |
| @register |
| class MAE(EvalMetric): |
| """Computes Mean Absolute Error (MAE) loss. |
| |
| The mean absolute error is given by |
| |
| .. math:: |
| \\frac{\\sum_i^n |y_i - \\hat{y}_i|}{n} |
| |
| Parameters |
| ---------- |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] |
| >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] |
| >>> mean_absolute_error = mx.metric.MAE() |
| >>> mean_absolute_error.update(labels = labels, preds = predicts) |
| >>> print mean_absolute_error.get() |
| ('mae', 0.5) |
| """ |
| |
| def __init__(self, name='mae', |
| output_names=None, label_names=None): |
| super(MAE, self).__init__( |
| name, output_names=output_names, label_names=label_names) |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| check_label_shapes(labels, preds) |
| |
| for label, pred in zip(labels, preds): |
| label = label.asnumpy() |
| pred = pred.asnumpy() |
| |
| if len(label.shape) == 1: |
| label = label.reshape(label.shape[0], 1) |
| |
| self.sum_metric += numpy.abs(label - pred).mean() |
| self.num_inst += 1 # numpy.prod(label.shape) |
| |
| |
| @register |
| class MSE(EvalMetric): |
| """Computes Mean Squared Error (MSE) loss. |
| |
| The mean squared error is given by |
| |
| .. math:: |
| \\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n} |
| |
| Parameters |
| ---------- |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] |
| >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] |
| >>> mean_squared_error = mx.metric.MSE() |
| >>> mean_squared_error.update(labels = labels, preds = predicts) |
| >>> print mean_squared_error.get() |
| ('mse', 0.375) |
| """ |
| def __init__(self, name='mse', |
| output_names=None, label_names=None): |
| super(MSE, self).__init__( |
| name, output_names=output_names, label_names=label_names) |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| check_label_shapes(labels, preds) |
| |
| for label, pred in zip(labels, preds): |
| label = label.asnumpy() |
| pred = pred.asnumpy() |
| |
| if len(label.shape) == 1: |
| label = label.reshape(label.shape[0], 1) |
| |
| self.sum_metric += ((label - pred)**2.0).mean() |
| self.num_inst += 1 # numpy.prod(label.shape) |
| |
| |
| @register |
| class RMSE(EvalMetric): |
| """Computes Root Mean Squred Error (RMSE) loss. |
| |
| The root mean squared error is given by |
| |
| .. math:: |
| \\sqrt{\\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n}} |
| |
| Parameters |
| ---------- |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] |
| >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] |
| >>> root_mean_squared_error = mx.metric.RMSE() |
| >>> root_mean_squared_error.update(labels = labels, preds = predicts) |
| >>> print root_mean_squared_error.get() |
| ('rmse', 0.612372457981) |
| """ |
| def __init__(self, name='rmse', |
| output_names=None, label_names=None): |
| super(RMSE, self).__init__( |
| name, output_names=output_names, label_names=label_names) |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| check_label_shapes(labels, preds) |
| |
| for label, pred in zip(labels, preds): |
| label = label.asnumpy() |
| pred = pred.asnumpy() |
| |
| if len(label.shape) == 1: |
| label = label.reshape(label.shape[0], 1) |
| |
| self.sum_metric += numpy.sqrt(((label - pred)**2.0).mean()) |
| self.num_inst += 1 |
| |
| |
| @register |
| @alias('ce') |
| class CrossEntropy(EvalMetric): |
| """Computes Cross Entropy loss. |
| |
| The cross entropy is given by |
| |
| .. math:: |
| -y\\log \\hat{y} + (1-y)\\log (1-\\hat{y}) |
| |
| Parameters |
| ---------- |
| eps : float |
| Cross Entropy loss is undefined for predicted value is 0 or 1, |
| so predicted values are added with the small constant. |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] |
| >>> labels = [mx.nd.array([0, 1, 1])] |
| >>> ce = mx.metric.CrossEntropy() |
| >>> ce.update(labels, predicts) |
| >>> print ce.get() |
| ('cross-entropy', 0.57159948348999023) |
| """ |
| def __init__(self, eps=1e-8, name='cross-entropy', |
| output_names=None, label_names=None): |
| super(CrossEntropy, self).__init__( |
| name, eps=eps, |
| output_names=output_names, label_names=label_names) |
| self.eps = eps |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| check_label_shapes(labels, preds) |
| |
| for label, pred in zip(labels, preds): |
| label = label.asnumpy() |
| pred = pred.asnumpy() |
| |
| label = label.ravel() |
| assert label.shape[0] == pred.shape[0] |
| |
| prob = pred[numpy.arange(label.shape[0]), numpy.int64(label)] |
| self.sum_metric += (-numpy.log(prob + self.eps)).sum() |
| self.num_inst += label.shape[0] |
| |
| |
| @register |
| class Loss(EvalMetric): |
| """Dummy metric for directly printing loss. |
| |
| Parameters |
| ---------- |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| """ |
| def __init__(self, name='loss', |
| output_names=None, label_names=None): |
| super(Loss, self).__init__( |
| name, output_names=output_names, label_names=label_names) |
| |
| def update(self, _, preds): |
| for pred in preds: |
| self.sum_metric += ndarray.sum(pred).asscalar() |
| self.num_inst += pred.size |
| |
| |
| @register |
| class Torch(Loss): |
| """Dummy metric for torch criterions.""" |
| def __init__(self, name='torch', |
| output_names=None, label_names=None): |
| super(Torch, self).__init__( |
| name, output_names=output_names, label_names=label_names) |
| |
| |
| @register |
| class Caffe(Loss): |
| """Dummy metric for caffe criterions.""" |
| def __init__(self, name='caffe', |
| output_names=None, label_names=None): |
| super(Caffe, self).__init__( |
| name, output_names=output_names, label_names=label_names) |
| |
| |
| @register |
| class CustomMetric(EvalMetric): |
| """Computes a customized evaluation metric. |
| |
| The `feval` function can return a `tuple` of (sum_metric, num_inst) or return |
| an `int` sum_metric. |
| |
| Parameters |
| ---------- |
| feval : callable(label, pred) |
| Customized evaluation function. |
| name : str, optional |
| The name of the metric. (the default is None). |
| allow_extra_outputs : bool, optional |
| If true, the prediction outputs can have extra outputs. |
| This is useful in RNN, where the states are also produced |
| in outputs for forwarding. (the default is False). |
| name : str |
| Name of this metric instance for display. |
| output_names : list of str, or None |
| Name of predictions that should be used when updating with update_dict. |
| By default include all predictions. |
| label_names : list of str, or None |
| Name of labels that should be used when updating with update_dict. |
| By default include all labels. |
| |
| Examples |
| -------- |
| >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] |
| >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] |
| >>> feval = lambda x, y : (x + y).mean() |
| >>> eval_metrics = mx.metric.CustomMetric(feval=feval) |
| >>> eval_metrics.update(labels, predicts) |
| >>> print eval_metrics.get() |
| ('custom(<lambda>)', 6.0) |
| """ |
| def __init__(self, feval, name=None, allow_extra_outputs=False, |
| output_names=None, label_names=None): |
| if name is None: |
| name = feval.__name__ |
| if name.find('<') != -1: |
| name = 'custom(%s)' % name |
| super(CustomMetric, self).__init__( |
| name, feval=feval, |
| allow_extra_outputs=allow_extra_outputs, |
| output_names=output_names, label_names=label_names) |
| self._feval = feval |
| self._allow_extra_outputs = allow_extra_outputs |
| |
| def update(self, labels, preds): |
| """Updates the internal evaluation result. |
| |
| Parameters |
| ---------- |
| labels : list of `NDArray` |
| The labels of the data. |
| |
| preds : list of `NDArray` |
| Predicted values. |
| """ |
| if not self._allow_extra_outputs: |
| check_label_shapes(labels, preds) |
| |
| for pred, label in zip(preds, labels): |
| label = label.asnumpy() |
| pred = pred.asnumpy() |
| |
| reval = self._feval(label, pred) |
| if isinstance(reval, tuple): |
| (sum_metric, num_inst) = reval |
| self.sum_metric += sum_metric |
| self.num_inst += num_inst |
| else: |
| self.sum_metric += reval |
| self.num_inst += 1 |
| |
| def get_config(self): |
| raise NotImplementedError("CustomMetric cannot be serialized") |
| |
| |
| # pylint: disable=invalid-name |
| def np(numpy_feval, name=None, allow_extra_outputs=False): |
| """Creates a custom evaluation metric that receives its inputs as numpy arrays. |
| |
| Parameters |
| ---------- |
| numpy_feval : callable(label, pred) |
| Custom evaluation function that receives labels and predictions for a minibatch |
| as numpy arrays and returns the corresponding custom metric as a floating point number. |
| name : str, optional |
| Name of the custom metric. |
| allow_extra_outputs : bool, optional |
| Whether prediction output is allowed to have extra outputs. This is useful in cases |
| like RNN where states are also part of output which can then be fed back to the RNN |
| in the next step. By default, extra outputs are not allowed. |
| |
| Returns |
| ------- |
| float |
| Custom metric corresponding to the provided labels and predictions. |
| |
| Example |
| ------- |
| >>> def custom_metric(label, pred): |
| ... return np.mean(np.abs(label-pred)) |
| ... |
| >>> metric = mx.metric.np(custom_metric) |
| """ |
| def feval(label, pred): |
| """Internal eval function.""" |
| return numpy_feval(label, pred) |
| feval.__name__ = numpy_feval.__name__ |
| return CustomMetric(feval, name, allow_extra_outputs) |
| # pylint: enable=invalid-name |