python/mxnet/metric.py - mxnet-test - Git at Google

 # coding: utf-8
 # pylint: disable=no-member

 """Online evaluation metric module."""
 from __future__ import absolute_import

 from collections import OrderedDict

 import numpy
 import pickle

 from . import base
 from .base import numeric_types, string_types
 from . import ndarray
 from . import registry


 def check_label_shapes(labels, preds, shape=0):
     """Check to see if the two arrays are the same size."""

     if shape == 0:
         label_shape, pred_shape = len(labels), len(preds)
     else:
         label_shape, pred_shape = labels.shape, preds.shape

     if label_shape != pred_shape:
         raise ValueError("Shape of labels {} does not match shape of "
                          "predictions {}".format(label_shape, pred_shape))


 class EvalMetric(object):
     """Base class of all evaluation metrics."""

     def __init__(self, name, num=None, output_names=None,
                  label_names=None, **kwargs):
         self.name = name
         self.num = num
         self.output_names = output_names
         self.label_names = label_names
         self._kwargs = kwargs
         self.reset()

     def __str__(self):
         return "EvalMetric: {}".format(dict(self.get_name_value()))

     def get_config(self):
         """Save configurations of metric. Can be recreated
         from configs with metric.create(**config)
         """
         config = self._kwargs.copy()
         config.update({
             'metric': self.__class__.__name__,
             'name': self.name,
             'num': self.num,
             'output_names': self.output_names,
             'label_names': self.label_names,
             '__type__': 'metric',
             '__version__': base.__version__})
         return config

     def update_dict(self, label, pred):
         """Update the internal evaluation with named label and pred

         Parameters
         ----------
         labels : OrderedDict of str -> NDArray
             name to array mapping for labels.

         preds : list of NDArray
             name to array mapping of predicted outputs.
         """
         if self.output_names is not None:
             pred = [pred[name] for name in self.output_names]
         else:
             pred = pred.values()

         if self.label_names is not None:
             label = [label[name] for name in self.label_names]
         else:
             label = label.values()

         self.update(label, pred)

     def update(self, label, pred):
         """Update the internal evaluation.

         Parameters
         ----------
         labels : list of NDArray
             The labels of the data.

         preds : list of NDArray
             Predicted values.
         """
         raise NotImplementedError()

     def reset(self):
         """Clear the internal statistics to initial state."""
         if self.num is None:
             self.num_inst = 0
             self.sum_metric = 0.0
         else:
             self.num_inst = [0] * self.num
             self.sum_metric = [0.0] * self.num

     def get(self):
         """Get the current evaluation result.

         Returns
         -------
         name : str
            Name of the metric.
         value : float
            Value of the evaluation.
         """
         if self.num is None:
             if self.num_inst == 0:
                 return (self.name, float('nan'))
             else:
                 return (self.name, self.sum_metric / self.num_inst)
         else:
             names = ['%s_%d'%(self.name, i) for i in range(self.num)]
             values = [x / y if y != 0 else float('nan') \
                 for x, y in list(zip(self.sum_metric, self.num_inst))]
             return (names, values)

     def get_name_value(self):
         """Get zipped name and value pairs"""
         name, value = self.get()
         if not isinstance(name, list):
             name = [name]
         if not isinstance(value, list):
             value = [value]
         return list(zip(name, value))

 # pylint: disable=invalid-name
 register = registry.get_register_func(EvalMetric, 'metric')
 alias = registry.get_alias_func(EvalMetric, 'metric')
 _create = registry.get_create_func(EvalMetric, 'metric')
 # pylint: enable=invalid-name


 def create(metric, *args, **kwargs):
     """Create an evaluation metric.

     Parameters
     ----------
     metric : str or callable
         The name of the metric, or a function
         providing statistics given pred, label NDArray.
     *args : list
         additional arguments to metric constructor
     **kwargs : dict
         additional arguments to metric constructor

     Returns
     -------
     created metric
     """

     if callable(metric):
         return CustomMetric(metric, *args, **kwargs)
     elif isinstance(metric, list):
         composite_metric = CompositeEvalMetric()
         for child_metric in metric:
             composite_metric.add(create(child_metric, *args, **kwargs))
         return composite_metric

     return _create(metric, *args, **kwargs)


 @register
 @alias('composite')
 class CompositeEvalMetric(EvalMetric):
     """Manage multiple evaluation metrics."""

     def __init__(self, metrics=None, **kwargs):
         super(CompositeEvalMetric, self).__init__('composite', **kwargs)
         if metrics is None:
             metrics = []
         self.metrics = metrics

     def add(self, metric):
         """Add a child metric."""
         self.metrics.append(metric)

     def get_metric(self, index):
         """Get a child metric."""
         try:
             return self.metrics[index]
         except IndexError:
             return ValueError("Metric index {} is out of range 0 and {}".format(
                 index, len(self.metrics)))

     def update_dict(self, labels, preds):
         if self.label_names is not None:
             labels = OrderedDict([i for i in labels.items()
                                   if i[0] in self.label_names])
         if self.output_names is not None:
             preds = OrderedDict([i for i in preds.items()
                                  if i[0] in self.output_names])

         for metric in self.metrics:
             metric.update_dict(labels, preds)

     def update(self, labels, preds):
         for metric in self.metrics:
             metric.update(labels, preds)

     def reset(self):
         try:
             for metric in self.metrics:
                 metric.reset()
         except AttributeError:
             pass

     def get(self):
         names = []
         values = []
         for metric in self.metrics:
             name, value = metric.get()
             if isinstance(name, str):
                 name = [name]
             if isinstance(value, numeric_types):
                 value = [value]
             names.extend(name)
             values.extend(value)
         return (names, values)

     def get_config(self):
         config = super(CompositeEvalMetric, self).get_config()
         config.update({'metrics': [i.get_config() for i in self.metrics]})
         return config


 ########################
 # CLASSIFICATION METRICS
 ########################


 @register
 @alias('acc')
 class Accuracy(EvalMetric):
     """Calculate accuracy

     Parameters
     ----------
     axis : int, default=1
         The axis that represents classes
     """
     def __init__(self, axis=1, name='accuracy', **kwargs):
         super(Accuracy, self).__init__(name, axis=axis, **kwargs)
         self.axis = axis

     def update(self, labels, preds):
         check_label_shapes(labels, preds)

         for label, pred_label in zip(labels, preds):
             if pred_label.shape != label.shape:
                 pred_label = ndarray.argmax(pred_label, axis=self.axis)
             pred_label = pred_label.asnumpy().astype('int32')
             label = label.asnumpy().astype('int32')

             check_label_shapes(label, pred_label)

             self.sum_metric += (pred_label.flat == label.flat).sum()
             self.num_inst += len(pred_label.flat)


 @register
 @alias('top_k_accuracy', 'top_k_acc')
 class TopKAccuracy(EvalMetric):
     """Calculate top k predictions accuracy"""

     def __init__(self, top_k=1, name='top_k_accuracy', **kwargs):
         super(TopKAccuracy, self).__init__(name, top_k=top_k, **kwargs)
         self.top_k = top_k
         assert(self.top_k > 1), 'Please use Accuracy if top_k is no more than 1'
         self.name += '_%d' % self.top_k

     def update(self, labels, preds):
         check_label_shapes(labels, preds)

         for label, pred_label in zip(labels, preds):
             assert(len(pred_label.shape) <= 2), 'Predictions should be no more than 2 dims'
             pred_label = numpy.argsort(pred_label.asnumpy().astype('float32'), axis=1)
             label = label.asnumpy().astype('int32')
             check_label_shapes(label, pred_label)
             num_samples = pred_label.shape[0]
             num_dims = len(pred_label.shape)
             if num_dims == 1:
                 self.sum_metric += (pred_label.flat == label.flat).sum()
             elif num_dims == 2:
                 num_classes = pred_label.shape[1]
                 top_k = min(num_classes, self.top_k)
                 for j in range(top_k):
                     self.sum_metric += (pred_label[:, num_classes - 1 - j].flat == label.flat).sum()
             self.num_inst += num_samples


 @register
 class F1(EvalMetric):
     """Calculate the F1 score of a binary classification problem."""

     def __init__(self, name='f1', **kwargs):
         super(F1, self).__init__(name, **kwargs)

     def update(self, labels, preds):
         check_label_shapes(labels, preds)

         for label, pred in zip(labels, preds):
             pred = pred.asnumpy()
             label = label.asnumpy().astype('int32')
             pred_label = numpy.argmax(pred, axis=1)

             check_label_shapes(label, pred)
             if len(numpy.unique(label)) > 2:
                 raise ValueError("F1 currently only supports binary classification.")

             true_positives, false_positives, false_negatives = 0., 0., 0.

             for y_pred, y_true in zip(pred_label, label):
                 if y_pred == 1 and y_true == 1:
                     true_positives += 1.
                 elif y_pred == 1 and y_true == 0:
                     false_positives += 1.
                 elif y_pred == 0 and y_true == 1:
                     false_negatives += 1.

             if true_positives + false_positives > 0:
                 precision = true_positives / (true_positives + false_positives)
             else:
                 precision = 0.

             if true_positives + false_negatives > 0:
                 recall = true_positives / (true_positives + false_negatives)
             else:
                 recall = 0.

             if precision + recall > 0:
                 f1_score = 2 * precision * recall / (precision + recall)
             else:
                 f1_score = 0.

             self.sum_metric += f1_score
             self.num_inst += 1


 @register
 class Perplexity(EvalMetric):
     """Calculate perplexity

     Parameters
     ----------
     ignore_label : int or None
         index of invalid label to ignore when
         counting. usually should be -1. Include
         all entries if None.
     """
     def __init__(self, ignore_label, name='perplexity', **kwargs):
         super(Perplexity, self).__init__(name, ignore_label=ignore_label, **kwargs)
         self.ignore_label = ignore_label

     def update(self, labels, preds):
         assert len(labels) == len(preds)
         loss = 0.
         num = 0
         probs = []

         for label, pred in zip(labels, preds):
             assert label.size == pred.size/pred.shape[-1], \
                 "shape mismatch: %s vs. %s"%(label.shape, pred.shape)
             label = label.as_in_context(pred.context).astype(dtype='int32').reshape((label.size,))
             pred = ndarray.batch_take(pred, label)
             probs.append(pred)

         for label, prob in zip(labels, probs):
             prob = prob.asnumpy()
             if self.ignore_label is not None:
                 ignore = label.asnumpy().flatten() == self.ignore_label
                 prob = prob*(1-ignore) + ignore
                 num += prob.size - ignore.sum()
             else:
                 num += prob.size
             loss += -numpy.log(numpy.maximum(1e-10, prob)).sum()

         self.sum_metric += numpy.exp(loss / num)
         self.num_inst += 1


 ####################
 # REGRESSION METRICS
 ####################


 @register
 class MAE(EvalMetric):
     """Calculate Mean Absolute Error loss"""

     def __init__(self, name='mae', **kwargs):
         super(MAE, self).__init__(name, **kwargs)

     def update(self, labels, preds):
         check_label_shapes(labels, preds)

         for label, pred in zip(labels, preds):
             label = label.asnumpy()
             pred = pred.asnumpy()

             if len(label.shape) == 1:
                 label = label.reshape(label.shape[0], 1)

             self.sum_metric += numpy.abs(label - pred).mean()
             self.num_inst += 1 # numpy.prod(label.shape)


 @register
 class MSE(EvalMetric):
     """Calculate Mean Squared Error loss"""
     def __init__(self, name='mse', **kwargs):
         super(MSE, self).__init__(name, **kwargs)

     def update(self, labels, preds):
         check_label_shapes(labels, preds)

         for label, pred in zip(labels, preds):
             label = label.asnumpy()
             pred = pred.asnumpy()

             if len(label.shape) == 1:
                 label = label.reshape(label.shape[0], 1)

             self.sum_metric += ((label - pred)**2.0).mean()
             self.num_inst += 1 # numpy.prod(label.shape)


 @register
 class RMSE(EvalMetric):
     """Calculate Root Mean Squred Error loss"""
     def __init__(self, name='rmse', **kwargs):
         super(RMSE, self).__init__(name, **kwargs)

     def update(self, labels, preds):
         check_label_shapes(labels, preds)

         for label, pred in zip(labels, preds):
             label = label.asnumpy()
             pred = pred.asnumpy()

             if len(label.shape) == 1:
                 label = label.reshape(label.shape[0], 1)

             self.sum_metric += numpy.sqrt(((label - pred)**2.0).mean())
             self.num_inst += 1


 @register
 @alias('ce')
 class CrossEntropy(EvalMetric):
     """Calculate Cross Entropy loss"""
     def __init__(self, eps=1e-8, name='cross-entropy', **kwargs):
         super(CrossEntropy, self).__init__(name, eps=eps, **kwargs)
         self.eps = eps

     def update(self, labels, preds):
         check_label_shapes(labels, preds)

         for label, pred in zip(labels, preds):
             label = label.asnumpy()
             pred = pred.asnumpy()

             label = label.ravel()
             assert label.shape[0] == pred.shape[0]

             prob = pred[numpy.arange(label.shape[0]), numpy.int64(label)]
             self.sum_metric += (-numpy.log(prob + self.eps)).sum()
             self.num_inst += label.shape[0]


 @register
 class Loss(EvalMetric):
     """Dummy metric for directly printing loss"""
     def __init__(self, name='loss', **kwargs):
         super(Loss, self).__init__(name, **kwargs)

     def update(self, _, preds):
         for pred in preds:
             self.sum_metric += ndarray.sum(pred).asscalar()
             self.num_inst += pred.size


 @register
 class Torch(Loss):
     """Dummy metric for torch criterions"""
     def __init__(self, name='torch', **kwargs):
         super(Torch, self).__init__(name, **kwargs)


 @register
 class Caffe(Loss):
     """Dummy metric for caffe criterions"""
     def __init__(self, name='caffe', **kwargs):
         super(Caffe, self).__init__(name, **kwargs)


 @register
 class CustomMetric(EvalMetric):
     """Custom evaluation metric that takes a NDArray function.

     Parameters
     ----------
     feval : callable(label, pred)
         Customized evaluation function.
     name : str, optional
         The name of the metric
     allow_extra_outputs : bool
         If true, the prediction outputs can have extra outputs.
         This is useful in RNN, where the states are also produced
         in outputs for forwarding.
     """
     def __init__(self, feval, name=None, allow_extra_outputs=False, **kwargs):
         if isinstance(feval, string_types):
             feval = pickle.loads(feval)
         if name is None:
             name = feval.__name__
             if name.find('<') != -1:
                 name = 'custom(%s)' % name
         super(CustomMetric, self).__init__(
             name, feval=pickle.dumps(feval),
             allow_extra_outputs=allow_extra_outputs,
             **kwargs)
         self._feval = feval
         self._allow_extra_outputs = allow_extra_outputs

     def update(self, labels, preds):
         if not self._allow_extra_outputs:
             check_label_shapes(labels, preds)

         for pred, label in zip(preds, labels):
             label = label.asnumpy()
             pred = pred.asnumpy()

             reval = self._feval(label, pred)
             if isinstance(reval, tuple):
                 (sum_metric, num_inst) = reval
                 self.sum_metric += sum_metric
                 self.num_inst += num_inst
             else:
                 self.sum_metric += reval
                 self.num_inst += 1


 # pylint: disable=invalid-name
 def np(numpy_feval, name=None, allow_extra_outputs=False):
     """Create a customized metric from numpy function.

     Parameters
     ----------
     numpy_feval : callable(label, pred)
         Customized evaluation function.
         This will get called with the labels and predictions
         for a minibatch, each as numpy arrays.  This function
         should return a single float.
     name : str, optional
         The name of the metric.
     allow_extra_outputs : bool
         If true, the prediction outputs can have extra outputs.
         This is useful in RNN, where the states are also produced
         in outputs for forwarding.
     """
     def feval(label, pred):
         """Internal eval function."""
         return numpy_feval(label, pred)
     feval.__name__ = numpy_feval.__name__
     return CustomMetric(feval, name, allow_extra_outputs)
 # pylint: enable=invalid-name
	# coding: utf-8
	# pylint: disable=no-member

	"""Online evaluation metric module."""
	from __future__ import absolute_import

	from collections import OrderedDict

	import numpy
	import pickle

	from . import base
	from .base import numeric_types, string_types
	from . import ndarray
	from . import registry


	def check_label_shapes(labels, preds, shape=0):
	"""Check to see if the two arrays are the same size."""

	if shape == 0:
	label_shape, pred_shape = len(labels), len(preds)
	else:
	label_shape, pred_shape = labels.shape, preds.shape

	if label_shape != pred_shape:
	raise ValueError("Shape of labels {} does not match shape of "
	"predictions {}".format(label_shape, pred_shape))


	class EvalMetric(object):
	"""Base class of all evaluation metrics."""

	def __init__(self, name, num=None, output_names=None,
	label_names=None, **kwargs):
	self.name = name
	self.num = num
	self.output_names = output_names
	self.label_names = label_names
	self._kwargs = kwargs
	self.reset()

	def __str__(self):
	return "EvalMetric: {}".format(dict(self.get_name_value()))

	def get_config(self):
	"""Save configurations of metric. Can be recreated
	from configs with metric.create(**config)
	"""
	config = self._kwargs.copy()
	config.update({
	'metric': self.__class__.__name__,
	'name': self.name,
	'num': self.num,
	'output_names': self.output_names,
	'label_names': self.label_names,
	'__type__': 'metric',
	'__version__': base.__version__})
	return config

	def update_dict(self, label, pred):
	"""Update the internal evaluation with named label and pred

	Parameters
	----------
	labels : OrderedDict of str -> NDArray
	name to array mapping for labels.

	preds : list of NDArray
	name to array mapping of predicted outputs.
	"""
	if self.output_names is not None:
	pred = [pred[name] for name in self.output_names]
	else:
	pred = pred.values()

	if self.label_names is not None:
	label = [label[name] for name in self.label_names]
	else:
	label = label.values()

	self.update(label, pred)

	def update(self, label, pred):
	"""Update the internal evaluation.

	Parameters
	----------
	labels : list of NDArray
	The labels of the data.

	preds : list of NDArray
	Predicted values.
	"""
	raise NotImplementedError()

	def reset(self):
	"""Clear the internal statistics to initial state."""
	if self.num is None:
	self.num_inst = 0
	self.sum_metric = 0.0
	else:
	self.num_inst = [0] * self.num
	self.sum_metric = [0.0] * self.num

	def get(self):
	"""Get the current evaluation result.

	Returns
	-------
	name : str
	Name of the metric.
	value : float
	Value of the evaluation.
	"""
	if self.num is None:
	if self.num_inst == 0:
	return (self.name, float('nan'))
	else:
	return (self.name, self.sum_metric / self.num_inst)
	else:
	names = ['%s_%d'%(self.name, i) for i in range(self.num)]
	values = [x / y if y != 0 else float('nan') \
	for x, y in list(zip(self.sum_metric, self.num_inst))]
	return (names, values)

	def get_name_value(self):
	"""Get zipped name and value pairs"""
	name, value = self.get()
	if not isinstance(name, list):
	name = [name]
	if not isinstance(value, list):
	value = [value]
	return list(zip(name, value))

	# pylint: disable=invalid-name
	register = registry.get_register_func(EvalMetric, 'metric')
	alias = registry.get_alias_func(EvalMetric, 'metric')
	_create = registry.get_create_func(EvalMetric, 'metric')
	# pylint: enable=invalid-name


	def create(metric, args, *kwargs):
	"""Create an evaluation metric.

	Parameters
	----------
	metric : str or callable
	The name of the metric, or a function
	providing statistics given pred, label NDArray.
	*args : list
	additional arguments to metric constructor
	**kwargs : dict
	additional arguments to metric constructor

	Returns
	-------
	created metric
	"""

	if callable(metric):
	return CustomMetric(metric, args, *kwargs)
	elif isinstance(metric, list):
	composite_metric = CompositeEvalMetric()
	for child_metric in metric:
	composite_metric.add(create(child_metric, args, *kwargs))
	return composite_metric

	return _create(metric, args, *kwargs)


	@register
	@alias('composite')
	class CompositeEvalMetric(EvalMetric):
	"""Manage multiple evaluation metrics."""

	def __init__(self, metrics=None, **kwargs):
	super(CompositeEvalMetric, self).__init__('composite', **kwargs)
	if metrics is None:
	metrics = []
	self.metrics = metrics

	def add(self, metric):
	"""Add a child metric."""
	self.metrics.append(metric)

	def get_metric(self, index):
	"""Get a child metric."""
	try:
	return self.metrics[index]
	except IndexError:
	return ValueError("Metric index {} is out of range 0 and {}".format(
	index, len(self.metrics)))

	def update_dict(self, labels, preds):
	if self.label_names is not None:
	labels = OrderedDict([i for i in labels.items()
	if i[0] in self.label_names])
	if self.output_names is not None:
	preds = OrderedDict([i for i in preds.items()
	if i[0] in self.output_names])

	for metric in self.metrics:
	metric.update_dict(labels, preds)

	def update(self, labels, preds):
	for metric in self.metrics:
	metric.update(labels, preds)

	def reset(self):
	try:
	for metric in self.metrics:
	metric.reset()
	except AttributeError:
	pass

	def get(self):
	names = []
	values = []
	for metric in self.metrics:
	name, value = metric.get()
	if isinstance(name, str):
	name = [name]
	if isinstance(value, numeric_types):
	value = [value]
	names.extend(name)
	values.extend(value)
	return (names, values)

	def get_config(self):
	config = super(CompositeEvalMetric, self).get_config()
	config.update({'metrics': [i.get_config() for i in self.metrics]})
	return config


	########################
	# CLASSIFICATION METRICS
	########################


	@register
	@alias('acc')
	class Accuracy(EvalMetric):
	"""Calculate accuracy

	Parameters
	----------
	axis : int, default=1
	The axis that represents classes
	"""
	def __init__(self, axis=1, name='accuracy', **kwargs):
	super(Accuracy, self).__init__(name, axis=axis, **kwargs)
	self.axis = axis

	def update(self, labels, preds):
	check_label_shapes(labels, preds)

	for label, pred_label in zip(labels, preds):
	if pred_label.shape != label.shape:
	pred_label = ndarray.argmax(pred_label, axis=self.axis)
	pred_label = pred_label.asnumpy().astype('int32')
	label = label.asnumpy().astype('int32')

	check_label_shapes(label, pred_label)

	self.sum_metric += (pred_label.flat == label.flat).sum()
	self.num_inst += len(pred_label.flat)


	@register
	@alias('top_k_accuracy', 'top_k_acc')
	class TopKAccuracy(EvalMetric):
	"""Calculate top k predictions accuracy"""

	def __init__(self, top_k=1, name='top_k_accuracy', **kwargs):
	super(TopKAccuracy, self).__init__(name, top_k=top_k, **kwargs)
	self.top_k = top_k
	assert(self.top_k > 1), 'Please use Accuracy if top_k is no more than 1'
	self.name += '_%d' % self.top_k

	def update(self, labels, preds):
	check_label_shapes(labels, preds)

	for label, pred_label in zip(labels, preds):
	assert(len(pred_label.shape) <= 2), 'Predictions should be no more than 2 dims'
	pred_label = numpy.argsort(pred_label.asnumpy().astype('float32'), axis=1)
	label = label.asnumpy().astype('int32')
	check_label_shapes(label, pred_label)
	num_samples = pred_label.shape[0]
	num_dims = len(pred_label.shape)
	if num_dims == 1:
	self.sum_metric += (pred_label.flat == label.flat).sum()
	elif num_dims == 2:
	num_classes = pred_label.shape[1]
	top_k = min(num_classes, self.top_k)
	for j in range(top_k):
	self.sum_metric += (pred_label[:, num_classes - 1 - j].flat == label.flat).sum()
	self.num_inst += num_samples


	@register
	class F1(EvalMetric):
	"""Calculate the F1 score of a binary classification problem."""

	def __init__(self, name='f1', **kwargs):
	super(F1, self).__init__(name, **kwargs)

	def update(self, labels, preds):
	check_label_shapes(labels, preds)

	for label, pred in zip(labels, preds):
	pred = pred.asnumpy()
	label = label.asnumpy().astype('int32')
	pred_label = numpy.argmax(pred, axis=1)

	check_label_shapes(label, pred)
	if len(numpy.unique(label)) > 2:
	raise ValueError("F1 currently only supports binary classification.")

	true_positives, false_positives, false_negatives = 0., 0., 0.

	for y_pred, y_true in zip(pred_label, label):
	if y_pred == 1 and y_true == 1:
	true_positives += 1.
	elif y_pred == 1 and y_true == 0:
	false_positives += 1.
	elif y_pred == 0 and y_true == 1:
	false_negatives += 1.

	if true_positives + false_positives > 0:
	precision = true_positives / (true_positives + false_positives)
	else:
	precision = 0.

	if true_positives + false_negatives > 0:
	recall = true_positives / (true_positives + false_negatives)
	else:
	recall = 0.

	if precision + recall > 0:
	f1_score = 2 * precision * recall / (precision + recall)
	else:
	f1_score = 0.

	self.sum_metric += f1_score
	self.num_inst += 1


	@register
	class Perplexity(EvalMetric):
	"""Calculate perplexity

	Parameters
	----------
	ignore_label : int or None
	index of invalid label to ignore when
	counting. usually should be -1. Include
	all entries if None.
	"""
	def __init__(self, ignore_label, name='perplexity', **kwargs):
	super(Perplexity, self).__init__(name, ignore_label=ignore_label, **kwargs)
	self.ignore_label = ignore_label

	def update(self, labels, preds):
	assert len(labels) == len(preds)
	loss = 0.
	num = 0
	probs = []

	for label, pred in zip(labels, preds):
	assert label.size == pred.size/pred.shape[-1], \
	"shape mismatch: %s vs. %s"%(label.shape, pred.shape)
	label = label.as_in_context(pred.context).astype(dtype='int32').reshape((label.size,))
	pred = ndarray.batch_take(pred, label)
	probs.append(pred)

	for label, prob in zip(labels, probs):
	prob = prob.asnumpy()
	if self.ignore_label is not None:
	ignore = label.asnumpy().flatten() == self.ignore_label
	prob = prob*(1-ignore) + ignore
	num += prob.size - ignore.sum()
	else:
	num += prob.size
	loss += -numpy.log(numpy.maximum(1e-10, prob)).sum()

	self.sum_metric += numpy.exp(loss / num)
	self.num_inst += 1


	####################
	# REGRESSION METRICS
	####################


	@register
	class MAE(EvalMetric):
	"""Calculate Mean Absolute Error loss"""

	def __init__(self, name='mae', **kwargs):
	super(MAE, self).__init__(name, **kwargs)

	def update(self, labels, preds):
	check_label_shapes(labels, preds)

	for label, pred in zip(labels, preds):
	label = label.asnumpy()
	pred = pred.asnumpy()

	if len(label.shape) == 1:
	label = label.reshape(label.shape[0], 1)

	self.sum_metric += numpy.abs(label - pred).mean()
	self.num_inst += 1 # numpy.prod(label.shape)


	@register
	class MSE(EvalMetric):
	"""Calculate Mean Squared Error loss"""
	def __init__(self, name='mse', **kwargs):
	super(MSE, self).__init__(name, **kwargs)

	def update(self, labels, preds):
	check_label_shapes(labels, preds)

	for label, pred in zip(labels, preds):
	label = label.asnumpy()
	pred = pred.asnumpy()

	if len(label.shape) == 1:
	label = label.reshape(label.shape[0], 1)

	self.sum_metric += ((label - pred)**2.0).mean()
	self.num_inst += 1 # numpy.prod(label.shape)


	@register
	class RMSE(EvalMetric):
	"""Calculate Root Mean Squred Error loss"""
	def __init__(self, name='rmse', **kwargs):
	super(RMSE, self).__init__(name, **kwargs)

	def update(self, labels, preds):
	check_label_shapes(labels, preds)

	for label, pred in zip(labels, preds):
	label = label.asnumpy()
	pred = pred.asnumpy()

	if len(label.shape) == 1:
	label = label.reshape(label.shape[0], 1)

	self.sum_metric += numpy.sqrt(((label - pred)**2.0).mean())
	self.num_inst += 1


	@register
	@alias('ce')
	class CrossEntropy(EvalMetric):
	"""Calculate Cross Entropy loss"""
	def __init__(self, eps=1e-8, name='cross-entropy', **kwargs):
	super(CrossEntropy, self).__init__(name, eps=eps, **kwargs)
	self.eps = eps

	def update(self, labels, preds):
	check_label_shapes(labels, preds)

	for label, pred in zip(labels, preds):
	label = label.asnumpy()
	pred = pred.asnumpy()

	label = label.ravel()
	assert label.shape[0] == pred.shape[0]

	prob = pred[numpy.arange(label.shape[0]), numpy.int64(label)]
	self.sum_metric += (-numpy.log(prob + self.eps)).sum()
	self.num_inst += label.shape[0]


	@register
	class Loss(EvalMetric):
	"""Dummy metric for directly printing loss"""
	def __init__(self, name='loss', **kwargs):
	super(Loss, self).__init__(name, **kwargs)

	def update(self, _, preds):
	for pred in preds:
	self.sum_metric += ndarray.sum(pred).asscalar()
	self.num_inst += pred.size


	@register
	class Torch(Loss):
	"""Dummy metric for torch criterions"""
	def __init__(self, name='torch', **kwargs):
	super(Torch, self).__init__(name, **kwargs)


	@register
	class Caffe(Loss):
	"""Dummy metric for caffe criterions"""
	def __init__(self, name='caffe', **kwargs):
	super(Caffe, self).__init__(name, **kwargs)


	@register
	class CustomMetric(EvalMetric):
	"""Custom evaluation metric that takes a NDArray function.

	Parameters
	----------
	feval : callable(label, pred)
	Customized evaluation function.
	name : str, optional
	The name of the metric
	allow_extra_outputs : bool
	If true, the prediction outputs can have extra outputs.
	This is useful in RNN, where the states are also produced
	in outputs for forwarding.
	"""
	def __init__(self, feval, name=None, allow_extra_outputs=False, **kwargs):
	if isinstance(feval, string_types):
	feval = pickle.loads(feval)
	if name is None:
	name = feval.__name__
	if name.find('<') != -1:
	name = 'custom(%s)' % name
	super(CustomMetric, self).__init__(
	name, feval=pickle.dumps(feval),
	allow_extra_outputs=allow_extra_outputs,
	**kwargs)
	self._feval = feval
	self._allow_extra_outputs = allow_extra_outputs

	def update(self, labels, preds):
	if not self._allow_extra_outputs:
	check_label_shapes(labels, preds)

	for pred, label in zip(preds, labels):
	label = label.asnumpy()
	pred = pred.asnumpy()

	reval = self._feval(label, pred)
	if isinstance(reval, tuple):
	(sum_metric, num_inst) = reval
	self.sum_metric += sum_metric
	self.num_inst += num_inst
	else:
	self.sum_metric += reval
	self.num_inst += 1


	# pylint: disable=invalid-name
	def np(numpy_feval, name=None, allow_extra_outputs=False):
	"""Create a customized metric from numpy function.

	Parameters
	----------
	numpy_feval : callable(label, pred)
	Customized evaluation function.
	This will get called with the labels and predictions
	for a minibatch, each as numpy arrays. This function
	should return a single float.
	name : str, optional
	The name of the metric.
	allow_extra_outputs : bool
	If true, the prediction outputs can have extra outputs.
	This is useful in RNN, where the states are also produced
	in outputs for forwarding.
	"""
	def feval(label, pred):
	"""Internal eval function."""
	return numpy_feval(label, pred)
	feval.__name__ = numpy_feval.__name__
	return CustomMetric(feval, name, allow_extra_outputs)
	# pylint: enable=invalid-name