| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| ''' |
| Classes: |
| Evaluation - Container for running an evaluation |
| ''' |
| |
| import logging |
| from ocw.metrics import Metric, UnaryMetric, BinaryMetric |
| from ocw.dataset import Dataset, Bounds |
| import ocw.dataset_processor as DSP |
| |
| import numpy.ma as ma |
| |
| logger = logging.getLogger(__name__) |
| |
| |
| class Evaluation(object): |
| '''Container for running an evaluation |
| |
| An *Evaluation* is the running of one or more metrics on one or more |
| target datasets and a (possibly optional) reference dataset. Evaluation |
| can handle two types of metrics, ``unary`` and ``binary``. The validity |
| of an Evaluation is dependent upon the number and type of metrics as well |
| as the number of datasets. |
| |
| A ``unary`` metric is a metric that runs over a single dataset. If you add |
| a ``unary`` metric to the Evaluation you are only required to add a |
| reference dataset or a target dataset. If there are multiple datasets |
| in the evaluation then the ``unary`` metric is run over all of them. |
| |
| A ``binary`` metric is a metric that runs over a reference dataset and |
| target dataset. If you add a ``binary`` metric you are required to add a |
| reference dataset and at least one target dataset. The ``binary`` metrics |
| are run over every (reference dataset, target dataset) pair in the |
| Evaluation. |
| |
| An Evaluation must have at least one metric to be valid. |
| ''' |
| |
| def __init__(self, reference, targets, metrics, subregions=None): |
| '''Default Evaluation constructor. |
| |
| :param reference: The reference Dataset for the evaluation. |
| :type reference: :class:`dataset.Dataset` |
| |
| :param targets: A list of one or more target datasets for the |
| evaluation. |
| :type targets: :class:`list` of :class:`dataset.Dataset` |
| |
| :param metrics: A list of one or more Metric instances to run |
| in the evaluation. |
| :type metrics: :class:`list` of :mod:`metrics` |
| |
| :param subregions: (Optional) Subregion information to use in the |
| evaluation. A subregion is specified with a Bounds object. |
| :type subregions: :class:`list` of :class:`dataset.Bounds` |
| |
| :raises: ValueError |
| ''' |
| #: The reference dataset. |
| self._ref_dataset = reference |
| #: The target dataset(s) which should each be compared with |
| #: the reference dataset when the evaluation is run. |
| self.target_datasets = [] |
| self.add_datasets(targets) |
| |
| #: The list of "binary" metrics (A metric which takes two Datasets) |
| #: that the Evaluation should use. |
| self.metrics = [] |
| #: The list of "unary" metrics (A metric which takes one Dataset) that |
| #: the Evaluation should use. |
| self.unary_metrics = [] |
| |
| # Metrics need to be added to specific lists depending on whether they |
| # are "binary" or "unary" metrics. |
| self.add_metrics(metrics) |
| |
| #: An optional list of subregion bounds to use when running the |
| #: evaluation. |
| self._subregions = subregions |
| |
| #: A list containing the results of running regular metric evaluations. |
| #: The shape of results is ``(num_target_datasets, num_metrics)`` if |
| #: the user doesn't specify subregion information. Otherwise the shape |
| #: is ``(num_target_datasets, num_metrics, num_subregions)``. |
| self.results = [] |
| #: A list containing the results of running the unary metric |
| #: evaluations. The shape of unary_results is |
| #: ``(num_targets, num_metrics)`` where ``num_targets = |
| #: num_target_ds + (1 if ref_dataset != None else 0`` |
| self.unary_results = [] |
| |
| @property |
| def ref_dataset(self): |
| return self._ref_dataset |
| |
| @ref_dataset.setter |
| def ref_dataset(self, value): |
| if not isinstance(value, Dataset): |
| error = ( |
| "Cannot add a dataset that isn't an instance of Dataset. " |
| "Please consult the documentation for additional help." |
| ) |
| raise TypeError(error) |
| self._ref_dataset = value |
| |
| @property |
| def subregions(self): |
| return self._subregions |
| |
| @subregions.setter |
| def subregions(self, value): |
| # If the value is None, we don't need to check that it's well formed! |
| if value: |
| # All of the values passed in the iterable better be Bounds! |
| if not all([isinstance(bound, Bounds) for bound in value]): |
| error = ( |
| "Found invalid subregion information. Expected " |
| "value to be an instance of Bounds." |
| ) |
| raise TypeError(error) |
| self._subregions = value |
| |
| def add_dataset(self, target_dataset): |
| '''Add a Dataset to the Evaluation. |
| |
| A target Dataset is compared against the reference dataset when the |
| Evaluation is run with one or more metrics. |
| |
| :param target_dataset: The target Dataset to add to the Evaluation. |
| :type target_dataset: :class:`dataset.Dataset` |
| |
| :raises ValueError: If a dataset to add isn't an instance of Dataset. |
| ''' |
| if not isinstance(target_dataset, Dataset): |
| error = ( |
| "Cannot add a dataset that isn't an instance of Dataset. " |
| "Please consult the documentation for additional help." |
| ) |
| logger.error(error) |
| raise TypeError(error) |
| |
| self.target_datasets.append(target_dataset) |
| |
| def add_datasets(self, target_datasets): |
| '''Add multiple Datasets to the Evaluation. |
| |
| :param target_datasets: The list of datasets that should be added to |
| the Evaluation. |
| :type target_datasets: :class:`list` of :class:`dataset.Dataset` |
| |
| :raises ValueError: If a dataset to add isn't an instance of Dataset. |
| ''' |
| for target in target_datasets: |
| self.add_dataset(target) |
| |
| def add_metric(self, metric): |
| '''Add a metric to the Evaluation. |
| |
| A metric is an instance of a class which inherits from metrics.Metric. |
| |
| :param metric: The metric instance to add to the Evaluation. |
| :type metric: :mod:`metrics` |
| |
| :raises ValueError: If the metric to add isn't a class that inherits |
| from metrics.Metric. |
| ''' |
| if not isinstance(metric, Metric): |
| error = ( |
| "Cannot add a metric that doesn't inherit from Metric. " |
| "Please consult the documentation for additional help." |
| ) |
| logger.error(error) |
| raise TypeError(error) |
| |
| if isinstance(metric, UnaryMetric): |
| self.unary_metrics.append(metric) |
| else: |
| self.metrics.append(metric) |
| |
| def add_metrics(self, metrics): |
| '''Add multiple metrics to the Evaluation. |
| |
| A metric is an instance of a class which inherits from metrics.Metric. |
| |
| :param metrics: The list of metric instances to add to the Evaluation. |
| :type metrics: :class:`list` of :mod:`metrics` |
| |
| :raises ValueError: If a metric to add isn't a class that inherits |
| from metrics.Metric. |
| ''' |
| for metric in metrics: |
| self.add_metric(metric) |
| |
| def run(self): |
| '''Run the evaluation. |
| |
| There are two phases to a run of the Evaluation. First, if there are |
| any "binary" metrics they are run through the evaluation. Binary |
| metrics are only run if there is a reference dataset and at least one |
| target dataset. |
| |
| If there is subregion information provided then each dataset is subset |
| before being run through the binary metrics. |
| |
| ..note:: Only the binary metrics are subset with subregion information. |
| |
| Next, if there are any "unary" metrics they are run. Unary metrics are |
| only run if there is at least one target dataset or a reference dataset. |
| ''' |
| if not self._evaluation_is_valid(): |
| error = "The evaluation is invalid. Check the docs for help." |
| logger.warning(error) |
| return |
| |
| if self._should_run_regular_metrics(): |
| if self.subregions: |
| self.results = self._run_subregion_evaluation() |
| else: |
| self.results = self._run_no_subregion_evaluation() |
| |
| if self._should_run_unary_metrics(): |
| if self.subregions: |
| self.unary_results = self._run_subregion_unary_evaluation() |
| else: |
| self.unary_results = self._run_unary_metric_evaluation() |
| |
| def _evaluation_is_valid(self): |
| '''Check if the evaluation is well-formed. |
| |
| * If there are no metrics or no datasets it's invalid. |
| * If there is a unary metric there must be a reference dataset or at |
| least one target dataset. |
| * If there is a regular metric there must be a reference dataset and |
| at least one target dataset. |
| ''' |
| run_reg = self._should_run_regular_metrics() |
| run_unary = self._should_run_unary_metrics() |
| reg_valid = self.ref_dataset != None and len(self.target_datasets) > 0 |
| unary_valid = self.ref_dataset != None or len(self.target_datasets) > 0 |
| |
| if run_reg and run_unary: |
| return reg_valid and unary_valid |
| elif run_reg: |
| return reg_valid |
| elif run_unary: |
| return unary_valid |
| else: |
| return False |
| |
| def _should_run_regular_metrics(self): |
| return len(self.metrics) > 0 |
| |
| def _should_run_unary_metrics(self): |
| return len(self.unary_metrics) > 0 |
| |
| def _run_subregion_evaluation(self): |
| results = [] |
| new_refs = [DSP.subset(self.ref_dataset, s) for s in self.subregions] |
| |
| for target in self.target_datasets: |
| results.append([]) |
| new_targets = [DSP.subset(target, s) for s in self.subregions] |
| |
| for metric in self.metrics: |
| results[-1].append([]) |
| |
| for i in range(len(self.subregions)): |
| new_ref = new_refs[i] |
| new_tar = new_targets[i] |
| |
| run_result = metric.run(new_ref, new_tar) |
| results[-1][-1].append(run_result) |
| return convert_evaluation_result(results, subregion=True) |
| |
| def _run_no_subregion_evaluation(self): |
| results = [] |
| for target in self.target_datasets: |
| results.append([]) |
| for metric in self.metrics: |
| run_result = metric.run(self.ref_dataset, target) |
| results[-1].append(run_result) |
| return convert_evaluation_result(results) |
| |
| def _run_unary_metric_evaluation(self): |
| unary_results = [] |
| for metric in self.unary_metrics: |
| unary_results.append([]) |
| # Unary metrics should be run over the reference Dataset also |
| if self.ref_dataset: |
| unary_results[-1].append(metric.run(self.ref_dataset)) |
| |
| for target in self.target_datasets: |
| unary_results[-1].append(metric.run(target)) |
| return convert_unary_evaluation_result(unary_results) |
| |
| def _run_subregion_unary_evaluation(self): |
| unary_results = [] |
| if self.ref_dataset: |
| new_refs = [DSP.subset(self.ref_dataset, s) |
| for s in self.subregions] |
| |
| new_targets = [ |
| [DSP.subset(t, s) for s in self.subregions] |
| for t in self.target_datasets |
| ] |
| |
| for metric in self.unary_metrics: |
| unary_results.append([]) |
| |
| for i in range(len(self.subregions)): |
| unary_results[-1].append([]) |
| |
| if self.ref_dataset: |
| unary_results[-1][-1].append(metric.run(new_refs[i])) |
| |
| for t in range(len(self.target_datasets)): |
| unary_results[-1][-1].append(metric.run(new_targets[t][i])) |
| |
| return convert_unary_evaluation_result(unary_results, subregion=True) |
| |
| def __str__(self): |
| formatted_repr = ( |
| "<Evaluation - ref_dataset: {}, " |
| "target_dataset(s): {}, " |
| "binary_metric(s): {}, " |
| "unary_metric(s): {}, " |
| "subregion(s): {}>" |
| ) |
| |
| return formatted_repr.format( |
| str(self._ref_dataset), |
| [str(ds) for ds in self.target_datasets], |
| [str(m) for m in self.metrics], |
| [str(m) for m in self.unary_metrics], |
| str(self.subregions) |
| ) |
| |
| |
| def convert_evaluation_result(evaluation_result, subregion=False): |
| if not subregion: |
| nmodel = len(evaluation_result) |
| nmetric = len(evaluation_result[0]) |
| results = [] |
| for imetric in range(nmetric): |
| if evaluation_result[0][imetric].ndim != 0: |
| result_shape = list(evaluation_result[0][imetric].shape) |
| result_shape.insert(0, nmodel) |
| result = ma.zeros(result_shape) |
| for imodel in range(nmodel): |
| result[imodel, :] = evaluation_result[imodel][imetric] |
| else: |
| result = ma.zeros(nmodel) |
| for imodel in range(nmodel): |
| result[imodel] = evaluation_result[imodel][imetric] |
| results.append(result) |
| return results |
| else: |
| nmodel = len(evaluation_result) |
| nmetric = len(evaluation_result[0]) |
| nsubregion = len(evaluation_result[0][0]) |
| |
| results = [] |
| for isubregion in range(nsubregion): |
| subregion_results = [] |
| for imetric in range(nmetric): |
| if evaluation_result[0][imetric][isubregion].ndim != 0: |
| result_shape = list(evaluation_result[0][ |
| imetric][isubregion].shape) |
| result_shape.insert(0, nmodel) |
| result = ma.zeros(result_shape) |
| for imodel in range(nmodel): |
| result[imodel, :] = evaluation_result[ |
| imodel][imetric][isubregion] |
| else: |
| result = ma.zeros(nmodel) |
| for imodel in range(nmodel): |
| result[imodel] = evaluation_result[ |
| imodel][imetric][isubregion] |
| subregion_results.append(result) |
| results.append(subregion_results) |
| return results |
| |
| |
| def convert_unary_evaluation_result(evaluation_result, subregion=False): |
| if not subregion: |
| nmetric = len(evaluation_result) |
| nmodel = len(evaluation_result[0]) |
| results = [] |
| for imetric in range(nmetric): |
| if evaluation_result[imetric][0].ndim != 0: |
| result_shape = list(evaluation_result[imetric][0].shape) |
| result_shape.insert(0, nmodel) |
| result = ma.zeros(result_shape) |
| for imodel in range(nmodel): |
| result[imodel, :] = evaluation_result[imetric][imodel] |
| else: |
| result = ma.zeros(nmodel) |
| for imodel in range(nmodel): |
| result[imodel] = evaluation_result[imetric][imodel] |
| results.append(result) |
| return results |
| else: |
| nmetric = len(evaluation_result) |
| nsubregion = len(evaluation_result[0]) |
| nmodel = len(evaluation_result[0][0]) |
| |
| results = [] |
| for isubregion in range(nsubregion): |
| subregion_results = [] |
| for imetric in range(nmetric): |
| if evaluation_result[imetric][isubregion][0].ndim != 0: |
| result_shape = list(evaluation_result[imetric][ |
| isubregion][0].shape) |
| result_shape.insert(0, nmodel) |
| result = ma.zeros(result_shape) |
| for imodel in range(nmodel): |
| result[imodel, :] = evaluation_result[ |
| imetric][isubregion][imodel] |
| else: |
| result = ma.zeros(nmodel) |
| for imodel in range(nmodel): |
| result[imodel] = evaluation_result[ |
| imetric][isubregion][imodel] |
| subregion_results.append(result) |
| results.append(subregion_results) |
| return results |