CLIMATE-658 - Restructure evaluation results
- now the evaluation results are provided as a list of missing arrays.
- update _run_no_subregion_evaluation and _run_unary_metric_evaluation in evaluation.py
- update test_evaluation
diff --git a/examples/knmi_to_cru31_full_bias.py b/examples/knmi_to_cru31_full_bias.py
index c6dac47..a241442 100644
--- a/examples/knmi_to_cru31_full_bias.py
+++ b/examples/knmi_to_cru31_full_bias.py
@@ -28,7 +28,6 @@
import ocw.evaluation as evaluation
import ocw.metrics as metrics
import ocw.plotter as plotter
-
# File URL leader
FILE_LEADER = "http://zipper.jpl.nasa.gov/dist/"
# This way we can easily adjust the time span of the retrievals
diff --git a/ocw/evaluation.py b/ocw/evaluation.py
index 89e34b4..ff9ad33 100644
--- a/ocw/evaluation.py
+++ b/ocw/evaluation.py
@@ -25,6 +25,8 @@
from dataset import Dataset, Bounds
import ocw.dataset_processor as DSP
+import numpy.ma as ma
+
logger = logging.getLogger(__name__)
class Evaluation(object):
@@ -285,7 +287,7 @@
run_result = metric.run(new_ref, new_tar)
results[-1][-1].append(run_result)
- return results
+ return convert_evaluation_result(results, subregion=True)
def _run_no_subregion_evaluation(self):
results = []
@@ -294,7 +296,7 @@
for metric in self.metrics:
run_result = metric.run(self.ref_dataset, target)
results[-1].append(run_result)
- return results
+ return convert_evaluation_result(results)
def _run_unary_metric_evaluation(self):
unary_results = []
@@ -306,7 +308,7 @@
for target in self.target_datasets:
unary_results[-1].append(metric.run(target))
- return unary_results
+ return convert_unary_evaluation_result(unary_results)
def _run_subregion_unary_evaluation(self):
unary_results = []
@@ -330,7 +332,7 @@
for t in range(len(self.target_datasets)):
unary_results[-1][-1].append(metric.run(new_targets[t][i]))
- return unary_results
+ return convert_unary_evaluation_result(unary_results, subregion = True)
def __str__(self):
formatted_repr = (
@@ -349,3 +351,65 @@
str(self.subregions)
)
+def convert_evaluation_result(evaluation_result, subregion = False):
+ if not subregion:
+ nmodel = len(evaluation_result)
+ nmetric = len(evaluation_result[0])
+ results = []
+ for imetric in range(nmetric):
+ result_shape = list(evaluation_result[0][imetric].shape)
+ result_shape.insert(0, nmodel)
+ result = ma.zeros(result_shape)
+ for imodel in range(nmodel):
+ result[imodel,:] = evaluation_result[imodel][imetric]
+ results.append(result)
+ return results
+ else:
+ nmodel = len(evaluation_result)
+ nmetric = len(evaluation_result[0])
+ nsubregion = len(evaluation_result[0][0])
+
+ results = []
+ for isubregion in range(nsubregion):
+ subregion_results = []
+ for imetric in range(nmetric):
+ result_shape = list(evaluation_result[0][imetric][isubregion].shape)
+ result_shape.insert(0, nmodel)
+ result = ma.zeros(result_shape)
+ for imodel in range(nmodel):
+ result[imodel,:] = evaluation_result[imodel][imetric][isubregion]
+ subregion_results.append(result)
+ results.append(subregion_results)
+ return results
+
+def convert_unary_evaluation_result(evaluation_result, subregion = False):
+ if not subregion:
+ nmetric = len(evaluation_result)
+ nmodel = len(evaluation_result[0])
+ results = []
+ for imetric in range(nmetric):
+ result_shape = list(evaluation_result[imetric][0].shape)
+ result_shape.insert(0, nmodel)
+ result = ma.zeros(result_shape)
+ for imodel in range(nmodel):
+ result[imodel,:] = evaluation_result[imetric][imodel]
+ results.append(result)
+ return results
+ else:
+ nmetric = len(evaluation_result)
+ nsubregion = len(evaluation_result[0])
+ nmodel = len(evaluation_result[0][0])
+
+ results = []
+ for isubregion in range(nsubregion):
+ subregion_results = []
+ for imetric in range(nmetric):
+ result_shape = list(evaluation_result[imetric][isubregion][0].shape)
+ result_shape.insert(0, nmodel)
+ result = ma.zeros(result_shape)
+ for imodel in range(nmodel):
+ result[imodel,:] = evaluation_result[imetric][isubregion][imodel]
+ subregion_results.append(result)
+ results.append(subregion_results)
+ return results
+
diff --git a/ocw/tests/test_evaluation.py b/ocw/tests/test_evaluation.py
index 4a02714..2d48e72 100644
--- a/ocw/tests/test_evaluation.py
+++ b/ocw/tests/test_evaluation.py
@@ -128,40 +128,29 @@
def test_result_shape(self):
bias_eval = Evaluation(
self.test_dataset,
- [self.another_test_dataset, self.another_test_dataset],
- [Bias()]
+ [self.another_test_dataset, self.another_test_dataset, self.another_test_dataset],
+ [Bias(), Bias()]
)
bias_eval.run()
# Expected result shape is
- # [
- # [
- # bias.run(reference, target1)
- # ],
- # [
- # bias.run(reference, target2)
- # ]
- # ]
+ # [bias, bias] where bias.shape[0] = number of datasets
self.assertTrue(len(bias_eval.results) == 2)
- self.assertTrue(len(bias_eval.results[0]) == 1)
- self.assertTrue(len(bias_eval.results[1]) == 1)
+ self.assertTrue(bias_eval.results[0].shape[0] == 3)
def test_unary_result_shape(self):
new_eval = Evaluation(
self.test_dataset,
- [self.another_test_dataset, self.another_test_dataset],
+ [self.another_test_dataset, self.another_test_dataset, self.another_test_dataset, self.another_test_dataset],
[TemporalStdDev()]
)
new_eval.run()
# Expected result shape is
- # [
- # temporalstddev.run(reference),
- # temporalstddev.run(target1),
- # temporalstddev.run(target2)
- # ]
+ # [stddev] where stddev.shape[0] = number of datasets
+
self.assertTrue(len(new_eval.unary_results) == 1)
- self.assertTrue(len(new_eval.unary_results[0]) == 3)
+ self.assertTrue(new_eval.unary_results[0].shape[0] == 5)
def test_subregion_result_shape(self):
bound = Bounds(
@@ -179,26 +168,14 @@
# Expected result shape is
# [
- # [
# [ # Subregions cause this extra layer
- # bias.run(reference, target1)
+ # [number of targets, bias.run(reference, target1).shape]
# ]
# ],
- # [
- # [
- # bias.run(reference, target2)
- # ]
- # ]
- # ]
- self.assertTrue(len(bias_eval.results) == 2)
-
+ self.assertTrue(len(bias_eval.results) == 1)
self.assertTrue(len(bias_eval.results[0]) == 1)
- self.assertTrue(type(bias_eval.results[0]) == type([]))
- self.assertTrue(len(bias_eval.results[1]) == 1)
- self.assertTrue(type(bias_eval.results[1]) == type([]))
-
- self.assertTrue(len(bias_eval.results[0][0]) == 1)
- self.assertTrue(len(bias_eval.results[1][0]) == 1)
+ self.assertTrue(bias_eval.results[0][0].shape[0] == 2)
+ self.assertTrue(type(bias_eval.results) == type([]))
def test_subregion_unary_result_shape(self):
bound = Bounds(
@@ -209,27 +186,21 @@
new_eval = Evaluation(
self.test_dataset,
[self.another_test_dataset, self.another_test_dataset],
- [TemporalStdDev()],
- [bound]
+ [TemporalStdDev(), TemporalStdDev()],
+ [bound, bound, bound, bound, bound]
)
new_eval.run()
# Expected result shape is
# [
- # [
# [ # Subregions cause this extra layer
- # temporalstddev.run(reference),
- # temporalstddev.run(target1),
- # temporalstddev.run(target2)
+ # [3, temporalstddev.run(reference).shape],
# ]
- # ]
# ]
- self.assertTrue(len(new_eval.unary_results) == 1)
+ self.assertTrue(len(new_eval.unary_results) == 5) # number of subregions
+ self.assertTrue(len(new_eval.unary_results[0]) == 2) # number of metrics
self.assertTrue(type(new_eval.unary_results) == type([]))
-
- self.assertTrue(len(new_eval.unary_results[0]) == 1)
-
- self.assertTrue(len(new_eval.unary_results[0][0]) == 3)
+ self.assertTrue(new_eval.unary_results[0][0].shape[0] == 3) # number of datasets (ref + target)
if __name__ == '__main__':