blob: 5bbbb083ee46f82832f53ae6602d3529c72e57f5 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import datetime as dt
import logging
import yaml
logger = logging.getLogger(__name__)
def export_evaluation_to_config(evaluation, file_path='./exported_eval.yaml'):
''' Export an evaluation to a config file
:param evaluation: The evaluation object to export.
:type evaluation: :class:`evaluation.Evaluation`
:param file_path: Optional file path where the config file should be saved.
:type file_path: :mod:`string`
config = {}
config['evaluation'] = generate_evaluation_information(evaluation)
config['datasets'] = generate_dataset_information(evaluation)
config['metrics'] = generate_metric_information(evaluation)
config['subregions'] = generate_subregion_information(evaluation)
yaml.dump(config, file(file_path, 'w'))
def generate_dataset_information(evaluation):
''' Generate dataset config file output for a given Evaluation object.
:param evaluation: The evaluation object from which to extract metrics.
:type evaluation: :class:`evaluation.Evaluation`
:returns: A :func:`dict` of dataset configuration information for export
to a configuration file.
:rtype: :func:`dict`
datasets = {}
if evaluation.ref_dataset:
datasets['reference'] = generate_dataset_config(evaluation.ref_dataset)
if len(evaluation.target_datasets) > 0:
datasets['targets'] = [
for target in evaluation.target_datasets
return datasets
def generate_dataset_config(dataset):
''' Generate dataset config file output for a given Dataset object.
:param dataset: The dataset from which to extract configuration
:type dataset: :class:`dataset.Dataset`
:returns: :func:`dict` containing necessary information for
dataset to be saved into a configuration object.
:raises AttributeError: If dataset does not contain expected source data.
dataset_source = dataset.origin['source']
if dataset_source == 'local':
info = _extract_local_dataset_info(dataset)
elif dataset_source == 'rcmed':
info = _extract_rcmed_dataset_info(dataset)
elif dataset_source == 'esgf':
info = _extract_esgf_dataset_info(dataset)
elif dataset_source == 'dap':
info = _extract_dap_dataset_info(dataset)
err = (
"Unexpected source in dataset origin information."
"Found {}."
raise AttributeError(err)
info['optional_args']['name'] =
info['optional_args']['units'] = dataset.units
return info
def generate_metric_information(evaluation):
''' Generate metric config file output from a given Evaluation object.
:param evaluation: The evaluation object from which to extract metrics.
:type evaluation: :class:`evaluation.Evaluation`
:returns: A :func:`list` of :mod:`metrics` object names for output into
a configuration file.
:rtype: :func:`list` of :mod:`metrics`
unary_metrics = [x.__class__.__name__ for x in evaluation.unary_metrics]
binary_metrics = [x.__class__.__name__ for x in evaluation.metrics]
return unary_metrics + binary_metrics
def generate_evaluation_information(evaluation):
''' Generate evaluation-related config file output.
Attempts to parse out temporal and spatial rebinning/regridding information
from the supplied evaluation object. If no datasets can be found, values
are defaulted to sane defaults or (potentially) excluded entirely.
It's important to note that this function does its best to extrapolate the
configuration information. It's possible that you will encounter a scenario
where the guessed values are not what you want/expect. Please double
check the output before blinding trusting what this generates.
:param evaluation: The evaluation object from which to extract metrics.
:type evaluation: :class:`evaluation.Evaluation`
:returns: A dictionary of valid `evaluation` section settings for export
to a configuration file.
:rtype: :func:`dict`
eval_config = {
'temporal_time_delta': 999,
'spatial_regrid_lats': (-90, 90, 1),
'spatial_regrid_lons': (-180, 180, 1),
'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"],
datasets = []
if evaluation.ref_dataset:
if evaluation.target_datasets:
datasets += evaluation.target_datasets
if len(datasets) > 0:
eval_config['temporal_time_delta'] = _calc_temporal_bin_size(datasets)
lats, lons = _calc_spatial_lat_lon_grid(datasets)
eval_config['spatial_regrid_lats'] = lats
eval_config['spatial_regrid_lons'] = lons
eval_config['subset'] = _calc_subset_config(datasets)
return eval_config
def generate_subregion_information(evaluation):
''' Generate subregion config file output from a given Evaluation object.
:param evaluation: The evaluation object from which to extract metrics.
:type evaluation: :class:`evaluation.Evaluation`
:returns: A :func:`list` of :func:`list` objects containing bounding
box info for export into a configuration file
:rtype: :func:`list` of :func:`list`
subregions = []
for s in evaluation.subregions:
subregions.append([s.lat_min, s.lat_max, s.lon_min, s.lon_max])
return subregions
def _extract_local_dataset_info(dataset):
dataset_info = {'optional_args': {}}
dataset_info['data_source'] = 'local'
dataset_info['file_count'] = 1
dataset_info['path'] = dataset.origin['path']
dataset_info['variable'] = dataset.variable
dataset_info['optional_args']['lat_name'] = dataset.origin['lat_name']
dataset_info['optional_args']['lon_name'] = dataset.origin['lon_name']
dataset_info['optional_args']['time_name'] = dataset.origin['time_name']
if 'elevation_index' in dataset.origin:
elev = dataset.origin['elevation_index']
dataset_info['optional_args']['elevation_index'] = elev
return dataset_info
def _extract_rcmed_dataset_info(dataset):
dataset_info = {'optional_args': {}}
min_lat, max_lat, min_lon, max_lon = dataset.spatial_boundaries()
start_time, end_time = dataset.temporal_boundaries()
dataset_info['data_source'] = 'rcmed'
dataset_info['dataset_id'] = dataset.origin['dataset_id']
dataset_info['parameter_id'] = dataset.origin['parameter_id']
dataset_info['min_lat'] = min_lat
dataset_info['max_lat'] = max_lat
dataset_info['min_lon'] = min_lon
dataset_info['max_lon'] = max_lon
dataset_info['start_time'] = str(start_time)
dataset_info['end_time'] = str(end_time)
return dataset_info
def _extract_esgf_dataset_info(dataset):
dataset_info = {'optional_args': {}}
dataset_info['data_source'] = 'esgf'
dataset_info['dataset_id'] = dataset.origin['dataset_id']
dataset_info['variable'] = dataset.origin['variable']
dataset_info['esgf_username'] = 'Put your ESGF Username here'
dataset_info['esgf_password'] = 'Put your ESGF Password here'
return dataset_info
def _extract_dap_dataset_info(dataset):
dataset_info = {'optional_args': {}}
dataset_info['data_source'] = 'dap'
dataset_info['url'] = dataset.origin['url']
dataset_info['variable'] = dataset.variable
return dataset_info
def _calc_temporal_bin_size(datasets):
times = datasets[0].times
time_delta = times[1] - times[0]
if time_delta.days == 0:
return 1
elif time_delta.days <= 31:
return 31
elif time_delta.days <= 366:
return 366
return 999
def _calc_spatial_lat_lon_grid(datasets):
lat_min, lat_max, lon_min, lon_max = datasets[0].spatial_boundaries()
lats = datasets[0].lats
lons = datasets[0].lons
# These explicit float casts are needed to ensure that the type of the
# lat/lon steps are not numpy values. PyYAML will choke on export if it
# encounters a Numpy value.
lat_step = float(abs(lats[1] - lats[0]))
lon_step = float(abs(lons[1] - lons[0]))
# We need to add an extra step value onto the end so when we generate a
# range with these values we don't lose one that we're expecting.
if lat_max != 90: lat_max += lat_step
if lon_max != 180: lon_max += lon_step
return ((lat_min, lat_max, lat_step), (lon_min, lon_max, lon_step))
def _calc_subset_config(datasets):
lat_min = 90
lat_max = -90
lon_min = 180
lon_max = -180
start = dt.datetime(2500, 1, 1)
end = dt.datetime(1500, 1, 1)
for ds in datasets:
ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = ds.spatial_boundaries()
ds_start, ds_end = ds.temporal_boundaries()
if ds_lat_min < lat_min:
lat_min = ds_lat_min
if ds_lat_max > lat_max:
lat_max = ds_lat_max
if ds_lon_min < lon_min:
lon_min = ds_lon_min
if ds_lon_max > lon_max:
lon_max = ds_lon_max
if ds_start < start:
start = ds_start
if ds_end > end:
end = ds_end
return [lat_min, lat_max, lon_min, lon_max, str(start), str(end)]