| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import dateutil.parser |
| from datetime import timedelta |
| import logging |
| |
| from ocw.dataset import Bounds |
| from ocw.evaluation import Evaluation |
| import ocw.dataset_processor as dsp |
| import ocw.data_source.local as local |
| import ocw.data_source.rcmed as rcmed |
| import ocw.data_source.esgf as esgf |
| import ocw.data_source.dap as dap |
| import ocw.metrics as metrics |
| |
| import numpy as np |
| |
| logging.basicConfig() |
| logger = logging.getLogger(__name__) |
| |
| def generate_evaluation_from_config(config_data): |
| """ Generate an Evaluation object from configuration data. |
| |
| :param config_data: Dictionary of the data parsed from the supplied YAML |
| configuration file. |
| :type config_data: :func:`dict` |
| |
| :returns: An Evaluation object containing the data specified in the |
| supplied configuration data. |
| """ |
| # Load datasets |
| reference = None |
| targets = [] |
| if config_data['datasets']: |
| if 'reference' in config_data['datasets']: |
| reference = _load_dataset(config_data['datasets']['reference']) |
| |
| if 'targets' in config_data['datasets']: |
| targets = [_load_dataset(t) for t in config_data['datasets']['targets']] |
| |
| reference, targets = _prepare_datasets_for_evaluation(reference, |
| targets, |
| config_data) |
| # Load metrics |
| eval_metrics = [] |
| if config_data['metrics']: |
| eval_metrics = [_load_metric(m)() for m in config_data['metrics']] |
| |
| # Load Subregions (if present) |
| subregions = None |
| if 'subregions' in config_data: |
| subregions = [_load_subregion(s) for s in config_data['subregions']] |
| |
| return Evaluation(reference, targets, eval_metrics, subregions=subregions) |
| |
| def _load_dataset(dataset_config_data): |
| """""" |
| if dataset_config_data['data_source'] == 'local': |
| if dataset_config_data['file_count'] > 1: |
| logger.error( |
| 'Multi-file datasets are currently not supported. Cancelling load ' |
| 'of the following dataset: {}'.format(dataset_config_data) |
| ) |
| return None |
| |
| return local.load_file(dataset_config_data['path'], |
| dataset_config_data['variable'], |
| **dataset_config_data.get('optional_args', {})) |
| elif dataset_config_data['data_source'] == 'rcmed': |
| return rcmed.parameter_dataset(dataset_config_data['dataset_id'], |
| dataset_config_data['parameter_id'], |
| dataset_config_data['min_lat'], |
| dataset_config_data['max_lat'], |
| dataset_config_data['min_lon'], |
| dataset_config_data['min_lon'], |
| dataset_config_data['start_time'], |
| dataset_config_data['end_time'], |
| **dataset_config_data.get('optional_args', {})) |
| elif dataset_config_data['data_source'] == 'esgf': |
| return esgf.load_dataset(dataset_config_data['dataset_id'], |
| dataset_config_data['variable'], |
| dataset_config_data['esgf_username'], |
| dataset_config_data['esgf_password'], |
| **dataset_config_data.get('optional_args', {})) |
| elif dataset_config_data['data_source'] == 'dap': |
| return dap.load(dataset_config_data['url'], |
| dataset_config_data['variable'], |
| **dataset_config_data('optional_args', {})) |
| |
| def _prepare_datasets_for_evaluation(reference, targets, config_data): |
| """""" |
| subset = config_data['evaluation'].get('subset', None) |
| temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) |
| spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) |
| spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) |
| |
| # If we have a temporal time delta and it's daily (i.e., 1) we will |
| # normalize the data as daily data (which means we adjust the start times |
| # for each bucket of data to be consistent). By default we will normalize |
| # the data as monthly. Note that this will not break yearly data so it's |
| # safer to do this no matter what. This keeps us from ending up with 1-off |
| # errors in the resulting dataset shape post-temporal/spatial adjustments |
| # that break evaluations. |
| string_time_delta = 'monthly' |
| if temporal_time_delta and temporal_time_delta == 1: |
| string_time_delta = 'daily' |
| |
| reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) |
| targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets] |
| |
| if subset: |
| start = dateutil.parser.parse(subset[4]) |
| end = dateutil.parser.parse(subset[5]) |
| bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) |
| |
| if reference: |
| reference = dsp.safe_subset(reference, bounds) |
| |
| if targets: |
| targets = [dsp.safe_subset(t, bounds) for t in targets] |
| |
| if temporal_time_delta: |
| resolution = timedelta(temporal_time_delta) |
| |
| if reference: |
| reference = dsp.temporal_rebin(reference, resolution) |
| |
| if targets: |
| targets = [dsp.temporal_rebin(t, resolution) for t in targets] |
| |
| if spatial_regrid_lats and spatial_regrid_lons: |
| lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) |
| lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) |
| |
| if reference: |
| reference = dsp.spatial_regrid(reference, lats, lons) |
| |
| if targets: |
| targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] |
| |
| return reference, targets |
| |
| def _load_metric(metric_config_data): |
| """""" |
| # If the dataset is user defined outside of ocw.metrics we won't currently |
| # handle loading it. |
| if '.' in metric_config_data: |
| logger.error( |
| 'User-defined metrics outside of the ocw.metrics module ' |
| 'cannot currently be loaded. If you just wanted a metric ' |
| 'found in ocw.metrics then do not specify the full ' |
| 'package and module names. See the documentation for examples.' |
| ) |
| return None |
| |
| return getattr(metrics, metric_config_data) |
| |
| def _load_subregion(subregion_config_data): |
| """""" |
| return Bounds(float(subregion_config_data[0]), |
| float(subregion_config_data[1]), |
| float(subregion_config_data[2]), |
| float(subregion_config_data[3])) |