| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import logging |
| import re |
| import sys |
| |
| import ocw.metrics as metrics |
| |
| import yaml |
| |
| logging.basicConfig() |
| logger = logging.getLogger(__name__) |
| |
| def is_config_valid(config_data): |
| """ Validate supplied evaluation configuration data. |
| |
| :param config_data: Dictionary of the data parsed from the supplied YAML |
| configuration file. |
| :type config_data: :func:`dict` |
| |
| :returns: True if the configuration data is sufficient for an evaluation and |
| seems to be well formed, False otherwise. |
| """ |
| if not _valid_minimal_config(config_data): |
| logger.error('Insufficient configuration file data for an evaluation') |
| return False |
| |
| if not _config_is_well_formed(config_data): |
| logger.error('Configuration data is not well formed') |
| return False |
| |
| return True |
| |
| def _valid_minimal_config(config_data): |
| """""" |
| if not 'datasets' in config_data.keys(): |
| logger.error('No datasets specified in configuration data.') |
| return False |
| |
| if not 'metrics' in config_data.keys(): |
| logger.error('No metrics specified in configuration data.') |
| return False |
| |
| if _contains_unary_metrics(config_data['metrics']): |
| if (not 'reference' in config_data['datasets'].keys() and |
| not 'targets' in config_data['datasets'].keys()): |
| err = ( |
| 'Unary metric in configuration data requires either a reference ' |
| 'or target dataset to be present for evaluation. Please ensure ' |
| 'that your config is well formed.' |
| ) |
| logger.error(err) |
| return False |
| |
| if _contains_binary_metrics(config_data['metrics']): |
| if (not 'reference' in config_data['datasets'].keys() or |
| not 'targets' in config_data['datasets'].keys()): |
| logger.error( |
| 'Binary metric in configuration requires both a reference ' |
| 'and target dataset to be present for evaluation. Please ensure ' |
| 'that your config is well formed.' |
| ) |
| return False |
| |
| return True |
| |
| def _config_is_well_formed(config_data): |
| """""" |
| is_well_formed = True |
| |
| if 'reference' in config_data['datasets']: |
| if not _valid_dataset_config_data(config_data['datasets']['reference']): |
| is_well_formed = False |
| |
| if 'targets' in config_data['datasets']: |
| targets = config_data['datasets']['targets'] |
| if type(targets) != type(list()): |
| err = ( |
| 'Expected to find list of target datasets but instead found ' |
| 'object of type {}' |
| ).format(type(targets)) |
| logger.error(err) |
| is_well_formed = False |
| else: |
| for t in targets: |
| if not _valid_dataset_config_data(t): |
| is_well_formed = False |
| |
| available_metrics = _fetch_built_in_metrics() |
| for metric in config_data['metrics']: |
| if metric not in available_metrics: |
| warning = ( |
| 'Unable to locate metric name {} in built-in metrics. If this ' |
| 'is not a user defined metric then please check for potential ' |
| 'misspellings.' |
| ).format(metric) |
| logger.warn(warning) |
| is_well_formed = False |
| |
| if 'subregions' in config_data: |
| for subregion in config_data['subregions']: |
| if not _valid_subregion_config_data(subregion): |
| is_well_formed = False |
| |
| if 'plots' in config_data: |
| for plot in config_data['plots']: |
| if not _valid_plot_config_data(plot): |
| is_well_formed = False |
| # Ensure that if we're trying to make a plot that require |
| # subregion info that the config has this present. |
| elif plot['type'] in ['subregion', 'portrait']: |
| if ('subregions' not in config_data or |
| len(config_data['subregions']) < 1): |
| logger.error( |
| 'Plot config that requires subregion information is present ' |
| 'in a config file without adequate subregion information ' |
| 'provided. Please ensure that you have properly supplied 1 or ' |
| 'more subregion config values.' |
| ) |
| is_well_formed = False |
| |
| |
| return is_well_formed |
| |
| def _contains_unary_metrics(config_metric_data): |
| """""" |
| unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] |
| return any(metric in unarys for metric in config_metric_data) |
| |
| def _contains_binary_metrics(config_metric_data): |
| """""" |
| binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] |
| return any(metric in binarys for metric in config_metric_data) |
| |
| def _fetch_built_in_metrics(): |
| """""" |
| unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()] |
| binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()] |
| return unarys + binarys |
| |
| def _valid_dataset_config_data(dataset_config_data): |
| """""" |
| try: |
| data_source = dataset_config_data['data_source'] |
| except KeyError: |
| logger.error('Dataset does not contain a data_source attribute.') |
| return False |
| |
| if data_source == 'local': |
| required_keys = set(['data_source', 'file_count', 'path', 'variable']) |
| elif data_source == 'rcmed': |
| required_keys = set([ |
| 'dataset_id', |
| 'parameter_id', |
| 'min_lat', |
| 'max_lat', |
| 'min_lon', |
| 'max_lon', |
| 'start_time', |
| 'end_time', |
| ]) |
| elif data_source == 'esgf': |
| required_keys = set([ |
| 'data_source', |
| 'dataset_id', |
| 'variable', |
| 'esgf_username', |
| 'esgf_password' |
| ]) |
| elif data_source == 'dap': |
| required_keys = set({'url', 'variable'}) |
| else: |
| logger.error('Dataset does not contain a valid data_source location.') |
| return False |
| |
| present_keys = set(dataset_config_data.keys()) |
| missing_keys = required_keys - present_keys |
| contains_required = len(missing_keys) == 0 |
| |
| if contains_required: |
| if data_source == 'local' and dataset_config_data['file_count'] > 1: |
| # If the dataset is a multi-file dataset then we need to make sure |
| # that the file glob pattern is included. |
| if not 'file_glob_pattern' in dataset_config_data: |
| logger.error( |
| 'Multi-file local dataset is missing key: file_glob_pattern' |
| ) |
| return False |
| return True |
| else: |
| missing = sorted(list(missing_keys)) |
| logger.error( |
| 'Dataset does not contain required keys. ' |
| 'The following keys are missing: {}'.format(', '.join(missing)) |
| ) |
| return False |
| |
| def _valid_plot_config_data(plot_config_data): |
| """""" |
| try: |
| plot_type = plot_config_data['type'] |
| except KeyError: |
| logger.error('Plot config does not include a type attribute.') |
| return False |
| |
| if plot_type == 'contour': |
| required_keys = set([ |
| 'results_indices', |
| 'lats', |
| 'lons', |
| 'output_name' |
| ]) |
| elif plot_type == 'taylor': |
| required_keys = set([ |
| 'stddev_results_indices', |
| 'pattern_corr_results_indices', |
| 'output_name' |
| ]) |
| elif plot_type == 'subregion': |
| required_keys = set([ |
| 'lats', |
| 'lons', |
| 'output_name' |
| ]) |
| elif plot_type == 'time_series': |
| required_keys = set([ |
| 'temporal_boundaries' |
| ]) |
| elif plot_type == 'portrait': |
| required_keys = set([ |
| 'metric_index', |
| 'output_name' |
| ]) |
| else: |
| logger.error('Invalid plot type specified.') |
| return False |
| |
| present_keys = set(plot_config_data.keys()) |
| missing_keys = required_keys - present_keys |
| contains_required = len(missing_keys) == 0 |
| |
| if not contains_required: |
| missing = sorted(list(missing_keys)) |
| logger.error( |
| 'Plot config does not contain required keys. ' |
| 'The following keys are missing: {}'.format(', '.join(missing)) |
| ) |
| return False |
| |
| return True |
| |
| def _valid_subregion_config_data(subregion_config_data): |
| """""" |
| if type(subregion_config_data) != type([]): |
| logger.error( |
| 'Subregions should be passed as a list of lists where ' |
| 'each sub-list contains a bounding box of the form: ' |
| '[lat_min, lat_max, lon_min, lon_max].' |
| ) |
| return False |
| |
| if len(subregion_config_data) != 4: |
| logger.error( |
| 'Subregions should be passed as a list of lists where ' |
| 'each sub-list contains a bounding box of the form: ' |
| '[lat_min, lat_max, lon_min, lon_max].' |
| ) |
| return False |
| |
| return True |