ocw_config_runner/configuration_writer.py - climate - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import datetime as dt
 import logging

 import yaml

 logging.basicConfig()
 logger = logging.getLogger(__name__)

 def export_evaluation_to_config(evaluation, file_path='./exported_eval.yaml'):
     ''' Export an evaluation to a config file

     :param evaluation: The evaluation object to export.
     :type evaluation: :class:`evaluation.Evaluation`

     :param file_path: Optional file path where the config file should be saved.
     :type file_path: :mod:`string`
     '''
     config = {}

     config['evaluation'] = generate_evaluation_information(evaluation)
     config['datasets'] = generate_dataset_information(evaluation)
     config['metrics'] = generate_metric_information(evaluation)
     config['subregions'] = generate_subregion_information(evaluation)

     yaml.dump(config, file(file_path, 'w'))

 def generate_dataset_information(evaluation):
     ''' Generate dataset config file output for a given Evaluation object.

     :param evaluation: The evaluation object from which to extract metrics.
     :type evaluation: :class:`evaluation.Evaluation`

     :returns: A :func:`dict` of dataset configuration information for export
         to a configuration file.
     :rtype: :func:`dict`
     '''
     datasets = {}

     if evaluation.ref_dataset:
         datasets['reference'] = generate_dataset_config(evaluation.ref_dataset)

     if len(evaluation.target_datasets) > 0:
         datasets['targets'] = [
             generate_dataset_config(target)
             for target in evaluation.target_datasets
         ]

     return datasets

 def generate_dataset_config(dataset):
     ''' Generate dataset config file output for a given Dataset object.

     :param dataset: The dataset from which to extract configuration
         information.
     :type dataset: :class:`dataset.Dataset`

     :returns: :func:`dict` containing necessary information for
         dataset to be saved into a configuration object.

     :raises AttributeError: If dataset does not contain expected source data.
     '''
     dataset_source = dataset.origin['source']

     if dataset_source == 'local':
         info = _extract_local_dataset_info(dataset)
     elif dataset_source == 'rcmed':
         info = _extract_rcmed_dataset_info(dataset)
     elif dataset_source == 'esgf':
         info = _extract_esgf_dataset_info(dataset)
     elif dataset_source == 'dap':
         info = _extract_dap_dataset_info(dataset)
     else:
         err = (
             "Unexpected source in dataset origin information."
             "Found {}."
         ).format(dataset_source)
         logger.error(err)
         raise AttributeError(err)

     info['optional_args']['name'] = dataset.name
     info['optional_args']['units'] = dataset.units

     return info

 def generate_metric_information(evaluation):
     ''' Generate metric config file output from a given Evaluation object.

     :param evaluation: The evaluation object from which to extract metrics.
     :type evaluation: :class:`evaluation.Evaluation`

     :returns: A :func:`list` of :mod:`metrics` object names for output into
         a configuration file.
     :rtype: :func:`list` of :mod:`metrics`
     '''
     unary_metrics = [x.__class__.__name__ for x in evaluation.unary_metrics]
     binary_metrics = [x.__class__.__name__ for x in evaluation.metrics]

     return unary_metrics + binary_metrics

 def generate_evaluation_information(evaluation):
     ''' Generate evaluation-related config file output.

     Attempts to parse out temporal and spatial rebinning/regridding information
     from the supplied evaluation object. If no datasets can be found, values
     are defaulted to sane defaults or (potentially) excluded entirely.

     It's important to note that this function does its best to extrapolate the
     configuration information. It's possible that you will encounter a scenario
     where the guessed values are not what you want/expect. Please double
     check the output before blinding trusting what this generates.

     :param evaluation: The evaluation object from which to extract metrics.
     :type evaluation: :class:`evaluation.Evaluation`

     :returns: A dictionary of valid `evaluation` section settings for export
         to a configuration file.
     :rtype: :func:`dict`
     '''
     eval_config = {
         'temporal_time_delta': 999,
         'spatial_regrid_lats': (-90, 90, 1),
         'spatial_regrid_lons': (-180, 180, 1),
         'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"],
     }

     datasets = []

     if evaluation.ref_dataset:
         datasets.append(evaluation.ref_dataset)

     if evaluation.target_datasets:
         datasets += evaluation.target_datasets

     if len(datasets) > 0:
         eval_config['temporal_time_delta'] = _calc_temporal_bin_size(datasets)

         lats, lons = _calc_spatial_lat_lon_grid(datasets)
         eval_config['spatial_regrid_lats'] = lats
         eval_config['spatial_regrid_lons'] = lons

         eval_config['subset'] = _calc_subset_config(datasets)

     return eval_config

 def generate_subregion_information(evaluation):
     ''' Generate subregion config file output from a given Evaluation object.

     :param evaluation: The evaluation object from which to extract metrics.
     :type evaluation: :class:`evaluation.Evaluation`

     :returns: A :func:`list` of :func:`list` objects containing bounding
         box info for export into a configuration file
     :rtype: :func:`list` of :func:`list`
     '''
     subregions = []
     for s in evaluation.subregions:
         subregions.append([s.lat_min, s.lat_max, s.lon_min, s.lon_max])

     return subregions

 def _extract_local_dataset_info(dataset):
     ''''''
     dataset_info = {'optional_args': {}}

     dataset_info['data_source'] = 'local'
     dataset_info['file_count'] = 1
     dataset_info['path'] = dataset.origin['path']
     dataset_info['variable'] = dataset.variable

     dataset_info['optional_args']['lat_name'] = dataset.origin['lat_name']
     dataset_info['optional_args']['lon_name'] = dataset.origin['lon_name']
     dataset_info['optional_args']['time_name'] = dataset.origin['time_name']

     if 'elevation_index' in dataset.origin:
         elev = dataset.origin['elevation_index']
         dataset_info['optional_args']['elevation_index'] = elev

     return dataset_info

 def _extract_rcmed_dataset_info(dataset):
     ''''''
     dataset_info = {'optional_args': {}}

     min_lat, max_lat, min_lon, max_lon = dataset.spatial_boundaries()
     start_time, end_time = dataset.temporal_boundaries()

     dataset_info['data_source'] = 'rcmed'
     dataset_info['dataset_id'] = dataset.origin['dataset_id']
     dataset_info['parameter_id'] = dataset.origin['parameter_id']
     dataset_info['min_lat'] = min_lat
     dataset_info['max_lat'] = max_lat
     dataset_info['min_lon'] = min_lon
     dataset_info['max_lon'] = max_lon
     dataset_info['start_time'] = str(start_time)
     dataset_info['end_time'] = str(end_time)

     return dataset_info

 def _extract_esgf_dataset_info(dataset):
     ''''''
     dataset_info = {'optional_args': {}}

     dataset_info['data_source'] = 'esgf'
     dataset_info['dataset_id'] = dataset.origin['dataset_id']
     dataset_info['variable'] = dataset.origin['variable']
     dataset_info['esgf_username'] = 'Put your ESGF Username here'
     dataset_info['esgf_password'] = 'Put your ESGF Password here'

     return dataset_info

 def _extract_dap_dataset_info(dataset):
     ''''''
     dataset_info = {'optional_args': {}}

     dataset_info['data_source'] = 'dap'
     dataset_info['url'] = dataset.origin['url']
     dataset_info['variable'] = dataset.variable

     return dataset_info

 def _calc_temporal_bin_size(datasets):
     ''''''
     times = datasets[0].times
     time_delta = times[1] - times[0]

     if time_delta.days == 0:
         return 1
     elif time_delta.days <= 31:
         return 31
     elif time_delta.days <= 366:
         return 366
     else:
         return 999

 def _calc_spatial_lat_lon_grid(datasets):
     ''''''
     lat_min, lat_max, lon_min, lon_max = datasets[0].spatial_boundaries()

     lats = datasets[0].lats
     lons = datasets[0].lons
     # These explicit float casts are needed to ensure that the type of the
     # lat/lon steps are not numpy values. PyYAML will choke on export if it
     # encounters a Numpy value.
     lat_step = float(abs(lats[1] - lats[0]))
     lon_step = float(abs(lons[1] - lons[0]))

     # We need to add an extra step value onto the end so when we generate a
     # range with these values we don't lose one that we're expecting.
     if lat_max != 90: lat_max += lat_step
     if lon_max != 180: lon_max += lon_step

     return ((lat_min, lat_max, lat_step), (lon_min, lon_max, lon_step))

 def _calc_subset_config(datasets):
     ''''''
     lat_min = 90
     lat_max = -90
     lon_min = 180
     lon_max = -180
     start = dt.datetime(2500, 1, 1)
     end = dt.datetime(1500, 1, 1)

     for ds in datasets:
         ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = ds.spatial_boundaries()
         ds_start, ds_end = ds.temporal_boundaries()

         if ds_lat_min < lat_min:
             lat_min = ds_lat_min

         if ds_lat_max > lat_max:
             lat_max = ds_lat_max

         if ds_lon_min < lon_min:
             lon_min = ds_lon_min

         if ds_lon_max > lon_max:
             lon_max = ds_lon_max

         if ds_start < start:
             start = ds_start

         if ds_end > end:
             end = ds_end

     return [lat_min, lat_max, lon_min, lon_max, str(start), str(end)]
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import datetime as dt
	import logging

	import yaml

	logging.basicConfig()
	logger = logging.getLogger(__name__)

	def export_evaluation_to_config(evaluation, file_path='./exported_eval.yaml'):
	''' Export an evaluation to a config file

	:param evaluation: The evaluation object to export.
	:type evaluation: :class:`evaluation.Evaluation`

	:param file_path: Optional file path where the config file should be saved.
	:type file_path: :mod:`string`
	'''
	config = {}

	config['evaluation'] = generate_evaluation_information(evaluation)
	config['datasets'] = generate_dataset_information(evaluation)
	config['metrics'] = generate_metric_information(evaluation)
	config['subregions'] = generate_subregion_information(evaluation)

	yaml.dump(config, file(file_path, 'w'))

	def generate_dataset_information(evaluation):
	''' Generate dataset config file output for a given Evaluation object.

	:param evaluation: The evaluation object from which to extract metrics.
	:type evaluation: :class:`evaluation.Evaluation`

	:returns: A :func:`dict` of dataset configuration information for export
	to a configuration file.
	:rtype: :func:`dict`
	'''
	datasets = {}

	if evaluation.ref_dataset:
	datasets['reference'] = generate_dataset_config(evaluation.ref_dataset)

	if len(evaluation.target_datasets) > 0:
	datasets['targets'] = [
	generate_dataset_config(target)
	for target in evaluation.target_datasets
	]

	return datasets

	def generate_dataset_config(dataset):
	''' Generate dataset config file output for a given Dataset object.

	:param dataset: The dataset from which to extract configuration
	information.
	:type dataset: :class:`dataset.Dataset`

	:returns: :func:`dict` containing necessary information for
	dataset to be saved into a configuration object.

	:raises AttributeError: If dataset does not contain expected source data.
	'''
	dataset_source = dataset.origin['source']

	if dataset_source == 'local':
	info = _extract_local_dataset_info(dataset)
	elif dataset_source == 'rcmed':
	info = _extract_rcmed_dataset_info(dataset)
	elif dataset_source == 'esgf':
	info = _extract_esgf_dataset_info(dataset)
	elif dataset_source == 'dap':
	info = _extract_dap_dataset_info(dataset)
	else:
	err = (
	"Unexpected source in dataset origin information."
	"Found {}."
	).format(dataset_source)
	logger.error(err)
	raise AttributeError(err)

	info['optional_args']['name'] = dataset.name
	info['optional_args']['units'] = dataset.units

	return info

	def generate_metric_information(evaluation):
	''' Generate metric config file output from a given Evaluation object.

	:param evaluation: The evaluation object from which to extract metrics.
	:type evaluation: :class:`evaluation.Evaluation`

	:returns: A :func:`list` of :mod:`metrics` object names for output into
	a configuration file.
	:rtype: :func:`list` of :mod:`metrics`
	'''
	unary_metrics = [x.__class__.__name__ for x in evaluation.unary_metrics]
	binary_metrics = [x.__class__.__name__ for x in evaluation.metrics]

	return unary_metrics + binary_metrics

	def generate_evaluation_information(evaluation):
	''' Generate evaluation-related config file output.

	Attempts to parse out temporal and spatial rebinning/regridding information
	from the supplied evaluation object. If no datasets can be found, values
	are defaulted to sane defaults or (potentially) excluded entirely.

	It's important to note that this function does its best to extrapolate the
	configuration information. It's possible that you will encounter a scenario
	where the guessed values are not what you want/expect. Please double
	check the output before blinding trusting what this generates.

	:param evaluation: The evaluation object from which to extract metrics.
	:type evaluation: :class:`evaluation.Evaluation`

	:returns: A dictionary of valid `evaluation` section settings for export
	to a configuration file.
	:rtype: :func:`dict`
	'''
	eval_config = {
	'temporal_time_delta': 999,
	'spatial_regrid_lats': (-90, 90, 1),
	'spatial_regrid_lons': (-180, 180, 1),
	'subset': [-90, 90, -180, 180, "1500-01-01", "2500-01-01"],
	}

	datasets = []

	if evaluation.ref_dataset:
	datasets.append(evaluation.ref_dataset)

	if evaluation.target_datasets:
	datasets += evaluation.target_datasets

	if len(datasets) > 0:
	eval_config['temporal_time_delta'] = _calc_temporal_bin_size(datasets)

	lats, lons = _calc_spatial_lat_lon_grid(datasets)
	eval_config['spatial_regrid_lats'] = lats
	eval_config['spatial_regrid_lons'] = lons

	eval_config['subset'] = _calc_subset_config(datasets)

	return eval_config

	def generate_subregion_information(evaluation):
	''' Generate subregion config file output from a given Evaluation object.

	:param evaluation: The evaluation object from which to extract metrics.
	:type evaluation: :class:`evaluation.Evaluation`

	:returns: A :func:`list` of :func:`list` objects containing bounding
	box info for export into a configuration file
	:rtype: :func:`list` of :func:`list`
	'''
	subregions = []
	for s in evaluation.subregions:
	subregions.append([s.lat_min, s.lat_max, s.lon_min, s.lon_max])

	return subregions

	def _extract_local_dataset_info(dataset):
	''''''
	dataset_info = {'optional_args': {}}

	dataset_info['data_source'] = 'local'
	dataset_info['file_count'] = 1
	dataset_info['path'] = dataset.origin['path']
	dataset_info['variable'] = dataset.variable

	dataset_info['optional_args']['lat_name'] = dataset.origin['lat_name']
	dataset_info['optional_args']['lon_name'] = dataset.origin['lon_name']
	dataset_info['optional_args']['time_name'] = dataset.origin['time_name']

	if 'elevation_index' in dataset.origin:
	elev = dataset.origin['elevation_index']
	dataset_info['optional_args']['elevation_index'] = elev

	return dataset_info

	def _extract_rcmed_dataset_info(dataset):
	''''''
	dataset_info = {'optional_args': {}}

	min_lat, max_lat, min_lon, max_lon = dataset.spatial_boundaries()
	start_time, end_time = dataset.temporal_boundaries()

	dataset_info['data_source'] = 'rcmed'
	dataset_info['dataset_id'] = dataset.origin['dataset_id']
	dataset_info['parameter_id'] = dataset.origin['parameter_id']
	dataset_info['min_lat'] = min_lat
	dataset_info['max_lat'] = max_lat
	dataset_info['min_lon'] = min_lon
	dataset_info['max_lon'] = max_lon
	dataset_info['start_time'] = str(start_time)
	dataset_info['end_time'] = str(end_time)

	return dataset_info

	def _extract_esgf_dataset_info(dataset):
	''''''
	dataset_info = {'optional_args': {}}

	dataset_info['data_source'] = 'esgf'
	dataset_info['dataset_id'] = dataset.origin['dataset_id']
	dataset_info['variable'] = dataset.origin['variable']
	dataset_info['esgf_username'] = 'Put your ESGF Username here'
	dataset_info['esgf_password'] = 'Put your ESGF Password here'

	return dataset_info

	def _extract_dap_dataset_info(dataset):
	''''''
	dataset_info = {'optional_args': {}}

	dataset_info['data_source'] = 'dap'
	dataset_info['url'] = dataset.origin['url']
	dataset_info['variable'] = dataset.variable

	return dataset_info

	def _calc_temporal_bin_size(datasets):
	''''''
	times = datasets[0].times
	time_delta = times[1] - times[0]

	if time_delta.days == 0:
	return 1
	elif time_delta.days <= 31:
	return 31
	elif time_delta.days <= 366:
	return 366
	else:
	return 999

	def _calc_spatial_lat_lon_grid(datasets):
	''''''
	lat_min, lat_max, lon_min, lon_max = datasets[0].spatial_boundaries()

	lats = datasets[0].lats
	lons = datasets[0].lons
	# These explicit float casts are needed to ensure that the type of the
	# lat/lon steps are not numpy values. PyYAML will choke on export if it
	# encounters a Numpy value.
	lat_step = float(abs(lats[1] - lats[0]))
	lon_step = float(abs(lons[1] - lons[0]))

	# We need to add an extra step value onto the end so when we generate a
	# range with these values we don't lose one that we're expecting.
	if lat_max != 90: lat_max += lat_step
	if lon_max != 180: lon_max += lon_step

	return ((lat_min, lat_max, lat_step), (lon_min, lon_max, lon_step))

	def _calc_subset_config(datasets):
	''''''
	lat_min = 90
	lat_max = -90
	lon_min = 180
	lon_max = -180
	start = dt.datetime(2500, 1, 1)
	end = dt.datetime(1500, 1, 1)

	for ds in datasets:
	ds_lat_min, ds_lat_max, ds_lon_min, ds_lon_max = ds.spatial_boundaries()
	ds_start, ds_end = ds.temporal_boundaries()

	if ds_lat_min < lat_min:
	lat_min = ds_lat_min

	if ds_lat_max > lat_max:
	lat_max = ds_lat_max

	if ds_lon_min < lon_min:
	lon_min = ds_lon_min

	if ds_lon_max > lon_max:
	lon_max = ds_lon_max

	if ds_start < start:
	start = ds_start

	if ds_end > end:
	end = ds_end

	return [lat_min, lat_max, lon_min, lon_max, str(start), str(end)]