ocw-parallel/functions.py - climate - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import sys, os, datetime
 from future.standard_library import install_aliases
 install_aliases()
 from urllib.parse import urlparse
 from datetime import timedelta
 import numpy as np

 import ocw.data_source.local as local
 import ocw.dataset_processor as dsp
 import ocw.evaluation as evaluation
 import ocw.metrics as metrics
 import ocw.plotter as plotter


 def loadDataset(urlAndVariable, dir=None):
     '''Load a dataset (variable), returning a Dataset object.'''
     shortName = None
     if len(urlAndVariable) == 3:
         url, variable, shortName = urlAndVariable
     else:
         url, variable = urlAndVariable
     f = retrieveFile(url, dir)
     ds = local.load_file(f, variable)
     if shortName is None or shortName == '':
         shortName = f + '?' + variable
     ds.name = shortName
     shape = ds.values.shape
     if len(shape) == 3:
         coords = '(time, lat, lon)'
     elif len(shape) == 2:
         coords = '(lat, lon)'
     print('loadDataset: File %s has variable %s with shape %s: %s' % (f, variable, coords, shape), file=sys.stderr)
     return ds

 def loadDatasets(urlAndVariables, dir=None):
     return [loadDataset(uv, dir) for uv in urlAndVariables]


 def temporalRegrid(dataset, timeRes=timedelta(days=365)):
     '''Temporally rebin a dataset variable to a specified time resolution (timedelta object).'''
     dataset = dsp.temporal_rebin(dataset, timeRes)
     name = dataset.name
     if name is None: name = ''
     print('temporalRebin: Dataset %s has new shape %s' % (name, str(dataset.values.shape)), file=sys.stderr)
     return dataset

 def temporalRegrids(datasets, timeRes=timedelta(days=365)):
     '''Temporally rebin dataset variables to a specified time resolution (timedelta object).'''
     return [temporalRegrid(d, timeRes) for d in datasets]


 def spatialBounds(dataset): return dataset.spatial_boundaries()

 def commonSpatialBounds(datasets):
     '''Compute overlapping (intersection) spatial bounds of many datasets.'''
     bounds = [spatialBounds(ds) for ds in datasets]
     for i, b in enumerate(bounds):
         name = datasets[i].name
         if name is None or name == '': name = str(i)
         print('commonSpatialBounds: Dataset %s has boundaries: lat (%s, %s), lon (%s, %s).' % \
                                 (name, b[0], b[1], b[2], b[3]), file=sys.stderr)
     minLat = max([b[0] for b in bounds])
     maxLat = min([b[1] for b in bounds])
     minLon = max([b[2] for b in bounds])
     maxLon = min([b[3] for b in bounds])
     print('commonSpatialBounds: Common boundaries are: lat (%s, %s), lon (%s, %s).' % \
                             (minLat, maxLat, minLon, maxLon), file=sys.stderr)
     return (minLat, maxLat, minLon, maxLon)

 def generateLatLonGrid(latGrid, lonGrid):
     '''Generate a uniform lat/lon grid at set resolutions, where latGrid is a tuple of (latMin, latMax, latRes).'''
     minLat, maxLat, latRes = list(map(float, latGrid))
     minLon, maxLon, lonRes = list(map(float, lonGrid))
     lats = np.arange(minLat, maxLat, float(latRes))
     lons = np.arange(minLon, maxLon, float(lonRes))
     return (lats, lons)

 def commonLatLonGrid(datasets, latRes, lonRes):
     '''Find common (intersect) lat/lon bounds and construct new grid with specified lat/lon resolutions.'''
     minLat, maxLat, minLon, maxLon = commonSpatialBounds(datasets)
     latGrid = (minLat, maxLat, latRes)
     lonGrid = (minLon, maxLon, lonRes)
     return generateLatLonGrid(latGrid, lonGrid)


 def spatialRegrid(dataset, lats, lons):
     '''Spatially regrid dataset variable to a new grid with specified resolution, where lats & lons
 are the new coordinate vectors.
     '''
     return dsp.spatial_regrid(dataset, lats, lons)

 def spatialRegrids(datasets, lats, lons):
     '''Spatially regrid dataset variables to a new grid with specified resolution, where lats & lons
 are the new coordinate vectors.
     '''
     return [spatialRegrid(d, lats, lons) for d in datasets]


 def lookupMetrics(metricNames,
                   availableMetrics={'Bias': metrics.Bias, 'StdDevRatio': metrics.StdDevRatio,
                                     'PatternCorrelation': metrics.PatternCorrelation}):
     '''Return a list of metric objects given a list of string names.'''
     metrics = []
     for name in metricNames:
         try:
             m = availableMetrics[name]
             metrics.append(m())
         except:
             print('lookupMetrics: Error, No metric named %s' % name, file=sys.stderr)
     return metrics

 def computeMetrics(datasets, metricNames=['Bias'], subregions=None):
     '''Compute one or more metrics comparing multiple target datasets to a reference dataset.
 This routine assumes that the datasets have already been regridded so that there grid dimensions
 are identical.
     '''
     metrics = lookupMetrics(metricNames)
     if len(metrics) != len(metricNames):
         print('computeMetrics: Error, Illegal or misspelled metric name.', file=sys.stderr)
     eval = evaluation.Evaluation(datasets[0], datasets[1:], metrics)
     print('computeMetrics: Evaluating metrics %s . . .' % str(metricNames), file=sys.stderr)
     eval.run()
     return eval.results


 def compareVariablesWithMetrics(datasetUrlsAndVarNames,         # URL's pointing to datasets, first one is reference, rest are targets;
                                                                 # each tuple can be (datasetUrl, variableName, shortName), shortName optional
                            metrics,                             # list of metrics to compute (by name)
                            outputName,                          # root name for outputs
                            timeRes=timedelta(days=365),         # time resolution to regrid all variables to
                            latRes=1., lonRes=1.,                # lat/lon resolutions to regrid all variables to
                            subregions=None,                     # list of subregion boundaries
                            dir='./'):                           # directory for outputs, defaults to current working dir
     '''Compare multiple target variables to a reference variable, returning the computed metric(s)
 after temporally rebinning to a common time resolution and a common spatial (lat/lon) resolution.
     '''
     datasets = loadDatasets(datasetUrlsAndVarNames, dir)

     datasets = temporalRegrids(datasets, timeRes)

     newLats, newLons = commonLatLonGrid(datasets, latRes, lonRes)
     datasets = spatialRegrids(datasets, newLats, newLons)

 #    datasets = maskMissingValues(datasets, missingValues)

     metrics = computeMetrics(datasets, metrics, subregions)
     return (newLats, newLons, metrics)


 def plotBias(metric, lats, lons, outputName, **config):
     '''Plot the bias of the reference datasets compared to multiple targets.'''
     plotFile = outputName + '.png'
     print(('plotBias: Writing %s' % plotFile))
     plotter.draw_contour_map(metric, lats, lons, outputName, **config)
     return plotFile


 # Utilities follow.

 def isLocalFile(url):
     '''Check if URL is a local path.'''
     u = urlparse(url)
     if u.scheme == '' or u.scheme == 'file':
         if not path.exists(u.path):
             print('isLocalFile: File at local path does not exist: %s' % u.path, file=sys.stderr)
         return (True, u.path)
     else:
         return (False, u.path)

 def retrieveFile(url, dir=None):
     '''Retrieve a file from a URL, or if it is a local path then verify it exists.'''
     if dir is None: dir = './'
     ok, path = isLocalFile(url)
     fn = os.path.split(path)[1]
     outPath = os.path.join(dir, fn)
     if not ok:
         if os.path.exists(outPath):
             print('retrieveFile: Using cached file: %s' % outPath, file=sys.stderr)
         else:
             try:
                 print('retrieveFile: Retrieving (URL) %s to %s' % (url, outPath), file=sys.stderr)
                 urllib.request.urlretrieve(url, outPath)
             except:
                 print('retrieveFile: Cannot retrieve file at URL: %s' % url, file=sys.stderr)
                 return None
     return outPath


 # Tests and main follow.

 def test1(urlsAndVars, outputName, **config):
     '''Test compareManyWithMetrics routine.'''
     lats, lons, metrics = compareVariablesWithMetrics(urlsAndVars, ['Bias'], outputName, timedelta(days=365), 1, 1)
     print(metrics)

     config = {'gridshape': (4, 5),
               'ptitle': 'TASMAX Bias of WRF Compared to KNMI (1989 - 2008)',
               'subtitles': list(range(1989, 2009, 1))}
     plotFile = plotBias(metrics[0][0], lats, lons, outputName, **config)
     return plotFile


 def main(args):
     '''Main routine to provide command line capability.'''
     nTest = int(args[0])
     url1 = args[1]
     var1 = args[2]
     outputName = args[3]
     url2 = args[4]
     var2 = args[5]
     urlsAndVars = [(url1, var1),  (url2, var2)]
     if nTest == 1:
         return test1(urlsAndVars, outputName)
     elif nTest == 2:
         return test2(urlsAndVars, outputName)

 if __name__ == '__main__':
     print(main(sys.argv[1:]))


 # python functions.py 1 "http://zipper.jpl.nasa.gov/dist/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc" "tasmax" "wrf_bias_compared_to_knmi" "http://zipper.jpl.nasa.gov/dist/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc" "tasmax"
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import sys, os, datetime
	from future.standard_library import install_aliases
	install_aliases()
	from urllib.parse import urlparse
	from datetime import timedelta
	import numpy as np

	import ocw.data_source.local as local
	import ocw.dataset_processor as dsp
	import ocw.evaluation as evaluation
	import ocw.metrics as metrics
	import ocw.plotter as plotter


	def loadDataset(urlAndVariable, dir=None):
	'''Load a dataset (variable), returning a Dataset object.'''
	shortName = None
	if len(urlAndVariable) == 3:
	url, variable, shortName = urlAndVariable
	else:
	url, variable = urlAndVariable
	f = retrieveFile(url, dir)
	ds = local.load_file(f, variable)
	if shortName is None or shortName == '':
	shortName = f + '?' + variable
	ds.name = shortName
	shape = ds.values.shape
	if len(shape) == 3:
	coords = '(time, lat, lon)'
	elif len(shape) == 2:
	coords = '(lat, lon)'
	print('loadDataset: File %s has variable %s with shape %s: %s' % (f, variable, coords, shape), file=sys.stderr)
	return ds

	def loadDatasets(urlAndVariables, dir=None):
	return [loadDataset(uv, dir) for uv in urlAndVariables]


	def temporalRegrid(dataset, timeRes=timedelta(days=365)):
	'''Temporally rebin a dataset variable to a specified time resolution (timedelta object).'''
	dataset = dsp.temporal_rebin(dataset, timeRes)
	name = dataset.name
	if name is None: name = ''
	print('temporalRebin: Dataset %s has new shape %s' % (name, str(dataset.values.shape)), file=sys.stderr)
	return dataset

	def temporalRegrids(datasets, timeRes=timedelta(days=365)):
	'''Temporally rebin dataset variables to a specified time resolution (timedelta object).'''
	return [temporalRegrid(d, timeRes) for d in datasets]


	def spatialBounds(dataset): return dataset.spatial_boundaries()

	def commonSpatialBounds(datasets):
	'''Compute overlapping (intersection) spatial bounds of many datasets.'''
	bounds = [spatialBounds(ds) for ds in datasets]
	for i, b in enumerate(bounds):
	name = datasets[i].name
	if name is None or name == '': name = str(i)
	print('commonSpatialBounds: Dataset %s has boundaries: lat (%s, %s), lon (%s, %s).' % \
	(name, b[0], b[1], b[2], b[3]), file=sys.stderr)
	minLat = max([b[0] for b in bounds])
	maxLat = min([b[1] for b in bounds])
	minLon = max([b[2] for b in bounds])
	maxLon = min([b[3] for b in bounds])
	print('commonSpatialBounds: Common boundaries are: lat (%s, %s), lon (%s, %s).' % \
	(minLat, maxLat, minLon, maxLon), file=sys.stderr)
	return (minLat, maxLat, minLon, maxLon)

	def generateLatLonGrid(latGrid, lonGrid):
	'''Generate a uniform lat/lon grid at set resolutions, where latGrid is a tuple of (latMin, latMax, latRes).'''
	minLat, maxLat, latRes = list(map(float, latGrid))
	minLon, maxLon, lonRes = list(map(float, lonGrid))
	lats = np.arange(minLat, maxLat, float(latRes))
	lons = np.arange(minLon, maxLon, float(lonRes))
	return (lats, lons)

	def commonLatLonGrid(datasets, latRes, lonRes):
	'''Find common (intersect) lat/lon bounds and construct new grid with specified lat/lon resolutions.'''
	minLat, maxLat, minLon, maxLon = commonSpatialBounds(datasets)
	latGrid = (minLat, maxLat, latRes)
	lonGrid = (minLon, maxLon, lonRes)
	return generateLatLonGrid(latGrid, lonGrid)


	def spatialRegrid(dataset, lats, lons):
	'''Spatially regrid dataset variable to a new grid with specified resolution, where lats & lons
	are the new coordinate vectors.
	'''
	return dsp.spatial_regrid(dataset, lats, lons)

	def spatialRegrids(datasets, lats, lons):
	'''Spatially regrid dataset variables to a new grid with specified resolution, where lats & lons
	are the new coordinate vectors.
	'''
	return [spatialRegrid(d, lats, lons) for d in datasets]


	def lookupMetrics(metricNames,
	availableMetrics={'Bias': metrics.Bias, 'StdDevRatio': metrics.StdDevRatio,
	'PatternCorrelation': metrics.PatternCorrelation}):
	'''Return a list of metric objects given a list of string names.'''
	metrics = []
	for name in metricNames:
	try:
	m = availableMetrics[name]
	metrics.append(m())
	except:
	print('lookupMetrics: Error, No metric named %s' % name, file=sys.stderr)
	return metrics

	def computeMetrics(datasets, metricNames=['Bias'], subregions=None):
	'''Compute one or more metrics comparing multiple target datasets to a reference dataset.
	This routine assumes that the datasets have already been regridded so that there grid dimensions
	are identical.
	'''
	metrics = lookupMetrics(metricNames)
	if len(metrics) != len(metricNames):
	print('computeMetrics: Error, Illegal or misspelled metric name.', file=sys.stderr)
	eval = evaluation.Evaluation(datasets[0], datasets[1:], metrics)
	print('computeMetrics: Evaluating metrics %s . . .' % str(metricNames), file=sys.stderr)
	eval.run()
	return eval.results


	def compareVariablesWithMetrics(datasetUrlsAndVarNames, # URL's pointing to datasets, first one is reference, rest are targets;
	# each tuple can be (datasetUrl, variableName, shortName), shortName optional
	metrics, # list of metrics to compute (by name)
	outputName, # root name for outputs
	timeRes=timedelta(days=365), # time resolution to regrid all variables to
	latRes=1., lonRes=1., # lat/lon resolutions to regrid all variables to
	subregions=None, # list of subregion boundaries
	dir='./'): # directory for outputs, defaults to current working dir
	'''Compare multiple target variables to a reference variable, returning the computed metric(s)
	after temporally rebinning to a common time resolution and a common spatial (lat/lon) resolution.
	'''
	datasets = loadDatasets(datasetUrlsAndVarNames, dir)

	datasets = temporalRegrids(datasets, timeRes)

	newLats, newLons = commonLatLonGrid(datasets, latRes, lonRes)
	datasets = spatialRegrids(datasets, newLats, newLons)

	# datasets = maskMissingValues(datasets, missingValues)

	metrics = computeMetrics(datasets, metrics, subregions)
	return (newLats, newLons, metrics)


	def plotBias(metric, lats, lons, outputName, **config):
	'''Plot the bias of the reference datasets compared to multiple targets.'''
	plotFile = outputName + '.png'
	print(('plotBias: Writing %s' % plotFile))
	plotter.draw_contour_map(metric, lats, lons, outputName, **config)
	return plotFile


	# Utilities follow.

	def isLocalFile(url):
	'''Check if URL is a local path.'''
	u = urlparse(url)
	if u.scheme == '' or u.scheme == 'file':
	if not path.exists(u.path):
	print('isLocalFile: File at local path does not exist: %s' % u.path, file=sys.stderr)
	return (True, u.path)
	else:
	return (False, u.path)

	def retrieveFile(url, dir=None):
	'''Retrieve a file from a URL, or if it is a local path then verify it exists.'''
	if dir is None: dir = './'
	ok, path = isLocalFile(url)
	fn = os.path.split(path)[1]
	outPath = os.path.join(dir, fn)
	if not ok:
	if os.path.exists(outPath):
	print('retrieveFile: Using cached file: %s' % outPath, file=sys.stderr)
	else:
	try:
	print('retrieveFile: Retrieving (URL) %s to %s' % (url, outPath), file=sys.stderr)
	urllib.request.urlretrieve(url, outPath)
	except:
	print('retrieveFile: Cannot retrieve file at URL: %s' % url, file=sys.stderr)
	return None
	return outPath


	# Tests and main follow.

	def test1(urlsAndVars, outputName, **config):
	'''Test compareManyWithMetrics routine.'''
	lats, lons, metrics = compareVariablesWithMetrics(urlsAndVars, ['Bias'], outputName, timedelta(days=365), 1, 1)
	print(metrics)

	config = {'gridshape': (4, 5),
	'ptitle': 'TASMAX Bias of WRF Compared to KNMI (1989 - 2008)',
	'subtitles': list(range(1989, 2009, 1))}
	plotFile = plotBias(metrics[0][0], lats, lons, outputName, **config)
	return plotFile


	def main(args):
	'''Main routine to provide command line capability.'''
	nTest = int(args[0])
	url1 = args[1]
	var1 = args[2]
	outputName = args[3]
	url2 = args[4]
	var2 = args[5]
	urlsAndVars = [(url1, var1), (url2, var2)]
	if nTest == 1:
	return test1(urlsAndVars, outputName)
	elif nTest == 2:
	return test2(urlsAndVars, outputName)

	if __name__ == '__main__':
	print(main(sys.argv[1:]))


	# python functions.py 1 "http://zipper.jpl.nasa.gov/dist/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc" "tasmax" "wrf_bias_compared_to_knmi" "http://zipper.jpl.nasa.gov/dist/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc" "tasmax"