ocw/utils.py - climate - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 ''''''

 import sys
 import datetime as dt
 import numpy as np
 import numpy.ma as ma

 from mpl_toolkits.basemap import shiftgrid
 from dateutil.relativedelta import relativedelta
 from netCDF4 import num2date


 def decode_time_values(dataset, time_var_name):
     ''' Decode NetCDF time values into Python datetime objects.

     :param dataset: The dataset from which time values should be extracted.
     :type dataset: netCDF4.Dataset
     :param time_var_name: The name of the time variable in dataset.
     :type time_var_name: :mod:`string`

     :returns: The list of converted datetime values.

     :raises ValueError: If the time units value couldn't be parsed, if the
         base time value couldn't be parsed, or if the time_var_name could not
         be found in the dataset.
     '''
     time_data = dataset.variables[time_var_name]
     time_format = time_data.units
     if time_format[-1].lower() == 'z':
         time_format = time_format[:-1]
     if time_format[-3:].lower() == 'utc':
         time_format = time_format[:-3]

     time_units = parse_time_units(time_format)
     time_base = parse_time_base(time_format)

     times = []
     if time_units == 'months':
         # datetime.timedelta doesn't support a 'months' option. To remedy
         # this, a month == 30 days for our purposes.
         for time_val in time_data:
             times.append(time_base + relativedelta(months=int(time_val)))
     else:
         try:
             times_calendar = time_data.calendar
         except:
             times_calendar = 'standard'

         times = num2date(
             time_data[:], units=time_format, calendar=times_calendar)
     return times


 def parse_time_units(time_format):
     ''' Parse units value from time units string.

     The only units that are supported are: seconds, minutes, hours, days,
         months, or years.

     :param time_format: The time data units string from the dataset
         being processed. The string should be of the format
         '<units> since <base time date>'
     :type time_format: :mod:`string`

     :returns: The unit substring from the time units string

     :raises ValueError: If the units present in the time units string doesn't
         match one of the supported unit value.
     '''
     for unit in ['seconds', 'minutes', 'hours', 'days', 'months', 'years']:
         if unit in time_format:
             return unit
     else:
         cur_frame = sys._getframe().f_code
         err = "{}.{}: Unable to parse valid units from {}".format(
             cur_frame.co_filename,
             cur_frame.co_name,
             time_format
         )
         raise ValueError(err)


 def parse_time_base(time_format):
     ''' Parse time base object from the time units string.

     :param time_format: The time data units string from the dataset
         being processed. The string should be of the format
         '<units> since <base time date>'
     :type time_format: :mod:`string`

     :returns: The base time as a datetime object.

     :raises ValueError: When the base time string couldn't be parsed from the
         units time_format string or if the date string didn't match any of the
         expected formats.
     '''
     base_time_string = parse_base_time_string(time_format)

     time_format = time_format.strip()

     possible_time_formats = [
         '%Y:%m:%d %H:%M:%S', '%Y-%m-%d %H-%M-%S', '%Y/%m/%d %H/%M/%S',
         '%Y-%m-%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y%m%d %H:%M:%S',
         '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S', '%Y/%m/%d/%H/%M/%S',
         '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y-%m-%d %H:%M:%S',
         '%Y/%m/%d%H:%M:%S', '%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M',
         '%Y:%m:%d %H:%M', '%Y%m%d %H:%M', '%Y-%m-%d', '%Y/%m/%d',
         '%Y:%m:%d', '%Y%m%d', '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H',
         '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ'
     ]

     # Attempt to match the base time string with a possible format parsing
     # string.
     for time_format in possible_time_formats:
         try:
             stripped_time = dt.datetime.strptime(base_time_string, time_format)
             break
         except ValueError:
             # This exception means that the time format attempted was
             # incorrect. No need to report or raise this,
             # simply try the next one!
             pass
     # If we got through the entire loop without a break, we couldn't parse the
     # date string with our known formats.
     else:
         cur_frame = sys._getframe().f_code
         err = "{}.{}: Unable to parse valid date from {}".format(
             cur_frame.co_filename,
             cur_frame.co_name,
             base_time_string
         )

         raise ValueError(err)

     return stripped_time


 def parse_base_time_string(time_format):
     ''' Retrieve base time string from time data units information.

     :param time_format: The time data units string from the dataset
         being processed. The string should be of the format
         '<units> since <base time date>'
     :type time_format: :mod:`string`

     :returns: The base time string split out of the time units information.

     :raises ValueError: When the time_format parameter is malformed.
     '''
     if 'since' not in time_format:
         cur_frame = sys._getframe().f_code
         err = "{}.{}: Invalid time_format value {} given".format(
             cur_frame.co_filename,
             cur_frame.co_name,
             time_format
         )

         raise ValueError(err)

     return time_format.split('since')[1].strip()


 def normalize_lat_lon_values(lats, lons, values):
     ''' Normalize lat/lon values

     Ensure that lat/lon values are within [-180, 180)/[-90, 90) as well
     as sorted. If the values are off the grid they are shifted into the
     expected range.

     :param lats: A 1D numpy array of sorted lat values.
     :type lats: :class:`numpy.ndarray`
     :param lons: A 1D numpy array of sorted lon values.
     :type lons: :class:`numpy.ndarray`
     :param values: A 3D array of data values.

     :returns: A :func:`tuple` of the form (adjust_lats, adjusted_lons,
                        adjusted_values)

     :raises ValueError: If the lat/lon values are not sorted.
     '''
     if lats.ndim == 1 and lons.ndim == 1:
         # Avoid unnecessary shifting if all lons are higher than 180
         if lons.min() > 180:
             lons -= 360

         # Make sure lats and lons are monotonically increasing
         lats_decreasing = np.diff(lats) < 0
         lons_decreasing = np.diff(lons) < 0

         # If all values are decreasing then they just need to be reversed
         lats_reversed, lons_reversed = (lats_decreasing.all(),
                                         lons_decreasing.all())

         # If the lat values are unsorted then raise an exception
         if not lats_reversed and lats_decreasing.any():
             raise ValueError('Latitudes must be sorted.')

         # Perform same checks now for lons
         if not lons_reversed and lons_decreasing.any():
             raise ValueError('Longitudes must be sorted.')

         # Also check if lons go from [0, 360), and convert to [-180, 180)
         # if necessary
         lons_shifted = lons.max() > 180
         lats_out, lons_out, data_out = lats[:], lons[:], values[:]
         # Now correct data if latlon grid needs to be shifted
         if lats_reversed:
             lats_out = lats_out[::-1]
             data_out = data_out[..., ::-1, :]

         if lons_reversed:
             lons_out = lons_out[::-1]
             data_out = data_out[..., :, ::-1]

         if lons_shifted:
             data_out, lons_out = shiftgrid(
                 180, data_out, lons_out, start=False)

         return lats_out, lons_out, data_out
     else:
         lons[lons > 180] = lons[lons > 180] - 360.

         return lats, lons, values


 def reshape_monthly_to_annually(dataset):
     ''' Reshape monthly binned dataset to annual bins.

     Reshape a monthly binned dataset's 3D value array with shape
     (num_months, num_lats, num_lons) to a 4D array with shape
     (num_years, 12, num_lats, num_lons). This causes the data to be binned
     annually while retaining its original shape.

     It is assumed that the number of months in the dataset is evenly
     divisible by 12. If it is not you will receive error due to
     an invalid shape.

     Example change of a dataset's shape:
     (24, 90, 180) -> (2, 12, 90, 180)

     :param dataset: Dataset object with full-year format
     :type dataset: :class:`dataset.Dataset`

     :returns: Dataset values array with shape (num_year, 12, num_lat, num_lon)
     '''

     values = dataset.values[:]
     data_shape = values.shape
     num_total_month = data_shape[0]
     num_year = num_total_month / 12
     num_month = 12
     year_month_shape = num_year, num_month
     lat_lon_shape = data_shape[1:]
     # Make new shape (num_year, 12, num_lats, num_lons)
     new_shape = tuple(year_month_shape + lat_lon_shape)
     # Reshape data with new shape
     values.shape = new_shape

     return values


 def calc_temporal_mean(dataset):
     ''' Calculate temporal mean of dataset's values

     :param dataset: OCW Dataset whose first dimension is time
     :type dataset: :class:`dataset.Dataset`

     :returns: Mean values averaged for the first dimension (time)
     '''
     return ma.mean(dataset.values, axis=0)


 def calc_climatology_year(dataset):
     ''' Calculate climatology of dataset's values for each year

     :param dataset: Monthly binned Dataset object with an evenly divisible
         number of months.
     :type dataset: :class:`dataset.Dataset`

     :returns: Mean values for each year (annual_mean) and mean values for all
         years (total_mean)

     :raise ValueError: If the number of monthly bins is not evenly divisible
         by 12.
     '''

     values_shape = dataset.values.shape
     time_shape = values_shape[0]
     if time_shape % 12:
         raise ValueError('The dataset should be in full-time format.')
     else:
         # Get values reshaped to (num_year, 12, num_lats, num_lons)
         values = reshape_monthly_to_annually(dataset)
         # Calculate mean values over year (num_year, num_lats, num_lons)
         annually_mean = values.mean(axis=1)
         # Calculate mean values over all years (num_lats, num_lons)
         total_mean = annually_mean.mean(axis=0)

     return annually_mean, total_mean


 def calc_climatology_monthly(dataset):
     ''' Calculate monthly mean values for a dataset.
     Follow COARDS climo stats calculation, the year can be given as 0
     but the min year allowed in Python is 1
     http://www.cgd.ucar.edu/cms/eaton/netcdf/CF-20010629.htm#climatology

     :param dataset: Monthly binned Dataset object with the number of months
         divisible by 12
     :type dataset: :class:`dataset.Dataset`

     :returns: Mean values for each month of the year of shape
               (12, num_lats, num_lons) and times array of datetime objects
               of length 12

     :raise ValueError: If the number of monthly bins is not divisible by 12
     '''

     if dataset.values.shape[0] % 12:
         error = (
             "The length of the time axis in the values array should be "
             "divisible by 12."
         )
         raise ValueError(error)
     else:
         values = reshape_monthly_to_annually(dataset).mean(axis=0)

         # A year can commence from any month
         first_month = dataset.times[0].month
         times = np.array([dt.datetime(1, first_month, 1) +
                           relativedelta(months=x)
                           for x in range(12)])
         return values, times


 def calc_time_series(dataset):
     ''' Calculate time series mean values for a dataset

     :param dataset: Dataset object
     :type dataset: :class:`dataset.Dataset`

     :returns: time series for the dataset of shape (nT)
     '''

     t_series = []
     for t in xrange(dataset.values.shape[0]):
         t_series.append(dataset.values[t, :, :].mean())

     return t_series


 def get_temporal_overlap(dataset_array):
     ''' Find the maximum temporal overlap across the observation and model datasets

     :param dataset_array: an array of OCW datasets
     '''
     start_time = []
     end_time = []
     for dataset in dataset_array:
         start_time.append(dataset.temporal_boundaries()[0])
         end_time.append(dataset.temporal_boundaries()[1])

     return np.max(start_time), np.min(end_time)


 def calc_subregion_area_mean_and_std(dataset_array, subregions):
     ''' Calculate area mean and standard deviation values for a given
         subregions using datasets on common grid points
     :param dataset_array: An array of OCW Dataset Objects
     :type list:
     :param subregions: list of subregions
     :type subregions: :class:`numpy.ma.array`
     :returns: area averaged time series for the dataset of shape
               (ntime, nsubregion)
     '''

     ndata = len(dataset_array)
     dataset0 = dataset_array[0]
     if dataset0.lons.ndim == 1:
         lons, lats = np.meshgrid(dataset0.lons, dataset0.lats)
     else:
         lons = dataset0.lons
         lats = dataset0.lats
     subregion_array = np.zeros(lons.shape)
     mask_array = dataset_array[0].values[0, :].mask
     # dataset0.values.shsape[0]: length of the time dimension
     # spatial average
     t_series = ma.zeros([ndata, dataset0.values.shape[0], len(subregions)])
     # spatial standard deviation
     spatial_std = ma.zeros([ndata, dataset0.values.shape[0], len(subregions)])

     for iregion, subregion in enumerate(subregions):
         lat_min, lat_max, lon_min, lon_max = subregion[1]
         y_index, x_index = np.where((lats >= lat_min) & (
             lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max))
         subregion_array[y_index, x_index] = iregion + 1
         for idata in np.arange(ndata):
             t_series[idata, :, iregion] = ma.mean(dataset_array[idata].values[
                                                   :, y_index, x_index], axis=1)
             spatial_std[idata, :, iregion] = ma.std(
                 dataset_array[idata].values[:, y_index, x_index], axis=1)
     subregion_array = ma.array(subregion_array, mask=mask_array)
     return t_series, spatial_std, subregion_array


 def calc_area_weighted_spatial_average(dataset, area_weight=False):
     '''Calculate area weighted average of the values in OCW dataset

     :param dataset: Dataset object
     :type dataset: :class:`dataset.Dataset`

     :returns: time series for the dataset of shape (nT)
     '''

     if dataset.lats.ndim == 1:
         lons, lats = np.meshgrid(dataset.lons, dataset.lats)
     else:
         lats = dataset.lats
     weights = np.cos(lats * np.pi / 180.)

     nt, ny, nx = dataset.values.shape
     spatial_average = ma.zeros(nt)
     for it in np.arange(nt):
         if area_weight:
             spatial_average[it] = ma.average(
                 dataset.values[it, :], weights=weights)
         else:
             spatial_average[it] = ma.average(dataset.values[it, :])

     return spatial_average
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	''''''

	import sys
	import datetime as dt
	import numpy as np
	import numpy.ma as ma

	from mpl_toolkits.basemap import shiftgrid
	from dateutil.relativedelta import relativedelta
	from netCDF4 import num2date


	def decode_time_values(dataset, time_var_name):
	''' Decode NetCDF time values into Python datetime objects.

	:param dataset: The dataset from which time values should be extracted.
	:type dataset: netCDF4.Dataset
	:param time_var_name: The name of the time variable in dataset.
	:type time_var_name: :mod:`string`

	:returns: The list of converted datetime values.

	:raises ValueError: If the time units value couldn't be parsed, if the
	base time value couldn't be parsed, or if the time_var_name could not
	be found in the dataset.
	'''
	time_data = dataset.variables[time_var_name]
	time_format = time_data.units
	if time_format[-1].lower() == 'z':
	time_format = time_format[:-1]
	if time_format[-3:].lower() == 'utc':
	time_format = time_format[:-3]

	time_units = parse_time_units(time_format)
	time_base = parse_time_base(time_format)

	times = []
	if time_units == 'months':
	# datetime.timedelta doesn't support a 'months' option. To remedy
	# this, a month == 30 days for our purposes.
	for time_val in time_data:
	times.append(time_base + relativedelta(months=int(time_val)))
	else:
	try:
	times_calendar = time_data.calendar
	except:
	times_calendar = 'standard'

	times = num2date(
	time_data[:], units=time_format, calendar=times_calendar)
	return times


	def parse_time_units(time_format):
	''' Parse units value from time units string.

	The only units that are supported are: seconds, minutes, hours, days,
	months, or years.

	:param time_format: The time data units string from the dataset
	being processed. The string should be of the format
	'<units> since <base time date>'
	:type time_format: :mod:`string`

	:returns: The unit substring from the time units string

	:raises ValueError: If the units present in the time units string doesn't
	match one of the supported unit value.
	'''
	for unit in ['seconds', 'minutes', 'hours', 'days', 'months', 'years']:
	if unit in time_format:
	return unit
	else:
	cur_frame = sys._getframe().f_code
	err = "{}.{}: Unable to parse valid units from {}".format(
	cur_frame.co_filename,
	cur_frame.co_name,
	time_format
	)
	raise ValueError(err)


	def parse_time_base(time_format):
	''' Parse time base object from the time units string.

	:param time_format: The time data units string from the dataset
	being processed. The string should be of the format
	'<units> since <base time date>'
	:type time_format: :mod:`string`

	:returns: The base time as a datetime object.

	:raises ValueError: When the base time string couldn't be parsed from the
	units time_format string or if the date string didn't match any of the
	expected formats.
	'''
	base_time_string = parse_base_time_string(time_format)

	time_format = time_format.strip()

	possible_time_formats = [
	'%Y:%m:%d %H:%M:%S', '%Y-%m-%d %H-%M-%S', '%Y/%m/%d %H/%M/%S',
	'%Y-%m-%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y%m%d %H:%M:%S',
	'%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S', '%Y/%m/%d/%H/%M/%S',
	'%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y-%m-%d %H:%M:%S',
	'%Y/%m/%d%H:%M:%S', '%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M',
	'%Y:%m:%d %H:%M', '%Y%m%d %H:%M', '%Y-%m-%d', '%Y/%m/%d',
	'%Y:%m:%d', '%Y%m%d', '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H',
	'%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%SZ'
	]

	# Attempt to match the base time string with a possible format parsing
	# string.
	for time_format in possible_time_formats:
	try:
	stripped_time = dt.datetime.strptime(base_time_string, time_format)
	break
	except ValueError:
	# This exception means that the time format attempted was
	# incorrect. No need to report or raise this,
	# simply try the next one!
	pass
	# If we got through the entire loop without a break, we couldn't parse the
	# date string with our known formats.
	else:
	cur_frame = sys._getframe().f_code
	err = "{}.{}: Unable to parse valid date from {}".format(
	cur_frame.co_filename,
	cur_frame.co_name,
	base_time_string
	)

	raise ValueError(err)

	return stripped_time


	def parse_base_time_string(time_format):
	''' Retrieve base time string from time data units information.

	:param time_format: The time data units string from the dataset
	being processed. The string should be of the format
	'<units> since <base time date>'
	:type time_format: :mod:`string`

	:returns: The base time string split out of the time units information.

	:raises ValueError: When the time_format parameter is malformed.
	'''
	if 'since' not in time_format:
	cur_frame = sys._getframe().f_code
	err = "{}.{}: Invalid time_format value {} given".format(
	cur_frame.co_filename,
	cur_frame.co_name,
	time_format
	)

	raise ValueError(err)

	return time_format.split('since')[1].strip()


	def normalize_lat_lon_values(lats, lons, values):
	''' Normalize lat/lon values

	Ensure that lat/lon values are within [-180, 180)/[-90, 90) as well
	as sorted. If the values are off the grid they are shifted into the
	expected range.

	:param lats: A 1D numpy array of sorted lat values.
	:type lats: :class:`numpy.ndarray`
	:param lons: A 1D numpy array of sorted lon values.
	:type lons: :class:`numpy.ndarray`
	:param values: A 3D array of data values.

	:returns: A :func:`tuple` of the form (adjust_lats, adjusted_lons,
	adjusted_values)

	:raises ValueError: If the lat/lon values are not sorted.
	'''
	if lats.ndim == 1 and lons.ndim == 1:
	# Avoid unnecessary shifting if all lons are higher than 180
	if lons.min() > 180:
	lons -= 360

	# Make sure lats and lons are monotonically increasing
	lats_decreasing = np.diff(lats) < 0
	lons_decreasing = np.diff(lons) < 0

	# If all values are decreasing then they just need to be reversed
	lats_reversed, lons_reversed = (lats_decreasing.all(),
	lons_decreasing.all())

	# If the lat values are unsorted then raise an exception
	if not lats_reversed and lats_decreasing.any():
	raise ValueError('Latitudes must be sorted.')

	# Perform same checks now for lons
	if not lons_reversed and lons_decreasing.any():
	raise ValueError('Longitudes must be sorted.')

	# Also check if lons go from [0, 360), and convert to [-180, 180)
	# if necessary
	lons_shifted = lons.max() > 180
	lats_out, lons_out, data_out = lats[:], lons[:], values[:]
	# Now correct data if latlon grid needs to be shifted
	if lats_reversed:
	lats_out = lats_out[::-1]
	data_out = data_out[..., ::-1, :]

	if lons_reversed:
	lons_out = lons_out[::-1]
	data_out = data_out[..., :, ::-1]

	if lons_shifted:
	data_out, lons_out = shiftgrid(
	180, data_out, lons_out, start=False)

	return lats_out, lons_out, data_out
	else:
	lons[lons > 180] = lons[lons > 180] - 360.

	return lats, lons, values


	def reshape_monthly_to_annually(dataset):
	''' Reshape monthly binned dataset to annual bins.

	Reshape a monthly binned dataset's 3D value array with shape
	(num_months, num_lats, num_lons) to a 4D array with shape
	(num_years, 12, num_lats, num_lons). This causes the data to be binned
	annually while retaining its original shape.

	It is assumed that the number of months in the dataset is evenly
	divisible by 12. If it is not you will receive error due to
	an invalid shape.

	Example change of a dataset's shape:
	(24, 90, 180) -> (2, 12, 90, 180)

	:param dataset: Dataset object with full-year format
	:type dataset: :class:`dataset.Dataset`

	:returns: Dataset values array with shape (num_year, 12, num_lat, num_lon)
	'''

	values = dataset.values[:]
	data_shape = values.shape
	num_total_month = data_shape[0]
	num_year = num_total_month / 12
	num_month = 12
	year_month_shape = num_year, num_month
	lat_lon_shape = data_shape[1:]
	# Make new shape (num_year, 12, num_lats, num_lons)
	new_shape = tuple(year_month_shape + lat_lon_shape)
	# Reshape data with new shape
	values.shape = new_shape

	return values


	def calc_temporal_mean(dataset):
	''' Calculate temporal mean of dataset's values

	:param dataset: OCW Dataset whose first dimension is time
	:type dataset: :class:`dataset.Dataset`

	:returns: Mean values averaged for the first dimension (time)
	'''
	return ma.mean(dataset.values, axis=0)


	def calc_climatology_year(dataset):
	''' Calculate climatology of dataset's values for each year

	:param dataset: Monthly binned Dataset object with an evenly divisible
	number of months.
	:type dataset: :class:`dataset.Dataset`

	:returns: Mean values for each year (annual_mean) and mean values for all
	years (total_mean)

	:raise ValueError: If the number of monthly bins is not evenly divisible
	by 12.
	'''

	values_shape = dataset.values.shape
	time_shape = values_shape[0]
	if time_shape % 12:
	raise ValueError('The dataset should be in full-time format.')
	else:
	# Get values reshaped to (num_year, 12, num_lats, num_lons)
	values = reshape_monthly_to_annually(dataset)
	# Calculate mean values over year (num_year, num_lats, num_lons)
	annually_mean = values.mean(axis=1)
	# Calculate mean values over all years (num_lats, num_lons)
	total_mean = annually_mean.mean(axis=0)

	return annually_mean, total_mean


	def calc_climatology_monthly(dataset):
	''' Calculate monthly mean values for a dataset.
	Follow COARDS climo stats calculation, the year can be given as 0
	but the min year allowed in Python is 1
	http://www.cgd.ucar.edu/cms/eaton/netcdf/CF-20010629.htm#climatology

	:param dataset: Monthly binned Dataset object with the number of months
	divisible by 12
	:type dataset: :class:`dataset.Dataset`

	:returns: Mean values for each month of the year of shape
	(12, num_lats, num_lons) and times array of datetime objects
	of length 12

	:raise ValueError: If the number of monthly bins is not divisible by 12
	'''

	if dataset.values.shape[0] % 12:
	error = (
	"The length of the time axis in the values array should be "
	"divisible by 12."
	)
	raise ValueError(error)
	else:
	values = reshape_monthly_to_annually(dataset).mean(axis=0)

	# A year can commence from any month
	first_month = dataset.times[0].month
	times = np.array([dt.datetime(1, first_month, 1) +
	relativedelta(months=x)
	for x in range(12)])
	return values, times


	def calc_time_series(dataset):
	''' Calculate time series mean values for a dataset

	:param dataset: Dataset object
	:type dataset: :class:`dataset.Dataset`

	:returns: time series for the dataset of shape (nT)
	'''

	t_series = []
	for t in xrange(dataset.values.shape[0]):
	t_series.append(dataset.values[t, :, :].mean())

	return t_series


	def get_temporal_overlap(dataset_array):
	''' Find the maximum temporal overlap across the observation and model datasets

	:param dataset_array: an array of OCW datasets
	'''
	start_time = []
	end_time = []
	for dataset in dataset_array:
	start_time.append(dataset.temporal_boundaries()[0])
	end_time.append(dataset.temporal_boundaries()[1])

	return np.max(start_time), np.min(end_time)


	def calc_subregion_area_mean_and_std(dataset_array, subregions):
	''' Calculate area mean and standard deviation values for a given
	subregions using datasets on common grid points
	:param dataset_array: An array of OCW Dataset Objects
	:type list:
	:param subregions: list of subregions
	:type subregions: :class:`numpy.ma.array`
	:returns: area averaged time series for the dataset of shape
	(ntime, nsubregion)
	'''

	ndata = len(dataset_array)
	dataset0 = dataset_array[0]
	if dataset0.lons.ndim == 1:
	lons, lats = np.meshgrid(dataset0.lons, dataset0.lats)
	else:
	lons = dataset0.lons
	lats = dataset0.lats
	subregion_array = np.zeros(lons.shape)
	mask_array = dataset_array[0].values[0, :].mask
	# dataset0.values.shsape[0]: length of the time dimension
	# spatial average
	t_series = ma.zeros([ndata, dataset0.values.shape[0], len(subregions)])
	# spatial standard deviation
	spatial_std = ma.zeros([ndata, dataset0.values.shape[0], len(subregions)])

	for iregion, subregion in enumerate(subregions):
	lat_min, lat_max, lon_min, lon_max = subregion[1]
	y_index, x_index = np.where((lats >= lat_min) & (
	lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max))
	subregion_array[y_index, x_index] = iregion + 1
	for idata in np.arange(ndata):
	t_series[idata, :, iregion] = ma.mean(dataset_array[idata].values[
	:, y_index, x_index], axis=1)
	spatial_std[idata, :, iregion] = ma.std(
	dataset_array[idata].values[:, y_index, x_index], axis=1)
	subregion_array = ma.array(subregion_array, mask=mask_array)
	return t_series, spatial_std, subregion_array


	def calc_area_weighted_spatial_average(dataset, area_weight=False):
	'''Calculate area weighted average of the values in OCW dataset

	:param dataset: Dataset object
	:type dataset: :class:`dataset.Dataset`

	:returns: time series for the dataset of shape (nT)
	'''

	if dataset.lats.ndim == 1:
	lons, lats = np.meshgrid(dataset.lons, dataset.lats)
	else:
	lats = dataset.lats
	weights = np.cos(lats * np.pi / 180.)

	nt, ny, nx = dataset.values.shape
	spatial_average = ma.zeros(nt)
	for it in np.arange(nt):
	if area_weight:
	spatial_average[it] = ma.average(
	dataset.values[it, :], weights=weights)
	else:
	spatial_average[it] = ma.average(dataset.values[it, :])

	return spatial_average