src/main/python/rcmes/toolkit/metrics.py - climate - Git at Google

 #
 #  Licensed to the Apache Software Foundation (ASF) under one or more
 #  contributor license agreements.  See the NOTICE file distributed with
 #  this work for additional information regarding copyright ownership.
 #  The ASF licenses this file to You under the Apache License, Version 2.0
 #  (the "License"); you may not use this file except in compliance with
 #  the License.  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #

 '''
 Module storing functions to calculate statistical metrics from numpy arrays
 '''

 import datetime
 import subprocess
 import sys
 import os
 import numpy as np
 import numpy.ma as ma
 from math import floor, log
 from toolkit import process
 from utils import misc
 from storage import files
 from pylab import *
 import scipy.stats.mstats as mstats
 import matplotlib as mpl
 import matplotlib.dates
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import ImageGrid
 from matplotlib.font_manager import FontProperties
 from mpl_toolkits.basemap import Basemap
 from utils.Taylor import TaylorDiagram
 # 6/10/2012 JK: any Ngl dependence will be removed in later versions
 #import Ngl

 def calc_ann_mean(t2, time):
     '''
     Calculate annual cycle in terms of monthly means at every grid point.
     '''
     # Calculate annual cycle in terms of monthly means at every grid point including single point case (ndim=1)
     # note: this routine is identical to 'calc_annual_cycle_means': must be converted to calculate the annual mean
     # Extract months from time variable
     months = np.empty(len(time))
     for t in np.arange(len(time)):
         months[t] = time[t].month
     if t2.ndim == 3:
         means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means
         # Calculate means month by month
         for i in np.arange(12)+1:
             means[i - 1, :, :] = t2[months == i, :, :].mean(0)
     if t2.ndim == 1:
         means = np.empty((12)) # empty array to store means
         # Calculate means month by month
         for i in np.arange(12)+1:
             means[i - 1] = t2[months == i].mean(0)
     return means


 def calc_clim_month(t2, time):
     '''
     Calculate monthly means at every grid point.
     '''
     # Calculate monthly means at every grid point including single point case (ndim=1)
     # Extract months from time variable
     months = np.empty(len(time))
     for t in np.arange(len(time)):
         months[t] = time[t].month
     if t2.ndim == 3:
         means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means
         # Calculate means month by month
         for i in np.arange(12) + 1:
             means[i - 1, :, :] = t2[months == i, :, :].mean(0)
     if t2.ndim == 1:
         means = np.empty((12)) # empty array to store means
         # Calculate means month by month
         for i in np.arange(12) + 1:
             means[i - 1] = t2[months == i].mean(0)
     return means


 def calc_clim_year(nYR, nT, ngrdY, ngrdX, t2, time):
     '''
     Calculate annual mean timeseries and climatology for both 2-D and point time series.
     '''
     # Extract months from time variable
     yy = np.empty(nT)
     mm = np.empty(nT)
     for t in np.arange(nT):
         yy[t] = time[t].year
         mm[t] = time[t].month
     if t2.ndim == 3:
         tSeries = ma.zeros((nYR, ngrdY, ngrdX))
         i = 0
         for myunit in np.unique(yy):
             wh = (yy == myunit)
             data = t2[wh, :, :]
             tSeries[i, :, :] = ma.average(data, axis = 0)
             #print 'data.shape= ',data.shape,'  i= ',i,'  yy= ',yy
             i += 1
         means = ma.zeros((ngrdY, ngrdX))
         means = ma.average(tSeries, axis = 0)
     elif t2.ndim == 1:
         tSeries = ma.zeros((nYR))
         i = 0
         for myunit in np.unique(yy):
             wh = (yy == myunit)
             data = t2[wh]
             tSeries[i] = ma.average(data, axis = 0)
             #print 'data.shape= ',data.shape,'  i= ',i,'  yy= ',yy
             i += 1
         means = ma.zeros((ngrdY, ngrdX))
         means = ma.average(tSeries, axis = 0)
     return tSeries, means


 def calc_clim_season(nYR, nT, mB, mE, ngrdY, ngrdX, t2, time):
     '''
     Calculate seasonal mean timeseries and climatology for both 2-D and point time series.
     '''
     #-------------------------------------------------------------------------------------
     # Calculate seasonal mean timeseries and climatology for both 2-d and point time series
     # The season to be calculated is defined by moB and moE; moE>=moB always
     #-------------------------------------------------------------------------------------
     # Extract months from time variable
     yy = np.empty(nT)
     mm = np.empty(nT)
     for t in np.arange(nT):
         yy[t] = time[t].year
         mm[t] = time[t].month
     if t2.ndim == 3:
         tSeries = ma.zeros((nYR, ngrdY, ngrdX))
         i = 0
         for myunit in np.unique(yy):
             wh = (yy == myunit) & (mm >= mB) & (mm <= mE)
             data = t2[wh, :, :]
             tSeries[i, :, :] = ma.average(data, axis = 0)
             #print 'data.shape= ',data.shape,'  i= ',i,'  yy= ',yy
             i += 1
         means = ma.zeros((ngrdY, ngrdX))
         means = ma.average(tSeries, axis = 0)
     elif t2.ndim == 1:
         tSeries = ma.zeros((nYR))
         i = 0
         for myunit in np.unique(yy):
             wh = (yy == myunit) & (mm >= mB) & (mm <= mE)
             data = t2[wh]
             tSeries[i] = ma.average(data, axis = 0)
             #print 'data.shape= ',data.shape,'  i= ',i,'  yy= ',yy
             i += 1
         means = ma.zeros((ngrdY, ngrdX))
         means = ma.average(tSeries, axis = 0)
     return tSeries, means


 def calc_clim_mo(nYR, nT, ngrdY, ngrdX, t2, time):
     '''
     Calculate monthly means at every grid points and the annual time series of single model including single point case (ndim=1).
     JK20: This is modified from 'calc_clim_month'  with additional arguments & output, the annual time series of single model output (mData)
     6/8/2013: bug fix: mm = months[t] --> mm = months[t] - 1, otherwise array overflow occurs
     '''
     # Extract months and monthly time series from the time and raw variable, respectively
     months = np.empty(nT)
     for t in np.arange(nT):
         months[t] = time[t].month
         if t == 0:
             yy0 = time[t].year
         # for a 2-D time series data
     if t2.ndim == 3:
         mData = ma.empty((nYR, 12, ngrdY, ngrdX))
         for t in np.arange(nT):
             yy = time[t].year
             mm = months[t] - 1
             yr = yy - yy0
             mData[yr, mm, :, :] = t2[t, :, :]
         # Calculate means month by month. means is an empty array to store means
         means = ma.empty((12, ngrdY, ngrdX))
         for i in np.arange(12) + 1:
             means[i - 1, :, :] = t2[months == i, :, :].mean(0)
         # for a point time series data
     if t2.ndim == 1:
         mData = ma.empty((nYR, 12))
         for t in np.arange(nT):
             yy = time[t].year
             mm = months[t]
             yr = yy - yy0
             mData[yr, mm] = t2[t]
         means = np.empty((12))
         # Calculate means month by month. means is an empty array to store means
         for i in np.arange(12) + 1:
             means[i - 1] = t2[months == i].mean(0)
     return mData, means


 def calc_clim_One_month(moID, nYR, nT, t2, time):
     '''
     Calculate the montly mean at every grid point for a specified month.
     '''
     #-------------------------------------------------------------------------------------
     # Calculate monthly means at every grid point for a specified month
     #-------------------------------------------------------------------------------------
     # Extract months and the corresponding time series from time variable
     months = np.empty(nT)
     for t in np.arange(nT):
         months[t] = time[t].month
     if t2.ndim == 3:
         mData = ma.empty((nYR, t2.shape[1], t2.shape[2])) # empty array to store time series
         n = 0
         if months[t] == moID:
             mData[n, :, :] = t2[t, :, :]
             n += 1
         means = ma.empty((t2.shape[1], t2.shape[2])) # empty array to store means
         # Calculate means for the month specified by moID
         means[:, :] = t2[months == moID, :, :].mean(0)
     return mData, means


 def calc_annual_cycle_means(data, time):
     '''
      Calculate monthly means for every grid point

      Inputs::
      	data - masked 3d array of the model data (time, lon, lat)
      	time - an array of python datetime objects
     '''
     # Extract months from time variable
     months = np.empty(len(time))

     for t in np.arange(len(time)):
         months[t] = time[t].month

     #if there is data varying in t and space
     if data.ndim == 3:
         # empty array to store means
         means = ma.empty((12, data.shape[1], data.shape[2]))

         # Calculate means month by month
         for i in np.arange(12) + 1:
             means[i - 1, :, :] = data[months == i, :, :].mean(0)

     #if the data is a timeseries over area-averaged values
     if data.ndim == 1:
         # TODO - Investigate using ma per KDW
         means = np.empty((12)) # empty array to store means??WHY NOT ma?

         # Calculate means month by month
         for i in np.arange(12) + 1:
             means[i - 1] = data[months == i].mean(0)

     return means


 def calc_annual_cycle_std(data, time):
     '''
      Calculate monthly standard deviations for every grid point
     '''
     # Extract months from time variable
     months = np.empty(len(time))

     for t in np.arange(len(time)):
         months[t] = time[t].month

     # empty array to store means
     stds = np.empty((12, data.shape[1], data.shape[2]))

     # Calculate means month by month
     for i in np.arange(12) + 1:
         stds[i - 1, :, :] = data[months == i, :, :].std(axis = 0, ddof = 1)

     return stds


 def calc_annual_cycle_domain_means(data, time):
     '''
      Calculate domain means for each month of the year
     '''
     # Extract months from time variable
     months = np.empty(len(time))

     for t in np.arange(len(time)):
         months[t] = time[t].month

     means = np.empty(12) # empty array to store means

     # Calculate means month by month
     for i in np.arange(12) + 1:
         means[i - 1] = data[months == i, :, :].mean()

     return means


 def calc_annual_cycle_domain_std(data, time):
     '''
      Calculate domain standard deviations for each month of the year
     '''
     # Extract months from time variable
     months = np.empty(len(time))

     for t in np.arange(len(time)):
         months[t] = time[t].month

     stds = np.empty(12) # empty array to store means

     # Calculate means month by month
     for i in np.arange(12) + 1:
         stds[i - 1] = data[months == i, :, :].std(ddof = 1)

     return stds


 def calc_bias_annual(t1, t2, optn):        # Mean Bias
     '''
     Calculate the mean difference between two fields over time for each grid point.
     '''
     # Calculate mean difference between two fields over time for each grid point
     # Precrocessing of both obs and model data ensures the absence of missing values
     diff = t1-t2
     if(open == 'abs'):
         diff = abs(diff)
     bias = diff.mean(axis = 0)
     return bias


 def calc_bias(t1, t2):
     '''
     Calculate mean difference between two fields over time for each grid point

     Classify missing data resulting from multiple times (using threshold
     data requirement)

     i.e. if the working time unit is monthly data, and we are dealing with
     multiple months of data then when we show mean of several months, we need
     to decide what threshold of missing data we tolerate before classifying a
     data point as missing data.
     '''
     t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75)
     t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75)

     diff = t1 - t2
     bias = diff.mean(axis = 0)

     # Set mask for bias metric using missing data in obs or model data series
     #   i.e. if obs contains more than threshold (e.g.50%) missing data
     #        then classify time average bias as missing data for that location.
     bias = ma.masked_array(bias.data, np.logical_or(t1Mask, t2Mask))
     return bias


 def calc_bias_dom(t1, t2):
     '''
      Calculate domain mean difference between two fields over time
     '''
     diff = t1 - t2
     bias = diff.mean()
     return bias


 def calc_difference(t1, t2):
     '''
      Calculate mean difference between two fields over time for each grid point
     '''
     print 'Calculating difference'
     diff = t1 - t2
     return diff


 def calc_mae(t1, t2):
     '''
     Calculate mean difference between two fields over time for each grid point

     Classify missing data resulting from multiple times (using threshold
     data requirement)

     i.e. if the working time unit is monthly data, and we are dealing with
     multiple months of data then when we show mean of several months, we need
     to decide what threshold of missing data we tolerate before classifying
     a data point as missing data.
     '''
     t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75)
     t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75)

     diff = t1 - t2
     adiff = abs(diff)

     mae = adiff.mean(axis = 0)

     # Set mask for mae metric using missing data in obs or model data series
     #   i.e. if obs contains more than threshold (e.g.50%) missing data
     #        then classify time average mae as missing data for that location.
     mae = ma.masked_array(mae.data, np.logical_or(t1Mask, t2Mask))
     return mae


 def calc_mae_dom(t1, t2):
     '''
      Calculate domain mean difference between two fields over time
     '''
     diff = t1 - t2
     adiff = abs(diff)
     mae = adiff.mean()
     return mae


 def calc_rms(t1, t2):
     '''
      Calculate mean difference between two fields over time for each grid point
     '''
     diff = t1 - t2
     sqdiff = diff ** 2
     msd = sqdiff.mean(axis = 0)
     rms = np.sqrt(msd)
     return rms


 def calc_rms_dom(t1, t2):
     '''
      Calculate RMS differences between two fields
     '''
     diff = t1 - t2
     sqdiff = diff ** 2
     msd = sqdiff.mean()
     rms = np.sqrt(msd)
     return rms


 def calc_temporal_stdv(t1):
     '''
     Calculate the temporal standard deviation.

     Input:
         t1 - data array of any shape

     Output:
         A 2-D array of temporal standard deviation
     '''
     # TODO Make sure the first dimension of t1 is teh time axis.
     stdv = t1.std(axis = 0)
     return stdv


 def calc_temporal_anom_cor(mD, oD):
     '''
     Calculate the temporal anomaly correlation.

     Assumption(s);
         The first dimension of mD and oD is the time axis.

     Input:
         mD - model data array of any shape
         oD - observation data array of any shape

     Output:
         A 2-D array of time series pattern correlation coefficients at each grid point.

     REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
     '''
     mo = oD.mean(axis = 0)
     nt = oD.shape[0]
     deno1 = ((mD - mo) * (mD - mo)).sum(axis = 0)
     deno2 = ((oD - mo) * (oD - mo)).sum(axis = 0)
     patcor = ((mD - mo) * (oD - mo)).sum(axis = 0) / sqrt(deno1 * deno2)
     return patcor


 def calc_spatial_anom_cor(mD, oD):
     '''
     Calculate anomaly correlation between two 2-D arrays.

     Input:
         mD - 2-D array of model data
         oD - 2-D array of observation data

     Output:
         The anomaly correlation between the two input arrays.
     '''
     mo = oD.mean()
     d1 = ((mD - mo)*(mD - mo)).sum()
     d2 = ((oD - mo)*(oD - mo)).sum()
     patcor = ((mD - mo) * (oD - mo)).sum() / sqrt(d1 * d2)
     return patcor


 def calc_temporal_pat_cor(t1, t2):
     '''
      Calculate the Temporal Pattern Correlation

       Input::
         t1 - 3d array of model data
         t2 - 3d array of obs data

       Output::
         2d array of time series pattern correlation coefficients at each grid point.
         **Note:** std_dev is standardized on 1 degree of freedom
     '''
     mt1 = t1.mean(axis = 0)
     mt2 = t2.mean(axis = 0)
     nt = t1.shape[0]
     sigma_t1 = t1.std(axis = 0, ddof = 1)
     sigma_t2 = t2.std(axis = 0, ddof = 1)
     patcor = ((((t1 - mt1) * (t2 - mt2)).sum(axis = 0)) / (nt)) / (sigma_t1 * sigma_t2)

     return patcor


 def calc_spatial_pat_cor(t1, t2):
     '''
     Calcualte pattern correlation between 2-D arrays.
     6/10/2013: JK: Enforce both t1 & t2 have the identical mask before calculating std and corr

     Input:
         t1 - 2-D array of model data
         t2 - 2-D array of observation data

     Output:
         Pattern correlation between two input arrays.
     '''
     import numpy as np
     msk1 = ma.getmaskarray(t1)
     msk2 = ma.getmaskarray(t2)
     t1 = ma.masked_array(t1.data, np.logical_or(msk1, msk2))
     t2 = ma.masked_array(t2.data, np.logical_or(msk1, msk2))
     np = ma.count(t1)
     mt1 = t1.mean()
     mt2 = t2.mean()
     st1 = t1.std()
     st2 = t2.std()
     patcor = ((t1 - mt1) * (t2 - mt2)).sum() / (np * st1 * st2)
     return patcor


 def calc_pat_cor2D(t1, t2, nT):
     '''
     Calculate the pattern correlation between 3-D input arrays.

     Input:
         t1 - 3-D array of model data
         t2 - 3-D array of observation data
         nT

     Output:
         1-D array (time series) of pattern correlation coefficients.
     '''
     # TODO - Update docstring. What is nT?
     nt = t1.shape[0]
     if(nt != nT):
         print 'input time levels do not match: Exit', nT, nt
         return -1
     # store results in list for convenience (then convert to numpy array at the end)
     patcor = []
     for t in xrange(nt):
         mt1 = t1[t, :, :].mean()
         mt2 = t2[t, :, :].mean()
         sigma_t1 = t1[t, :, :].std()
         sigma_t2 = t2[t, :, :].std()
         # TODO: make means and standard deviations weighted by grid box area.
         patcor.append((((((t1[t, :, :] - mt1) * (t2[t, :, :] - mt2)).sum()) /
                      (t1.shape[1] * t1.shape[2]) ) / (sigma_t1 * sigma_t2)))
         print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t]
     # TODO: deal with missing data appropriately, i.e. mask out grid points with missing data above tolerence level
     # convert from list into numpy array
     patcor = numpy.array(patcor)
     #print patcor.shape
     return patcor

 def calc_pat_cor(dataset_1, dataset_2):
     '''
      Purpose: Calculate the Pattern Correlation Timeseries
      Assumption(s)::
      	Both dataset_1 and dataset_2 are the same shape.
         * lat, lon must match up
         * time steps must align (i.e. months vs. months)
      Input::
         dataset_1 - 3d (time, lat, lon) array of data
         dataset_2 - 3d (time, lat, lon) array of data
      Output:
         patcor - a 1d array (time series) of pattern correlation coefficients.
      **Note:** Standard deviation is using 1 degree of freedom.  Debugging print
      statements to show the difference the n-1 makes. http://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html
      6/17/2013 JK: Add an option for a 1-d arrays
     '''

     # TODO:  Add in try block to ensure the shapes match

     nDim1 = dataset_1.ndim
     nDim2 = dataset_2.ndim
     if nDim1 != nDim2:
         print 'dimension mismatch in calc_pat_cor: exit', nDim1,nDim2
         sys.exit()

     if nDim1 == 1:
         mt1 = dataset_1.mean()
         mt2 = dataset_2.mean()
         nt = dataset_1.shape[0]
         sigma_t1 = dataset_1.std()
         sigma_t2 = dataset_2.std()
         patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / (nt * sigma_t1 * sigma_t2)

     elif nDim1 == 2:
         # find mean and std_dev
         mt1 = dataset_1.mean()
         mt2 = dataset_2.mean()
         ny = dataset_1.shape[0]
         nx = dataset_1.shape[1]
         sigma_t1 = dataset_1.std()
         sigma_t2 = dataset_2.std()
         patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / ((ny * nx) * (sigma_t1 * sigma_t2))

     elif nDim1 == 3:
         nt = dataset_1.shape[0]
         ny = dataset_1.shape[1]
         nx = dataset_1.shape[2]
         # store results in list for convenience (then convert to numpy array)
         patcor = []
         for t in xrange(nt):
             # find mean and std_dev
             mt1 = dataset_1[t, :, :].mean()
             mt2 = dataset_2[t, :, :].mean()
             sigma_t1 = dataset_1[t, :, :].std(ddof = 1)
             sigma_t2 = dataset_2[t, :, :].std(ddof=1)
             # TODO: make means and standard deviations weighted by grid box area.
             # Equation from Santer_et_al 1995
             #     patcor = (1/(N*M_std*O_std))*sum((M_i-M_bar)*(O_i-O_bar))
             patcor.append((((((dataset_1[t, :, :] - mt1) * (dataset_2[t, :, :] - mt2)).sum()) / (ny * nx)) / (sigma_t1 * sigma_t2)))
             print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t]
             # TODO: deal with missing data appropriately, i.e. mask out grid points
             # with missing data above tolerance level
         # convert from list into numpy array
         patcor = np.array(patcor)

     #print patcor.shape, patcor
     return patcor


 def calc_anom_corn(dataset_1, dataset_2, climatology = None):
     '''
     Calculate the anomaly correlation.

     Input:
         dataset_1 - First input dataset
         dataset_2 - Second input dataset
         climatology - Optional climatology input array. Assumption is that it is for
             the same time period by default.

     Output:
         The anomaly correlation.
     '''
     # TODO: Update docstring with actual useful information

     # store results in list for convenience (then convert to numpy array)
     anomcor = []
     nt = dataset_1.shape[0]
     #prompt for the third file, i.e. climo file...
     #include making sure the lat, lon and times are ok for comparision
     # find the climo in here and then using for eg, if 100 yrs
     # is given for the climo file, but only looking at 10yrs
     # ask if want to input climo dataset for use....if no, call previous

     if climatology != None:
         climoFileOption = raw_input('Would you like to use the full observation dataset as \
                                      the climatology in this calculation? [y/n] \n>')
         if climoFileOption == 'y':
             base_dataset = climatology
         else:
             base_dataset = dataset_2
     for t in xrange(nt):
         mean_base = base_dataset[t, :, :].mean()
         anomcor.append((((dataset_1[t, :, :] - mean_base) * (dataset_2[t, :, :] - mean_base)).sum()) /
                        np.sqrt(((dataset_1[t, :, :] - mean_base) ** 2).sum() *
                                ((dataset_2[t, :, :] - mean_base) ** 2).sum()))
         print t, mean_base.shape, anomcor[t]

     # TODO: deal with missing data appropriately, i.e. mask out grid points
     # with missing data above tolerence level

     # convert from list into numpy array
     anomcor = np.array(anomcor)
     print anomcor.shape, anomcor.ndim, anomcor
     return anomcor


 def calc_anom_cor(t1, t2):
     '''
      Calculate the Anomaly Correlation (Deprecated)
     '''

     nt = t1.shape[0]

     # store results in list for convenience (then convert to numpy
     # array at the end)
     anomcor = []
     for t in xrange(nt):

         mt2 = t2[t, :, :].mean()

         sigma_t1 = t1[t, :, :].std(ddof = 1)
         sigma_t2 = t2[t, :, :].std(ddof = 1)

         # TODO: make means and standard deviations weighted by grid box area.

         anomcor.append(((((t1[t, :, :] - mt2) * (t2[t, :, :] - mt2)).sum()) /
                         (t1.shape[1] * t1.shape[2])) / (sigma_t1 * sigma_t2))

         print t, mt2.shape, sigma_t1.shape, sigma_t2.shape, anomcor[t]

         # TODO: deal with missing data appropriately, i.e. mask out grid points with
         #       missing data above tolerence level

     # convert from list into numpy array
     anomcor = np.array(anomcor)
     print anomcor.shape, anomcor.ndim, anomcor
     return anomcor


 def calc_nash_sutcliff(dataset_1, dataset_2):
     '''
     Routine to calculate the Nash-Sutcliff coefficient of efficiency (E)

     Assumption(s)::
     	Both dataset_1 and dataset_2 are the same shape.
         * lat, lon must match up
         * time steps must align (i.e. months vs. months)

     Input::
     	dataset_1 - 3d (time, lat, lon) array of data
         dataset_2 - 3d (time, lat, lon) array of data

     Output:
         nashcor - 1d array aligned along the time dimension of the input
         datasets. Time Series of Nash-Sutcliff Coefficient of efficiency

      '''

     nt = dataset_1.shape[0]
     nashcor = []
     for t in xrange(nt):
         mean_dataset_2 = dataset_2[t, :, :].mean()

         nashcor.append(1 - ((((dataset_2[t, :, :] - dataset_1[t, :, :]) ** 2).sum()) /
                             ((dataset_2[t, :, :] - mean_dataset_2) ** 2).sum()))

         print t, mean_dataset_2.shape, nashcor[t]

     nashcor = np.array(nashcor)
     print nashcor.shape, nashcor.ndim, nashcor
     return nashcor


 def calc_pdf(dataset_1, dataset_2):
     '''
     Routine to calculate a normalized Probability Distribution Function with
     bins set according to data range.
     Equation from Perkins et al. 2007

         PS=sum(min(Z_O_i, Z_M_i)) where Z is the distribution (histogram of the data for either set)
         called in do_rcmes_processing_sub.py

     Inputs::
         2 arrays of data
         t1 is the modelData and t2 is 3D obsdata - time,lat, lon NB, time here
         is the number of time values eg for time period 199001010000 - 199201010000

         if annual means-opt 1, was chosen, then t2.shape = (2,lat,lon)

         if monthly means - opt 2, was choosen, then t2.shape = (24,lat,lon)

     User inputs: number of bins to use and edges (min and max)
     Output:

         one float which represents the PDF for the year

     TODO:  Clean up this docstring so we have a single purpose statement

     Routine to calculate a normalised PDF with bins set according to data range.

     Input::
         2 data  arrays, modelData and obsData

     Output::
         PDF for the year

     '''
     #list to store PDFs of modelData and obsData
     pdf_mod = []
     pdf_obs = []
     # float to store the final PDF similarity score
     similarity_score = 0.0
     d1_max = dataset_1.amax()
     d1_min = dataset_1.amin()

     print 'min modelData', dataset_1[:, :, :].min()
     print 'max modelData', dataset_1[:, :, :].max()
     print 'min obsData', dataset_2[:, :, :].min()
     print 'max obsData', dataset_2[:, :, :].max()
     # find a distribution for the entire dataset
     #prompt the user to enter the min, max and number of bin values.
     # The max, min info above is to help guide the user with these choises
     print '****PDF input values from user required **** \n'
     nbins = int (raw_input('Please enter the number of bins to use. \n'))
     minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n'))
     maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n'))

     mybins = np.linspace(minEdge, maxEdge, nbins)
     print 'nbins is', nbins, 'mybins are', mybins


     # TODO:  there is no 'new' kwargs for numpy.histogram
     # per: http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
     # PLAN: Replace new with density param.
     pdf_mod, edges = np.histogram(dataset_1, bins = mybins, normed = True, new = True)
     print 'dataset_1 distribution and edges', pdf_mod, edges
     pdf_obs, edges = np.histogram(dataset_2, bins = mybins, normed = True, new = True)
     print 'dataset_2 distribution and edges', pdf_obs, edges

     # TODO: drop this
     """
     considering using pdf function from statistics package. It is not
      installed. Have to test on Mac.
      http://bonsai.hgc.jp/~mdehoon/software/python/Statistics/manual/index.xhtml#TOC31
     pdf_mod, edges = stats.pdf(dataset_1, bins=mybins)
     print 'dataset_1 distribution and edges', pdf_mod, edges
     pdf_obs,edges=stats.pdf(dataset_2,bins=mybins)
     print 'dataset_2 distribution and edges', pdf_obs, edges
     """

     #find minimum at each bin between lists
     i = 0
     for model_value in pdf_mod :
         print 'model_value is', model_value, 'pdf_obs[', i, '] is', pdf_obs[i]
         if model_value < pdf_obs[i]:
             similarity_score += model_value
         else:
             similarity_score += pdf_obs[i]
         i += 1
     print 'similarity_score is', similarity_score
     return similarity_score


 def calc_stdev(t1):
     '''
     Calculate the standard deviation for a given dataset.

     Input:
         t1 - Dataset to calculate the standard deviation on.

     Output:
         Array of the standard deviations for each month in the provided dataset.
     '''
     nt = t1.shape[0]
     sigma_t1 = []
     for t in xrange(nt):
         sigma_t1.append(t1[t, :, :].std(ddof = 1))
     sigma_t1 = np.array(sigma_t1)
     print sigma_t1, sigma_t1.shape
     return sigma_t1

 # 6/10/2013 JK: plotting routines are added below

 def pow_round(x):
     '''
      Function to round x to the nearest power of 10
     '''
     return 10 ** (floor(log(x, 10) - log(0.5, 10)))

 def calcNiceIntervals(data, nLevs):
     '''
     Purpose::
         Calculates nice intervals between each color level for colorbars
         and contour plots. The target minimum and maximum color levels are
         calculated by taking the minimum and maximum of the distribution
         after cutting off the tails to remove outliers.

     Input::
         data - an array of data to be plotted
         nLevs - an int giving the target number of intervals

     Output::
         cLevs - A list of floats for the resultant colorbar levels
     '''
     # Find the min and max levels by cutting off the tails of the distribution
     # This mitigates the influence of outliers
     data = data.ravel()
     mnLvl = mstats.scoreatpercentile(data, 5)
     mxLvl = mstats.scoreatpercentile(data, 95)
     locator = mpl.ticker.MaxNLocator(nLevs)
     cLevs = locator.tick_values(mnLvl, mxLvl)

     # Make sure the bounds of cLevs are reasonable since sometimes
     # MaxNLocator gives values outside the domain of the input data
     cLevs = cLevs[(cLevs >= mnLvl) & (cLevs <= mxLvl)]
     return cLevs

 def calcBestGridShape(nPlots, oldShape):
     '''
     Purpose::
         Calculate a better grid shape in case the user enters more columns
         and rows than needed to fit a given number of subplots.

     Input::
         nPlots - an int giving the number of plots that will be made
         oldShape - a tuple denoting the desired grid shape (nRows, nCols) for arranging
                     the subplots originally requested by the user.

     Output::
         newShape - the smallest possible subplot grid shape needed to fit nPlots
     '''
     nRows, nCols = oldShape
     size = nRows * nCols
     diff = size - nPlots
     if diff < 0:
         raise ValueError('gridShape=(%d, %d): Cannot fit enough subplots for data' %(nRows, nCols))
     else:
         # If the user enters an excessively large number of
         # rows and columns for gridShape, automatically
         # correct it so that it fits only as many plots
         # as needed
         while diff >= nCols:
             nRows -= 1
             size = nRows * nCols
             diff = size - nPlots

         # Don't forget to remove unnecessary columns too
         if nRows == 1:
             nCols = nPlots

         newShape = nRows, nCols
         return newShape

 def drawPortraitDiagramSingle(data, rowLabels, colLabels, cLevs, fName, fType = 'png',
                               xLabel = '', yLabel = '', cLabel = '', pTitle = '', cMap = None):
     '''
     Purpose::
         Makes a portrait diagram plot.

     Input::
         data - 2d array of the field to be plotted
         rowLabels - list of strings denoting labels for each row
         colLabels - list of strings denoting labels for each column
         cLevs - a list of integers or floats specifying the colorbar levels
         xLabel - a string specifying the x-axis title
         yLabel - a string specifying the y-axis title
         cLabel - a string specifying the colorbar title
         pTitle - a string specifying the plot title
         fName  - a string specifying the filename of the plot
         fType  - an optional string specifying the filetype, default is .png
         cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap,
     '''
     # Set up the colormap if not specified
     if cMap is None:
         cMap = plt.cm.RdBu_r

     # Set up figure and axes
     fig = plt.figure()
     ax = fig.add_subplot(111)

     # Make the portrait diagram
     norm = mpl.colors.BoundaryNorm(cLevs, cMap.N)
     cs = ax.matshow(data, cmap = cMap, aspect = 'auto', origin = 'lower', norm = norm)

     # Add colorbar
     cbar = fig.colorbar(cs, norm = norm, boundaries = cLevs, drawedges = True,
                         pad = .05)
     cbar.set_label(cLabel)
     cbar.set_ticks(cLevs)
     cbar.ax.xaxis.set_ticks_position("none")
     cbar.ax.yaxis.set_ticks_position("none")

     # Add grid lines
     ax.xaxis.set_ticks(np.arange(data.shape[1] + 1))
     ax.yaxis.set_ticks(np.arange(data.shape[0] + 1))
     x = (ax.xaxis.get_majorticklocs() - .5)
     y = (ax.yaxis.get_majorticklocs() - .5)
     ax.vlines(x, y.min(), y.max())
     ax.hlines(y, x.min(), x.max())

     # Configure ticks
     ax.xaxis.tick_bottom()
     ax.xaxis.set_ticks_position('none')
     ax.yaxis.set_ticks_position('none')
     ax.set_xticklabels(rowLabels)
     ax.set_yticklabels(colLabels)

     # Add labels and title
     ax.set_xlabel(xLabel)
     ax.set_ylabel(yLabel)
     ax.set_title(pTitle)

     # Save the figure
     fig.savefig('%s.%s' %(fName, fType))
     plt.show()

 def drawPortraitDiagram(dataset, rowLabels, colLabels, fName, fType = 'png',
                               gridShape = (1, 1), xLabel = '', yLabel = '', cLabel = '',
                               pTitle = '', subTitles = None, cMap = None, cLevs = None,
                               nLevs = 10, extend = 'neither'):
     '''
     Purpose::
         Makes a portrait diagram plot.

     Input::
         dataset - 3d array of the field to be plotted (nT, nRows, nCols)
         rowLabels - a list of strings denoting labels for each row
         colLabels - a list of strings denoting labels for each column
         fName - a string specifying the filename of the plot
         fType - an optional string specifying the output filetype
         xLabel - an optional string specifying the x-axis title
         yLabel - an optional string specifying the y-axis title
         cLabel - an optional string specifying the colorbar title
         pTitle - a string specifying the plot title
         subTitles - an optional list of strings specifying the title for each subplot
         cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap
         cLevs - an optional list of ints or floats specifying colorbar levels
         nLevs - an optional integer specifying the target number of contour levels if
                 cLevs is None
         extend - an optional string to toggle whether to place arrows at the colorbar
              boundaries. Default is 'neither', but can also be 'min', 'max', or
              'both'. Will be automatically set to 'both' if cLevs is None.

     '''
     # Handle the single plot case.
     if dataset.ndim == 2 or (dataset.ndim == 3 and dataset.shape[0] == 1):
         dataset = dataset.reshape(1, *dataset.shape)

     nPlots = dataset.shape[0]

     # Make sure gridShape is compatible with input data
     gridShape = calcBestGridShape(nPlots, gridShape)

     # Row and Column labels must be consistent with the shape of
     # the input data too
     nRows, nCols = dataset.shape[1:]
     if len(rowLabels) != nRows or len(colLabels) != nCols:
         raise ValueError('rowLabels and colLabels must have %d and %d elements respectively' %(nRows, nCols))

     # Set up the colormap if not specified
     if cMap is None:
         cMap = plt.cm.coolwarm

     # Set up the figure
     fig = plt.figure()
     fig.set_size_inches((8.5, 11.))
     fig.dpi = 300

     # Make the subplot grid
     grid = ImageGrid(fig, 111,
                 nrows_ncols = gridShape,
                 axes_pad = 0.4,
                 share_all = True,
                 aspect = False,
                 add_all = True,
                 ngrids = nPlots,
                 label_mode = "all",
                 cbar_mode = 'single',
                 cbar_location = 'bottom',
                 cbar_pad = '3%',
                 cbar_size = .15
                 )

     # Calculate colorbar levels if not given
     if cLevs is None:
         # Cut off the tails of the distribution
         # for more representative colorbar levels
         cLevs = calcNiceIntervals(dataset, nLevs)
         extend = 'both'

     norm = mpl.colors.BoundaryNorm(cLevs, cMap.N)

     # Do the plotting
     for i, ax in enumerate(grid):
         data = dataset[i]
         cs = ax.matshow(data, cmap = cMap, aspect = 'auto', origin = 'lower', norm = norm)

         # Add grid lines
         ax.xaxis.set_ticks(np.arange(data.shape[1] + 1))
         ax.yaxis.set_ticks(np.arange(data.shape[0] + 1))
         x = (ax.xaxis.get_majorticklocs() - .5)
         y = (ax.yaxis.get_majorticklocs() - .5)
         ax.vlines(x, y.min(), y.max())
         ax.hlines(y, x.min(), x.max())

         # Configure ticks
         ax.xaxis.tick_bottom()
         ax.xaxis.set_ticks_position('none')
         ax.yaxis.set_ticks_position('none')
         ax.set_xticklabels(colLabels, fontsize = 'xx-small')
         ax.set_yticklabels(rowLabels, fontsize = 'xx-small')

         # Add axes title
         if subTitles is not None:
             ax.text(0.5, 1.04, subTitles[i], va = 'center', ha = 'center',
                     transform = ax.transAxes, fontsize = 'small')

     # Add colorbar
     cbar = fig.colorbar(cs, cax = ax.cax, norm = norm, boundaries = cLevs, drawedges = True,
                         extend = extend, orientation = 'horizontal')
     cbar.set_label(cLabel)
     cbar.set_ticks(cLevs)
     cbar.ax.xaxis.set_ticks_position("none")
     cbar.ax.yaxis.set_ticks_position("none")

     # This is an ugly hack to make the title show up at the correct height.
     # Basically save the figure once to achieve tight layout and calculate
     # the adjusted heights of the axes, then draw the title slightly above
     # that height and save the figure again
     fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
     ymax = 0
     for ax in grid:
         bbox = ax.get_position()
         ymax = max(ymax, bbox.ymax)

     # Add figure title and axes labels
     fig.text(.51, .14, yLabel, va = 'center', ha = 'center', rotation = 'horizontal')
     fig.text(.08, .53, xLabel, va = 'center', ha = 'center', rotation = 'vertical')
     fig.suptitle(pTitle, y = ymax + .04, fontsize = 16)
     fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
     plt.show()
     fig.clf()

 def taylorDiagram(pltDat, pltTit, pltFileName, refName, legendPos, radMax):
     '''
     Draw a Taylor diagram
     '''
     stdref = 1.                     # Standard reference value
     rect = 111                      # Subplot setting and location
     markerSize = 6
     fig = plt.figure()
     fig.suptitle(pltTit)            # PLot title
     dia = TaylorDiagram (stdref, fig = fig, radMax = radMax, rect = rect, label = refName)
     for i,(stddev,corrcoef,name) in enumerate(pltDat):
         dia.add_sample (stddev, corrcoef, marker = '$%d$' % (i+1), ms = markerSize, label=name)
     # Add ploylines to mark a range specified by input data - 2 be implemented
     #circular_line = dia.add_stddev_contours(0.959, 1, 0.965)
     #circular_line = dia.add_stddev_contours(1.1, 1, 0.973)
     #straight_line = dia.add_contours(0.959, 1, 1.1, 1)
     #straight_line = dia.add_contours(0.959, 0.965, 1.1, 0.973)
     #l=fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos)
     # loc: 1='upper right', 2='upper left' or specified in the calling program via "legendPos"
     l = fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos)
     l.draw_frame(False)
     plt.savefig(pltFileName)
     plt.show()
     pltDat = 0.

 def drawTimeSeriesSingle(dataset, times, labels, fName, fType = 'png', xLabel = '', yLabel ='', pTitle ='',
                    legendPos = 'upper center', legendFrameOn = False, yearLabels = True, yscale = 'linear'):
     '''
     Purpose::
         Function to draw a time series plot

     Input::
         dataset - a list of arrays for each dataset as a time series
         times - a list of python datetime objects
         labels - a list of strings with the names of each dataset
         fName - a string specifying the filename of the plot
         fType - an optional string specifying the output filetype
         xLabel - a string specifying the x-axis title
         yLabel - a string specifying the y-axis title
         pTitle - a string specifying the plot title
         legendPos - an optional string or tuple of float for determining
                     the position of the legend
         legendFrameOn - optional bool to toggle drawing the frame around
                         the legend
         yearLabels - optional bool to toggle drawing year labels on the x-xaxis
         yscale - optional string for setting the y-axis scale, 'linear' for linear
                  and 'log' for log base 10.
     '''
     fig = plt.figure()
     ax = fig.add_subplot(111)

     if not yearLabels:
         xfmt = mpl.dates.DateFormatter('%b')
         ax.xaxis.set_major_formatter(xfmt)

     ax.set_xlabel(xLabel)
     ax.set_ylabel(yLabel)
     ax.set_title(pTitle)

     # Set up list of lines for legend
     lines = []
     ymin, ymax = 0, 0

     # Plot each dataset
     for data in dataset:
         line = ax.plot_date(times, data, '', linewidth = 2)
         lines.extend(line)
         cmin, cmax = data.min(), data.max()
         if ymin > cmin:
             ymin = cmin
         if ymax < cmax:
             ymax = cmax

     # Add a bit of padding so lines don't touch bottom and top of the plot
     ymin = ymin - ((ymax - ymin) * 0.1)
     ymax = ymax + ((ymax - ymin) * 0.1)
     ax.set_ylim((ymin, ymax))

     # Set the y-axis scale
     ax.set_yscale(yscale)

     # Create the legend
     ax.legend((lines), labels, loc = legendPos, ncol = 10, fontsize='x-small',
                        frameon=legendFrameOn)
     fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight')
     plt.show()
     fig.clf()


 def pltXY(x, y, lineLabel, lineTyp, pltTit, xLabel, yLabel, pltName, xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos, xScale, yScale):
     """
     The default drawing order for axes is patches, lines, text.This order is determined by the zorder attribute. The following defaults are set:
     Artist                      Z-order
     Patch / PatchCollection      1
     Line2D / LineCollection      2
     Text                         3
     You can change the order for individual artists by setting the zorder.  Any individual plot() call can set a value
       for the zorder of that particular item.
     In the fist subplot below, the lines are drawn above the patch collection from the scatter, which is the default.
     In the subplot below, the order is reversed.
     The second figure shows how to control the zorder of individual lines.
     Arguments
       x (nX)   : np array: the number of points in the x axis
       y (nX,nY): np array: the number of y values to be plotted
       lineLabel: list(nY): labels for individual y data
       pltTit   : Text    : plot title
       xLabel   : Text    : x-axis label
       yLabel   : Text    : y-axis label
       pltName  : Text    : name of the plot file
     3/28/2013 Jinwon Kim: Modification of a routine in the matplotlib gallery
     """
     #lineColors = ['k', 'b', 'r', 'g', 'c', 'm', 'y', '0.5', '0.55', '0.6', '0.65', '0.7', '0.75', '0.8', '0.85', '0.9', '0.95', '1.0']
     lineColors = ['k','r','y','g','b','c','m','k','r','y','g','b','c','m','k','r','y','g','b','c','m','0.35','0.5','0.65','0.8','.95']
     nX = x.size
     nY = len(lineLabel)
     lineColors[nY - 1] = 'b'
     fig = plt.figure()
     ax = fig.add_subplot(1,1,1)
     for n in np.arange(nY):
         if n <= 6:
             opacity=1.
         elif n <= 13:
             opacity = .75
         elif n <= 20:
             opacity = .40
         plot(x,y[n, :], linewidth=1, alpha=opacity, color=lineColors[n], linestyle=lineTyp[n], label=lineLabel[n], zorder = 10)
     xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10)
     if xmax > xmin:
         plt.xlim(xmin,xmax)
         ticks = frange(xmin,xmax,npts=(xmax-xmin)/deltaX+1)
         ax.xaxis.set_ticks(ticks,minor=False)
     if ymax > ymin:
         plt.ylim(ymin,ymax)
         ticks = frange(ymin,ymax,npts=(ymax-ymin)/deltaY+1)
         ax.yaxis.set_ticks(ticks,minor=False)
     ax.set_title(pltTit)
     ax.xaxis.tick_bottom(); ax.yaxis.tick_left()    # put tick marks only on the left (for y) and bottom (for x) axis
     if xScale == 'log':
         ax.set_xscale('log')
     else:
         ax.set_xscale('linear')
     if yScale == 'log':
         ax.set_yscale('log')
     else:
         ax.set_yscale('linear')
     if(nY>1):
         l = legend(prop={'size':10},loc='best')
         l.set_zorder(20) # put the legend on top
     plt.savefig(pltName)
     show()
     # release work arrays
     x=0.; y=0.


 def pltSca1F(x, y, pltName, xLabel, yLabel, pmin, pmax, delP, pTit, pFname, xScale, yScale):
     #*************************************#
     # Plot a single-frame scatter diagram #
     #*************************************#
     fig = plt.figure(); ax = fig.add_subplot(1,1,1)
     lTyp='o'
     plot(x,y,lTyp,c='r')
     if pmax > pmin:
         ticks = frange(pmin,pmax,npts=(pmax-pmin)/delP+1)
         plt.xlim(pmin,pmax); ax.xaxis.set_ticks(ticks,minor=False)
         plt.ylim(pmin,pmax); ax.yaxis.set_ticks(ticks,minor=False)
     ax.set_title(pTit)
     if xScale == 'log':
         ax.set_xscale('log')
     else:
         ax.set_xscale('linear')
     if yScale == 'log':
         ax.set_yscale('log')
     else:
         ax.set_yscale('linear')
     xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10)
     l = legend(prop={'size':8},loc='best')
     l.set_zorder(20)
     plt.savefig(pFname)
     show()
     x=0.; y=0.


 def pltSca6F(x, dName, pmin, pmax, delP, pTitle, pFname, xScale, yScale):
     #****************************************************#
     # Plot up to 6 frames (6 rows X 2 columns) on a page #
     #****************************************************#
     nPlt = x.shape[0]
     if pmax > pmin:
         ticks = frange(pmin,pmax,npts=(pmax-pmin)/delP+1)
     if nPlt > 6:
         print 'frames exceed 12: return'
         return
     nrows = 3; ncols = 2; npmax = nrows*ncols; lTyp='o'; xlabcol='black'; ylabcol='green'
     fig = plt.figure()
     plt.subplots_adjust(hspace=0.3, wspace=0.2)
     for n in range(1,nPlt):
         pid = n % npmax
         if pid == 0:
             pid = npmax
         ax = fig.add_subplot(nrows,ncols,pid)
         #ax.set_title(pTitle[n-1],fontsize=6)
         if xScale == 'log':
             ax.set_xscale('log')
         else:
             ax.set_xscale('linear')
         if yScale == 'log':
             ax.set_yscale('log')
         else:
             ax.set_yscale('linear')
         plot(x[0,:],x[n,:],lTyp,label=pTitle[n-1],c='r')
         plot(x[0,:],x[n,:],lTyp,c='r')
         plot(frange(pmin,pmax),c='b')
         l = legend(prop={'size':8},loc='best')
         l.set_zorder(20)
         xlabel(dName[0],fontsize=10); ylabel(dName[n],fontsize=10)
         if pmax > pmin:
             plt.xlim(pmin,pmax); ax.xaxis.set_ticks(ticks,minor=False)
             plt.ylim(pmin,pmax); ax.yaxis.set_ticks(ticks,minor=False)
         for label in ax.xaxis.get_ticklabels():
             label.set_color(xlabcol)
             label.set_rotation(0)
             label.set_fontsize(8)
         for label in ax.yaxis.get_ticklabels():
             label.set_color(ylabcol)
             label.set_rotation(0)
             label.set_fontsize(8)
     plt.savefig(pFname)
     show()
     x=0.

 def drawContourMapSingle(data, lats, lons, cLevs, fName, fType = 'png',
                          cLabel = '', pTitle = '', cMap = None, nParallels = 5, nMeridians = 5):
     '''
     Purpose::
         Plots a filled contour map.
     Input::
         data - 2d array of the field to be plotted with shape (nLon, nLat)
         lats - array of latitudes
         lons - array of longitudes
         cLevs - A list of ints or floats specifying contour levels
         fName  - a string specifying the filename of the plot
         fType  - an optional string specifying the filetype, default is .png
         cLabel - an optional string specifying the colorbar title
         pTitle - an optional string specifying plot title
         cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap
         nParallels - an optional int for the number of parallels to draw
         nMeridians - an optional int for the number of meridians to draw
     '''
     # Set up the colormap if not specified
     if cMap is None:
         cMap = plt.cm.RdBu_r

     # Set up the figure
     fig = plt.figure()
     ax = fig.add_subplot(111)

     # Determine the map boundaries and construct a Basemap object
     lonMin = lons.min()
     lonMax = lons.max()
     latMin = lats.min()
     latMax = lats.max()
     m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax,
                 llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l', ax = ax)

     # Draw the borders for coastlines and countries
     m.drawcoastlines(linewidth = 1)
     m.drawcountries(linewidth = .75)

     # Draw parallels / meridians.
     m.drawmeridians(np.linspace(lonMin, lonMax, nMeridians), labels = [0, 0, 0, 1])
     m.drawparallels(np.linspace(latMin, latMax, nMeridians), labels = [1, 0, 0, 1])

     # Convert lats and lons to projection coordinates
     if lats.ndim == 1 and lons.ndim == 1:
         lons, lats = np.meshgrid(lons, lats)
     x, y = m(lons, lats)

     # Plot data with filled contours
     cs = m.contourf(x, y, data, cmap = cMap, levels = cLevs)

     # Add colorbar
     cbar = m.colorbar(cs, drawedges = True, pad = '2%', size = '3%')
     cbar.set_label(cLabel)
     cbar.set_ticks(cLevs)
     cbar.ax.xaxis.set_ticks_position("none")
     cbar.ax.yaxis.set_ticks_position("none")

     # Add title and save the figure
     ax.set_title(pTitle)
     fig.savefig('%s.%s' %(fName, fType))
     show()

 def drawCntrMap(data,lon,lat,titles,ncols,pFname):
     '''
     This routine is based on PyNGL, i.e., NcarGraphics
     data - a masked numpy array of data to plot (nT,nY,nX)
     lon - longitude (nY,nX)
     lat - latitude  (nY,nX)
     titles - array of titles (nT)
     nrows - number of rows of the paneled plots
     ncols - number of columns of the paneled plots
     nT = nrows * ncols
     pFname - name of output postscript file
     '''
     wks_type = 'ps'
     if data.ndim == 2:
         nT = 1; nrows = 1; ncols = 1
     elif data.ndim == 3:
         nT=data.shape[0]
         if nT % ncols == 0:
             nrows = nT/ncols
         else:
             nrows = nT/ncols + 1
     # set workstation type (X11, ps, png)
     res = Ngl.Resources()
     wks = Ngl.open_wks(wks_type,pFname,res)
     # set plot resource paramters
     resources = Ngl.Resources()
     resources.mpLimitMode = "LatLon"    # Limit the map view.
     resources.mpMinLonF   = lon.min()
     resources.mpMaxLonF   = lon.max()
     resources.mpMinLatF   = lat.min()
     resources.mpMaxLatF   = lat.max()
     resources.cnFillOn      = True
     resources.cnLineLabelsOn        = False    # Turn off line labels.
     resources.cnInfoLabelOn         = False    # Turn off info label.
     resources.cnLinesOn             = False    # Turn off countour line (only filled colors)
     resources.sfXArray              = lon[:,:]
     resources.sfYArray              = lat[:,:]
     resources.pmTickMarkDisplayMode = "Never"  # Turn off map tickmarks.
     resources.mpOutlineBoundarySets = "GeophysicalAndUSStates"
     resources.mpGeophysicalLineColor = "red"
     resources.mpUSStateLineColor = "red"
     resources.mpGeophysicalLineThicknessF = 0.75
     resources.mpUSStateLineThicknessF = 0.75
     resources.mpPerimOn             = True     # Turn on/off map perimeter
     resources.mpGridAndLimbOn = True           # Turn off map grid.
     resources.nglFrame = False    # Don't advance the frame
     resources.nglDraw = False
     plot=[]
     for iT in np.arange(nT):
         resources.tiMainString = titles[iT]
         #resources.pmLabelBarDisplayMode = "Never" # Turn off individual label bars
         resources.pmLabelBarDisplayMode = "Always" # Turn on individual label bars
         if data.ndim == 3:
             plot.append(Ngl.contour_map(wks,data[iT,:,:],resources))
         elif data.ndim == 2:
             plot.append(Ngl.contour_map(wks,data[:,:],resources))
     panelres = Ngl.Resources()
     panelres.nglPanelTop=0.95
     panelres.nglPanelBottom=0.05
     panelres.nglPanelLabelBar                 = False   # Turn on panel labelbar
     panelres.nglPanelLabelBarLabelFontHeightF = 0.012  # Labelbar font height
     panelres.nglPanelLabelBarHeightF          = 0.03   # Height of labelbar
     panelres.nglPanelLabelBarWidthF           = 0.8    # Width of labelbar
     panelres.lbLabelFont                      = "helvetica-bold" # Labelbar font
     Ngl.panel(wks,plot[0:nT],[nrows,ncols],panelres)
     Ngl.destroy(wks)
     del plot
     del resources
     del wks
     return

 def drawContourMap(dataset, lats, lons, fName, fType = 'png', gridShape = (1, 1),
                       cLabel = '', pTitle = '', subTitles = None, cMap = None,
                       cLevs = None, nLevs = 10, parallels = None, meridians = None,
                       extend = 'neither'):
     '''
     Purpose::
         Create a multiple panel contour map plot.
     Input::
         dataset -  3d array of the field to be plotted with shape (nT, nLon, nLat)
         lats - array of latitudes
         lons - array of longitudes
         fName  - a string specifying the filename of the plot
         fType  - an optional string specifying the filetype, default is .png
         gridShape - optional tuple denoting the desired grid shape (nRows, nCols) for arranging
                     the subplots.
         cLabel - an optional string specifying the colorbar title
         pTitle - an optional string specifying plot title
         subTitles - an optional list of strings specifying the title for each subplot
         cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap
         cLevs - an optional list of ints or floats specifying contour levels
         nLevs - an optional integer specifying the target number of contour levels if
                 cLevs is None
         parallels - an optional list of ints or floats for the parallels to be drawn
         meridians - an optional list of ints or floats for the meridians to be drawn
         extend - an optional string to toggle whether to place arrows at the colorbar
                  boundaries. Default is 'neither', but can also be 'min', 'max', or
                  'both'. Will be automatically set to 'both' if cLevs is None.
     '''
     # Handle the single plot case. Meridians and Parallels are not labeled for
     # multiple plots to save space.
     if dataset.ndim == 2 or (dataset.ndim == 3 and dataset.shape[0] == 1):
         if dataset.ndim == 2:
             dataset = dataset.reshape(1, *dataset.shape)
         nPlots = 1
         mLabels = [0, 0, 0, 1]
         pLabels = [1, 0, 0, 1]
     else:
         nPlots = dataset.shape[0]
         mLabels = [0, 0, 0, 0]
         pLabels = [0, 0, 0, 0]

     # Make sure gridShape is compatible with input data
     gridShape = calcBestGridShape(nPlots, gridShape)

     # Set up the colormap if not specified
     if cMap is None:
         cMap = plt.cm.coolwarm

     # Set up the figure
     fig = plt.figure()
     #fig.set_size_inches((8.5, 11.))
     #fig.dpi = 600

     # Make the subplot grid
     grid = ImageGrid(fig, 111,
                 nrows_ncols = gridShape,
                 axes_pad = 0.3,
                 share_all = True,
                 add_all = True,
                 ngrids = nPlots,
                 label_mode = "L",
                 cbar_mode = 'single',
                 cbar_location = 'bottom',
                 cbar_pad = '0%'
                 )

     # Determine the map boundaries and construct a Basemap object
     lonMin = lons.min()
     lonMax = lons.max()
     latMin = lats.min()
     latMax = lats.max()
     m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax,
                 llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l')

     # Convert lats and lons to projection coordinates
     if lats.ndim == 1 and lons.ndim == 1:
         lons, lats = np.meshgrid(lons, lats)

     # Calculate contour levels if not given
     if cLevs is None:
         # Cut off the tails of the distribution
         # for more representative contour levels
         cLevs = calcNiceIntervals(dataset, nLevs)
     extend = 'both'

     # Create default meridians and parallels
     if meridians is None:
         meridians = np.arange(-180, 181, 15)
     if parallels is None:
         parallels = np.arange(-90, 91, 15)

     x, y = m(lons, lats)
     for i, ax in enumerate(grid):
         # Load the data to be plotted
         data = dataset[i]
         m.ax = ax

         # Draw the borders for coastlines and countries
         m.drawcoastlines(linewidth = 1)
         m.drawcountries(linewidth = .75)

         # Draw parallels / meridians
         m.drawmeridians(meridians, labels = mLabels, linewidth = .75)
         m.drawparallels(parallels, labels = pLabels, linewidth = .75)

         # Draw filled contours
         cs = m.contourf(x, y, data, cmap = cMap, levels = cLevs, extend = extend)

         # Add title
         if subTitles is not None:
             ax.set_title(subTitles[i], fontsize = 'small')


     # Add colorbar
     cbar = fig.colorbar(cs, cax = ax.cax, drawedges = True, orientation = 'horizontal',
                         extendfrac = 'auto')
     cbar.set_label(cLabel)
     cbar.set_ticks(cLevs)
     cbar.ax.xaxis.set_ticks_position("none")
     cbar.ax.yaxis.set_ticks_position("none")

     # This is an ugly hack to make the title show up at the correct height.
     # Basically save the figure once to achieve tight layout and calculate
     # the adjusted heights of the axes, then draw the title slightly above
     # that height and save the figure again
     fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
     ymax = 0
     for ax in grid:
         bbox = ax.get_position()
         ymax = max(ymax, bbox.ymax)

     # Add figure title
     fig.suptitle(pTitle, y = ymax + .04, fontsize = 16)
     fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
     plt.show()
     fig.clf()

 def drawSubRegions(subRegions, lats, lons, fName, fType = 'png', pTitle = '',
                     parallels = None, meridians = None, subRegionMasks = None):
     '''
     Purpose::
         Function to draw subregion domain(s) on a map

     Input::
             subRegions - a list of subRegion objects
         lats - array of latitudes
         lons - array of longitudes
         fName  - a string specifying the filename of the plot
         fType  - an optional string specifying the filetype, default is .png
         pTitle - an optional string specifying plot title
         parallels - an optional list of ints or floats for the parallels to be drawn
         meridians - an optional list of ints or floats for the meridians to be drawn
         subRegionMasks - optional dictionary of boolean arrays for each subRegion
                          for giving finer control of the domain to be drawn, by default
                          the entire domain is drawn.
     '''
     # Set up the figure
     fig = plt.figure()
     fig.set_size_inches((8.5, 11.))
     fig.dpi = 300
     ax = fig.add_subplot(111)

     # Determine the map boundaries and construct a Basemap object
     lonMin = lons.min()
     lonMax = lons.max()
     latMin = lats.min()
     latMax = lats.max()
     m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax,
                 llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l', ax = ax)

     # Draw the borders for coastlines and countries
     m.drawcoastlines(linewidth = 1)
     m.drawcountries(linewidth = .75)
     m.drawstates()

     # Create default meridians and parallels
     if meridians is None:
         meridians = np.arange(-180, 181, 15)
     if parallels is None:
         parallels = np.arange(-90, 91, 15)

     # Draw parallels / meridians
     m.drawmeridians(meridians, labels = [0, 0, 0, 1], linewidth = .75)
     m.drawparallels(parallels, labels = [1, 0, 0, 1], linewidth = .75)

     # Set up the color scaling
     cMap = plt.cm.jet
     norm = mpl.colors.BoundaryNorm(np.arange(1, len(subRegions) + 3), cMap.N)

     # Process the subregions
     for i, reg in enumerate(subRegions):
         if subRegionMasks is not None and reg.name in subRegionMasks.keys():
             domain = (i + 1) * subRegionMasks[reg.name]
         else:
             domain = (i + 1) * np.ones((2, 2))

         nLats, nLons = domain.shape
         domain = ma.masked_equal(domain, 0)
         regLats = np.linspace(reg.latMin, reg.latMax, nLats)
         regLons = np.linspace(reg.lonMin, reg.lonMax, nLons)
         regLons, regLats = np.meshgrid(regLons, regLats)

         # Convert to to projection coordinates. Not really necessary
         # for cylindrical projections but keeping it here in case we need
         # support for other projections.
         x, y = m(regLons, regLats)

         # Draw the subregion domain
         m.pcolormesh(x, y, domain, cmap = cMap, norm = norm, alpha = .5)

         # Label the subregion
         xm, ym = x.mean(), y.mean()
         m.plot(xm, ym, marker = '$%s$' %(reg.name), markersize = 12, color = 'k')

     # Add the the title
     ax.set_title(pTitle)

     # Save the figure
     fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
     show()
     fig.clf()

 def metrics_plots(varName, numOBS, numMDL, nT, ngrdY, ngrdX, Times, lons, lats, allData, dataList, workdir, subRegions, timeStep, fileOutputOption):
     '''
     Calculate evaluation metrics and generate plots.
     '''
     ##################################################################################################################
     # Routine to compute evaluation metrics and generate plots
     #    (1)  metric calculation
     #    (2) plot production
     #    Input:
     #        numOBS           - the number of obs data. either 1 or >2 as obs ensemble is added for multi-obs cases
     #        numMDL           - the number of mdl data. either 1 or >2 as obs ensemble is added for multi-mdl cases
     #        nT               - the length of the data in the time dimension
     #        ngrdY            - the length of the data in Y dimension
     #        ngrdX            - the length of the data in the X dimension
     #        Times            - time stamps
     #        lons,lats        - longitude & latitude values of the data domain (same for model & obs)
     #        allData          - the sum of the observed and model data (combines obsData & mdlData in the old code)
     #        dataList         - the list of data names (combines obsList and mdlList in the old code)
     #        workdir        - string describing the directory path for storing results and plots
     #        subRegions        - list of SubRegion Objects or False
     #        fileOutputOption - option to write regridded data in a netCDF file or not
     #    Output: image files of plots + possibly data
     #******************************************************
     # JK2.0: Only the data interpolated temporally and spatially onto the analysis grid
     #        are transferred into this routine. The rest of processing (e.g., area-averaging, etc.)
     #        are to be performed in this routine. Do not overwrite obsData[numOBs,nt,ngrdY,ngrdX] &
     #        mdlData[numMDL,nt,ngrdY,ngrdX]. These are the raw, re-gridded data to be used repeatedly
     #        for multiple evaluation steps as desired by an evaluator
     # JK2.1: The observed and model data are unified in a single variable for ease of processing
     ##################################################################################################################

     print ''
     print 'Start metrics.py'
     print ''
     # JK2.1: define the variable to represent the total number of combined (obs + model) datasets
     numDatasets = numOBS + numMDL

     #####################################################################################################
     # JK2.0: Compute evaluation metrics and plots to visualize the results
     #####################################################################################################
     # (mp.001) Sub-regions for local timeseries analysis
     #--------------------------------
     # Enter the location of the subrgns via screen input of data;
     # modify this to add an option to read-in from data file(s)
     #----------------------------------------------------------------------------------------------------
     if subRegions:
         numSubRgn = len(subRegions)
         subRgnName = [ x.name   for x in subRegions ]
         subRgnLon0 = [ x.lonMin for x in subRegions ]
         subRgnLon1 = [ x.lonMax for x in subRegions ]
         subRgnLat0 = [ x.latMin for x in subRegions ]
         subRgnLat1 = [ x.latMax for x in subRegions ]
     else:
         print ''
         ans = raw_input('Calculate area-mean timeseries for subregions? y/n: [n] \n')
         print ''
         if ans == 'y':
             ans = raw_input('Input subregion info interactively? y/n: \n> ')
             if ans == 'y':
                 numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_interactively()
             else:
                 print 'Read subregion info from a pre-fabricated text file'
                 ans = raw_input('Read from a defaule file (workdir + "/sub_regions.txt")? y/n: \n> ')
                 if ans == 'y':
                     subRgnFileName = workdir + "/sub_regions.txt"
                 else:
                     subRgnFileName = raw_input('Enter the subRgnFileName to read from \n')
                 print 'subRgnFileName ', subRgnFileName
                 numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_from_a_text_file(subRgnFileName)
             print subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1
         else:
             numSubRgn = 0
     # compute the area-mean timeseries for all subregions if subregion(s) are defined.
     #   the number of subregions is usually small and memory usage is usually not a concern
     dataRgn = np.zeros((numDatasets, numSubRgn, nT))

     if subRegions:
         print 'Enter area-averaging: allData.shape ', allData.shape
         print 'Using Latitude/Longitude Mask for Area Averaging'
         for n in np.arange(numSubRgn):
             # Define mask using regular lat/lon box specified by users ('mask=True' defines the area to be excluded)
             maskLonMin = subRegions[n].lonMin
             if maskLonMin > 180.:
                 maskLonMin = maskLonMin - 360.
             maskLonMax = subRegions[n].lonMax
             if maskLonMax > 180.:
                 maskLonMax = maskLonMax - 360.
             maskLatMin = subRegions[n].latMin
             maskLatMax = subRegions[n].latMax
             mask = np.logical_or(np.logical_or(lats <= maskLatMin, lats >= maskLatMax),
                                  np.logical_or(lons <= maskLonMin, lons >= maskLonMax))

             # Calculate area-weighted averages within this region and store in a new list
             for k in np.arange(numDatasets):           #JK2.1: area-average all data
                 Store = []
                 for t in np.arange(nT):
                     Store.append(process.calc_area_mean(allData[k, t, :, :], lats, lons, mymask = mask))
                 dataRgn[k, n, :] = ma.array(Store[:])
             Store = []                               # release the memory allocated by temporary vars

     #-------------------------------------------------------------------------
     # (mp.002) fileOutputOption: The option to create a binary or netCDF file of processed
     #                      (re-gridded and regionally-averaged) data for user-specific processing.
     #                      This option is useful for advanced users who need more than
     #                      the metrics and vidualization provided in the basic package.
     #----------------------------------------------------------------------------------------------------
     print ''
     if not fileOutputOption:
         while fileOutputOption not in ['no', 'nc']:
             fileOutputOption = raw_input('Option for output files of obs/model data: Enter no/nc \
                                 for no, netCDF file \n> ').lower()
     print ''

     # write a netCDF file for post-processing if desired. JK21: binary output option has been completely eliminated
     if fileOutputOption == 'nc':
         fileName = '%s/%s_Tseries.nc' % (workdir, varName)
         tempName = fileName
         if(os.path.exists(tempName) == True):
             print "removing %s from the local filesystem, so it can be replaced..." % (tempName,)
             cmnd = 'rm -f ' + tempName
             subprocess.call(cmnd, shell=True)
         #files.writeNCfile1(fileName, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions)
         files.writeNCfile1(fileName, numDatasets, numOBS, numMDL, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions)
         print 'The regridded obs and model data are written in the netCDF file ', fileName

     #####################################################################################################
     ###################### Metrics calculation and plotting cycle starts from here ######################
     #####################################################################################################
     print ''
     print 'OBS and MDL data have been prepared for the evaluation step'
     print ''
     doMetricsOption = raw_input('Want to calculate metrics and plot them? [y/n]\n> ').lower()
     if doMetricsOption == 'y':
         # Assign the variable name and unit to be used in plots
         print 'The variable to be processed is ',timeStep,' ',varName
         pltVarName = raw_input('Enter the variable name to appear in the plot\n> ')
         pltVarUnit = raw_input('Enter the variable unit to appear in the plot\n> ')
     print ''

     ####################################################
     # Terminate job if no metrics are to be calculated #
     ####################################################

     neval = 0

     while doMetricsOption == 'y':
         neval += 1
         print ' '
         print 'neval= ', neval
         print ' '
         #--------------------------------
         # (mp.003) Preparation
         #----------------------------------------------------------------------------------------------------
         # Determine info on years (the years in the record [YR] and its number[numYR])
         yy = ma.zeros(nT, 'i')
         for n in np.arange(nT):
             yy[n] = Times[n].strftime("%Y")
         YR = np.unique(yy)
         yy = 0
         nYR = len(YR)
         print 'nYR, YR = ', nYR, YR

         # Select the eval domain: over the entire domain (spatial distrib) or regional time series
         anlDomain = 'n'
         anlRgn = 'n'
         print ' '
         analSelect = int(raw_input('Eval over domain (Enter 0) or time series of selected Sub Regions (Enter 1) \n> '))
         print ' '
         if analSelect == 0:
             anlDomain = 'y'
         elif analSelect == 1:
             anlRgn = 'y'
         else:
             print 'analSelect= ', analSelect, ' is Not a valid option: CRASH'

         #--------------------------------------------------------------------------------------------------------------------
         # (mp.004) Select the model and data to be used in the evaluation step
         # 6/7/2013: JK4 - unified handling of the ref & mdl datasets allows several diff types of evaluation (RCMES v2.1)
         #                 such as ref vs. one model or ref; ref vs. all models; ref vs. all model + non-ref refs
         #          refID: the ID of the reference data against which all data are to be evaluated
         #          mdlID: the list of data ID's to be evaluated. if mdlSelect == -99, the list also includes non-ref obs data
         #--------------------------------------------------------------------------------------------------------------------
         refID = int(misc.select_data_combined(numDatasets, Times, dataList, 'ref'))
         mdlSelect = int(misc.select_data_combined(numDatasets, Times, dataList, 'mdl'))
         mdlID=[]
         # Assign the data id to be evaluated. Note that a non-reference obs dataset is treated like a model dataset (mdlSelect == -99)
         if mdlSelect >= 0:
             mdlID.append(mdlSelect)
         elif mdlSelect == -1:
             for n in np.arange(numMDL):
                 mdlID.append(n+numOBS)
         elif mdlSelect == -2:
             for n in np.arange(refID):
                 mdlID.append(n)
             for n in range(refID + 1, numDatasets):
                 mdlID.append(n)
         elif mdlSelect == -3:
             if numOBS == 1:
                 print 'There exist only one reference data: EXIT'
                 sys.exit()
             for n in np.arange(refID):
                 mdlID.append(n)
             for n in range(refID + 1, numOBS):
                 mdlID.append(n)
         elif mdlSelect == -4:
             id4eval = 0         # any number != -9
             print 'Enter the data id to be evaluated: -9 to stop entering'
             while id4eval != -9:
                 id4eval = int(raw_input('Enter the data id for evaluation. -9 to stop entry\n> '))
                 if id4eval != -9:
                     mdlID.append(id4eval)
         refName = dataList[refID]
         mdlName = []
         numMdl = len(mdlID)
         for n in np.arange(numMdl):
             tname = dataList[mdlID[n]]
             m = min(len(tname), 8)
             for k in np.arange(m):
                 if tname[k] == ' ':
                     break
                 elif k == m-1:
                     k = m
             mdlName.append(tname[0:k])
         print 'selected reference and model data for evaluation= ', refName, mdlName

         #--------------------------------
         # (mp.005) Spatial distribution analysis/Evaluation (anlDomain='y')
         #          Obs/mdl climatology variables are 2-d/3d arrays (e.g., oClim = ma.zeros((ngrdY,ngrdX), mClim = ma.zeros((numMdl,ngrdY,ngrdX))
         #----------------------------------------------------------------------------------------------------
         if anlDomain == 'y':
             # first determine the temporal properties to be evaluated
             print ''
             timeOption = misc.select_timOpt()
             print ''
             if timeOption == 1:
                 timeScale = 'annual'
                 # compute the annual-mean time series and climatology.
                 oTser, oClim = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times)
                 mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX))
                 mClim = ma.zeros((numMdl, ngrdY, ngrdX))
                 for n in np.arange(numMdl):
                     id = mdlID[n]
                     mTser[n, :, :, :], mClim[n, :, :] = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times)
             elif timeOption == 2:
                 timeScale = 'seasonal'
                 # select the timeseries and climatology for a season specifiec by a user
                 mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX))
                 mClim = ma.zeros((numMdl, ngrdY, ngrdX))
                 print ' '
                 moBgn = int(raw_input('Enter the beginning month for your season. 1-12: \n> '))
                 moEnd = int(raw_input('Enter the ending month for your season. 1-12: \n> '))
                 print ' '
                 if moEnd >= moBgn:
                     nMoPerSeason = moEnd - moBgn + 1
                     oTser, oClim = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times)
                     for n in np.arange(numMdl):
                         id = mdlID[n]
                         mTser[n, :, :, :], mClim[n, :, :] = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times)
                 elif moEnd == moBgn:
                     # Eval for a single month. mTser, oTser are the annual time series
                     # for the specified month (moEnd), and  mClim, oClim are the corresponding climatology
                     oTser, oClim = calc_clim_One_month(moEnd, nYR, nT, allData[refID, :, :, :], Times)
                     for n in np.arange(numMdl):
                         id = mdlID[n]
                         mTser[n, :, :, :], mClim[n, :, :] = calc_clim_One_month(moEnd, nYR, nT, allData[id, :, :, :], Times)
                 elif moEnd < moBgn:        # have to lose the ending year. redefine nYR=nYR-1, and drop the YR[nYR]
                     nMoS1 = 12 - moBgn + 1
                     nMoS2 = moEnd
                     nMoPerSeason = nMoS1 + nMoS2
                     mTser = ma.zeros((numMdl, nYR - 1, ngrdY, ngrdX))
                     # calculate the seasonal timeseries and climatology for the model data
                     for n in np.arange(numMdl):
                         id = mdlID[n]
                         mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[id, :, :, :], Times)
                         mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times)
                         for i in np.arange(nYR - 1):
                             mTser[n, i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason
                     mClim = ma.average(mTser, axis=1)
                     # repeat for the obs data
                     mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[refID, :, :, :], Times)
                     mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times)
                     oTser = ma.zeros((nYR - 1, ngrdY, ngrdX))
                     for i in np.arange(nYR - 1):
                         oTser[i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason
                     oClim = ma.zeros((ngrdY, ngrdX))
                     oClim = ma.average(oTser, axis=0)
                     nYR = nYR - 1
                     yy = ma.empty(nYR)
                     for i in np.arange(nYR):
                         yy[i] = YR[i]
                     mTser1 = 0.
                     mTser2 = 0.
                     mClim1 = 0.
                     mClim2 = 0.
             elif timeOption == 3:
                 timeScale = 'monthly'
                 # compute the monthly-mean time series and climatology
                 # Note that the shapes of the output vars are:
                 #   mTser = ma.zeros((nYR,12,ngrdY,ngrdX)) & mClim = ma.zeros((12,ngrdY,ngrdX))
                 # Also same for oTser = ma.zeros((nYR,12,ngrdY,ngrdX)) &,oClim = ma.zeros((12,ngrdY,ngrdX))
                 oTser, oClim = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times)
                 mTser = ma.zeros((numMdl, nYR, 12, ngrdY, ngrdX))
                 mClim = ma.zeros((numMdl, 12, ngrdY, ngrdX))
                 for n in np.arange(numMdl):
                     id = mdlID[n]
                     mTser[n, :, :, :, :], mClim[n, :, :, :] = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times)
             else:
                 # undefined process options. exit
                 print 'The desired temporal scale is not available this time. END the job'
                 sys.exit()

             # compute the interannual variability
             if timeScale == 'monthly':
                 oTsig = ma.zeros((12, ngrdY, ngrdX))
                 mTsig = ma.zeros((numMdl, 12, ngrdY, ngrdX))
             else:
                 oTsig = ma.zeros((ngrdY, ngrdX))
                 mTsig = ma.zeros((numMdl, ngrdY, ngrdX))
             oTsig = oTser.std(axis = 0)
             mTsig = mTser.std(axis = 1)

             #--------------------------------
             # (mp.006) Select metric to be calculated
             # bias, mae, acct, accs, pcct, pccs, rmst, rmss, pdfSkillScore, taylor diagram
             #----------------------------------------------------------------------------------------------------
             print ' '
             metricOption = misc.select_metrics(mdlSelect)
             print ' '

             # metrics calculation: the shape of metricDat varies according to the metric type & timescale opetions

             if metricOption == 'BIAS':
                 iselect = int (raw_input('Enter to evaluate: 0 for the mean climatology; 1 for the interannual variability. \n'))
             else:
                 iselect = 0

             # metrics below yields a 2-d (annual or seasonal) or 3-d (monthly) array for each model
             if metricOption == 'BIAS':
                 if timeScale == 'monthly':
                     oStdv = np.zeros(12)
                     metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             if iselect == 0:
                                 metricDat[n, m, :, :]  = calc_bias(mTser[n, :, m, :, :], oTser[m, :, :])
                             elif iselect == 1:
                                 metricDat[n, m, :, :]  = mTsig[n, m, :, :] - oTsig[m, :, :]
                             if n == 0:
                                 oStdv[m] = calc_temporal_stdv(oTser[m, :, :])
                 else:
                     oStdv = calc_temporal_stdv(oTser)
                     metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         if iselect == 0:
                             metricDat[n, :, :]  = calc_bias(mTser[n, :, :, :], oTser)
                         elif iselect == 1:
                             for n in np.arange(numMdl):
                                 metricDat[n, :, :]  = mTsig[n, :, :] - oTsig[:, :]

             elif metricOption == 'MAE':
                 if timeScale == 'monthly':
                     metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             metricDat[n, m, :, :]  = calc_mae(mTser[n, :, m, :, :], oTser[m, :, :])
                 else:
                     metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         metricDat[n, :, :]  = calc_mae(mTser[n, :, :, :], oTser)

             elif metricOption == 'ACt':
                 if timeScale == 'monthly':
                     metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             metricDat[n, m, :, :]  = calc_temporal_anom_cor(mTser[n, :, m, :, :], oTser[m, :, :])
                 else:
                     metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         metricDat[n, :, :]  = calc_temporal_anom_cor(mTser[n, :, :, :], oTser)

             elif metricOption == 'PCt':
                 if timeScale == 'monthly':
                     metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             metricDat[n, m, :, :]  = calc_temporal_pat_cor(mTser[n, :, m, :, :], oTser[m, :, :])
                 else:
                     metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         metricDat[n, :, :]  = calc_temporal_pat_cor(mTser[n, :, :, :], oTser)

             elif metricOption == 'RMSt':
                 if timeScale == 'monthly':
                     metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             metricDat[n, m, :, :]  = calc_rms(mTser[n, :, m, :, :], oTser[m, :, :])
                 else:
                     metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
                     for n in np.arange(numMdl):
                         metricDat[n, :, :]  = calc_rms(mTser[n, :, :, :], oTser)

             # metrics below yields a scalar value for each model
             elif metricOption == 'ACs':
                 if timeScale == 'monthly':
                     metricDat = ma.zeros((numMdl, 12))
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             metricDat[n, m]  = calc_spatial_anom_cor(mClim[n, m, :, :], oClim[m, :, :])
                 else:
                     metricDat = ma.zeros(numMdl)
                     for n in np.arange(numMdl):
                         metricDat[n]  = calc_spatial_anom_cor(mClim[n, :, :], oClim)
                     print 'spatial anomaly correlation vs the reference data:', refName
                     for n in np.arange(numMdl):
                         print dataList[n+1][0:8],metricDat[n]

             elif metricOption == 'PCs':
                 if timeScale == 'monthly':
                     metricDat = ma.zeros((numMdl, 12))
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             metricDat[n, m]  = calc_spatial_pat_cor(mClim[n, m, :, :], oClim[m, :, :])
                 else:
                     metricDat = ma.zeros(numMdl)
                     for n in np.arange(numMdl):
                         metricDat[n]  = calc_spatial_pat_cor(mClim[n, :, :], oClim)
                     print 'spatial pattern correlation vs the reference data:', refName
                     for n in np.arange(numMdl):
                         print dataList[n+1][0:8],metricDat[n]

             elif metricOption == 'RMSs':
                 if timeScale == 'monthly':
                     metricDat = ma.zeros((numMdl, 12))
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             metricDat[n, m]  = calc_rms_dom(mClim[n, m, :, :], oClim[m, :, :])
                 else:
                     metricDat = ma.zeros(numMdl)
                     for n in np.arange(numMdl):
                         metricDat[n]  = calc_rms_dom(mClim[n, :, :], oClim)
                     print 'RMSE over the entire domain'
                     for n in np.arange(numMdl):
                         print dataList[n+1][0:8],metricDat[n]

             # metrics to plot taylor diagram
             elif metricOption == 'Taylor_space':
                 if timeScale == 'monthly':
                     oStdv = ma.zeros(12)
                     mStdv = ma.zeros((numMdl, 12))
                     mCorr = ma.zeros((numMdl, 12))
                     mTemp = ma.zeros((ngrdY, ngrdX))
                     for m in np.arange(12):
                         oStdv[m] = oClim[m, :, :].std()
                     for n in np.arange(numMdl):
                         for m in np.arange(12):
                             mTemp = mClim[n, m, :, :]
                             mStdv[n, m] = mTemp.std()
                             mCorr[n, m] = calc_spatial_pat_cor(mTemp, oClim)
                 else:
                     oStdv = oClim.std()
                     mStdv = ma.zeros(numMdl)
                     mCorr = ma.zeros(numMdl)
                     for n in np.arange(numMdl):
                         mStdv[n] = mClim[n, :, :].std()
                         mCorr[n] = calc_spatial_pat_cor(mClim[n, :, :], oClim)
                 mStdv = mStdv / oStdv                          # standardized deviation

             #--------------------------------
             # (mp.008 & 009) Coeff of Variation or Signal-to-noise ratio: only application to the case of
             #                                     multiple datasets of the same kind (e.g., obs or model)
             #----------------------------------------------------------------------------------------------------
             elif metricOption == 'CV' or metricOption == 'S2N':
                 if numMDL == 1:
                     print 'CV/S2N not available when only one dataset exists'
                     pass
                 else:
                     if timeScale == 'monthly':
                         metricDat = ma.zeros((12, ngrdY, ngrdX))
                         Stdv = ma.zeros((12, ngrdY, ngrdX))
                         Stdv = mClim.std(axis = 0, ddof = 1); print Stdv.shape
                     else:
                         metricDat = ma.zeros((ngrdY, ngrdX))
                         Stdv = ma.zeros((ngrdY, ngrdX))
                         Stdv = mClim.std(axis = 0, ddof = 1); print Stdv.shape
                         metricDat = oClim / Stdv           # Signal-2-noise ratio
                     if metricOption == 'CV':
                         metricDat = Stdv / oClim

             #--------------------------------
             # (mp.007) Plot the metrics. First, enter plot info
             #----------------------------------------------------------------------------------------------------
             # Taylor diagram
             if metricOption == 'Taylor_space':
                 taylorDat = []
                 for n in np.arange(numMdl):
                     ttmp = []
                     ttmp.append(mStdv[n])
                     ttmp.append(mCorr[n])
                     entryLegend = mdlName[n]
                     ttmp.append(entryLegend)
                     taylorDat.append(ttmp)
                 pltTit = 'Spatial variability evaluation'
                 pltFileName = workdir + '/taylor_space_' + timeScale + '-' + varName
                 legendPos = 'upper right'
                 if max(mStdv) > 2.5:
                     radMax = 5.
                 elif max(mStdv) > 2.:
                     radMax = 2.5
                 else:
                     radMax = 2
                 status = taylorDiagram(taylorDat, pltTit, pltFileName, refName, legendPos, radMax)

             # multiple 2-d contour plots
             elif metricDat.ndim == 3:
                 print 'Plot',metricOption,'in',timeScale,varName
                 # Plot metrics (2-d contour plots)
                 if iselect == 0:
                     plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption  + '_tAVG'
                 elif iselect == 1:
                     plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption  + '_INTRANN VAR'
                 if(os.path.exists(plotFileName)==True):
                     cmnd='rm -f ' + plotFileName
                     subprocess.call(cmnd,shell=True)
                 pltTit = []
                 if metricDat.shape[0] > 1:
                     ncols = 3
                     for n in np.arange(numMdl):
                         pltTit.append(mdlName[n])
                 elif metricDat.shape[0] == 1:
                     ncols = 1
                     pltTit.append(mdlName[0])
                 if metricDat.shape[0] // ncols == 0:
                     nrows = max( int(metricDat.shape[0] / ncols), 1)
                 else:
                     nrows = int(metricDat.shape[0]/ncols) + 1
                 status = drawContourMap(metricDat, lats, lons, plotFileName, fType = 'png', gridShape = (nrows, ncols),
                                         cLabel = '', pTitle = '', subTitles = pltTit, cMap = None,
                                         cLevs = None, nLevs = 10, parallels = None, meridians = None, extend = 'neither')
                 if metricOption == 'BIAS':
                     # obs climatology
                     plotDat = np.zeros((ngrdY, ngrdX))
                     print''
                     makePlot = raw_input('Plot observed climatology? [y/n]\n> ').lower()
                     if makePlot == 'y':
                         pltTit = []
                         if iselect == 0:
                             plotDat = oClim
                             plotFileName = workdir + '/' + timeScale + '_' + varName + '_OBS_tAVG'
                             pltTit.append('OBS (' + pltVarName + ': ' + pltVarUnit +'): MEAN CLIM')
                         elif iselect == 1:
                             plotDat = oTsig
                             plotFileName = workdir + '/' + timeScale + '_' + varName + '_OBS_tSIG'
                             pltTit.append('OBS (' + pltVarName + ': ' + pltVarUnit + '): INTRANN VAR')
                         if(os.path.exists(plotFileName)==True):
                             cmnd='rm -f ' + plotFileName
                             subprocess.call(cmnd,shell=True)
                         ncols = 1
                         nrows = 1
                         status = drawContourMap(plotDat, lats, lons, plotFileName, fType = 'png', gridShape = (ncols, nrows),
                                                 cLabel = '', pTitle = '', subTitles = pltTit, cMap = None,
                                                 cLevs = None, nLevs = 10, parallels = None, meridians = None, extend = 'neither')
                     # plot bias normalized by the mean climatology
                     if iselect == 0:
                         makePlot = raw_input('Plot bias in terms of % of OBS mean? [y/n]\n> ').lower()
                     else:
                         makePlot = 'n'
                     if makePlot != 'y':
                         pass
                     else:
                         plotDat = ma.empty((numMdl, ngrdY, ngrdX))
                         print plotDat.shape, metricDat.shape
                         for n in np.arange(numMdl):
                             plotDat[n,:,:] = 100.* metricDat[n,:,:] / oClim[:,:]
                         plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption + '_MEAN'
                         if(os.path.exists(plotFileName)==True):
                             cmnd='rm -f ' + plotFileName
                             subprocess.call(cmnd,shell=True)
                         pltTit = []
                         if plotDat.shape[0] > 1:
                             ncols = 3
                             for n in np.arange(numMdl):
                                 pltTit.append(mdlName[n])
                         elif plotDat.shape[0] == 1:
                             ncols = 1
                             pltTit.append(mdlName[0])
                         if plotDat.shape[0]/ncols == 0:
                             nrows = max(int(plotDat.shape[0] / ncols), 0)
                         else:
                             nrows = int(metricDat.shape[0]/ncols) + 1
                         status = drawContourMap(plotDat, lats, lons, plotFileName, fType = 'png', gridShape = (ncols, nrows),
                                                 cLabel = '', pTitle = '', subTitles = pltTit, cMap = None,
                                                 cLevs = None, nLevs = 10, parallels = None, meridians = None, extend = 'neither')
                         plotDat = 0.
                     # plot bias normalized by the interannual variability
                     if iselect == 0:
                         makePlot = raw_input('Plot bias in terms of % of interann sigma? [y/n]\n> ').lower()  # inactive for this version
                     else:
                         makePlot = 'n'
                     if makePlot == 'y':
                         print 'plot normalized by sigma'
                         plotDat = ma.empty((numMdl, ngrdY, ngrdX))
                         print plotDat.shape, metricDat.shape
                         for n in np.arange(numMdl):
                             plotDat[n,:,:] = 100.* metricDat[n,:,:] / oStdv[:,:]
                         plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption + '_SIG'
                         if(os.path.exists(plotFileName)==True):
                             cmnd='rm -f ' + plotFileName
                             subprocess.call(cmnd,shell=True)
                         pltTit = []
                         if metricDat.shape[0] > 1:
                             ncols = 3
                             for n in np.arange(numMdl):
                                 pltTit.append(mdlName[n])
                         elif metricDat.shape[0] == 1:
                             ncols = 1
                             pltTit.append(mdlName[0])
                         if metricDat.shape[0]/ncols == 0:
                             nrows = max(int(metricDat.shape[0] / ncols), 0)
                         else:
                             nrows = int(metricDat.shape[0]/ncols) + 1
                         status = drawContourMap(plotDat, lats, lons, plotFileName, fType = 'png', gridShape = (ncols, nrows),
                                                 cLabel = '', pTitle = '', subTitles = pltTit, cMap = None,
                                                 cLevs = None, nLevs = 10, parallels = None, meridians = None, extend = 'neither')
                         plotDat = 0.

             # a single 2-d array
             elif metricDat.ndim == 2:
                 # obs climatology
                 print''
                 plotFileName = workdir + '/' + metricOption + '_' + varName + '_OBS'
                 if(os.path.exists(plotFileName)==True):
                     cmnd='rm -f ' + plotFileName
                     subprocess.call(cmnd,shell=True)
                 pltTit = []
                 pltTit.append(metricOption)
                 ncols = 1
                 nrows = 1
                 cLevs = [1., 5., 10., 25., 50.]
                 status = drawContourMap(metricDat, lats, lons, plotFileName, fType = 'png', gridShape = (ncols, nrows),
                                         cLabel = '', pTitle = '', subTitles = pltTit, cMap = None,
                                         cLevs = cLevs, nLevs = 10, parallels = None, meridians = None, extend = 'neither')

             # Repeat for another metric. Also release memory used by temporary variables
             metricDat = 0.
             oTser = 0.
             oClim = 0.
             mTser = 0.
             mClim = 0.
             print ''
             print 'Evaluation completed'
             print ''
             doMetricsOption = raw_input('Do you want to perform another evaluation? [y/n]\n> ').lower()
             print ''

         # metrics and plots for regional time series
         elif anlRgn == 'y' and subRegions == False:
             print 'No SubRegions have been defined.  Regional Time Series Plots cannot be created'

         # metrics and plots for regional time series
         elif anlRgn == 'y':
             oData = ma.zeros((numSubRgn, nT))
             mData = ma.zeros((numMdl, numSubRgn, nT))
             oData[:, :] = dataRgn[refID, :, :]
             for n in np.arange(numMdl):
                 mData[n, :, :] = dataRgn[mdlID[n], :, :]
             # select the region(s) for evaluation. model and obs have been selected before entering this if block
             print 'There are %s subregions. Select the subregion(s) for evaluation' % numSubRgn
             rgnSelect = misc.selectSubRegion(subRegions)
             print 'selected region for evaluation= ', rgnSelect

             # compute the annual cycle (in terms of monthly means) regardless of its use
             if rgnSelect >= 0:
                 obsAnnCyc = calc_annual_cycle_means(oData[rgnSelect, :], Times)
                 mdlAnnCyc = ma.empty((numMdl, 12))
                 for n in np.arange(numMdl):
                     mdlAnnCyc[n, :] = calc_annual_cycle_means(mData[n, rgnSelect, :], Times)
                 print 'Annual cycles for  a single subregion are calculated'
             elif rgnSelect == -9:
                 obsAnnCyc = ma.empty((numSubRgn, 12))
                 mdlAnnCyc = ma.empty((numMdl, numSubRgn, 12))
                 for r in np.arange(numSubRgn):
                     obsAnnCyc[r, :] = calc_annual_cycle_means(oData[r, :], Times)
                     for n in np.arange(numMdl):
                         mdlAnnCyc[n, r, :] = calc_annual_cycle_means(mData[n, r, :], Times)
                 print 'Annual cycles for all subregions are calculated'

             # (rgn01) evaluate the annual cycle
             ans = raw_input('Do you want evaluate the annual cycle? [y/n]\n> ').lower()
             if ans == 'y':                     # analysis for a single region
                 if rgnSelect >= 0:
                     # plot obs and model within a single frame
                     pltYvar = ma.empty((numMdl+1, 12))
                     pltYvar[0, :] = obsAnnCyc[:]
                     for n in np.arange(numMdl):
                         pltYvar[n+1, :] = mdlAnnCyc[n, :]
                     # assign lineLabel: add data name, rmse and corr between the model and ref data for the model data
                     lineLabel = []
                     lineLabel.append(refName)
                     for n in np.arange(numMdl):
                         rms = calc_rms(obsAnnCyc, mdlAnnCyc[n, :])
                         cor = calc_pat_cor(obsAnnCyc, mdlAnnCyc[n, :])
                         lineLabel.append(mdlName[n])
                     lineTyp = []
                     for n in np.arange(numMdl + 1):
                         if (n == 0) or (n == numMdl):
                             lineTyp.append('-')
                         else:
                             lineTyp.append(':')
                     xLabel = 'Month'
                     yLabel = pltVarUnit
                     pltTit = 'Annual cycle of ' + pltVarName +' at Subregion ' + subRgnName[rgnSelect]
                     plotFileName = workdir + '/annCyc_' + varName + '_' + subRgnName[rgnSelect]
                     if(os.path.exists(plotFileName)==True):
                         cmnd='rm -f ' + plotFileName
                         subprocess.call(cmnd, shell=True)
                     pltXvar = frange(1, 12, npts=12)
                     xmin = 1
                     xmax = 12
                     deltaX = 1
                     mn1 = 1.e24
                     mx1 = -mn1
                     for n in np.arange(numMdl):
                         mn1 = min(min(mdlAnnCyc[n]), mn1)
                         mx1 = max(max(mdlAnnCyc[n]), mx1)
                     print 'Minimum y value= ', min(min(obsAnnCyc), mn1)
                     print 'Maximum y value= ', max(max(obsAnnCyc), mx1)
                     ymin = float(raw_input('Enter the minimum y to be plotted\n> '))
                     ymax = float(raw_input('Enter the maximum y to be plotted\n> '))
                     if(ymax>ymin):
                         deltaY = float(raw_input('Enter the delta_y (tick mark intervals)\n> '))
                     legendPos = 'best'
                     xScale = 'linear'
                     yScale = 'linear'
                     status = pltXY(pltXvar, pltYvar, lineLabel, lineTyp, pltTit, xLabel, yLabel, plotFileName, \
                                    xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos, xScale, yScale)
                     pltXvar = 0
                     pltYvar = 0
                 else:                           # analysis for all subregions - draw a multi-frame plot
                     # compute rmse and corr between the ref and model annual cycle
                     rms = np.zeros((numMdl, numSubRgn))
                     crr = np.zeros((numMdl, numSubRgn))
                     for n in np.arange(numMdl):
                         for r in np.arange(numSubRgn):
                             rms[n, r] = calc_rms(obsAnnCyc[r, :], mdlAnnCyc[n, r, :])
                             crr[n, r] = calc_pat_cor(obsAnnCyc[r, :], mdlAnnCyc[n, r, :])

                     # Make a portrait diagram here
                     rgnName = []
                     for i in np.arange(numSubRgn):
                         j = i+1
                         if j < 10:
                             rgnName.append('R0' + str(j))
                         else:
                             rgnName.append('R' + str(j))
                     # plot correlation coefficients
                     plotFileName = workdir + '/pd_annCyc_corr_' + varName + '.subRgns'
                     pTitle = 'Annual Cycle CORR COEF'
                     data = crr
                     cLevs = [0., .1, .2, .3, .4, .5, .6, .7, .8, .9, 1.]
                     status = drawPortraitDiagramSingle(data, rowLabels = rgnName, colLabels = mdlName, cLevs = cLevs,
                                                        fName = plotFileName, fType = 'png', xLabel = 'Region', yLabel = 'Model',
                                                        cLabel = '', pTitle = pTitle, cMap = None)
                     # plot correlation coefficients
                     plotFileName = workdir + '/pd_annCyc_rmse_' + varName + '.subRgns'
                     if varName == 'pr' or varName == 'prec':
                         pTitle = 'Ann Cyc RMSE: % Mean'
                         for n in np.arange(numMdl):
                             for r in np.arange(numSubRgn):
                                 rgnSum = 0.
                                 for i in np.arange(12):
                                     rgnSum = rgnSum + obsAnnCyc[r, i]
                                 data[n, r] = rms[n, r] / (rgnSum / 12.)
                     cLevs = [0., 20., 50., 100., 150., 200., 250., 300., 400.]
                     data = data * 100.
                     status = drawPortraitDiagramSingle(data, rowLabels = rgnName, colLabels = mdlName, cLevs = cLevs,
                                                        fName = plotFileName, fType = 'png', xLabel = 'Region', yLabel = 'Model',
                                                        cLabel = '', pTitle = pTitle, cMap = None)
                     data = 0.
                     # Make x-y plots for all regions (replace with multi-plot page(s))
                     autoYscale = raw_input('Automatic scaling for the Y axis? y/n \n> ')
                     if autoYscale == 'y':
                         ymin = 0.
                         ymax = 0.
                         deltaY = 0.
                     pltXvar = frange(1,12,npts=12)
                     pltYvar = ma.empty((numMdl+1, 12))
                     xLabel = 'Month'
                     xmin = 1
                     xmax = 12
                     deltaX = 1
                     yLabel = pltVarUnit
                     xScale = 'linear'
                     yScale = 'linear'
                     legendPos = 'best'
                     lineLabel = []
                     lineLabel.append(refName)
                     for n in np.arange(numMdl):
                         lineLabel.append(mdlName[n])
                     lineTyp = []
                     for n in np.arange(numMdl + 1):
                         if (n==0) or (n==numMdl):
                             lineTyp.append('-')
                         else:
                             lineTyp.append(':')
                     for r in np.arange(numSubRgn):
                         plotFileName = workdir + '/annCyc_' + varName + '_' + subRgnName[r]
                         print 'Annual cycle plot for r= ', r, ' Rgn Name= ', subRgnName[r]
                         pltTit = ''
                         if autoYscale != 'y':
                             mn1 = mdlAnnCyc[:, r, :].min()
                             mx1 = mdlAnnCyc[:, r, :].max()
                             print 'Minimum y value= ', min(obsAnnCyc[r, :].min(), mn1)
                             print 'Maximum y value= ', max(obsAnnCyc[r, :].max(), mx1)
                             ymin = float(raw_input('Enter the minimum y to be plotted\n> '))
                             ymax = float(raw_input('Enter the maximum y to be plotted\n> '))
                             if(ymax>ymin):
                                 deltaY = float(raw_input('Enter the delta_y (tick mark intervals)\n> '))
                             else:
                                 deltaY = 0.
                         # assign plot values
                         pltYvar[0, :] = obsAnnCyc[r, :]
                         for n in np.arange(numMdl):
                             pltYvar[n+1, :] = mdlAnnCyc[n, r, :]
                         status = pltXY(pltXvar, pltYvar, lineLabel, lineTyp, pltTit, xLabel, yLabel, plotFileName, \
                                        xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos, xScale, yScale)
                     pltXvar = 0
                     pltYvar = 0

             #--------------------------------
             # (mp.008) Other metrics options to be added here
             #----------------------------------------------------------------------------------------------------

             # Repeat for another metric
             doMetricsOption = raw_input('Do you want to perform another evaluation? [y/n]\n> ').lower()

     # Processing complete if a user enters 'n' for 'doMetricsOption'
     print 'RCMES processing completed.'