| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| ''' |
| Module storing functions to calculate statistical metrics from numpy arrays |
| ''' |
| |
| import datetime |
| import subprocess |
| import sys |
| import os |
| import numpy as np |
| import numpy.ma as ma |
| from math import floor, log |
| from toolkit import process |
| from utils import misc |
| from storage import files |
| from pylab import * |
| import scipy.stats.mstats as mstats |
| import matplotlib as mpl |
| import matplotlib.dates |
| import matplotlib.pyplot as plt |
| from mpl_toolkits.axes_grid1 import ImageGrid |
| from matplotlib.font_manager import FontProperties |
| from mpl_toolkits.basemap import Basemap |
| from utils.Taylor import TaylorDiagram |
| # 6/10/2012 JK: any Ngl dependence will be removed in later versions |
| #import Ngl |
| |
| def calc_ann_mean(t2, time): |
| ''' |
| Calculate annual cycle in terms of monthly means at every grid point. |
| ''' |
| # Calculate annual cycle in terms of monthly means at every grid point including single point case (ndim=1) |
| # note: this routine is identical to 'calc_annual_cycle_means': must be converted to calculate the annual mean |
| # Extract months from time variable |
| months = np.empty(len(time)) |
| for t in np.arange(len(time)): |
| months[t] = time[t].month |
| if t2.ndim == 3: |
| means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means |
| # Calculate means month by month |
| for i in np.arange(12)+1: |
| means[i - 1, :, :] = t2[months == i, :, :].mean(0) |
| if t2.ndim == 1: |
| means = np.empty((12)) # empty array to store means |
| # Calculate means month by month |
| for i in np.arange(12)+1: |
| means[i - 1] = t2[months == i].mean(0) |
| return means |
| |
| |
| def calc_clim_month(t2, time): |
| ''' |
| Calculate monthly means at every grid point. |
| ''' |
| # Calculate monthly means at every grid point including single point case (ndim=1) |
| # Extract months from time variable |
| months = np.empty(len(time)) |
| for t in np.arange(len(time)): |
| months[t] = time[t].month |
| if t2.ndim == 3: |
| means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means |
| # Calculate means month by month |
| for i in np.arange(12) + 1: |
| means[i - 1, :, :] = t2[months == i, :, :].mean(0) |
| if t2.ndim == 1: |
| means = np.empty((12)) # empty array to store means |
| # Calculate means month by month |
| for i in np.arange(12) + 1: |
| means[i - 1] = t2[months == i].mean(0) |
| return means |
| |
| |
| def calc_clim_year(nYR, nT, ngrdY, ngrdX, t2, time): |
| ''' |
| Calculate annual mean timeseries and climatology for both 2-D and point time series. |
| ''' |
| # Extract months from time variable |
| yy = np.empty(nT) |
| mm = np.empty(nT) |
| for t in np.arange(nT): |
| yy[t] = time[t].year |
| mm[t] = time[t].month |
| if t2.ndim == 3: |
| tSeries = ma.zeros((nYR, ngrdY, ngrdX)) |
| i = 0 |
| for myunit in np.unique(yy): |
| wh = (yy == myunit) |
| data = t2[wh, :, :] |
| tSeries[i, :, :] = ma.average(data, axis = 0) |
| #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy |
| i += 1 |
| means = ma.zeros((ngrdY, ngrdX)) |
| means = ma.average(tSeries, axis = 0) |
| elif t2.ndim == 1: |
| tSeries = ma.zeros((nYR)) |
| i = 0 |
| for myunit in np.unique(yy): |
| wh = (yy == myunit) |
| data = t2[wh] |
| tSeries[i] = ma.average(data, axis = 0) |
| #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy |
| i += 1 |
| means = ma.zeros((ngrdY, ngrdX)) |
| means = ma.average(tSeries, axis = 0) |
| return tSeries, means |
| |
| |
| def calc_clim_season(nYR, nT, mB, mE, ngrdY, ngrdX, t2, time): |
| ''' |
| Calculate seasonal mean timeseries and climatology for both 2-D and point time series. |
| ''' |
| #------------------------------------------------------------------------------------- |
| # Calculate seasonal mean timeseries and climatology for both 2-d and point time series |
| # The season to be calculated is defined by moB and moE; moE>=moB always |
| #------------------------------------------------------------------------------------- |
| # Extract months from time variable |
| yy = np.empty(nT) |
| mm = np.empty(nT) |
| for t in np.arange(nT): |
| yy[t] = time[t].year |
| mm[t] = time[t].month |
| if t2.ndim == 3: |
| tSeries = ma.zeros((nYR, ngrdY, ngrdX)) |
| i = 0 |
| for myunit in np.unique(yy): |
| wh = (yy == myunit) & (mm >= mB) & (mm <= mE) |
| data = t2[wh, :, :] |
| tSeries[i, :, :] = ma.average(data, axis = 0) |
| #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy |
| i += 1 |
| means = ma.zeros((ngrdY, ngrdX)) |
| means = ma.average(tSeries, axis = 0) |
| elif t2.ndim == 1: |
| tSeries = ma.zeros((nYR)) |
| i = 0 |
| for myunit in np.unique(yy): |
| wh = (yy == myunit) & (mm >= mB) & (mm <= mE) |
| data = t2[wh] |
| tSeries[i] = ma.average(data, axis = 0) |
| #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy |
| i += 1 |
| means = ma.zeros((ngrdY, ngrdX)) |
| means = ma.average(tSeries, axis = 0) |
| return tSeries, means |
| |
| |
| def calc_clim_mo(nYR, nT, ngrdY, ngrdX, t2, time): |
| ''' |
| Calculate monthly means at every grid points and the annual time series of single model including single point case (ndim=1). |
| JK20: This is modified from 'calc_clim_month' with additional arguments & output, the annual time series of single model output (mData) |
| 6/8/2013: bug fix: mm = months[t] --> mm = months[t] - 1, otherwise array overflow occurs |
| ''' |
| # Extract months and monthly time series from the time and raw variable, respectively |
| months = np.empty(nT) |
| for t in np.arange(nT): |
| months[t] = time[t].month |
| if t == 0: |
| yy0 = time[t].year |
| # for a 2-D time series data |
| if t2.ndim == 3: |
| mData = ma.empty((nYR, 12, ngrdY, ngrdX)) |
| for t in np.arange(nT): |
| yy = time[t].year |
| mm = months[t] - 1 |
| yr = yy - yy0 |
| mData[yr, mm, :, :] = t2[t, :, :] |
| # Calculate means month by month. means is an empty array to store means |
| means = ma.empty((12, ngrdY, ngrdX)) |
| for i in np.arange(12) + 1: |
| means[i - 1, :, :] = t2[months == i, :, :].mean(0) |
| # for a point time series data |
| if t2.ndim == 1: |
| mData = ma.empty((nYR, 12)) |
| for t in np.arange(nT): |
| yy = time[t].year |
| mm = months[t] |
| yr = yy - yy0 |
| mData[yr, mm] = t2[t] |
| means = np.empty((12)) |
| # Calculate means month by month. means is an empty array to store means |
| for i in np.arange(12) + 1: |
| means[i - 1] = t2[months == i].mean(0) |
| return mData, means |
| |
| |
| def calc_clim_One_month(moID, nYR, nT, t2, time): |
| ''' |
| Calculate the montly mean at every grid point for a specified month. |
| ''' |
| #------------------------------------------------------------------------------------- |
| # Calculate monthly means at every grid point for a specified month |
| #------------------------------------------------------------------------------------- |
| # Extract months and the corresponding time series from time variable |
| months = np.empty(nT) |
| for t in np.arange(nT): |
| months[t] = time[t].month |
| if t2.ndim == 3: |
| mData = ma.empty((nYR, t2.shape[1], t2.shape[2])) # empty array to store time series |
| n = 0 |
| if months[t] == moID: |
| mData[n, :, :] = t2[t, :, :] |
| n += 1 |
| means = ma.empty((t2.shape[1], t2.shape[2])) # empty array to store means |
| # Calculate means for the month specified by moID |
| means[:, :] = t2[months == moID, :, :].mean(0) |
| return mData, means |
| |
| |
| def calc_annual_cycle_means(data, time): |
| ''' |
| Calculate monthly means for every grid point |
| |
| Inputs:: |
| data - masked 3d array of the model data (time, lon, lat) |
| time - an array of python datetime objects |
| ''' |
| # Extract months from time variable |
| months = np.empty(len(time)) |
| |
| for t in np.arange(len(time)): |
| months[t] = time[t].month |
| |
| #if there is data varying in t and space |
| if data.ndim == 3: |
| # empty array to store means |
| means = ma.empty((12, data.shape[1], data.shape[2])) |
| |
| # Calculate means month by month |
| for i in np.arange(12) + 1: |
| means[i - 1, :, :] = data[months == i, :, :].mean(0) |
| |
| #if the data is a timeseries over area-averaged values |
| if data.ndim == 1: |
| # TODO - Investigate using ma per KDW |
| means = np.empty((12)) # empty array to store means??WHY NOT ma? |
| |
| # Calculate means month by month |
| for i in np.arange(12) + 1: |
| means[i - 1] = data[months == i].mean(0) |
| |
| return means |
| |
| |
| def calc_annual_cycle_std(data, time): |
| ''' |
| Calculate monthly standard deviations for every grid point |
| ''' |
| # Extract months from time variable |
| months = np.empty(len(time)) |
| |
| for t in np.arange(len(time)): |
| months[t] = time[t].month |
| |
| # empty array to store means |
| stds = np.empty((12, data.shape[1], data.shape[2])) |
| |
| # Calculate means month by month |
| for i in np.arange(12) + 1: |
| stds[i - 1, :, :] = data[months == i, :, :].std(axis = 0, ddof = 1) |
| |
| return stds |
| |
| |
| def calc_annual_cycle_domain_means(data, time): |
| ''' |
| Calculate domain means for each month of the year |
| ''' |
| # Extract months from time variable |
| months = np.empty(len(time)) |
| |
| for t in np.arange(len(time)): |
| months[t] = time[t].month |
| |
| means = np.empty(12) # empty array to store means |
| |
| # Calculate means month by month |
| for i in np.arange(12) + 1: |
| means[i - 1] = data[months == i, :, :].mean() |
| |
| return means |
| |
| |
| def calc_annual_cycle_domain_std(data, time): |
| ''' |
| Calculate domain standard deviations for each month of the year |
| ''' |
| # Extract months from time variable |
| months = np.empty(len(time)) |
| |
| for t in np.arange(len(time)): |
| months[t] = time[t].month |
| |
| stds = np.empty(12) # empty array to store means |
| |
| # Calculate means month by month |
| for i in np.arange(12) + 1: |
| stds[i - 1] = data[months == i, :, :].std(ddof = 1) |
| |
| return stds |
| |
| |
| def calc_bias_annual(t1, t2, optn): # Mean Bias |
| ''' |
| Calculate the mean difference between two fields over time for each grid point. |
| ''' |
| # Calculate mean difference between two fields over time for each grid point |
| # Precrocessing of both obs and model data ensures the absence of missing values |
| diff = t1-t2 |
| if(open == 'abs'): |
| diff = abs(diff) |
| bias = diff.mean(axis = 0) |
| return bias |
| |
| |
| def calc_bias(t1, t2): |
| ''' |
| Calculate mean difference between two fields over time for each grid point |
| |
| Classify missing data resulting from multiple times (using threshold |
| data requirement) |
| |
| i.e. if the working time unit is monthly data, and we are dealing with |
| multiple months of data then when we show mean of several months, we need |
| to decide what threshold of missing data we tolerate before classifying a |
| data point as missing data. |
| ''' |
| t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75) |
| t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75) |
| |
| diff = t1 - t2 |
| bias = diff.mean(axis = 0) |
| |
| # Set mask for bias metric using missing data in obs or model data series |
| # i.e. if obs contains more than threshold (e.g.50%) missing data |
| # then classify time average bias as missing data for that location. |
| bias = ma.masked_array(bias.data, np.logical_or(t1Mask, t2Mask)) |
| return bias |
| |
| |
| def calc_bias_dom(t1, t2): |
| ''' |
| Calculate domain mean difference between two fields over time |
| ''' |
| diff = t1 - t2 |
| bias = diff.mean() |
| return bias |
| |
| |
| def calc_difference(t1, t2): |
| ''' |
| Calculate mean difference between two fields over time for each grid point |
| ''' |
| print 'Calculating difference' |
| diff = t1 - t2 |
| return diff |
| |
| |
| def calc_mae(t1, t2): |
| ''' |
| Calculate mean difference between two fields over time for each grid point |
| |
| Classify missing data resulting from multiple times (using threshold |
| data requirement) |
| |
| i.e. if the working time unit is monthly data, and we are dealing with |
| multiple months of data then when we show mean of several months, we need |
| to decide what threshold of missing data we tolerate before classifying |
| a data point as missing data. |
| ''' |
| t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75) |
| t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75) |
| |
| diff = t1 - t2 |
| adiff = abs(diff) |
| |
| mae = adiff.mean(axis = 0) |
| |
| # Set mask for mae metric using missing data in obs or model data series |
| # i.e. if obs contains more than threshold (e.g.50%) missing data |
| # then classify time average mae as missing data for that location. |
| mae = ma.masked_array(mae.data, np.logical_or(t1Mask, t2Mask)) |
| return mae |
| |
| |
| def calc_mae_dom(t1, t2): |
| ''' |
| Calculate domain mean difference between two fields over time |
| ''' |
| diff = t1 - t2 |
| adiff = abs(diff) |
| mae = adiff.mean() |
| return mae |
| |
| |
| def calc_rms(t1, t2): |
| ''' |
| Calculate mean difference between two fields over time for each grid point |
| ''' |
| diff = t1 - t2 |
| sqdiff = diff ** 2 |
| msd = sqdiff.mean(axis = 0) |
| rms = np.sqrt(msd) |
| return rms |
| |
| |
| def calc_rms_dom(t1, t2): |
| ''' |
| Calculate RMS differences between two fields |
| ''' |
| diff = t1 - t2 |
| sqdiff = diff ** 2 |
| msd = sqdiff.mean() |
| rms = np.sqrt(msd) |
| return rms |
| |
| |
| def calc_temporal_stdv(t1): |
| ''' |
| Calculate the temporal standard deviation. |
| |
| Input: |
| t1 - data array of any shape |
| |
| Output: |
| A 2-D array of temporal standard deviation |
| ''' |
| # TODO Make sure the first dimension of t1 is teh time axis. |
| stdv = t1.std(axis = 0) |
| return stdv |
| |
| |
| def calc_temporal_anom_cor(mD, oD): |
| ''' |
| Calculate the temporal anomaly correlation. |
| |
| Assumption(s); |
| The first dimension of mD and oD is the time axis. |
| |
| Input: |
| mD - model data array of any shape |
| oD - observation data array of any shape |
| |
| Output: |
| A 2-D array of time series pattern correlation coefficients at each grid point. |
| |
| REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp. |
| ''' |
| mo = oD.mean(axis = 0) |
| nt = oD.shape[0] |
| deno1 = ((mD - mo) * (mD - mo)).sum(axis = 0) |
| deno2 = ((oD - mo) * (oD - mo)).sum(axis = 0) |
| patcor = ((mD - mo) * (oD - mo)).sum(axis = 0) / sqrt(deno1 * deno2) |
| return patcor |
| |
| |
| def calc_spatial_anom_cor(mD, oD): |
| ''' |
| Calculate anomaly correlation between two 2-D arrays. |
| |
| Input: |
| mD - 2-D array of model data |
| oD - 2-D array of observation data |
| |
| Output: |
| The anomaly correlation between the two input arrays. |
| ''' |
| mo = oD.mean() |
| d1 = ((mD - mo)*(mD - mo)).sum() |
| d2 = ((oD - mo)*(oD - mo)).sum() |
| patcor = ((mD - mo) * (oD - mo)).sum() / sqrt(d1 * d2) |
| return patcor |
| |
| |
| def calc_temporal_pat_cor(t1, t2): |
| ''' |
| Calculate the Temporal Pattern Correlation |
| |
| Input:: |
| t1 - 3d array of model data |
| t2 - 3d array of obs data |
| |
| Output:: |
| 2d array of time series pattern correlation coefficients at each grid point. |
| **Note:** std_dev is standardized on 1 degree of freedom |
| ''' |
| mt1 = t1.mean(axis = 0) |
| mt2 = t2.mean(axis = 0) |
| nt = t1.shape[0] |
| sigma_t1 = t1.std(axis = 0, ddof = 1) |
| sigma_t2 = t2.std(axis = 0, ddof = 1) |
| patcor = ((((t1 - mt1) * (t2 - mt2)).sum(axis = 0)) / (nt)) / (sigma_t1 * sigma_t2) |
| |
| return patcor |
| |
| |
| def calc_spatial_pat_cor(t1, t2): |
| ''' |
| Calcualte pattern correlation between 2-D arrays. |
| 6/10/2013: JK: Enforce both t1 & t2 have the identical mask before calculating std and corr |
| |
| Input: |
| t1 - 2-D array of model data |
| t2 - 2-D array of observation data |
| |
| Output: |
| Pattern correlation between two input arrays. |
| ''' |
| import numpy as np |
| msk1 = ma.getmaskarray(t1) |
| msk2 = ma.getmaskarray(t2) |
| t1 = ma.masked_array(t1.data, np.logical_or(msk1, msk2)) |
| t2 = ma.masked_array(t2.data, np.logical_or(msk1, msk2)) |
| np = ma.count(t1) |
| mt1 = t1.mean() |
| mt2 = t2.mean() |
| st1 = t1.std() |
| st2 = t2.std() |
| patcor = ((t1 - mt1) * (t2 - mt2)).sum() / (np * st1 * st2) |
| return patcor |
| |
| |
| def calc_pat_cor2D(t1, t2, nT): |
| ''' |
| Calculate the pattern correlation between 3-D input arrays. |
| |
| Input: |
| t1 - 3-D array of model data |
| t2 - 3-D array of observation data |
| nT |
| |
| Output: |
| 1-D array (time series) of pattern correlation coefficients. |
| ''' |
| # TODO - Update docstring. What is nT? |
| nt = t1.shape[0] |
| if(nt != nT): |
| print 'input time levels do not match: Exit', nT, nt |
| return -1 |
| # store results in list for convenience (then convert to numpy array at the end) |
| patcor = [] |
| for t in xrange(nt): |
| mt1 = t1[t, :, :].mean() |
| mt2 = t2[t, :, :].mean() |
| sigma_t1 = t1[t, :, :].std() |
| sigma_t2 = t2[t, :, :].std() |
| # TODO: make means and standard deviations weighted by grid box area. |
| patcor.append((((((t1[t, :, :] - mt1) * (t2[t, :, :] - mt2)).sum()) / |
| (t1.shape[1] * t1.shape[2]) ) / (sigma_t1 * sigma_t2))) |
| print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t] |
| # TODO: deal with missing data appropriately, i.e. mask out grid points with missing data above tolerence level |
| # convert from list into numpy array |
| patcor = numpy.array(patcor) |
| #print patcor.shape |
| return patcor |
| |
| def calc_pat_cor(dataset_1, dataset_2): |
| ''' |
| Purpose: Calculate the Pattern Correlation Timeseries |
| Assumption(s):: |
| Both dataset_1 and dataset_2 are the same shape. |
| * lat, lon must match up |
| * time steps must align (i.e. months vs. months) |
| Input:: |
| dataset_1 - 3d (time, lat, lon) array of data |
| dataset_2 - 3d (time, lat, lon) array of data |
| Output: |
| patcor - a 1d array (time series) of pattern correlation coefficients. |
| **Note:** Standard deviation is using 1 degree of freedom. Debugging print |
| statements to show the difference the n-1 makes. http://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html |
| 6/17/2013 JK: Add an option for a 1-d arrays |
| ''' |
| |
| # TODO: Add in try block to ensure the shapes match |
| |
| nDim1 = dataset_1.ndim |
| nDim2 = dataset_2.ndim |
| if nDim1 != nDim2: |
| print 'dimension mismatch in calc_pat_cor: exit', nDim1,nDim2 |
| sys.exit() |
| |
| if nDim1 == 1: |
| mt1 = dataset_1.mean() |
| mt2 = dataset_2.mean() |
| nt = dataset_1.shape[0] |
| sigma_t1 = dataset_1.std() |
| sigma_t2 = dataset_2.std() |
| patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / (nt * sigma_t1 * sigma_t2) |
| |
| elif nDim1 == 2: |
| # find mean and std_dev |
| mt1 = dataset_1.mean() |
| mt2 = dataset_2.mean() |
| ny = dataset_1.shape[0] |
| nx = dataset_1.shape[1] |
| sigma_t1 = dataset_1.std() |
| sigma_t2 = dataset_2.std() |
| patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / ((ny * nx) * (sigma_t1 * sigma_t2)) |
| |
| elif nDim1 == 3: |
| nt = dataset_1.shape[0] |
| ny = dataset_1.shape[1] |
| nx = dataset_1.shape[2] |
| # store results in list for convenience (then convert to numpy array) |
| patcor = [] |
| for t in xrange(nt): |
| # find mean and std_dev |
| mt1 = dataset_1[t, :, :].mean() |
| mt2 = dataset_2[t, :, :].mean() |
| sigma_t1 = dataset_1[t, :, :].std(ddof = 1) |
| sigma_t2 = dataset_2[t, :, :].std(ddof=1) |
| # TODO: make means and standard deviations weighted by grid box area. |
| # Equation from Santer_et_al 1995 |
| # patcor = (1/(N*M_std*O_std))*sum((M_i-M_bar)*(O_i-O_bar)) |
| patcor.append((((((dataset_1[t, :, :] - mt1) * (dataset_2[t, :, :] - mt2)).sum()) / (ny * nx)) / (sigma_t1 * sigma_t2))) |
| print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t] |
| # TODO: deal with missing data appropriately, i.e. mask out grid points |
| # with missing data above tolerance level |
| # convert from list into numpy array |
| patcor = np.array(patcor) |
| |
| #print patcor.shape, patcor |
| return patcor |
| |
| |
| def calc_anom_corn(dataset_1, dataset_2, climatology = None): |
| ''' |
| Calculate the anomaly correlation. |
| |
| Input: |
| dataset_1 - First input dataset |
| dataset_2 - Second input dataset |
| climatology - Optional climatology input array. Assumption is that it is for |
| the same time period by default. |
| |
| Output: |
| The anomaly correlation. |
| ''' |
| # TODO: Update docstring with actual useful information |
| |
| # store results in list for convenience (then convert to numpy array) |
| anomcor = [] |
| nt = dataset_1.shape[0] |
| #prompt for the third file, i.e. climo file... |
| #include making sure the lat, lon and times are ok for comparision |
| # find the climo in here and then using for eg, if 100 yrs |
| # is given for the climo file, but only looking at 10yrs |
| # ask if want to input climo dataset for use....if no, call previous |
| |
| if climatology != None: |
| climoFileOption = raw_input('Would you like to use the full observation dataset as \ |
| the climatology in this calculation? [y/n] \n>') |
| if climoFileOption == 'y': |
| base_dataset = climatology |
| else: |
| base_dataset = dataset_2 |
| for t in xrange(nt): |
| mean_base = base_dataset[t, :, :].mean() |
| anomcor.append((((dataset_1[t, :, :] - mean_base) * (dataset_2[t, :, :] - mean_base)).sum()) / |
| np.sqrt(((dataset_1[t, :, :] - mean_base) ** 2).sum() * |
| ((dataset_2[t, :, :] - mean_base) ** 2).sum())) |
| print t, mean_base.shape, anomcor[t] |
| |
| # TODO: deal with missing data appropriately, i.e. mask out grid points |
| # with missing data above tolerence level |
| |
| # convert from list into numpy array |
| anomcor = np.array(anomcor) |
| print anomcor.shape, anomcor.ndim, anomcor |
| return anomcor |
| |
| |
| def calc_anom_cor(t1, t2): |
| ''' |
| Calculate the Anomaly Correlation (Deprecated) |
| ''' |
| |
| nt = t1.shape[0] |
| |
| # store results in list for convenience (then convert to numpy |
| # array at the end) |
| anomcor = [] |
| for t in xrange(nt): |
| |
| mt2 = t2[t, :, :].mean() |
| |
| sigma_t1 = t1[t, :, :].std(ddof = 1) |
| sigma_t2 = t2[t, :, :].std(ddof = 1) |
| |
| # TODO: make means and standard deviations weighted by grid box area. |
| |
| anomcor.append(((((t1[t, :, :] - mt2) * (t2[t, :, :] - mt2)).sum()) / |
| (t1.shape[1] * t1.shape[2])) / (sigma_t1 * sigma_t2)) |
| |
| print t, mt2.shape, sigma_t1.shape, sigma_t2.shape, anomcor[t] |
| |
| # TODO: deal with missing data appropriately, i.e. mask out grid points with |
| # missing data above tolerence level |
| |
| # convert from list into numpy array |
| anomcor = np.array(anomcor) |
| print anomcor.shape, anomcor.ndim, anomcor |
| return anomcor |
| |
| |
| def calc_nash_sutcliff(dataset_1, dataset_2): |
| ''' |
| Routine to calculate the Nash-Sutcliff coefficient of efficiency (E) |
| |
| Assumption(s):: |
| Both dataset_1 and dataset_2 are the same shape. |
| * lat, lon must match up |
| * time steps must align (i.e. months vs. months) |
| |
| Input:: |
| dataset_1 - 3d (time, lat, lon) array of data |
| dataset_2 - 3d (time, lat, lon) array of data |
| |
| Output: |
| nashcor - 1d array aligned along the time dimension of the input |
| datasets. Time Series of Nash-Sutcliff Coefficient of efficiency |
| |
| ''' |
| |
| nt = dataset_1.shape[0] |
| nashcor = [] |
| for t in xrange(nt): |
| mean_dataset_2 = dataset_2[t, :, :].mean() |
| |
| nashcor.append(1 - ((((dataset_2[t, :, :] - dataset_1[t, :, :]) ** 2).sum()) / |
| ((dataset_2[t, :, :] - mean_dataset_2) ** 2).sum())) |
| |
| print t, mean_dataset_2.shape, nashcor[t] |
| |
| nashcor = np.array(nashcor) |
| print nashcor.shape, nashcor.ndim, nashcor |
| return nashcor |
| |
| |
| def calc_pdf(dataset_1, dataset_2): |
| ''' |
| Routine to calculate a normalized Probability Distribution Function with |
| bins set according to data range. |
| Equation from Perkins et al. 2007 |
| |
| PS=sum(min(Z_O_i, Z_M_i)) where Z is the distribution (histogram of the data for either set) |
| called in do_rcmes_processing_sub.py |
| |
| Inputs:: |
| 2 arrays of data |
| t1 is the modelData and t2 is 3D obsdata - time,lat, lon NB, time here |
| is the number of time values eg for time period 199001010000 - 199201010000 |
| |
| if annual means-opt 1, was chosen, then t2.shape = (2,lat,lon) |
| |
| if monthly means - opt 2, was choosen, then t2.shape = (24,lat,lon) |
| |
| User inputs: number of bins to use and edges (min and max) |
| Output: |
| |
| one float which represents the PDF for the year |
| |
| TODO: Clean up this docstring so we have a single purpose statement |
| |
| Routine to calculate a normalised PDF with bins set according to data range. |
| |
| Input:: |
| 2 data arrays, modelData and obsData |
| |
| Output:: |
| PDF for the year |
| |
| ''' |
| #list to store PDFs of modelData and obsData |
| pdf_mod = [] |
| pdf_obs = [] |
| # float to store the final PDF similarity score |
| similarity_score = 0.0 |
| d1_max = dataset_1.amax() |
| d1_min = dataset_1.amin() |
| |
| print 'min modelData', dataset_1[:, :, :].min() |
| print 'max modelData', dataset_1[:, :, :].max() |
| print 'min obsData', dataset_2[:, :, :].min() |
| print 'max obsData', dataset_2[:, :, :].max() |
| # find a distribution for the entire dataset |
| #prompt the user to enter the min, max and number of bin values. |
| # The max, min info above is to help guide the user with these choises |
| print '****PDF input values from user required **** \n' |
| nbins = int (raw_input('Please enter the number of bins to use. \n')) |
| minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n')) |
| maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n')) |
| |
| mybins = np.linspace(minEdge, maxEdge, nbins) |
| print 'nbins is', nbins, 'mybins are', mybins |
| |
| |
| # TODO: there is no 'new' kwargs for numpy.histogram |
| # per: http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html |
| # PLAN: Replace new with density param. |
| pdf_mod, edges = np.histogram(dataset_1, bins = mybins, normed = True, new = True) |
| print 'dataset_1 distribution and edges', pdf_mod, edges |
| pdf_obs, edges = np.histogram(dataset_2, bins = mybins, normed = True, new = True) |
| print 'dataset_2 distribution and edges', pdf_obs, edges |
| |
| # TODO: drop this |
| """ |
| considering using pdf function from statistics package. It is not |
| installed. Have to test on Mac. |
| http://bonsai.hgc.jp/~mdehoon/software/python/Statistics/manual/index.xhtml#TOC31 |
| pdf_mod, edges = stats.pdf(dataset_1, bins=mybins) |
| print 'dataset_1 distribution and edges', pdf_mod, edges |
| pdf_obs,edges=stats.pdf(dataset_2,bins=mybins) |
| print 'dataset_2 distribution and edges', pdf_obs, edges |
| """ |
| |
| #find minimum at each bin between lists |
| i = 0 |
| for model_value in pdf_mod : |
| print 'model_value is', model_value, 'pdf_obs[', i, '] is', pdf_obs[i] |
| if model_value < pdf_obs[i]: |
| similarity_score += model_value |
| else: |
| similarity_score += pdf_obs[i] |
| i += 1 |
| print 'similarity_score is', similarity_score |
| return similarity_score |
| |
| |
| def calc_stdev(t1): |
| ''' |
| Calculate the standard deviation for a given dataset. |
| |
| Input: |
| t1 - Dataset to calculate the standard deviation on. |
| |
| Output: |
| Array of the standard deviations for each month in the provided dataset. |
| ''' |
| nt = t1.shape[0] |
| sigma_t1 = [] |
| for t in xrange(nt): |
| sigma_t1.append(t1[t, :, :].std(ddof = 1)) |
| sigma_t1 = np.array(sigma_t1) |
| print sigma_t1, sigma_t1.shape |
| return sigma_t1 |
| |
| # 6/10/2013 JK: plotting routines are added below |
| |
| def pow_round(x): |
| ''' |
| Function to round x to the nearest power of 10 |
| ''' |
| return 10 ** (floor(log(x, 10) - log(0.5, 10))) |
| |
| def calcNiceIntervals(data, nLevs): |
| ''' |
| Purpose:: |
| Calculates nice intervals between each color level for colorbars |
| and contour plots. The target minimum and maximum color levels are |
| calculated by taking the minimum and maximum of the distribution |
| after cutting off the tails to remove outliers. |
| |
| Input:: |
| data - an array of data to be plotted |
| nLevs - an int giving the target number of intervals |
| |
| Output:: |
| cLevs - A list of floats for the resultant colorbar levels |
| ''' |
| # Find the min and max levels by cutting off the tails of the distribution |
| # This mitigates the influence of outliers |
| data = data.ravel() |
| mnLvl = mstats.scoreatpercentile(data, 5) |
| mxLvl = mstats.scoreatpercentile(data, 95) |
| locator = mpl.ticker.MaxNLocator(nLevs) |
| cLevs = locator.tick_values(mnLvl, mxLvl) |
| |
| # Make sure the bounds of cLevs are reasonable since sometimes |
| # MaxNLocator gives values outside the domain of the input data |
| cLevs = cLevs[(cLevs >= mnLvl) & (cLevs <= mxLvl)] |
| return cLevs |
| |
| def calcBestGridShape(nPlots, oldShape): |
| ''' |
| Purpose:: |
| Calculate a better grid shape in case the user enters more columns |
| and rows than needed to fit a given number of subplots. |
| |
| Input:: |
| nPlots - an int giving the number of plots that will be made |
| oldShape - a tuple denoting the desired grid shape (nRows, nCols) for arranging |
| the subplots originally requested by the user. |
| |
| Output:: |
| newShape - the smallest possible subplot grid shape needed to fit nPlots |
| ''' |
| nRows, nCols = oldShape |
| size = nRows * nCols |
| diff = size - nPlots |
| if diff < 0: |
| raise ValueError('gridShape=(%d, %d): Cannot fit enough subplots for data' %(nRows, nCols)) |
| else: |
| # If the user enters an excessively large number of |
| # rows and columns for gridShape, automatically |
| # correct it so that it fits only as many plots |
| # as needed |
| while diff >= nCols: |
| nRows -= 1 |
| size = nRows * nCols |
| diff = size - nPlots |
| |
| # Don't forget to remove unnecessary columns too |
| if nRows == 1: |
| nCols = nPlots |
| |
| newShape = nRows, nCols |
| return newShape |
| |
| def drawPortraitDiagramSingle(data, rowLabels, colLabels, cLevs, fName, fType = 'png', |
| xLabel = '', yLabel = '', cLabel = '', pTitle = '', cMap = None): |
| ''' |
| Purpose:: |
| Makes a portrait diagram plot. |
| |
| Input:: |
| data - 2d array of the field to be plotted |
| rowLabels - list of strings denoting labels for each row |
| colLabels - list of strings denoting labels for each column |
| cLevs - a list of integers or floats specifying the colorbar levels |
| xLabel - a string specifying the x-axis title |
| yLabel - a string specifying the y-axis title |
| cLabel - a string specifying the colorbar title |
| pTitle - a string specifying the plot title |
| fName - a string specifying the filename of the plot |
| fType - an optional string specifying the filetype, default is .png |
| cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap, |
| ''' |
| # Set up the colormap if not specified |
| if cMap is None: |
| cMap = plt.cm.RdBu_r |
| |
| # Set up figure and axes |
| fig = plt.figure() |
| ax = fig.add_subplot(111) |
| |
| # Make the portrait diagram |
| norm = mpl.colors.BoundaryNorm(cLevs, cMap.N) |
| cs = ax.matshow(data, cmap = cMap, aspect = 'auto', origin = 'lower', norm = norm) |
| |
| # Add colorbar |
| cbar = fig.colorbar(cs, norm = norm, boundaries = cLevs, drawedges = True, |
| pad = .05) |
| cbar.set_label(cLabel) |
| cbar.set_ticks(cLevs) |
| cbar.ax.xaxis.set_ticks_position("none") |
| cbar.ax.yaxis.set_ticks_position("none") |
| |
| # Add grid lines |
| ax.xaxis.set_ticks(np.arange(data.shape[1] + 1)) |
| ax.yaxis.set_ticks(np.arange(data.shape[0] + 1)) |
| x = (ax.xaxis.get_majorticklocs() - .5) |
| y = (ax.yaxis.get_majorticklocs() - .5) |
| ax.vlines(x, y.min(), y.max()) |
| ax.hlines(y, x.min(), x.max()) |
| |
| # Configure ticks |
| ax.xaxis.tick_bottom() |
| ax.xaxis.set_ticks_position('none') |
| ax.yaxis.set_ticks_position('none') |
| ax.set_xticklabels(rowLabels) |
| ax.set_yticklabels(colLabels) |
| |
| # Add labels and title |
| ax.set_xlabel(xLabel) |
| ax.set_ylabel(yLabel) |
| ax.set_title(pTitle) |
| |
| # Save the figure |
| fig.savefig('%s.%s' %(fName, fType)) |
| plt.show() |
| |
| def drawPortraitDiagram(dataset, rowLabels, colLabels, fName, fType = 'png', |
| gridShape = (1, 1), xLabel = '', yLabel = '', cLabel = '', |
| pTitle = '', subTitles = None, cMap = None, cLevs = None, |
| nLevs = 10, extend = 'neither'): |
| ''' |
| Purpose:: |
| Makes a portrait diagram plot. |
| |
| Input:: |
| dataset - 3d array of the field to be plotted (nT, nRows, nCols) |
| rowLabels - a list of strings denoting labels for each row |
| colLabels - a list of strings denoting labels for each column |
| fName - a string specifying the filename of the plot |
| fType - an optional string specifying the output filetype |
| xLabel - an optional string specifying the x-axis title |
| yLabel - an optional string specifying the y-axis title |
| cLabel - an optional string specifying the colorbar title |
| pTitle - a string specifying the plot title |
| subTitles - an optional list of strings specifying the title for each subplot |
| cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap |
| cLevs - an optional list of ints or floats specifying colorbar levels |
| nLevs - an optional integer specifying the target number of contour levels if |
| cLevs is None |
| extend - an optional string to toggle whether to place arrows at the colorbar |
| boundaries. Default is 'neither', but can also be 'min', 'max', or |
| 'both'. Will be automatically set to 'both' if cLevs is None. |
| |
| ''' |
| # Handle the single plot case. |
| if dataset.ndim == 2 or (dataset.ndim == 3 and dataset.shape[0] == 1): |
| dataset = dataset.reshape(1, *dataset.shape) |
| |
| nPlots = dataset.shape[0] |
| |
| # Make sure gridShape is compatible with input data |
| gridShape = calcBestGridShape(nPlots, gridShape) |
| |
| # Row and Column labels must be consistent with the shape of |
| # the input data too |
| nRows, nCols = dataset.shape[1:] |
| if len(rowLabels) != nRows or len(colLabels) != nCols: |
| raise ValueError('rowLabels and colLabels must have %d and %d elements respectively' %(nRows, nCols)) |
| |
| # Set up the colormap if not specified |
| if cMap is None: |
| cMap = plt.cm.coolwarm |
| |
| # Set up the figure |
| fig = plt.figure() |
| fig.set_size_inches((8.5, 11.)) |
| fig.dpi = 300 |
| |
| # Make the subplot grid |
| grid = ImageGrid(fig, 111, |
| nrows_ncols = gridShape, |
| axes_pad = 0.4, |
| share_all = True, |
| aspect = False, |
| add_all = True, |
| ngrids = nPlots, |
| label_mode = "all", |
| cbar_mode = 'single', |
| cbar_location = 'bottom', |
| cbar_pad = '3%', |
| cbar_size = .15 |
| ) |
| |
| # Calculate colorbar levels if not given |
| if cLevs is None: |
| # Cut off the tails of the distribution |
| # for more representative colorbar levels |
| cLevs = calcNiceIntervals(dataset, nLevs) |
| extend = 'both' |
| |
| norm = mpl.colors.BoundaryNorm(cLevs, cMap.N) |
| |
| # Do the plotting |
| for i, ax in enumerate(grid): |
| data = dataset[i] |
| cs = ax.matshow(data, cmap = cMap, aspect = 'auto', origin = 'lower', norm = norm) |
| |
| # Add grid lines |
| ax.xaxis.set_ticks(np.arange(data.shape[1] + 1)) |
| ax.yaxis.set_ticks(np.arange(data.shape[0] + 1)) |
| x = (ax.xaxis.get_majorticklocs() - .5) |
| y = (ax.yaxis.get_majorticklocs() - .5) |
| ax.vlines(x, y.min(), y.max()) |
| ax.hlines(y, x.min(), x.max()) |
| |
| # Configure ticks |
| ax.xaxis.tick_bottom() |
| ax.xaxis.set_ticks_position('none') |
| ax.yaxis.set_ticks_position('none') |
| ax.set_xticklabels(colLabels, fontsize = 'xx-small') |
| ax.set_yticklabels(rowLabels, fontsize = 'xx-small') |
| |
| # Add axes title |
| if subTitles is not None: |
| ax.text(0.5, 1.04, subTitles[i], va = 'center', ha = 'center', |
| transform = ax.transAxes, fontsize = 'small') |
| |
| # Add colorbar |
| cbar = fig.colorbar(cs, cax = ax.cax, norm = norm, boundaries = cLevs, drawedges = True, |
| extend = extend, orientation = 'horizontal') |
| cbar.set_label(cLabel) |
| cbar.set_ticks(cLevs) |
| cbar.ax.xaxis.set_ticks_position("none") |
| cbar.ax.yaxis.set_ticks_position("none") |
| |
| # This is an ugly hack to make the title show up at the correct height. |
| # Basically save the figure once to achieve tight layout and calculate |
| # the adjusted heights of the axes, then draw the title slightly above |
| # that height and save the figure again |
| fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) |
| ymax = 0 |
| for ax in grid: |
| bbox = ax.get_position() |
| ymax = max(ymax, bbox.ymax) |
| |
| # Add figure title and axes labels |
| fig.text(.51, .14, yLabel, va = 'center', ha = 'center', rotation = 'horizontal') |
| fig.text(.08, .53, xLabel, va = 'center', ha = 'center', rotation = 'vertical') |
| fig.suptitle(pTitle, y = ymax + .04, fontsize = 16) |
| fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) |
| plt.show() |
| fig.clf() |
| |
| def taylorDiagram(pltDat, pltTit, pltFileName, refName, legendPos, radMax): |
| ''' |
| Draw a Taylor diagram |
| ''' |
| stdref = 1. # Standard reference value |
| rect = 111 # Subplot setting and location |
| markerSize = 6 |
| fig = plt.figure() |
| fig.suptitle(pltTit) # PLot title |
| dia = TaylorDiagram (stdref, fig = fig, radMax = radMax, rect = rect, label = refName) |
| for i,(stddev,corrcoef,name) in enumerate(pltDat): |
| dia.add_sample (stddev, corrcoef, marker = '$%d$' % (i+1), ms = markerSize, label=name) |
| # Add ploylines to mark a range specified by input data - 2 be implemented |
| #circular_line = dia.add_stddev_contours(0.959, 1, 0.965) |
| #circular_line = dia.add_stddev_contours(1.1, 1, 0.973) |
| #straight_line = dia.add_contours(0.959, 1, 1.1, 1) |
| #straight_line = dia.add_contours(0.959, 0.965, 1.1, 0.973) |
| #l=fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos) |
| # loc: 1='upper right', 2='upper left' or specified in the calling program via "legendPos" |
| l = fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos) |
| l.draw_frame(False) |
| plt.savefig(pltFileName) |
| plt.show() |
| pltDat = 0. |
| |
| def drawTimeSeriesSingle(dataset, times, labels, fName, fType = 'png', xLabel = '', yLabel ='', pTitle ='', |
| legendPos = 'upper center', legendFrameOn = False, yearLabels = True, yscale = 'linear'): |
| ''' |
| Purpose:: |
| Function to draw a time series plot |
| |
| Input:: |
| dataset - a list of arrays for each dataset as a time series |
| times - a list of python datetime objects |
| labels - a list of strings with the names of each dataset |
| fName - a string specifying the filename of the plot |
| fType - an optional string specifying the output filetype |
| xLabel - a string specifying the x-axis title |
| yLabel - a string specifying the y-axis title |
| pTitle - a string specifying the plot title |
| legendPos - an optional string or tuple of float for determining |
| the position of the legend |
| legendFrameOn - optional bool to toggle drawing the frame around |
| the legend |
| yearLabels - optional bool to toggle drawing year labels on the x-xaxis |
| yscale - optional string for setting the y-axis scale, 'linear' for linear |
| and 'log' for log base 10. |
| ''' |
| fig = plt.figure() |
| ax = fig.add_subplot(111) |
| |
| if not yearLabels: |
| xfmt = mpl.dates.DateFormatter('%b') |
| ax.xaxis.set_major_formatter(xfmt) |
| |
| ax.set_xlabel(xLabel) |
| ax.set_ylabel(yLabel) |
| ax.set_title(pTitle) |
| |
| # Set up list of lines for legend |
| lines = [] |
| ymin, ymax = 0, 0 |
| |
| # Plot each dataset |
| for data in dataset: |
| line = ax.plot_date(times, data, '', linewidth = 2) |
| lines.extend(line) |
| cmin, cmax = data.min(), data.max() |
| if ymin > cmin: |
| ymin = cmin |
| if ymax < cmax: |
| ymax = cmax |
| |
| # Add a bit of padding so lines don't touch bottom and top of the plot |
| ymin = ymin - ((ymax - ymin) * 0.1) |
| ymax = ymax + ((ymax - ymin) * 0.1) |
| ax.set_ylim((ymin, ymax)) |
| |
| # Set the y-axis scale |
| ax.set_yscale(yscale) |
| |
| # Create the legend |
| ax.legend((lines), labels, loc = legendPos, ncol = 10, fontsize='x-small', |
| frameon=legendFrameOn) |
| fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight') |
| plt.show() |
| fig.clf() |
| |
| |
| def pltXY(x, y, lineLabel, lineTyp, pltTit, xLabel, yLabel, pltName, xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos, xScale, yScale): |
| """ |
| The default drawing order for axes is patches, lines, text.This order is determined by the zorder attribute. The following defaults are set: |
| Artist Z-order |
| Patch / PatchCollection 1 |
| Line2D / LineCollection 2 |
| Text 3 |
| You can change the order for individual artists by setting the zorder. Any individual plot() call can set a value |
| for the zorder of that particular item. |
| In the fist subplot below, the lines are drawn above the patch collection from the scatter, which is the default. |
| In the subplot below, the order is reversed. |
| The second figure shows how to control the zorder of individual lines. |
| Arguments |
| x (nX) : np array: the number of points in the x axis |
| y (nX,nY): np array: the number of y values to be plotted |
| lineLabel: list(nY): labels for individual y data |
| pltTit : Text : plot title |
| xLabel : Text : x-axis label |
| yLabel : Text : y-axis label |
| pltName : Text : name of the plot file |
| 3/28/2013 Jinwon Kim: Modification of a routine in the matplotlib gallery |
| """ |
| #lineColors = ['k', 'b', 'r', 'g', 'c', 'm', 'y', '0.5', '0.55', '0.6', '0.65', '0.7', '0.75', '0.8', '0.85', '0.9', '0.95', '1.0'] |
| lineColors = ['k','r','y','g','b','c','m','k','r','y','g','b','c','m','k','r','y','g','b','c','m','0.35','0.5','0.65','0.8','.95'] |
| nX = x.size |
| nY = len(lineLabel) |
| lineColors[nY - 1] = 'b' |
| fig = plt.figure() |
| ax = fig.add_subplot(1,1,1) |
| for n in np.arange(nY): |
| if n <= 6: |
| opacity=1. |
| elif n <= 13: |
| opacity = .75 |
| elif n <= 20: |
| opacity = .40 |
| plot(x,y[n, :], linewidth=1, alpha=opacity, color=lineColors[n], linestyle=lineTyp[n], label=lineLabel[n], zorder = 10) |
| xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10) |
| if xmax > xmin: |
| plt.xlim(xmin,xmax) |
| ticks = frange(xmin,xmax,npts=(xmax-xmin)/deltaX+1) |
| ax.xaxis.set_ticks(ticks,minor=False) |
| if ymax > ymin: |
| plt.ylim(ymin,ymax) |
| ticks = frange(ymin,ymax,npts=(ymax-ymin)/deltaY+1) |
| ax.yaxis.set_ticks(ticks,minor=False) |
| ax.set_title(pltTit) |
| ax.xaxis.tick_bottom(); ax.yaxis.tick_left() # put tick marks only on the left (for y) and bottom (for x) axis |
| if xScale == 'log': |
| ax.set_xscale('log') |
| else: |
| ax.set_xscale('linear') |
| if yScale == 'log': |
| ax.set_yscale('log') |
| else: |
| ax.set_yscale('linear') |
| if(nY>1): |
| l = legend(prop={'size':10},loc='best') |
| l.set_zorder(20) # put the legend on top |
| plt.savefig(pltName) |
| show() |
| # release work arrays |
| x=0.; y=0. |
| |
| |
| def pltSca1F(x, y, pltName, xLabel, yLabel, pmin, pmax, delP, pTit, pFname, xScale, yScale): |
| #*************************************# |
| # Plot a single-frame scatter diagram # |
| #*************************************# |
| fig = plt.figure(); ax = fig.add_subplot(1,1,1) |
| lTyp='o' |
| plot(x,y,lTyp,c='r') |
| if pmax > pmin: |
| ticks = frange(pmin,pmax,npts=(pmax-pmin)/delP+1) |
| plt.xlim(pmin,pmax); ax.xaxis.set_ticks(ticks,minor=False) |
| plt.ylim(pmin,pmax); ax.yaxis.set_ticks(ticks,minor=False) |
| ax.set_title(pTit) |
| if xScale == 'log': |
| ax.set_xscale('log') |
| else: |
| ax.set_xscale('linear') |
| if yScale == 'log': |
| ax.set_yscale('log') |
| else: |
| ax.set_yscale('linear') |
| xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10) |
| l = legend(prop={'size':8},loc='best') |
| l.set_zorder(20) |
| plt.savefig(pFname) |
| show() |
| x=0.; y=0. |
| |
| |
| def pltSca6F(x, dName, pmin, pmax, delP, pTitle, pFname, xScale, yScale): |
| #****************************************************# |
| # Plot up to 6 frames (6 rows X 2 columns) on a page # |
| #****************************************************# |
| nPlt = x.shape[0] |
| if pmax > pmin: |
| ticks = frange(pmin,pmax,npts=(pmax-pmin)/delP+1) |
| if nPlt > 6: |
| print 'frames exceed 12: return' |
| return |
| nrows = 3; ncols = 2; npmax = nrows*ncols; lTyp='o'; xlabcol='black'; ylabcol='green' |
| fig = plt.figure() |
| plt.subplots_adjust(hspace=0.3, wspace=0.2) |
| for n in range(1,nPlt): |
| pid = n % npmax |
| if pid == 0: |
| pid = npmax |
| ax = fig.add_subplot(nrows,ncols,pid) |
| #ax.set_title(pTitle[n-1],fontsize=6) |
| if xScale == 'log': |
| ax.set_xscale('log') |
| else: |
| ax.set_xscale('linear') |
| if yScale == 'log': |
| ax.set_yscale('log') |
| else: |
| ax.set_yscale('linear') |
| plot(x[0,:],x[n,:],lTyp,label=pTitle[n-1],c='r') |
| plot(x[0,:],x[n,:],lTyp,c='r') |
| plot(frange(pmin,pmax),c='b') |
| l = legend(prop={'size':8},loc='best') |
| l.set_zorder(20) |
| xlabel(dName[0],fontsize=10); ylabel(dName[n],fontsize=10) |
| if pmax > pmin: |
| plt.xlim(pmin,pmax); ax.xaxis.set_ticks(ticks,minor=False) |
| plt.ylim(pmin,pmax); ax.yaxis.set_ticks(ticks,minor=False) |
| for label in ax.xaxis.get_ticklabels(): |
| label.set_color(xlabcol) |
| label.set_rotation(0) |
| label.set_fontsize(8) |
| for label in ax.yaxis.get_ticklabels(): |
| label.set_color(ylabcol) |
| label.set_rotation(0) |
| label.set_fontsize(8) |
| plt.savefig(pFname) |
| show() |
| x=0. |
| |
| def drawContourMapSingle(data, lats, lons, cLevs, fName, fType = 'png', |
| cLabel = '', pTitle = '', cMap = None, nParallels = 5, nMeridians = 5): |
| ''' |
| Purpose:: |
| Plots a filled contour map. |
| Input:: |
| data - 2d array of the field to be plotted with shape (nLon, nLat) |
| lats - array of latitudes |
| lons - array of longitudes |
| cLevs - A list of ints or floats specifying contour levels |
| fName - a string specifying the filename of the plot |
| fType - an optional string specifying the filetype, default is .png |
| cLabel - an optional string specifying the colorbar title |
| pTitle - an optional string specifying plot title |
| cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap |
| nParallels - an optional int for the number of parallels to draw |
| nMeridians - an optional int for the number of meridians to draw |
| ''' |
| # Set up the colormap if not specified |
| if cMap is None: |
| cMap = plt.cm.RdBu_r |
| |
| # Set up the figure |
| fig = plt.figure() |
| ax = fig.add_subplot(111) |
| |
| # Determine the map boundaries and construct a Basemap object |
| lonMin = lons.min() |
| lonMax = lons.max() |
| latMin = lats.min() |
| latMax = lats.max() |
| m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax, |
| llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l', ax = ax) |
| |
| # Draw the borders for coastlines and countries |
| m.drawcoastlines(linewidth = 1) |
| m.drawcountries(linewidth = .75) |
| |
| # Draw parallels / meridians. |
| m.drawmeridians(np.linspace(lonMin, lonMax, nMeridians), labels = [0, 0, 0, 1]) |
| m.drawparallels(np.linspace(latMin, latMax, nMeridians), labels = [1, 0, 0, 1]) |
| |
| # Convert lats and lons to projection coordinates |
| if lats.ndim == 1 and lons.ndim == 1: |
| lons, lats = np.meshgrid(lons, lats) |
| x, y = m(lons, lats) |
| |
| # Plot data with filled contours |
| cs = m.contourf(x, y, data, cmap = cMap, levels = cLevs) |
| |
| # Add colorbar |
| cbar = m.colorbar(cs, drawedges = True, pad = '2%', size = '3%') |
| cbar.set_label(cLabel) |
| cbar.set_ticks(cLevs) |
| cbar.ax.xaxis.set_ticks_position("none") |
| cbar.ax.yaxis.set_ticks_position("none") |
| |
| # Add title and save the figure |
| ax.set_title(pTitle) |
| fig.savefig('%s.%s' %(fName, fType)) |
| show() |
| |
| def drawCntrMap(data,lon,lat,titles,ncols,pFname): |
| ''' |
| This routine is based on PyNGL, i.e., NcarGraphics |
| data - a masked numpy array of data to plot (nT,nY,nX) |
| lon - longitude (nY,nX) |
| lat - latitude (nY,nX) |
| titles - array of titles (nT) |
| nrows - number of rows of the paneled plots |
| ncols - number of columns of the paneled plots |
| nT = nrows * ncols |
| pFname - name of output postscript file |
| ''' |
| wks_type = 'ps' |
| if data.ndim == 2: |
| nT = 1; nrows = 1; ncols = 1 |
| elif data.ndim == 3: |
| nT=data.shape[0] |
| if nT % ncols == 0: |
| nrows = nT/ncols |
| else: |
| nrows = nT/ncols + 1 |
| # set workstation type (X11, ps, png) |
| res = Ngl.Resources() |
| wks = Ngl.open_wks(wks_type,pFname,res) |
| # set plot resource paramters |
| resources = Ngl.Resources() |
| resources.mpLimitMode = "LatLon" # Limit the map view. |
| resources.mpMinLonF = lon.min() |
| resources.mpMaxLonF = lon.max() |
| resources.mpMinLatF = lat.min() |
| resources.mpMaxLatF = lat.max() |
| resources.cnFillOn = True |
| resources.cnLineLabelsOn = False # Turn off line labels. |
| resources.cnInfoLabelOn = False # Turn off info label. |
| resources.cnLinesOn = False # Turn off countour line (only filled colors) |
| resources.sfXArray = lon[:,:] |
| resources.sfYArray = lat[:,:] |
| resources.pmTickMarkDisplayMode = "Never" # Turn off map tickmarks. |
| resources.mpOutlineBoundarySets = "GeophysicalAndUSStates" |
| resources.mpGeophysicalLineColor = "red" |
| resources.mpUSStateLineColor = "red" |
| resources.mpGeophysicalLineThicknessF = 0.75 |
| resources.mpUSStateLineThicknessF = 0.75 |
| resources.mpPerimOn = True # Turn on/off map perimeter |
| resources.mpGridAndLimbOn = True # Turn off map grid. |
| resources.nglFrame = False # Don't advance the frame |
| resources.nglDraw = False |
| plot=[] |
| for iT in np.arange(nT): |
| resources.tiMainString = titles[iT] |
| #resources.pmLabelBarDisplayMode = "Never" # Turn off individual label bars |
| resources.pmLabelBarDisplayMode = "Always" # Turn on individual label bars |
| if data.ndim == 3: |
| plot.append(Ngl.contour_map(wks,data[iT,:,:],resources)) |
| elif data.ndim == 2: |
| plot.append(Ngl.contour_map(wks,data[:,:],resources)) |
| panelres = Ngl.Resources() |
| panelres.nglPanelTop=0.95 |
| panelres.nglPanelBottom=0.05 |
| panelres.nglPanelLabelBar = False # Turn on panel labelbar |
| panelres.nglPanelLabelBarLabelFontHeightF = 0.012 # Labelbar font height |
| panelres.nglPanelLabelBarHeightF = 0.03 # Height of labelbar |
| panelres.nglPanelLabelBarWidthF = 0.8 # Width of labelbar |
| panelres.lbLabelFont = "helvetica-bold" # Labelbar font |
| Ngl.panel(wks,plot[0:nT],[nrows,ncols],panelres) |
| Ngl.destroy(wks) |
| del plot |
| del resources |
| del wks |
| return |
| |
| def drawContourMap(dataset, lats, lons, fName, fType = 'png', gridShape = (1, 1), |
| cLabel = '', pTitle = '', subTitles = None, cMap = None, |
| cLevs = None, nLevs = 10, parallels = None, meridians = None, |
| extend = 'neither'): |
| ''' |
| Purpose:: |
| Create a multiple panel contour map plot. |
| Input:: |
| dataset - 3d array of the field to be plotted with shape (nT, nLon, nLat) |
| lats - array of latitudes |
| lons - array of longitudes |
| fName - a string specifying the filename of the plot |
| fType - an optional string specifying the filetype, default is .png |
| gridShape - optional tuple denoting the desired grid shape (nRows, nCols) for arranging |
| the subplots. |
| cLabel - an optional string specifying the colorbar title |
| pTitle - an optional string specifying plot title |
| subTitles - an optional list of strings specifying the title for each subplot |
| cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap |
| cLevs - an optional list of ints or floats specifying contour levels |
| nLevs - an optional integer specifying the target number of contour levels if |
| cLevs is None |
| parallels - an optional list of ints or floats for the parallels to be drawn |
| meridians - an optional list of ints or floats for the meridians to be drawn |
| extend - an optional string to toggle whether to place arrows at the colorbar |
| boundaries. Default is 'neither', but can also be 'min', 'max', or |
| 'both'. Will be automatically set to 'both' if cLevs is None. |
| ''' |
| # Handle the single plot case. Meridians and Parallels are not labeled for |
| # multiple plots to save space. |
| if dataset.ndim == 2 or (dataset.ndim == 3 and dataset.shape[0] == 1): |
| if dataset.ndim == 2: |
| dataset = dataset.reshape(1, *dataset.shape) |
| nPlots = 1 |
| mLabels = [0, 0, 0, 1] |
| pLabels = [1, 0, 0, 1] |
| else: |
| nPlots = dataset.shape[0] |
| mLabels = [0, 0, 0, 0] |
| pLabels = [0, 0, 0, 0] |
| |
| # Make sure gridShape is compatible with input data |
| gridShape = calcBestGridShape(nPlots, gridShape) |
| |
| # Set up the colormap if not specified |
| if cMap is None: |
| cMap = plt.cm.coolwarm |
| |
| # Set up the figure |
| fig = plt.figure() |
| #fig.set_size_inches((8.5, 11.)) |
| #fig.dpi = 600 |
| |
| # Make the subplot grid |
| grid = ImageGrid(fig, 111, |
| nrows_ncols = gridShape, |
| axes_pad = 0.3, |
| share_all = True, |
| add_all = True, |
| ngrids = nPlots, |
| label_mode = "L", |
| cbar_mode = 'single', |
| cbar_location = 'bottom', |
| cbar_pad = '0%' |
| ) |
| |
| # Determine the map boundaries and construct a Basemap object |
| lonMin = lons.min() |
| lonMax = lons.max() |
| latMin = lats.min() |
| latMax = lats.max() |
| m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax, |
| llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l') |
| |
| # Convert lats and lons to projection coordinates |
| if lats.ndim == 1 and lons.ndim == 1: |
| lons, lats = np.meshgrid(lons, lats) |
| |
| # Calculate contour levels if not given |
| if cLevs is None: |
| # Cut off the tails of the distribution |
| # for more representative contour levels |
| cLevs = calcNiceIntervals(dataset, nLevs) |
| extend = 'both' |
| |
| # Create default meridians and parallels |
| if meridians is None: |
| meridians = np.arange(-180, 181, 15) |
| if parallels is None: |
| parallels = np.arange(-90, 91, 15) |
| |
| x, y = m(lons, lats) |
| for i, ax in enumerate(grid): |
| # Load the data to be plotted |
| data = dataset[i] |
| m.ax = ax |
| |
| # Draw the borders for coastlines and countries |
| m.drawcoastlines(linewidth = 1) |
| m.drawcountries(linewidth = .75) |
| |
| # Draw parallels / meridians |
| m.drawmeridians(meridians, labels = mLabels, linewidth = .75) |
| m.drawparallels(parallels, labels = pLabels, linewidth = .75) |
| |
| # Draw filled contours |
| cs = m.contourf(x, y, data, cmap = cMap, levels = cLevs, extend = extend) |
| |
| # Add title |
| if subTitles is not None: |
| ax.set_title(subTitles[i], fontsize = 'small') |
| |
| |
| # Add colorbar |
| cbar = fig.colorbar(cs, cax = ax.cax, drawedges = True, orientation = 'horizontal', |
| extendfrac = 'auto') |
| cbar.set_label(cLabel) |
| cbar.set_ticks(cLevs) |
| cbar.ax.xaxis.set_ticks_position("none") |
| cbar.ax.yaxis.set_ticks_position("none") |
| |
| # This is an ugly hack to make the title show up at the correct height. |
| # Basically save the figure once to achieve tight layout and calculate |
| # the adjusted heights of the axes, then draw the title slightly above |
| # that height and save the figure again |
| fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) |
| ymax = 0 |
| for ax in grid: |
| bbox = ax.get_position() |
| ymax = max(ymax, bbox.ymax) |
| |
| # Add figure title |
| fig.suptitle(pTitle, y = ymax + .04, fontsize = 16) |
| fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) |
| plt.show() |
| fig.clf() |
| |
| def drawSubRegions(subRegions, lats, lons, fName, fType = 'png', pTitle = '', |
| parallels = None, meridians = None, subRegionMasks = None): |
| ''' |
| Purpose:: |
| Function to draw subregion domain(s) on a map |
| |
| Input:: |
| subRegions - a list of subRegion objects |
| lats - array of latitudes |
| lons - array of longitudes |
| fName - a string specifying the filename of the plot |
| fType - an optional string specifying the filetype, default is .png |
| pTitle - an optional string specifying plot title |
| parallels - an optional list of ints or floats for the parallels to be drawn |
| meridians - an optional list of ints or floats for the meridians to be drawn |
| subRegionMasks - optional dictionary of boolean arrays for each subRegion |
| for giving finer control of the domain to be drawn, by default |
| the entire domain is drawn. |
| ''' |
| # Set up the figure |
| fig = plt.figure() |
| fig.set_size_inches((8.5, 11.)) |
| fig.dpi = 300 |
| ax = fig.add_subplot(111) |
| |
| # Determine the map boundaries and construct a Basemap object |
| lonMin = lons.min() |
| lonMax = lons.max() |
| latMin = lats.min() |
| latMax = lats.max() |
| m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax, |
| llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l', ax = ax) |
| |
| # Draw the borders for coastlines and countries |
| m.drawcoastlines(linewidth = 1) |
| m.drawcountries(linewidth = .75) |
| m.drawstates() |
| |
| # Create default meridians and parallels |
| if meridians is None: |
| meridians = np.arange(-180, 181, 15) |
| if parallels is None: |
| parallels = np.arange(-90, 91, 15) |
| |
| # Draw parallels / meridians |
| m.drawmeridians(meridians, labels = [0, 0, 0, 1], linewidth = .75) |
| m.drawparallels(parallels, labels = [1, 0, 0, 1], linewidth = .75) |
| |
| # Set up the color scaling |
| cMap = plt.cm.jet |
| norm = mpl.colors.BoundaryNorm(np.arange(1, len(subRegions) + 3), cMap.N) |
| |
| # Process the subregions |
| for i, reg in enumerate(subRegions): |
| if subRegionMasks is not None and reg.name in subRegionMasks.keys(): |
| domain = (i + 1) * subRegionMasks[reg.name] |
| else: |
| domain = (i + 1) * np.ones((2, 2)) |
| |
| nLats, nLons = domain.shape |
| domain = ma.masked_equal(domain, 0) |
| regLats = np.linspace(reg.latMin, reg.latMax, nLats) |
| regLons = np.linspace(reg.lonMin, reg.lonMax, nLons) |
| regLons, regLats = np.meshgrid(regLons, regLats) |
| |
| # Convert to to projection coordinates. Not really necessary |
| # for cylindrical projections but keeping it here in case we need |
| # support for other projections. |
| x, y = m(regLons, regLats) |
| |
| # Draw the subregion domain |
| m.pcolormesh(x, y, domain, cmap = cMap, norm = norm, alpha = .5) |
| |
| # Label the subregion |
| xm, ym = x.mean(), y.mean() |
| m.plot(xm, ym, marker = '$%s$' %(reg.name), markersize = 12, color = 'k') |
| |
| # Add the the title |
| ax.set_title(pTitle) |
| |
| # Save the figure |
| fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) |
| show() |
| fig.clf() |
| |
| def metrics_plots(varName, numOBS, numMDL, nT, ngrdY, ngrdX, Times, lons, lats, allData, dataList, workdir, subRegions, timeStep, fileOutputOption): |
| ''' |
| Calculate evaluation metrics and generate plots. |
| ''' |
| ################################################################################################################## |
| # Routine to compute evaluation metrics and generate plots |
| # (1) metric calculation |
| # (2) plot production |
| # Input: |
| # numOBS - the number of obs data. either 1 or >2 as obs ensemble is added for multi-obs cases |
| # numMDL - the number of mdl data. either 1 or >2 as obs ensemble is added for multi-mdl cases |
| # nT - the length of the data in the time dimension |
| # ngrdY - the length of the data in Y dimension |
| # ngrdX - the length of the data in the X dimension |
| # Times - time stamps |
| # lons,lats - longitude & latitude values of the data domain (same for model & obs) |
| # allData - the sum of the observed and model data (combines obsData & mdlData in the old code) |
| # dataList - the list of data names (combines obsList and mdlList in the old code) |
| # workdir - string describing the directory path for storing results and plots |
| # subRegions - list of SubRegion Objects or False |
| # fileOutputOption - option to write regridded data in a netCDF file or not |
| # Output: image files of plots + possibly data |
| #****************************************************** |
| # JK2.0: Only the data interpolated temporally and spatially onto the analysis grid |
| # are transferred into this routine. The rest of processing (e.g., area-averaging, etc.) |
| # are to be performed in this routine. Do not overwrite obsData[numOBs,nt,ngrdY,ngrdX] & |
| # mdlData[numMDL,nt,ngrdY,ngrdX]. These are the raw, re-gridded data to be used repeatedly |
| # for multiple evaluation steps as desired by an evaluator |
| # JK2.1: The observed and model data are unified in a single variable for ease of processing |
| ################################################################################################################## |
| |
| print '' |
| print 'Start metrics.py' |
| print '' |
| # JK2.1: define the variable to represent the total number of combined (obs + model) datasets |
| numDatasets = numOBS + numMDL |
| |
| ##################################################################################################### |
| # JK2.0: Compute evaluation metrics and plots to visualize the results |
| ##################################################################################################### |
| # (mp.001) Sub-regions for local timeseries analysis |
| #-------------------------------- |
| # Enter the location of the subrgns via screen input of data; |
| # modify this to add an option to read-in from data file(s) |
| #---------------------------------------------------------------------------------------------------- |
| if subRegions: |
| numSubRgn = len(subRegions) |
| subRgnName = [ x.name for x in subRegions ] |
| subRgnLon0 = [ x.lonMin for x in subRegions ] |
| subRgnLon1 = [ x.lonMax for x in subRegions ] |
| subRgnLat0 = [ x.latMin for x in subRegions ] |
| subRgnLat1 = [ x.latMax for x in subRegions ] |
| else: |
| print '' |
| ans = raw_input('Calculate area-mean timeseries for subregions? y/n: [n] \n') |
| print '' |
| if ans == 'y': |
| ans = raw_input('Input subregion info interactively? y/n: \n> ') |
| if ans == 'y': |
| numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_interactively() |
| else: |
| print 'Read subregion info from a pre-fabricated text file' |
| ans = raw_input('Read from a defaule file (workdir + "/sub_regions.txt")? y/n: \n> ') |
| if ans == 'y': |
| subRgnFileName = workdir + "/sub_regions.txt" |
| else: |
| subRgnFileName = raw_input('Enter the subRgnFileName to read from \n') |
| print 'subRgnFileName ', subRgnFileName |
| numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_from_a_text_file(subRgnFileName) |
| print subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 |
| else: |
| numSubRgn = 0 |
| # compute the area-mean timeseries for all subregions if subregion(s) are defined. |
| # the number of subregions is usually small and memory usage is usually not a concern |
| dataRgn = np.zeros((numDatasets, numSubRgn, nT)) |
| |
| if subRegions: |
| print 'Enter area-averaging: allData.shape ', allData.shape |
| print 'Using Latitude/Longitude Mask for Area Averaging' |
| for n in np.arange(numSubRgn): |
| # Define mask using regular lat/lon box specified by users ('mask=True' defines the area to be excluded) |
| maskLonMin = subRegions[n].lonMin |
| if maskLonMin > 180.: |
| maskLonMin = maskLonMin - 360. |
| maskLonMax = subRegions[n].lonMax |
| if maskLonMax > 180.: |
| maskLonMax = maskLonMax - 360. |
| maskLatMin = subRegions[n].latMin |
| maskLatMax = subRegions[n].latMax |
| mask = np.logical_or(np.logical_or(lats <= maskLatMin, lats >= maskLatMax), |
| np.logical_or(lons <= maskLonMin, lons >= maskLonMax)) |
| |
| # Calculate area-weighted averages within this region and store in a new list |
| for k in np.arange(numDatasets): #JK2.1: area-average all data |
| Store = [] |
| for t in np.arange(nT): |
| Store.append(process.calc_area_mean(allData[k, t, :, :], lats, lons, mymask = mask)) |
| dataRgn[k, n, :] = ma.array(Store[:]) |
| Store = [] # release the memory allocated by temporary vars |
| |
| #------------------------------------------------------------------------- |
| # (mp.002) fileOutputOption: The option to create a binary or netCDF file of processed |
| # (re-gridded and regionally-averaged) data for user-specific processing. |
| # This option is useful for advanced users who need more than |
| # the metrics and vidualization provided in the basic package. |
| #---------------------------------------------------------------------------------------------------- |
| print '' |
| if not fileOutputOption: |
| while fileOutputOption not in ['no', 'nc']: |
| fileOutputOption = raw_input('Option for output files of obs/model data: Enter no/nc \ |
| for no, netCDF file \n> ').lower() |
| print '' |
| |
| # write a netCDF file for post-processing if desired. JK21: binary output option has been completely eliminated |
| if fileOutputOption == 'nc': |
| fileName = '%s/%s_Tseries.nc' % (workdir, varName) |
| tempName = fileName |
| if(os.path.exists(tempName) == True): |
| print "removing %s from the local filesystem, so it can be replaced..." % (tempName,) |
| cmnd = 'rm -f ' + tempName |
| subprocess.call(cmnd, shell=True) |
| #files.writeNCfile1(fileName, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions) |
| files.writeNCfile1(fileName, numDatasets, numOBS, numMDL, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions) |
| print 'The regridded obs and model data are written in the netCDF file ', fileName |
| |
| ##################################################################################################### |
| ###################### Metrics calculation and plotting cycle starts from here ###################### |
| ##################################################################################################### |
| print '' |
| print 'OBS and MDL data have been prepared for the evaluation step' |
| print '' |
| doMetricsOption = raw_input('Want to calculate metrics and plot them? [y/n]\n> ').lower() |
| if doMetricsOption == 'y': |
| # Assign the variable name and unit to be used in plots |
| print 'The variable to be processed is ',timeStep,' ',varName |
| pltVarName = raw_input('Enter the variable name to appear in the plot\n> ') |
| pltVarUnit = raw_input('Enter the variable unit to appear in the plot\n> ') |
| print '' |
| |
| #################################################### |
| # Terminate job if no metrics are to be calculated # |
| #################################################### |
| |
| neval = 0 |
| |
| while doMetricsOption == 'y': |
| neval += 1 |
| print ' ' |
| print 'neval= ', neval |
| print ' ' |
| #-------------------------------- |
| # (mp.003) Preparation |
| #---------------------------------------------------------------------------------------------------- |
| # Determine info on years (the years in the record [YR] and its number[numYR]) |
| yy = ma.zeros(nT, 'i') |
| for n in np.arange(nT): |
| yy[n] = Times[n].strftime("%Y") |
| YR = np.unique(yy) |
| yy = 0 |
| nYR = len(YR) |
| print 'nYR, YR = ', nYR, YR |
| |
| # Select the eval domain: over the entire domain (spatial distrib) or regional time series |
| anlDomain = 'n' |
| anlRgn = 'n' |
| print ' ' |
| analSelect = int(raw_input('Eval over domain (Enter 0) or time series of selected Sub Regions (Enter 1) \n> ')) |
| print ' ' |
| if analSelect == 0: |
| anlDomain = 'y' |
| elif analSelect == 1: |
| anlRgn = 'y' |
| else: |
| print 'analSelect= ', analSelect, ' is Not a valid option: CRASH' |
| |
| #-------------------------------------------------------------------------------------------------------------------- |
| # (mp.004) Select the model and data to be used in the evaluation step |
| # 6/7/2013: JK4 - unified handling of the ref & mdl datasets allows several diff types of evaluation (RCMES v2.1) |
| # such as ref vs. one model or ref; ref vs. all models; ref vs. all model + non-ref refs |
| # refID: the ID of the reference data against which all data are to be evaluated |
| # mdlID: the list of data ID's to be evaluated. if mdlSelect == -99, the list also includes non-ref obs data |
| #-------------------------------------------------------------------------------------------------------------------- |
| refID = int(misc.select_data_combined(numDatasets, Times, dataList, 'ref')) |
| mdlSelect = int(misc.select_data_combined(numDatasets, Times, dataList, 'mdl')) |
| mdlID=[] |
| # Assign the data id to be evaluated. Note that a non-reference obs dataset is treated like a model dataset (mdlSelect == -99) |
| if mdlSelect >= 0: |
| mdlID.append(mdlSelect) |
| elif mdlSelect == -1: |
| for n in np.arange(numMDL): |
| mdlID.append(n+numOBS) |
| elif mdlSelect == -2: |
| for n in np.arange(refID): |
| mdlID.append(n) |
| for n in range(refID + 1, numDatasets): |
| mdlID.append(n) |
| elif mdlSelect == -3: |
| if numOBS == 1: |
| print 'There exist only one reference data: EXIT' |
| sys.exit() |
| for n in np.arange(refID): |
| mdlID.append(n) |
| for n in range(refID + 1, numOBS): |
| mdlID.append(n) |
| elif mdlSelect == -4: |
| id4eval = 0 # any number != -9 |
| print 'Enter the data id to be evaluated: -9 to stop entering' |
| while id4eval != -9: |
| id4eval = int(raw_input('Enter the data id for evaluation. -9 to stop entry\n> ')) |
| if id4eval != -9: |
| mdlID.append(id4eval) |
| refName = dataList[refID] |
| mdlName = [] |
| numMdl = len(mdlID) |
| for n in np.arange(numMdl): |
| tname = dataList[mdlID[n]] |
| m = min(len(tname), 8) |
| for k in np.arange(m): |
| if tname[k] == ' ': |
| break |
| elif k == m-1: |
| k = m |
| mdlName.append(tname[0:k]) |
| print 'selected reference and model data for evaluation= ', refName, mdlName |
| |
| #-------------------------------- |
| # (mp.005) Spatial distribution analysis/Evaluation (anlDomain='y') |
| # Obs/mdl climatology variables are 2-d/3d arrays (e.g., oClim = ma.zeros((ngrdY,ngrdX), mClim = ma.zeros((numMdl,ngrdY,ngrdX)) |
| #---------------------------------------------------------------------------------------------------- |
| if anlDomain == 'y': |
| # first determine the temporal properties to be evaluated |
| print '' |
| timeOption = misc.select_timOpt() |
| print '' |
| if timeOption == 1: |
| timeScale = 'annual' |
| # compute the annual-mean time series and climatology. |
| oTser, oClim = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times) |
| mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX)) |
| mClim = ma.zeros((numMdl, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| id = mdlID[n] |
| mTser[n, :, :, :], mClim[n, :, :] = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times) |
| elif timeOption == 2: |
| timeScale = 'seasonal' |
| # select the timeseries and climatology for a season specifiec by a user |
| mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX)) |
| mClim = ma.zeros((numMdl, ngrdY, ngrdX)) |
| print ' ' |
| moBgn = int(raw_input('Enter the beginning month for your season. 1-12: \n> ')) |
| moEnd = int(raw_input('Enter the ending month for your season. 1-12: \n> ')) |
| print ' ' |
| if moEnd >= moBgn: |
| nMoPerSeason = moEnd - moBgn + 1 |
| oTser, oClim = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times) |
| for n in np.arange(numMdl): |
| id = mdlID[n] |
| mTser[n, :, :, :], mClim[n, :, :] = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times) |
| elif moEnd == moBgn: |
| # Eval for a single month. mTser, oTser are the annual time series |
| # for the specified month (moEnd), and mClim, oClim are the corresponding climatology |
| oTser, oClim = calc_clim_One_month(moEnd, nYR, nT, allData[refID, :, :, :], Times) |
| for n in np.arange(numMdl): |
| id = mdlID[n] |
| mTser[n, :, :, :], mClim[n, :, :] = calc_clim_One_month(moEnd, nYR, nT, allData[id, :, :, :], Times) |
| elif moEnd < moBgn: # have to lose the ending year. redefine nYR=nYR-1, and drop the YR[nYR] |
| nMoS1 = 12 - moBgn + 1 |
| nMoS2 = moEnd |
| nMoPerSeason = nMoS1 + nMoS2 |
| mTser = ma.zeros((numMdl, nYR - 1, ngrdY, ngrdX)) |
| # calculate the seasonal timeseries and climatology for the model data |
| for n in np.arange(numMdl): |
| id = mdlID[n] |
| mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[id, :, :, :], Times) |
| mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times) |
| for i in np.arange(nYR - 1): |
| mTser[n, i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason |
| mClim = ma.average(mTser, axis=1) |
| # repeat for the obs data |
| mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[refID, :, :, :], Times) |
| mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times) |
| oTser = ma.zeros((nYR - 1, ngrdY, ngrdX)) |
| for i in np.arange(nYR - 1): |
| oTser[i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason |
| oClim = ma.zeros((ngrdY, ngrdX)) |
| oClim = ma.average(oTser, axis=0) |
| nYR = nYR - 1 |
| yy = ma.empty(nYR) |
| for i in np.arange(nYR): |
| yy[i] = YR[i] |
| mTser1 = 0. |
| mTser2 = 0. |
| mClim1 = 0. |
| mClim2 = 0. |
| elif timeOption == 3: |
| timeScale = 'monthly' |
| # compute the monthly-mean time series and climatology |
| # Note that the shapes of the output vars are: |
| # mTser = ma.zeros((nYR,12,ngrdY,ngrdX)) & mClim = ma.zeros((12,ngrdY,ngrdX)) |
| # Also same for oTser = ma.zeros((nYR,12,ngrdY,ngrdX)) &,oClim = ma.zeros((12,ngrdY,ngrdX)) |
| oTser, oClim = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times) |
| mTser = ma.zeros((numMdl, nYR, 12, ngrdY, ngrdX)) |
| mClim = ma.zeros((numMdl, 12, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| id = mdlID[n] |
| mTser[n, :, :, :, :], mClim[n, :, :, :] = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times) |
| else: |
| # undefined process options. exit |
| print 'The desired temporal scale is not available this time. END the job' |
| sys.exit() |
| |
| # compute the interannual variability |
| if timeScale == 'monthly': |
| oTsig = ma.zeros((12, ngrdY, ngrdX)) |
| mTsig = ma.zeros((numMdl, 12, ngrdY, ngrdX)) |
| else: |
| oTsig = ma.zeros((ngrdY, ngrdX)) |
| mTsig = ma.zeros((numMdl, ngrdY, ngrdX)) |
| oTsig = oTser.std(axis = 0) |
| mTsig = mTser.std(axis = 1) |
| |
| #-------------------------------- |
| # (mp.006) Select metric to be calculated |
| # bias, mae, acct, accs, pcct, pccs, rmst, rmss, pdfSkillScore, taylor diagram |
| #---------------------------------------------------------------------------------------------------- |
| print ' ' |
| metricOption = misc.select_metrics(mdlSelect) |
| print ' ' |
| |
| # metrics calculation: the shape of metricDat varies according to the metric type & timescale opetions |
| |
| if metricOption == 'BIAS': |
| iselect = int (raw_input('Enter to evaluate: 0 for the mean climatology; 1 for the interannual variability. \n')) |
| else: |
| iselect = 0 |
| |
| # metrics below yields a 2-d (annual or seasonal) or 3-d (monthly) array for each model |
| if metricOption == 'BIAS': |
| if timeScale == 'monthly': |
| oStdv = np.zeros(12) |
| metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| if iselect == 0: |
| metricDat[n, m, :, :] = calc_bias(mTser[n, :, m, :, :], oTser[m, :, :]) |
| elif iselect == 1: |
| metricDat[n, m, :, :] = mTsig[n, m, :, :] - oTsig[m, :, :] |
| if n == 0: |
| oStdv[m] = calc_temporal_stdv(oTser[m, :, :]) |
| else: |
| oStdv = calc_temporal_stdv(oTser) |
| metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| if iselect == 0: |
| metricDat[n, :, :] = calc_bias(mTser[n, :, :, :], oTser) |
| elif iselect == 1: |
| for n in np.arange(numMdl): |
| metricDat[n, :, :] = mTsig[n, :, :] - oTsig[:, :] |
| |
| elif metricOption == 'MAE': |
| if timeScale == 'monthly': |
| metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| metricDat[n, m, :, :] = calc_mae(mTser[n, :, m, :, :], oTser[m, :, :]) |
| else: |
| metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| metricDat[n, :, :] = calc_mae(mTser[n, :, :, :], oTser) |
| |
| elif metricOption == 'ACt': |
| if timeScale == 'monthly': |
| metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| metricDat[n, m, :, :] = calc_temporal_anom_cor(mTser[n, :, m, :, :], oTser[m, :, :]) |
| else: |
| metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| metricDat[n, :, :] = calc_temporal_anom_cor(mTser[n, :, :, :], oTser) |
| |
| elif metricOption == 'PCt': |
| if timeScale == 'monthly': |
| metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| metricDat[n, m, :, :] = calc_temporal_pat_cor(mTser[n, :, m, :, :], oTser[m, :, :]) |
| else: |
| metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| metricDat[n, :, :] = calc_temporal_pat_cor(mTser[n, :, :, :], oTser) |
| |
| elif metricOption == 'RMSt': |
| if timeScale == 'monthly': |
| metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| metricDat[n, m, :, :] = calc_rms(mTser[n, :, m, :, :], oTser[m, :, :]) |
| else: |
| metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) |
| for n in np.arange(numMdl): |
| metricDat[n, :, :] = calc_rms(mTser[n, :, :, :], oTser) |
| |
| # metrics below yields a scalar value for each model |
| elif metricOption == 'ACs': |
| if timeScale == 'monthly': |
| metricDat = ma.zeros((numMdl, 12)) |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| metricDat[n, m] = calc_spatial_anom_cor(mClim[n, m, :, :], oClim[m, :, :]) |
| else: |
| metricDat = ma.zeros(numMdl) |
| for n in np.arange(numMdl): |
| metricDat[n] = calc_spatial_anom_cor(mClim[n, :, :], oClim) |
| print 'spatial anomaly correlation vs the reference data:', refName |
| for n in np.arange(numMdl): |
| print dataList[n+1][0:8],metricDat[n] |
| |
| elif metricOption == 'PCs': |
| if timeScale == 'monthly': |
| metricDat = ma.zeros((numMdl, 12)) |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| metricDat[n, m] = calc_spatial_pat_cor(mClim[n, m, :, :], oClim[m, :, :]) |
| else: |
| metricDat = ma.zeros(numMdl) |
| for n in np.arange(numMdl): |
| metricDat[n] = calc_spatial_pat_cor(mClim[n, :, :], oClim) |
| print 'spatial pattern correlation vs the reference data:', refName |
| for n in np.arange(numMdl): |
| print dataList[n+1][0:8],metricDat[n] |
| |
| elif metricOption == 'RMSs': |
| if timeScale == 'monthly': |
| metricDat = ma.zeros((numMdl, 12)) |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| metricDat[n, m] = calc_rms_dom(mClim[n, m, :, :], oClim[m, :, :]) |
| else: |
| metricDat = ma.zeros(numMdl) |
| for n in np.arange(numMdl): |
| metricDat[n] = calc_rms_dom(mClim[n, :, :], oClim) |
| print 'RMSE over the entire domain' |
| for n in np.arange(numMdl): |
| print dataList[n+1][0:8],metricDat[n] |
| |
| # metrics to plot taylor diagram |
| elif metricOption == 'Taylor_space': |
| if timeScale == 'monthly': |
| oStdv = ma.zeros(12) |
| mStdv = ma.zeros((numMdl, 12)) |
| mCorr = ma.zeros((numMdl, 12)) |
| mTemp = ma.zeros((ngrdY, ngrdX)) |
| for m in np.arange(12): |
| oStdv[m] = oClim[m, :, :].std() |
| for n in np.arange(numMdl): |
| for m in np.arange(12): |
| mTemp = mClim[n, m, :, :] |
| mStdv[n, m] = mTemp.std() |
| mCorr[n, m] = calc_spatial_pat_cor(mTemp, oClim) |
| else: |
| oStdv = oClim.std() |
| mStdv = ma.zeros(numMdl) |
| mCorr = ma.zeros(numMdl) |
| for n in np.arange(numMdl): |
| mStdv[n] = mClim[n, :, :].std() |
| mCorr[n] = calc_spatial_pat_cor(mClim[n, :, :], oClim) |
| mStdv = mStdv / oStdv # standardized deviation |
| |
| #-------------------------------- |
| # (mp.008 & 009) Coeff of Variation or Signal-to-noise ratio: only application to the case of |
| # multiple datasets of the same kind (e.g., obs or model) |
| #---------------------------------------------------------------------------------------------------- |
| elif metricOption == 'CV' or metricOption == 'S2N': |
| if numMDL == 1: |
| print 'CV/S2N not available when only one dataset exists' |
| pass |
| else: |
| if timeScale == 'monthly': |
| metricDat = ma.zeros((12, ngrdY, ngrdX)) |
| Stdv = ma.zeros((12, ngrdY, ngrdX)) |
| Stdv = mClim.std(axis = 0, ddof = 1); print Stdv.shape |
| else: |
| metricDat = ma.zeros((ngrdY, ngrdX)) |
| Stdv = ma.zeros((ngrdY, ngrdX)) |
| Stdv = mClim.std(axis = 0, ddof = 1); print Stdv.shape |
| metricDat = oClim / Stdv # Signal-2-noise ratio |
| if metricOption == 'CV': |
| metricDat = Stdv / oClim |
| |
| #-------------------------------- |
| # (mp.007) Plot the metrics. First, enter plot info |
| #---------------------------------------------------------------------------------------------------- |
| # Taylor diagram |
| if metricOption == 'Taylor_space': |
| taylorDat = [] |
| for n in np.arange(numMdl): |
| ttmp = [] |
| ttmp.append(mStdv[n]) |
| ttmp.append(mCorr[n]) |
| entryLegend = mdlName[n] |
| ttmp.append(entryLegend) |
| taylorDat.append(ttmp) |
| pltTit = 'Spatial variability evaluation' |
| pltFileName = workdir + '/taylor_space_' + timeScale + '-' + varName |
| legendPos = 'upper right' |
| if max(mStdv) > 2.5: |
| radMax = 5. |
| elif max(mStdv) > 2.: |
| radMax = 2.5 |
| else: |
| radMax = 2 |
| status = taylorDiagram(taylorDat, pltTit, pltFileName, refName, legendPos, radMax) |
| |
| # multiple 2-d contour plots |
| elif metricDat.ndim == 3: |
| print 'Plot',metricOption,'in',timeScale,varName |
| # Plot metrics (2-d contour plots) |
| if iselect == 0: |
| plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption + '_tAVG' |
| elif iselect == 1: |
| plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption + '_INTRANN VAR' |
| if(os.path.exists(plotFileName)==True): |
| cmnd='rm -f ' + plotFileName |
| subprocess.call(cmnd,shell=True) |
| pltTit = [] |
| if metricDat.shape[0] > 1: |
| ncols = 3 |
| for n in np.arange(numMdl): |
| pltTit.append(mdlName[n]) |
| elif metricDat.shape[0] == 1: |
| ncols = 1 |
| pltTit.append(mdlName[0]) |
| if metricDat.shape[0] // ncols == 0: |
| nrows = max( int(metricDat.shape[0] / ncols), 1) |
| else: |
| nrows = int(metricDat.shape[0]/ncols) + 1 |
| status = drawContourMap(metricDat, lats, lons, plotFileName, fType = 'png', gridShape = (nrows, ncols), |
| cLabel = '', pTitle = '', subTitles = pltTit, cMap = None, |
| cLevs = None, nLevs = 10, parallels = None, meridians = None, extend = 'neither') |
| if metricOption == 'BIAS': |
| # obs climatology |
| plotDat = np.zeros((ngrdY, ngrdX)) |
| print'' |
| makePlot = raw_input('Plot observed climatology? [y/n]\n> ').lower() |
| if makePlot == 'y': |
| pltTit = [] |
| if iselect == 0: |
| plotDat = oClim |
| plotFileName = workdir + '/' + timeScale + '_' + varName + '_OBS_tAVG' |
| pltTit.append('OBS (' + pltVarName + ': ' + pltVarUnit +'): MEAN CLIM') |
| elif iselect == 1: |
| plotDat = oTsig |
| plotFileName = workdir + '/' + timeScale + '_' + varName + '_OBS_tSIG' |
| pltTit.append('OBS (' + pltVarName + ': ' + pltVarUnit + '): INTRANN VAR') |
| if(os.path.exists(plotFileName)==True): |
| cmnd='rm -f ' + plotFileName |
| subprocess.call(cmnd,shell=True) |
| ncols = 1 |
| nrows = 1 |
| status = drawContourMap(plotDat, lats, lons, plotFileName, fType = 'png', gridShape = (ncols, nrows), |
| cLabel = '', pTitle = '', subTitles = pltTit, cMap = None, |
| cLevs = None, nLevs = 10, parallels = None, meridians = None, extend = 'neither') |
| # plot bias normalized by the mean climatology |
| if iselect == 0: |
| makePlot = raw_input('Plot bias in terms of % of OBS mean? [y/n]\n> ').lower() |
| else: |
| makePlot = 'n' |
| if makePlot != 'y': |
| pass |
| else: |
| plotDat = ma.empty((numMdl, ngrdY, ngrdX)) |
| print plotDat.shape, metricDat.shape |
| for n in np.arange(numMdl): |
| plotDat[n,:,:] = 100.* metricDat[n,:,:] / oClim[:,:] |
| plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption + '_MEAN' |
| if(os.path.exists(plotFileName)==True): |
| cmnd='rm -f ' + plotFileName |
| subprocess.call(cmnd,shell=True) |
| pltTit = [] |
| if plotDat.shape[0] > 1: |
| ncols = 3 |
| for n in np.arange(numMdl): |
| pltTit.append(mdlName[n]) |
| elif plotDat.shape[0] == 1: |
| ncols = 1 |
| pltTit.append(mdlName[0]) |
| if plotDat.shape[0]/ncols == 0: |
| nrows = max(int(plotDat.shape[0] / ncols), 0) |
| else: |
| nrows = int(metricDat.shape[0]/ncols) + 1 |
| status = drawContourMap(plotDat, lats, lons, plotFileName, fType = 'png', gridShape = (ncols, nrows), |
| cLabel = '', pTitle = '', subTitles = pltTit, cMap = None, |
| cLevs = None, nLevs = 10, parallels = None, meridians = None, extend = 'neither') |
| plotDat = 0. |
| # plot bias normalized by the interannual variability |
| if iselect == 0: |
| makePlot = raw_input('Plot bias in terms of % of interann sigma? [y/n]\n> ').lower() # inactive for this version |
| else: |
| makePlot = 'n' |
| if makePlot == 'y': |
| print 'plot normalized by sigma' |
| plotDat = ma.empty((numMdl, ngrdY, ngrdX)) |
| print plotDat.shape, metricDat.shape |
| for n in np.arange(numMdl): |
| plotDat[n,:,:] = 100.* metricDat[n,:,:] / oStdv[:,:] |
| plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption + '_SIG' |
| if(os.path.exists(plotFileName)==True): |
| cmnd='rm -f ' + plotFileName |
| subprocess.call(cmnd,shell=True) |
| pltTit = [] |
| if metricDat.shape[0] > 1: |
| ncols = 3 |
| for n in np.arange(numMdl): |
| pltTit.append(mdlName[n]) |
| elif metricDat.shape[0] == 1: |
| ncols = 1 |
| pltTit.append(mdlName[0]) |
| if metricDat.shape[0]/ncols == 0: |
| nrows = max(int(metricDat.shape[0] / ncols), 0) |
| else: |
| nrows = int(metricDat.shape[0]/ncols) + 1 |
| status = drawContourMap(plotDat, lats, lons, plotFileName, fType = 'png', gridShape = (ncols, nrows), |
| cLabel = '', pTitle = '', subTitles = pltTit, cMap = None, |
| cLevs = None, nLevs = 10, parallels = None, meridians = None, extend = 'neither') |
| plotDat = 0. |
| |
| # a single 2-d array |
| elif metricDat.ndim == 2: |
| # obs climatology |
| print'' |
| plotFileName = workdir + '/' + metricOption + '_' + varName + '_OBS' |
| if(os.path.exists(plotFileName)==True): |
| cmnd='rm -f ' + plotFileName |
| subprocess.call(cmnd,shell=True) |
| pltTit = [] |
| pltTit.append(metricOption) |
| ncols = 1 |
| nrows = 1 |
| cLevs = [1., 5., 10., 25., 50.] |
| status = drawContourMap(metricDat, lats, lons, plotFileName, fType = 'png', gridShape = (ncols, nrows), |
| cLabel = '', pTitle = '', subTitles = pltTit, cMap = None, |
| cLevs = cLevs, nLevs = 10, parallels = None, meridians = None, extend = 'neither') |
| |
| # Repeat for another metric. Also release memory used by temporary variables |
| metricDat = 0. |
| oTser = 0. |
| oClim = 0. |
| mTser = 0. |
| mClim = 0. |
| print '' |
| print 'Evaluation completed' |
| print '' |
| doMetricsOption = raw_input('Do you want to perform another evaluation? [y/n]\n> ').lower() |
| print '' |
| |
| # metrics and plots for regional time series |
| elif anlRgn == 'y' and subRegions == False: |
| print 'No SubRegions have been defined. Regional Time Series Plots cannot be created' |
| |
| # metrics and plots for regional time series |
| elif anlRgn == 'y': |
| oData = ma.zeros((numSubRgn, nT)) |
| mData = ma.zeros((numMdl, numSubRgn, nT)) |
| oData[:, :] = dataRgn[refID, :, :] |
| for n in np.arange(numMdl): |
| mData[n, :, :] = dataRgn[mdlID[n], :, :] |
| # select the region(s) for evaluation. model and obs have been selected before entering this if block |
| print 'There are %s subregions. Select the subregion(s) for evaluation' % numSubRgn |
| rgnSelect = misc.selectSubRegion(subRegions) |
| print 'selected region for evaluation= ', rgnSelect |
| |
| # compute the annual cycle (in terms of monthly means) regardless of its use |
| if rgnSelect >= 0: |
| obsAnnCyc = calc_annual_cycle_means(oData[rgnSelect, :], Times) |
| mdlAnnCyc = ma.empty((numMdl, 12)) |
| for n in np.arange(numMdl): |
| mdlAnnCyc[n, :] = calc_annual_cycle_means(mData[n, rgnSelect, :], Times) |
| print 'Annual cycles for a single subregion are calculated' |
| elif rgnSelect == -9: |
| obsAnnCyc = ma.empty((numSubRgn, 12)) |
| mdlAnnCyc = ma.empty((numMdl, numSubRgn, 12)) |
| for r in np.arange(numSubRgn): |
| obsAnnCyc[r, :] = calc_annual_cycle_means(oData[r, :], Times) |
| for n in np.arange(numMdl): |
| mdlAnnCyc[n, r, :] = calc_annual_cycle_means(mData[n, r, :], Times) |
| print 'Annual cycles for all subregions are calculated' |
| |
| # (rgn01) evaluate the annual cycle |
| ans = raw_input('Do you want evaluate the annual cycle? [y/n]\n> ').lower() |
| if ans == 'y': # analysis for a single region |
| if rgnSelect >= 0: |
| # plot obs and model within a single frame |
| pltYvar = ma.empty((numMdl+1, 12)) |
| pltYvar[0, :] = obsAnnCyc[:] |
| for n in np.arange(numMdl): |
| pltYvar[n+1, :] = mdlAnnCyc[n, :] |
| # assign lineLabel: add data name, rmse and corr between the model and ref data for the model data |
| lineLabel = [] |
| lineLabel.append(refName) |
| for n in np.arange(numMdl): |
| rms = calc_rms(obsAnnCyc, mdlAnnCyc[n, :]) |
| cor = calc_pat_cor(obsAnnCyc, mdlAnnCyc[n, :]) |
| lineLabel.append(mdlName[n]) |
| lineTyp = [] |
| for n in np.arange(numMdl + 1): |
| if (n == 0) or (n == numMdl): |
| lineTyp.append('-') |
| else: |
| lineTyp.append(':') |
| xLabel = 'Month' |
| yLabel = pltVarUnit |
| pltTit = 'Annual cycle of ' + pltVarName +' at Subregion ' + subRgnName[rgnSelect] |
| plotFileName = workdir + '/annCyc_' + varName + '_' + subRgnName[rgnSelect] |
| if(os.path.exists(plotFileName)==True): |
| cmnd='rm -f ' + plotFileName |
| subprocess.call(cmnd, shell=True) |
| pltXvar = frange(1, 12, npts=12) |
| xmin = 1 |
| xmax = 12 |
| deltaX = 1 |
| mn1 = 1.e24 |
| mx1 = -mn1 |
| for n in np.arange(numMdl): |
| mn1 = min(min(mdlAnnCyc[n]), mn1) |
| mx1 = max(max(mdlAnnCyc[n]), mx1) |
| print 'Minimum y value= ', min(min(obsAnnCyc), mn1) |
| print 'Maximum y value= ', max(max(obsAnnCyc), mx1) |
| ymin = float(raw_input('Enter the minimum y to be plotted\n> ')) |
| ymax = float(raw_input('Enter the maximum y to be plotted\n> ')) |
| if(ymax>ymin): |
| deltaY = float(raw_input('Enter the delta_y (tick mark intervals)\n> ')) |
| legendPos = 'best' |
| xScale = 'linear' |
| yScale = 'linear' |
| status = pltXY(pltXvar, pltYvar, lineLabel, lineTyp, pltTit, xLabel, yLabel, plotFileName, \ |
| xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos, xScale, yScale) |
| pltXvar = 0 |
| pltYvar = 0 |
| else: # analysis for all subregions - draw a multi-frame plot |
| # compute rmse and corr between the ref and model annual cycle |
| rms = np.zeros((numMdl, numSubRgn)) |
| crr = np.zeros((numMdl, numSubRgn)) |
| for n in np.arange(numMdl): |
| for r in np.arange(numSubRgn): |
| rms[n, r] = calc_rms(obsAnnCyc[r, :], mdlAnnCyc[n, r, :]) |
| crr[n, r] = calc_pat_cor(obsAnnCyc[r, :], mdlAnnCyc[n, r, :]) |
| |
| # Make a portrait diagram here |
| rgnName = [] |
| for i in np.arange(numSubRgn): |
| j = i+1 |
| if j < 10: |
| rgnName.append('R0' + str(j)) |
| else: |
| rgnName.append('R' + str(j)) |
| # plot correlation coefficients |
| plotFileName = workdir + '/pd_annCyc_corr_' + varName + '.subRgns' |
| pTitle = 'Annual Cycle CORR COEF' |
| data = crr |
| cLevs = [0., .1, .2, .3, .4, .5, .6, .7, .8, .9, 1.] |
| status = drawPortraitDiagramSingle(data, rowLabels = rgnName, colLabels = mdlName, cLevs = cLevs, |
| fName = plotFileName, fType = 'png', xLabel = 'Region', yLabel = 'Model', |
| cLabel = '', pTitle = pTitle, cMap = None) |
| # plot correlation coefficients |
| plotFileName = workdir + '/pd_annCyc_rmse_' + varName + '.subRgns' |
| if varName == 'pr' or varName == 'prec': |
| pTitle = 'Ann Cyc RMSE: % Mean' |
| for n in np.arange(numMdl): |
| for r in np.arange(numSubRgn): |
| rgnSum = 0. |
| for i in np.arange(12): |
| rgnSum = rgnSum + obsAnnCyc[r, i] |
| data[n, r] = rms[n, r] / (rgnSum / 12.) |
| cLevs = [0., 20., 50., 100., 150., 200., 250., 300., 400.] |
| data = data * 100. |
| status = drawPortraitDiagramSingle(data, rowLabels = rgnName, colLabels = mdlName, cLevs = cLevs, |
| fName = plotFileName, fType = 'png', xLabel = 'Region', yLabel = 'Model', |
| cLabel = '', pTitle = pTitle, cMap = None) |
| data = 0. |
| # Make x-y plots for all regions (replace with multi-plot page(s)) |
| autoYscale = raw_input('Automatic scaling for the Y axis? y/n \n> ') |
| if autoYscale == 'y': |
| ymin = 0. |
| ymax = 0. |
| deltaY = 0. |
| pltXvar = frange(1,12,npts=12) |
| pltYvar = ma.empty((numMdl+1, 12)) |
| xLabel = 'Month' |
| xmin = 1 |
| xmax = 12 |
| deltaX = 1 |
| yLabel = pltVarUnit |
| xScale = 'linear' |
| yScale = 'linear' |
| legendPos = 'best' |
| lineLabel = [] |
| lineLabel.append(refName) |
| for n in np.arange(numMdl): |
| lineLabel.append(mdlName[n]) |
| lineTyp = [] |
| for n in np.arange(numMdl + 1): |
| if (n==0) or (n==numMdl): |
| lineTyp.append('-') |
| else: |
| lineTyp.append(':') |
| for r in np.arange(numSubRgn): |
| plotFileName = workdir + '/annCyc_' + varName + '_' + subRgnName[r] |
| print 'Annual cycle plot for r= ', r, ' Rgn Name= ', subRgnName[r] |
| pltTit = '' |
| if autoYscale != 'y': |
| mn1 = mdlAnnCyc[:, r, :].min() |
| mx1 = mdlAnnCyc[:, r, :].max() |
| print 'Minimum y value= ', min(obsAnnCyc[r, :].min(), mn1) |
| print 'Maximum y value= ', max(obsAnnCyc[r, :].max(), mx1) |
| ymin = float(raw_input('Enter the minimum y to be plotted\n> ')) |
| ymax = float(raw_input('Enter the maximum y to be plotted\n> ')) |
| if(ymax>ymin): |
| deltaY = float(raw_input('Enter the delta_y (tick mark intervals)\n> ')) |
| else: |
| deltaY = 0. |
| # assign plot values |
| pltYvar[0, :] = obsAnnCyc[r, :] |
| for n in np.arange(numMdl): |
| pltYvar[n+1, :] = mdlAnnCyc[n, r, :] |
| status = pltXY(pltXvar, pltYvar, lineLabel, lineTyp, pltTit, xLabel, yLabel, plotFileName, \ |
| xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos, xScale, yScale) |
| pltXvar = 0 |
| pltYvar = 0 |
| |
| #-------------------------------- |
| # (mp.008) Other metrics options to be added here |
| #---------------------------------------------------------------------------------------------------- |
| |
| # Repeat for another metric |
| doMetricsOption = raw_input('Do you want to perform another evaluation? [y/n]\n> ').lower() |
| |
| # Processing complete if a user enters 'n' for 'doMetricsOption' |
| print 'RCMES processing completed.' |
| |
| |