# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, | |
# software distributed under the License is distributed on an | |
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
# KIND, either express or implied. See the License for the | |
# specific language governing permissions and limitations | |
# under the License. | |
#Apache OCW lib immports | |
import ocw.dataset_processor as dsp | |
import ocw.data_source.local as local | |
import ocw.data_source.rcmed as rcmed | |
import ocw.data_source.esgf as esgf | |
import ocw.plotter as plotter | |
import ocw.utils as utils | |
from ocw.dataset import Bounds | |
import matplotlib.pyplot as plt | |
from matplotlib import rcParams | |
import numpy as np | |
import numpy.ma as ma | |
import yaml | |
from glob import glob | |
import operator | |
from dateutil import parser | |
from datetime import datetime | |
import os | |
import sys | |
from getpass import getpass | |
from metrics_and_plots import * | |
import ssl | |
if hasattr(ssl, '_create_unverified_context'): | |
ssl._create_default_https_context = ssl._create_unverified_context | |
config_file = str(sys.argv[1]) | |
print 'Reading the configuration file ', config_file | |
config = yaml.load(open(config_file)) | |
time_info = config['time'] | |
temporal_resolution = time_info['temporal_resolution'] | |
start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') | |
end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') | |
space_info = config['space'] | |
min_lat = space_info['min_lat'] | |
max_lat = space_info['max_lat'] | |
min_lon = space_info['min_lon'] | |
max_lon = space_info['max_lon'] | |
ref_data_info = config['datasets']['reference'] | |
model_data_info = config['datasets']['targets'] | |
if ref_data_info['data_source'] == 'ESGF' or model_data_info['data_source'] == 'ESGF': | |
username=raw_input('Enter your ESGF OpenID:\n') | |
password=getpass(prompt='Enter your ESGF password:\n') | |
""" Step 1: Load the reference data """ | |
ref_lat_name = None | |
ref_lon_name = None | |
if 'latitude_name' in ref_data_info.keys(): | |
ref_lat_name = ref_data_info['latitude_name'] | |
if 'longitude_name' in ref_data_info.keys(): | |
ref_lon_name = ref_data_info['longitude_name'] | |
print 'Loading observation dataset:\n',ref_data_info | |
ref_name = ref_data_info['data_name'] | |
if ref_data_info['data_source'] == 'local': | |
ref_dataset = local.load_file(ref_data_info['path'], | |
ref_data_info['variable'], name=ref_name, | |
lat_name=ref_lat_name, lon_name=ref_lon_name) | |
elif ref_data_info['data_source'] == 'rcmed': | |
ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'], | |
ref_data_info['parameter_id'], | |
min_lat, max_lat, min_lon, max_lon, | |
start_time, end_time) | |
elif ref_data_info['data_source'] == 'ESGF': | |
ds = esgf.load_dataset(dataset_id = ref_data_info['dataset_id'], | |
variable = ref_data_info['variable'], | |
esgf_username=username, | |
esgf_password=password) | |
ref_dataset = ds[0] | |
else: | |
print ' ' | |
if temporal_resolution == 'daily' or temporal_resolution == 'monthly': | |
ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, temporal_resolution) | |
if 'multiplying_factor' in ref_data_info.keys(): | |
ref_dataset.values = ref_dataset.values*ref_data_info['multiplying_factor'] | |
""" Step 2: Load model NetCDF Files into OCW Dataset Objects """ | |
model_lat_name = None | |
model_lon_name = None | |
if 'latitude_name' in model_data_info.keys(): | |
model_lat_name = model_data_info['latitude_name'] | |
if 'longitude_name' in model_data_info.keys(): | |
model_lon_name = model_data_info['longitude_name'] | |
boundary_check_model = True | |
if 'GCM_data' in model_data_info.keys(): | |
if model_data_info['GCM_data']: | |
boundary_check_model = False | |
print 'Loading model datasets:\n',model_data_info | |
if model_data_info['data_source'] == 'local': | |
model_datasets = local.load_multiple_files(file_path=model_data_info['path'], | |
variable_name =model_data_info['variable'], | |
lat_name=model_lat_name, lon_name=model_lon_name) | |
model_names = [dataset.name for dataset in model_datasets] | |
elif model_data_info['data_source'] == 'ESGF': | |
md = esgf.load_dataset(dataset_id=model_data_info['dataset_id'], | |
variable=model_data_info['variable'], | |
esgf_username=username, | |
esgf_password=password) | |
model_datasets = [] | |
model_names = [] | |
model_datasets.append(md[0]) | |
model_names.append(model_data_info['data_name']) | |
else: | |
print ' ' | |
# TO DO: support RCMED | |
if temporal_resolution == 'daily' or temporal_resolution == 'monthly': | |
for idata,dataset in enumerate(model_datasets): | |
model_datasets[idata] = dsp.normalize_dataset_datetimes(dataset, temporal_resolution) | |
""" Step 3: Subset the data for temporal and spatial domain """ | |
# Create a Bounds object to use for subsetting | |
if time_info['maximum_overlap_period']: | |
start_time, end_time = utils.get_temporal_overlap([ref_dataset]+model_datasets) | |
print 'Maximum overlap period' | |
print 'start_time:', start_time | |
print 'end_time:', end_time | |
if temporal_resolution == 'monthly' and end_time.day !=1: | |
end_time = end_time.replace(day=1) | |
if ref_data_info['data_source'] == 'rcmed': | |
min_lat = np.max([min_lat, ref_dataset.lats.min()]) | |
max_lat = np.min([max_lat, ref_dataset.lats.max()]) | |
min_lon = np.max([min_lon, ref_dataset.lons.min()]) | |
max_lon = np.min([max_lon, ref_dataset.lons.max()]) | |
bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) | |
ref_dataset = dsp.subset(ref_dataset, bounds) | |
if ref_dataset.temporal_resolution() != temporal_resolution: | |
ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution) | |
for idata,dataset in enumerate(model_datasets): | |
model_datasets[idata] = dsp.subset(dataset, bounds) | |
if dataset.temporal_resolution() != temporal_resolution: | |
model_datasets[idata] = dsp.temporal_rebin(dataset, temporal_resolution) | |
# Temporaly subset both observation and model datasets for the user specified season | |
month_start = time_info['month_start'] | |
month_end = time_info['month_end'] | |
average_each_year = time_info['average_each_year'] | |
ref_dataset = dsp.temporal_subset(ref_dataset,month_start, month_end,average_each_year) | |
for idata,dataset in enumerate(model_datasets): | |
model_datasets[idata] = dsp.temporal_subset(dataset,month_start, month_end,average_each_year) | |
# generate grid points for regridding | |
if config['regrid']['regrid_on_reference']: | |
new_lat = ref_dataset.lats | |
new_lon = ref_dataset.lons | |
else: | |
delta_lat = config['regrid']['regrid_dlat'] | |
delta_lon = config['regrid']['regrid_dlon'] | |
nlat = (max_lat - min_lat)/delta_lat+1 | |
nlon = (max_lon - min_lon)/delta_lon+1 | |
new_lat = np.linspace(min_lat, max_lat, nlat) | |
new_lon = np.linspace(min_lon, max_lon, nlon) | |
# number of models | |
nmodel = len(model_datasets) | |
print 'Dataset loading completed' | |
print 'Observation data:', ref_name | |
print 'Number of model datasets:',nmodel | |
for model_name in model_names: | |
print model_name | |
""" Step 4: Spatial regriding of the reference datasets """ | |
print 'Regridding datasets: ', config['regrid'] | |
if not config['regrid']['regrid_on_reference']: | |
ref_dataset = dsp.spatial_regrid(ref_dataset, new_lat, new_lon) | |
print 'Reference dataset has been regridded' | |
for idata,dataset in enumerate(model_datasets): | |
model_datasets[idata] = dsp.spatial_regrid(dataset, new_lat, new_lon, boundary_check = boundary_check_model) | |
print model_names[idata]+' has been regridded' | |
print 'Propagating missing data information' | |
ref_dataset = dsp.mask_missing_data([ref_dataset]+model_datasets)[0] | |
model_datasets = dsp.mask_missing_data([ref_dataset]+model_datasets)[1:] | |
""" Step 5: Checking and converting variable units """ | |
print 'Checking and converting variable units' | |
ref_dataset = dsp.variable_unit_conversion(ref_dataset) | |
for idata,dataset in enumerate(model_datasets): | |
model_datasets[idata] = dsp.variable_unit_conversion(dataset) | |
print 'Generating multi-model ensemble' | |
if len(model_datasets) >= 2.: | |
model_datasets.append(dsp.ensemble(model_datasets)) | |
model_names.append('ENS') | |
""" Step 6: Generate subregion average and standard deviation """ | |
if config['use_subregions']: | |
# sort the subregion by region names and make a list | |
subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0)) | |
# number of subregions | |
nsubregion = len(subregions) | |
print 'Calculating spatial averages and standard deviations of ',str(nsubregion),' subregions' | |
ref_subregion_mean, ref_subregion_std, subregion_array = utils.calc_subregion_area_mean_and_std([ref_dataset], subregions) | |
model_subregion_mean, model_subregion_std, subregion_array = utils.calc_subregion_area_mean_and_std(model_datasets, subregions) | |
""" Step 7: Write a netCDF file """ | |
workdir = config['workdir'] | |
if workdir[-1] != '/': | |
workdir = workdir+'/' | |
print 'Writing a netcdf file: ',workdir+config['output_netcdf_filename'] | |
if not os.path.exists(workdir): | |
os.system("mkdir -p "+workdir) | |
if config['use_subregions']: | |
dsp.write_netcdf_multiple_datasets_with_subregions(ref_dataset, ref_name, model_datasets, model_names, | |
path=workdir+config['output_netcdf_filename'], | |
subregions=subregions, subregion_array = subregion_array, | |
ref_subregion_mean=ref_subregion_mean, ref_subregion_std=ref_subregion_std, | |
model_subregion_mean=model_subregion_mean, model_subregion_std=model_subregion_std) | |
else: | |
dsp.write_netcdf_multiple_datasets_with_subregions(ref_dataset, ref_name, model_datasets, model_names, | |
path=workdir+config['output_netcdf_filename']) | |
""" Step 8: Calculate metrics and draw plots """ | |
nmetrics = config['number_of_metrics_and_plots'] | |
if config['use_subregions']: | |
Map_plot_subregion(subregions, ref_dataset, workdir) | |
if nmetrics > 0: | |
print 'Calculating metrics and generating plots' | |
for imetric in np.arange(nmetrics)+1: | |
metrics_name = config['metrics'+'%1d' %imetric] | |
plot_info = config['plots'+'%1d' %imetric] | |
file_name = workdir+plot_info['file_name'] | |
print 'metrics '+str(imetric)+'/'+str(nmetrics)+': ', metrics_name | |
if metrics_name == 'Map_plot_bias_of_multiyear_climatology': | |
row, column = plot_info['subplots_array'] | |
if 'map_projection' in plot_info.keys(): | |
Map_plot_bias_of_multiyear_climatology(ref_dataset, ref_name, model_datasets, model_names, | |
file_name, row, column, map_projection=plot_info['map_projection']) | |
else: | |
Map_plot_bias_of_multiyear_climatology(ref_dataset, ref_name, model_datasets, model_names, | |
file_name, row, column) | |
elif metrics_name == 'Taylor_diagram_spatial_pattern_of_multiyear_climatology': | |
Taylor_diagram_spatial_pattern_of_multiyear_climatology(ref_dataset, ref_name, model_datasets, model_names, | |
file_name) | |
elif config['use_subregions']: | |
if metrics_name == 'Timeseries_plot_subregion_interannual_variability' and average_each_year: | |
row, column = plot_info['subplots_array'] | |
Time_series_subregion(ref_subregion_mean, ref_name, model_subregion_mean, model_names, False, | |
file_name, row, column, x_tick=['Y'+str(i+1) for i in np.arange(model_subregion_mean.shape[1])]) | |
if metrics_name == 'Timeseries_plot_subregion_annual_cycle' and not average_each_year and month_start==1 and month_end==12: | |
row, column = plot_info['subplots_array'] | |
Time_series_subregion(ref_subregion_mean, ref_name, model_subregion_mean, model_names, True, | |
file_name, row, column, x_tick=['J','F','M','A','M','J','J','A','S','O','N','D']) | |
if metrics_name == 'Portrait_diagram_subregion_interannual_variability' and average_each_year: | |
Portrait_diagram_subregion(ref_subregion_mean, ref_name, model_subregion_mean, model_names, False, | |
file_name) | |
if metrics_name == 'Portrait_diagram_subregion_annual_cycle' and not average_each_year and month_start==1 and month_end==12: | |
Portrait_diagram_subregion(ref_subregion_mean, ref_name, model_subregion_mean, model_names, True, | |
file_name) | |
else: | |
print 'please check the currently supported metrics' |