| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import os |
| import sys |
| import ssl |
| import yaml |
| import operator |
| from datetime import datetime |
| from glob import glob |
| from getpass import getpass |
| import numpy as np |
| import ocw.utils as utils |
| import ocw.dataset_processor as dsp |
| from ocw.dataset import Bounds |
| from ocw.dataset_loader import DatasetLoader |
| from metrics_and_plots import * |
| |
| def load_datasets_from_config(extra_opts, *loader_opts): |
| ''' |
| Generic dataset loading function. |
| ''' |
| for opt in loader_opts: |
| loader_name = opt['loader_name'] |
| if loader_name == 'esgf': |
| if extra_opts['password'] is None: |
| extra_opts['username'] = raw_input('Enter your ESGF OpenID:\n') |
| extra_opts['password'] = getpass( |
| prompt='Enter your ESGF password:\n') |
| |
| opt['esgf_username'] = extra_opts['username'] |
| opt['esgf_password'] = extra_opts['password'] |
| elif loader_name == 'rcmed': |
| opt['min_lat'] = extra_opts['min_lat'] |
| opt['max_lat'] = extra_opts['max_lat'] |
| opt['min_lon'] = extra_opts['min_lon'] |
| opt['max_lon'] = extra_opts['max_lon'] |
| opt['start_time'] = extra_opts['start_time'] |
| opt['end_time'] = extra_opts['end_time'] |
| |
| loader = DatasetLoader(*loader_opts) |
| loader.load_datasets() |
| return loader.datasets |
| |
| if hasattr(ssl, '_create_unverified_context'): |
| ssl._create_default_https_context = ssl._create_unverified_context |
| |
| config_file = str(sys.argv[1]) |
| |
| print 'Reading the configuration file ', config_file |
| config = yaml.load(open(config_file)) |
| time_info = config['time'] |
| temporal_resolution = time_info['temporal_resolution'] |
| |
| # Read time info |
| start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') |
| end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') |
| |
| # Read space info |
| space_info = config['space'] |
| if not 'boundary_type' in space_info: |
| min_lat = space_info['min_lat'] |
| max_lat = space_info['max_lat'] |
| min_lon = space_info['min_lon'] |
| max_lon = space_info['max_lon'] |
| else: |
| min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(space_info['boundary_type'][6:].replace(" ","").lower()) |
| |
| # Additional arguments for the DatasetLoader |
| extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, |
| 'max_lon': max_lon, 'start_time': start_time, |
| 'end_time': end_time, 'username': None, 'password': None} |
| |
| # Get the dataset loader options |
| obs_data_info = config['datasets']['reference'] |
| model_data_info = config['datasets']['targets'] |
| |
| # Extract info we don't want to put into the loader config |
| # Multiplying Factor to scale obs by |
| multiplying_factor = np.ones(len(obs_data_info)) |
| for i, info in enumerate(obs_data_info): |
| if 'multiplying_factor' in info: |
| multiplying_factor[i] = info.pop('multiplying_factor') |
| |
| # If models are GCMs we can skip boundary check. Probably need to find a more |
| # elegant way to express this in the config file API. |
| boundary_check = True |
| for i, info in enumerate(model_data_info): |
| if 'boundary_check' in info: |
| boundary_check = info.pop('boundary_check') |
| |
| """ Step 1: Load the observation data """ |
| print 'Loading observation datasets:\n',obs_data_info |
| obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info) |
| obs_names = [dataset.name for dataset in obs_datasets] |
| for i, dataset in enumerate(obs_datasets): |
| if temporal_resolution == 'daily' or temporal_resolution == 'monthly': |
| obs_datasets[i] = dsp.normalize_dataset_datetimes(dataset, |
| temporal_resolution) |
| |
| if multiplying_factor[i] != 1: |
| obs_dataset.values *= multiplying_factor[i] |
| |
| """ Step 2: Load model NetCDF Files into OCW Dataset Objects """ |
| model_datasets = load_datasets_from_config(extra_opts, *model_data_info) |
| model_names = [dataset.name for dataset in model_datasets] |
| if temporal_resolution == 'daily' or temporal_resolution == 'monthly': |
| for i, dataset in enumerate(model_datasets): |
| model_datasets[i] = dsp.normalize_dataset_datetimes(dataset, |
| temporal_resolution) |
| |
| """ Step 3: Subset the data for temporal and spatial domain """ |
| # Create a Bounds object to use for subsetting |
| if time_info['maximum_overlap_period']: |
| start_time, end_time = utils.get_temporal_overlap(obs_datasets+model_datasets) |
| print 'Maximum overlap period' |
| print 'start_time:', start_time |
| print 'end_time:', end_time |
| |
| if temporal_resolution == 'monthly' and end_time.day !=1: |
| end_time = end_time.replace(day=1) |
| |
| for i, dataset in enumerate(obs_datasets): |
| min_lat = np.max([min_lat, dataset.lats.min()]) |
| max_lat = np.min([max_lat, dataset.lats.max()]) |
| min_lon = np.max([min_lon, dataset.lons.min()]) |
| max_lon = np.min([max_lon, dataset.lons.max()]) |
| |
| if not 'boundary_type' in space_info: |
| bounds = Bounds(lat_min=min_lat, |
| lat_max=max_lat, |
| lon_min=min_lon, |
| lon_max=max_lon, |
| start=start_time, |
| end=end_time) |
| else: |
| bounds = Bounds(boundary_type=space_info['boundary_type'], |
| start=start_time, |
| end=end_time) |
| |
| for i, dataset in enumerate(obs_datasets): |
| obs_datasets[i] = dsp.subset(dataset, bounds) |
| if dataset.temporal_resolution() != temporal_resolution: |
| obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) |
| |
| for i, dataset in enumerate(model_datasets): |
| model_datasets[i] = dsp.subset(dataset, bounds) |
| if dataset.temporal_resolution() != temporal_resolution: |
| model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) |
| |
| # Temporally subset both observation and model datasets |
| # for the user specified season |
| month_start = time_info['month_start'] |
| month_end = time_info['month_end'] |
| average_each_year = time_info['average_each_year'] |
| |
| # TODO: Fully support multiple observation / reference datasets. |
| # For now we will only use the first reference dataset listed in the config file |
| obs_dataset = obs_datasets[0] |
| obs_name = obs_names[0] |
| obs_dataset = dsp.temporal_subset(obs_dataset,month_start, month_end,average_each_year) |
| for i, dataset in enumerate(model_datasets): |
| model_datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, |
| average_each_year) |
| |
| # generate grid points for regridding |
| if config['regrid']['regrid_on_reference']: |
| new_lat = obs_dataset.lats |
| new_lon = obs_dataset.lons |
| else: |
| delta_lat = config['regrid']['regrid_dlat'] |
| delta_lon = config['regrid']['regrid_dlon'] |
| nlat = (max_lat - min_lat)/delta_lat+1 |
| nlon = (max_lon - min_lon)/delta_lon+1 |
| new_lat = np.linspace(min_lat, max_lat, nlat) |
| new_lon = np.linspace(min_lon, max_lon, nlon) |
| |
| # number of models |
| nmodel = len(model_datasets) |
| print 'Dataset loading completed' |
| print 'Observation data:', obs_name |
| print 'Number of model datasets:',nmodel |
| for model_name in model_names: |
| print model_name |
| |
| """ Step 4: Spatial regriding of the reference datasets """ |
| print 'Regridding datasets: ', config['regrid'] |
| if not config['regrid']['regrid_on_reference']: |
| obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon) |
| print 'Reference dataset has been regridded' |
| for i, dataset in enumerate(model_datasets): |
| model_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, |
| boundary_check=boundary_check) |
| print model_names[i]+' has been regridded' |
| print 'Propagating missing data information' |
| obs_dataset = dsp.mask_missing_data([obs_dataset]+model_datasets)[0] |
| model_datasets = dsp.mask_missing_data([obs_dataset]+model_datasets)[1:] |
| |
| """ Step 5: Checking and converting variable units """ |
| print 'Checking and converting variable units' |
| obs_dataset = dsp.variable_unit_conversion(obs_dataset) |
| for idata,dataset in enumerate(model_datasets): |
| model_datasets[idata] = dsp.variable_unit_conversion(dataset) |
| |
| |
| print 'Generating multi-model ensemble' |
| if len(model_datasets) >= 2.: |
| model_datasets.append(dsp.ensemble(model_datasets)) |
| model_names.append('ENS') |
| |
| """ Step 6: Generate subregion average and standard deviation """ |
| if config['use_subregions']: |
| # sort the subregion by region names and make a list |
| subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0)) |
| |
| # number of subregions |
| nsubregion = len(subregions) |
| |
| print ('Calculating spatial averages and standard deviations of ', |
| str(nsubregion),' subregions') |
| |
| obs_subregion_mean, obs_subregion_std, subregion_array = ( |
| utils.calc_subregion_area_mean_and_std([obs_dataset], subregions)) |
| model_subregion_mean, model_subregion_std, subregion_array = ( |
| utils.calc_subregion_area_mean_and_std(model_datasets, subregions)) |
| |
| """ Step 7: Write a netCDF file """ |
| workdir = config['workdir'] |
| if workdir[-1] != '/': |
| workdir = workdir+'/' |
| print 'Writing a netcdf file: ',workdir+config['output_netcdf_filename'] |
| if not os.path.exists(workdir): |
| os.system("mkdir -p "+workdir) |
| |
| if config['use_subregions']: |
| dsp.write_netcdf_multiple_datasets_with_subregions( |
| obs_dataset, obs_name, model_datasets, model_names, |
| path=workdir+config['output_netcdf_filename'], |
| subregions=subregions, subregion_array=subregion_array, |
| ref_subregion_mean=obs_subregion_mean, |
| ref_subregion_std=obs_subregion_std, |
| model_subregion_mean=model_subregion_mean, |
| model_subregion_std=model_subregion_std) |
| else: |
| dsp.write_netcdf_multiple_datasets_with_subregions( |
| obs_dataset, obs_name, model_datasets, |
| model_names, |
| path=workdir+config['output_netcdf_filename']) |
| |
| """ Step 8: Calculate metrics and draw plots """ |
| nmetrics = config['number_of_metrics_and_plots'] |
| if config['use_subregions']: |
| Map_plot_subregion(subregions, obs_dataset, workdir) |
| |
| if nmetrics > 0: |
| print 'Calculating metrics and generating plots' |
| for imetric in np.arange(nmetrics)+1: |
| metrics_name = config['metrics'+'%1d' %imetric] |
| plot_info = config['plots'+'%1d' %imetric] |
| file_name = workdir+plot_info['file_name'] |
| |
| print 'metrics '+str(imetric)+'/'+str(nmetrics)+': ', metrics_name |
| if metrics_name == 'Map_plot_bias_of_multiyear_climatology': |
| row, column = plot_info['subplots_array'] |
| if 'map_projection' in plot_info.keys(): |
| Map_plot_bias_of_multiyear_climatology( |
| obs_dataset, obs_name, model_datasets, model_names, |
| file_name, row, column, |
| map_projection=plot_info['map_projection']) |
| else: |
| Map_plot_bias_of_multiyear_climatology( |
| obs_dataset, obs_name, model_datasets, model_names, |
| file_name, row, column) |
| elif metrics_name == 'Taylor_diagram_spatial_pattern_of_multiyear_climatology': |
| Taylor_diagram_spatial_pattern_of_multiyear_climatology( |
| obs_dataset, obs_name, model_datasets, model_names, |
| file_name) |
| elif config['use_subregions']: |
| if (metrics_name == 'Timeseries_plot_subregion_interannual_variability' |
| and average_each_year): |
| row, column = plot_info['subplots_array'] |
| Time_series_subregion( |
| obs_subregion_mean, obs_name, model_subregion_mean, |
| model_names, False, file_name, row, column, |
| x_tick=['Y'+str(i+1) |
| for i in np.arange(model_subregion_mean.shape[1])]) |
| |
| if (metrics_name == 'Timeseries_plot_subregion_annual_cycle' |
| and not average_each_year and month_start==1 and month_end==12): |
| row, column = plot_info['subplots_array'] |
| Time_series_subregion( |
| obs_subregion_mean, obs_name, |
| model_subregion_mean, model_names, True, |
| file_name, row, column, |
| x_tick=['J','F','M','A','M','J','J','A','S','O','N','D']) |
| |
| if (metrics_name == 'Portrait_diagram_subregion_interannual_variability' |
| and average_each_year): |
| Portrait_diagram_subregion(obs_subregion_mean, obs_name, |
| model_subregion_mean, model_names, |
| False, file_name) |
| |
| if (metrics_name == 'Portrait_diagram_subregion_annual_cycle' |
| and not average_each_year and month_start==1 and month_end==12): |
| Portrait_diagram_subregion(obs_subregion_mean, obs_name, |
| model_subregion_mean, model_names, |
| True, file_name) |
| else: |
| print 'please check the currently supported metrics' |