| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| from __future__ import print_function |
| import os |
| import sys |
| import ssl |
| import yaml |
| import operator |
| from datetime import datetime |
| from glob import glob |
| from getpass import getpass |
| import numpy as np |
| |
| # Need these lines to run RCMES through SSH without X11 |
| import matplotlib |
| matplotlib.use('Agg') |
| |
| import ocw.utils as utils |
| import ocw.dataset_processor as dsp |
| from ocw.dataset import Bounds |
| from ocw.dataset_loader import DatasetLoader |
| from metrics_and_plots import * |
| |
| def load_datasets_from_config(extra_opts, *loader_opts): |
| ''' |
| Generic dataset loading function. |
| ''' |
| for opt in loader_opts: |
| loader_name = opt['loader_name'] |
| if loader_name == 'esgf': |
| if extra_opts['password'] is None: |
| extra_opts['username'] = raw_input('Enter your ESGF OpenID:\n') |
| extra_opts['password'] = getpass( |
| prompt='Enter your ESGF password:\n') |
| |
| opt['esgf_username'] = extra_opts['username'] |
| opt['esgf_password'] = extra_opts['password'] |
| elif loader_name == 'rcmed': |
| opt['min_lat'] = extra_opts['min_lat'] |
| opt['max_lat'] = extra_opts['max_lat'] |
| opt['min_lon'] = extra_opts['min_lon'] |
| opt['max_lon'] = extra_opts['max_lon'] |
| opt['start_time'] = extra_opts['start_time'] |
| opt['end_time'] = extra_opts['end_time'] |
| |
| loader = DatasetLoader(*loader_opts) |
| loader.load_datasets() |
| return loader.datasets |
| |
| if hasattr(ssl, '_create_unverified_context'): |
| ssl._create_default_https_context = ssl._create_unverified_context |
| |
| config_file = str(sys.argv[1]) |
| |
| print('Reading the configuration file {}'.format(config_file)) |
| config = yaml.load(open(config_file)) |
| time_info = config['time'] |
| temporal_resolution = time_info['temporal_resolution'] |
| |
| # Read time info |
| maximum_overlap_period = time_info.get('maximum_overlap_period', False) |
| if not maximum_overlap_period: |
| start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') |
| end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') |
| else: |
| # These values will be determined after datasets are loaded |
| start_time, end_time = None, None |
| |
| # Read space info |
| space_info = config['space'] |
| if not 'boundary_type' in space_info: |
| min_lat = space_info['min_lat'] |
| max_lat = space_info['max_lat'] |
| min_lon = space_info['min_lon'] |
| max_lon = space_info['max_lon'] |
| else: |
| domain = space_info['boundary_type'] |
| if 'CORDEX' in domain: |
| domain = domain.replace('CORDEX', '').lower() |
| domain = domain.strip() |
| min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(domain) |
| |
| |
| # Additional arguments for the DatasetLoader |
| extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, |
| 'max_lon': max_lon, 'start_time': start_time, |
| 'end_time': end_time, 'username': None, 'password': None} |
| |
| # Get the dataset loader options |
| data_info = config['datasets'] |
| |
| # Extract info we don't want to put into the loader config |
| # Multiplying Factor to scale obs by. Currently only supported for reference |
| # (first) dataset. We should instead make this a parameter for each |
| # loader and Dataset objects. |
| fact = data_info[0].pop('multiplying_factor', 1) |
| |
| """ Step 1: Load the datasets """ |
| print('Loading datasets:\n{}'.format(data_info)) |
| datasets = load_datasets_from_config(extra_opts, *data_info) |
| multiplying_factor = np.ones(len(datasets)) |
| multiplying_factor[0] = fact |
| names = [dataset.name for dataset in datasets] |
| for i, dataset in enumerate(datasets): |
| res = dataset.temporal_resolution() |
| if res == 'daily' or res == 'monthly': |
| datasets[i] = dsp.normalize_dataset_datetimes(dataset, res) |
| if multiplying_factor[i] != 1: |
| datasets[i].values *= multiplying_factor[i] |
| |
| """ Step 2: Subset the data for temporal and spatial domain """ |
| # Create a Bounds object to use for subsetting |
| if 'adjust_model_years_for_climatology_calculation' in time_info: |
| if time_info['adjust_model_years_for_climatology_calculation']: |
| datasets = utils.adjust_model_years_for_climatology_calculation(datasets) |
| if maximum_overlap_period: |
| start_time, end_time = utils.get_temporal_overlap(datasets) |
| print('Maximum overlap period') |
| print('start_time: {}'.format(start_time)) |
| print('end_time: {}'.format(end_time)) |
| |
| if temporal_resolution == 'monthly' and end_time.day !=1: |
| end_time = end_time.replace(day=1) |
| |
| for i, dataset in enumerate(datasets): |
| min_lat = np.max([min_lat, dataset.lats.min()]) |
| max_lat = np.min([max_lat, dataset.lats.max()]) |
| min_lon = np.max([min_lon, dataset.lons.min()]) |
| max_lon = np.min([max_lon, dataset.lons.max()]) |
| |
| if not 'boundary_type' in space_info: |
| bounds = Bounds(lat_min=min_lat, |
| lat_max=max_lat, |
| lon_min=min_lon, |
| lon_max=max_lon, |
| start=start_time, |
| end=end_time) |
| else: |
| bounds = Bounds(boundary_type=space_info['boundary_type'], |
| start=start_time, |
| end=end_time) |
| |
| for i, dataset in enumerate(datasets): |
| datasets[i] = dsp.subset(dataset, bounds) |
| if dataset.temporal_resolution() != temporal_resolution: |
| datasets[i] = dsp.temporal_rebin(datasets[i], temporal_resolution) |
| |
| # Temporally subset both observation and model datasets |
| # for the user specified season |
| month_start = time_info['month_start'] |
| month_end = time_info['month_end'] |
| average_each_year = time_info['average_each_year'] |
| |
| # For now we will treat the first listed dataset as the reference dataset for |
| # evaluation purposes. |
| for i, dataset in enumerate(datasets): |
| datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, |
| average_each_year) |
| |
| reference_dataset = datasets[0] |
| target_datasets = datasets[1:] |
| reference_name = names[0] |
| target_names = names[1:] |
| |
| # generate grid points for regridding |
| if config['regrid']['regrid_on_reference']: |
| new_lat = reference_dataset.lats |
| new_lon = reference_dataset.lons |
| else: |
| delta_lat = config['regrid']['regrid_dlat'] |
| delta_lon = config['regrid']['regrid_dlon'] |
| nlat = (max_lat - min_lat)/delta_lat+1 |
| nlon = (max_lon - min_lon)/delta_lon+1 |
| new_lat = np.linspace(min_lat, max_lat, nlat) |
| new_lon = np.linspace(min_lon, max_lon, nlon) |
| |
| # Get flag for boundary checking for regridding. By default, this is set to True |
| # since the main intent of this program is to evaluate RCMs. However, it can be |
| # used for GCMs in which case it should be set to False to save time. |
| boundary_check = config['regrid'].get('boundary_check', True) |
| |
| # number of target datasets (usually models, but can also be obs / reanalysis) |
| ntarget = len(target_datasets) |
| print('Dataset loading completed') |
| print('Reference data: {}'.format(reference_name)) |
| print('Number of target datasets: {}'.format(ntarget)) |
| for target_name in target_names: |
| print(target_name) |
| |
| """ Step 3: Spatial regriding of the datasets """ |
| print('Regridding datasets: {}'.format(config['regrid'])) |
| if not config['regrid']['regrid_on_reference']: |
| reference_dataset = dsp.spatial_regrid(reference_dataset, new_lat, new_lon) |
| print('Reference dataset has been regridded') |
| for i, dataset in enumerate(target_datasets): |
| target_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, |
| boundary_check=boundary_check) |
| print('{} has been regridded'.format(target_names[i])) |
| print('Propagating missing data information') |
| datasets = dsp.mask_missing_data([reference_dataset]+target_datasets) |
| reference_dataset = datasets[0] |
| target_datasets = datasets[1:] |
| |
| """ Step 4: Checking and converting variable units """ |
| print('Checking and converting variable units') |
| reference_dataset = dsp.variable_unit_conversion(reference_dataset) |
| for i, dataset in enumerate(target_datasets): |
| target_datasets[i] = dsp.variable_unit_conversion(dataset) |
| |
| print('Generating multi-model ensemble') |
| if len(target_datasets) >= 2.: |
| target_datasets.append(dsp.ensemble(target_datasets)) |
| target_names.append('ENS') |
| |
| """ Step 5: Generate subregion average and standard deviation """ |
| if config['use_subregions']: |
| # sort the subregion by region names and make a list |
| subregions= sorted(config['subregions'].items(),key=operator.itemgetter(0)) |
| |
| # number of subregions |
| nsubregion = len(subregions) |
| |
| print('Calculating spatial averages and standard deviations of {} subregions' |
| .format(nsubregion)) |
| |
| reference_subregion_mean, reference_subregion_std, subregion_array = ( |
| utils.calc_subregion_area_mean_and_std([reference_dataset], subregions)) |
| target_subregion_mean, target_subregion_std, subregion_array = ( |
| utils.calc_subregion_area_mean_and_std(target_datasets, subregions)) |
| |
| """ Step 6: Write a netCDF file """ |
| workdir = config['workdir'] |
| if workdir[-1] != '/': |
| workdir = workdir+'/' |
| print('Writing a netcdf file: ',workdir+config['output_netcdf_filename']) |
| if not os.path.exists(workdir): |
| os.system("mkdir -p "+workdir) |
| |
| if config['use_subregions']: |
| dsp.write_netcdf_multiple_datasets_with_subregions( |
| reference_dataset, reference_name, target_datasets, target_names, |
| path=workdir+config['output_netcdf_filename'], |
| subregions=subregions, subregion_array=subregion_array, |
| ref_subregion_mean=reference_subregion_mean, |
| ref_subregion_std=reference_subregion_std, |
| model_subregion_mean=target_subregion_mean, |
| model_subregion_std=target_subregion_std) |
| else: |
| dsp.write_netcdf_multiple_datasets_with_subregions( |
| reference_dataset, reference_name, target_datasets, |
| target_names, |
| path=workdir+config['output_netcdf_filename']) |
| |
| """ Step 7: Calculate metrics and draw plots """ |
| nmetrics = config['number_of_metrics_and_plots'] |
| if config['use_subregions']: |
| Map_plot_subregion(subregions, reference_dataset, workdir) |
| |
| if nmetrics > 0: |
| print('Calculating metrics and generating plots') |
| for imetric in np.arange(nmetrics)+1: |
| metrics_name = config['metrics'+'%1d' %imetric] |
| plot_info = config['plots'+'%1d' %imetric] |
| file_name = workdir+plot_info['file_name'] |
| |
| print('metrics {0}/{1}: {2}'.format(imetric, nmetrics, metrics_name)) |
| default_shape = (int(np.ceil(np.sqrt(ntarget + 2))), |
| int(np.ceil(np.sqrt(ntarget + 2)))) |
| if metrics_name == 'Map_plot_bias_of_multiyear_climatology': |
| row, column = plot_info.get('subplots_array', default_shape) |
| if 'map_projection' in plot_info.keys(): |
| Map_plot_bias_of_multiyear_climatology( |
| reference_dataset, reference_name, target_datasets, target_names, |
| file_name, row, column, |
| map_projection=plot_info['map_projection']) |
| else: |
| Map_plot_bias_of_multiyear_climatology( |
| reference_dataset, reference_name, target_datasets, target_names, |
| file_name, row, column) |
| elif metrics_name == 'Taylor_diagram_spatial_pattern_of_multiyear_climatology': |
| Taylor_diagram_spatial_pattern_of_multiyear_climatology( |
| reference_dataset, reference_name, target_datasets, target_names, |
| file_name) |
| elif config['use_subregions']: |
| if (metrics_name == 'Timeseries_plot_subregion_interannual_variability' |
| and average_each_year): |
| row, column = plot_info.get('subplots_array', default_shape) |
| Time_series_subregion( |
| reference_subregion_mean, reference_name, target_subregion_mean, |
| target_names, False, file_name, row, column, |
| x_tick=['Y'+str(i+1) |
| for i in np.arange(target_subregion_mean.shape[1])]) |
| |
| if (metrics_name == 'Timeseries_plot_subregion_annual_cycle' |
| and not average_each_year and month_start==1 and month_end==12): |
| row, column = plot_info.get('subplots_array', (1, 1)) |
| Time_series_subregion( |
| reference_subregion_mean, reference_name, |
| target_subregion_mean, target_names, True, |
| file_name, row, column, |
| x_tick=['J','F','M','A','M','J','J','A','S','O','N','D']) |
| |
| if (metrics_name == 'Portrait_diagram_subregion_interannual_variability' |
| and average_each_year): |
| Portrait_diagram_subregion(reference_subregion_mean, reference_name, |
| target_subregion_mean, target_names, |
| False, file_name) |
| |
| if (metrics_name == 'Portrait_diagram_subregion_annual_cycle' |
| and not average_each_year and month_start==1 and month_end==12): |
| Portrait_diagram_subregion(reference_subregion_mean, reference_name, |
| target_subregion_mean, target_names, |
| True, file_name) |
| else: |
| print('please check the currently supported metrics') |