| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| import datetime |
| import logging |
| |
| import netCDF4 |
| import numpy as np |
| import numpy.ma as ma |
| import scipy.ndimage |
| from matplotlib.path import Path |
| from scipy.interpolate import griddata |
| from scipy.ndimage import map_coordinates |
| |
| import ocw.utils as utils |
| from ocw import dataset as ds |
| |
| logger = logging.getLogger(__name__) |
| |
| |
| def temporal_subset(target_dataset, month_start, month_end, |
| average_each_year=False): |
| """ Temporally subset data given month_index. |
| |
| :param month_start: An integer for beginning month (Jan=1) |
| :type month_start: :class:`int` |
| |
| :param month_end: An integer for ending month (Jan=1) |
| :type month_end: :class:`int` |
| |
| :param target_dataset: Dataset object that needs temporal subsetting |
| :type target_dataset: Open Climate Workbench Dataset Object |
| |
| :param average_each_year: If True, output dataset is averaged for each year |
| :type average_each_year: :class:'boolean' |
| |
| :returns: A temporal subset OCW Dataset |
| :rtype: Open Climate Workbench Dataset Object |
| """ |
| |
| if month_start > month_end: |
| month_index = list(range(month_start, 13)) |
| month_index.extend(list(range(1, month_end + 1))) |
| else: |
| month_index = range(month_start, month_end + 1) |
| |
| dates = target_dataset.times |
| months = np.array([d.month for d in dates]) |
| time_index = [] |
| for m_value in month_index: |
| time_index.extend(list(np.where(months == m_value)[0])) |
| time_index.sort() |
| |
| new_dataset = ds.Dataset(target_dataset.lats, |
| target_dataset.lons, |
| target_dataset.times[time_index], |
| target_dataset.values[time_index, :], |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=target_dataset.name) |
| |
| if average_each_year: |
| new_times = new_dataset.times |
| nmonth = len(month_index) |
| ntime = new_times.size |
| nyear = ntime // nmonth |
| if ntime % nmonth != 0: |
| logger.warning("Number of times in dataset ({}) does not " |
| "divide evenly into {} year(s). Trimming data..." |
| .format(ntime, nyear)) |
| slc = utils.trim_dataset(new_dataset) |
| new_dataset.values = new_dataset.values[slc] |
| new_times = new_times[slc] |
| nyear = new_times.size // nmonth |
| |
| averaged_time = [] |
| ny, nx = target_dataset.values.shape[1:] |
| averaged_values = ma.zeros([nyear, ny, nx]) |
| for iyear in np.arange(nyear): |
| # centered time index of the season between month_start and |
| # month_end in each year |
| center_index = int(nmonth / 2 + iyear * nmonth) |
| if nmonth == 1: |
| center_index = iyear |
| averaged_time.append(new_times[center_index]) |
| averaged_values[iyear, :] = ma.average(new_dataset.values[ |
| nmonth * iyear: nmonth * iyear + nmonth, :], axis=0) |
| new_dataset = ds.Dataset(target_dataset.lats, |
| target_dataset.lons, |
| np.array(averaged_time), |
| averaged_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=target_dataset.name) |
| return new_dataset |
| |
| |
| def temporal_rebin(target_dataset, temporal_resolution): |
| """ Rebin a Dataset to a new temporal resolution |
| |
| :param target_dataset: Dataset object that needs temporal rebinned |
| :type target_dataset: :class:`dataset.Dataset` |
| |
| :param temporal_resolution: The new temporal resolution |
| :type temporal_resolution: :mod:`string` |
| |
| :returns: A new temporally rebinned Dataset |
| :rtype: :class:`dataset.Dataset` |
| """ |
| # Decode the temporal resolution into a string format that |
| # _rcmes_calc_average_on_new_time_unit_K() can understand |
| |
| binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit( |
| target_dataset.values, target_dataset.times, temporal_resolution) |
| binned_dates = np.array(binned_dates) |
| new_dataset = ds.Dataset(target_dataset.lats, |
| target_dataset.lons, |
| binned_dates, |
| binned_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=target_dataset.name, |
| origin=target_dataset.origin) |
| return new_dataset |
| |
| |
| def temporal_rebin_with_time_index(target_dataset, nt_average): |
| """ Rebin a Dataset to a new temporal resolution |
| |
| :param target_dataset: Dataset object that needs temporal rebinned |
| :type target_dataset: :class:`dataset.Dataset` |
| |
| :param nt_average: Time resolution for the output datasets. |
| It is the same as the number of time indicies to be averaged. |
| length of time dimension in the rebinned dataset) = |
| (original time dimension length/nt_average) |
| :type nt_average: integer |
| |
| :returns: A new temporally rebinned Dataset |
| :rtype: :class:`dataset.Dataset` |
| """ |
| nt = target_dataset.times.size |
| if nt % nt_average != 0: |
| logger.warning('Length of time dimension must ' |
| 'be a multiple of nt_average') |
| # nt2 is the length of time dimension in the rebinned dataset |
| nt2 = nt // nt_average |
| binned_dates = target_dataset.times[np.arange(nt2) * nt_average] |
| binned_values = ma.zeros( |
| np.insert(target_dataset.values.shape[1:], 0, nt2)) |
| for it in np.arange(nt2): |
| binned_values[it, :] = ma.average( |
| target_dataset.values[nt_average * it: |
| nt_average * it + nt_average, |
| :], |
| axis=0) |
| new_dataset = ds.Dataset(target_dataset.lats, |
| target_dataset.lons, |
| binned_dates, |
| binned_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=target_dataset.name, |
| origin=target_dataset.origin) |
| return new_dataset |
| |
| |
| def spatial_regrid(target_dataset, new_latitudes, new_longitudes, |
| boundary_check=True): |
| """ Regrid a Dataset using the new latitudes and longitudes |
| |
| :param target_dataset: Dataset object that needs spatially regridded |
| :type target_dataset: :class:`dataset.Dataset` |
| |
| :param new_latitudes: Array of latitudes |
| :type new_latitudes: :class:`numpy.ndarray` |
| |
| :param new_longitudes: Array of longitudes |
| :type new_longitudes: :class:`numpy.ndarray` |
| |
| :param boundary_check: Check if the regriding domain's boundaries |
| are outside target_dataset's domain |
| :type boundary_check: :class:'bool' |
| |
| :returns: A new spatially regridded Dataset |
| :rtype: :class:`dataset.Dataset` |
| """ |
| |
| # Create grids of the given lats and lons for the underlying API |
| # NOTE: np.meshgrid() requires inputs (x, y) and returns data |
| # of shape(y|lat|rows, x|lon|columns). So we pass in lons, lats |
| # and get back data.shape(lats, lons) |
| if target_dataset.lons.ndim == 1 and target_dataset.lats.ndim == 1: |
| regular_grid = True |
| lons, lats = np.meshgrid(target_dataset.lons, target_dataset.lats) |
| else: |
| regular_grid = False |
| lons = target_dataset.lons |
| lats = target_dataset.lats |
| if new_longitudes.ndim == 1 and new_latitudes.ndim == 1: |
| new_lons, new_lats = np.meshgrid(new_longitudes, new_latitudes) |
| else: |
| new_lons = new_longitudes |
| new_lats = new_latitudes |
| |
| ny_old, nx_old = lats.shape |
| ny_new, nx_new = new_lats.shape |
| |
| for iy in np.arange(ny_old): |
| if not all(x < y for x, y in zip(lons[iy, :], lons[iy, 1:])): |
| lons[iy, :][lons[iy, :] < 0] = lons[iy, :][lons[iy, :] < 0] + 360. |
| |
| # Make masked array of shape (times, new_latitudes,new_longitudes) |
| new_values = ma.zeros([len(target_dataset.times), |
| ny_new, nx_new])+1.e+20 |
| |
| # Boundary vertices of target_dataset |
| vertices = [] |
| |
| if regular_grid: |
| vertices.append([lons[0, 0], lats[0, 0]]) |
| vertices.append([lons[-1, 0], lats[-1, 0]]) |
| vertices.append([lons[-1, -1], lats[-1, -1]]) |
| vertices.append([lons[0, -1], lats[0, -1]]) |
| else: |
| # from south to north along the west boundary |
| for iy in np.arange(ny_old): |
| vertices.append([lons[iy, 0], lats[iy, 0]]) |
| # from west to east along the north boundary |
| for ix in np.arange(nx_old): |
| vertices.append([lons[-1, ix], lats[-1, ix]]) |
| # from north to south along the east boundary |
| for iy in np.arange(ny_old)[::-1]: |
| vertices.append([lons[iy, -1], lats[iy, -1]]) |
| # from east to west along the south boundary |
| for ix in np.arange(nx_old)[::-1]: |
| vertices.append([lons[0, ix], lats[0, ix]]) |
| path = Path(vertices) |
| new_xy_mask = np.ones(new_lats.shape) |
| |
| for iy in np.arange(ny_new): |
| for ix in np.arange(nx_new): |
| if path.contains_point([new_lons[iy, ix], |
| new_lats[iy, ix]]) or not boundary_check: |
| new_xy_mask[iy, ix] = 0. |
| |
| new_index = np.where(new_xy_mask == 0.) |
| # Regrid the data on each time slice |
| for i in range(len(target_dataset.times)): |
| if len(target_dataset.times) == 1 and target_dataset.values.ndim == 2: |
| values_original = ma.array(target_dataset.values) |
| else: |
| values_original = ma.array(target_dataset.values[i]) |
| new_mask = np.copy(values_original.mask) |
| for shift in (-1, 1): |
| for axis in (0, 1): |
| q_shifted = np.roll(values_original, shift=shift, axis=axis) |
| if (np.where((values_original.mask == True) & (q_shifted.mask == False)))[0].size !=0: |
| index1 =np.where((values_original.mask == True) & (q_shifted.mask == False)) |
| n_indices = len(index1[0]) |
| values_original.data[index1] = q_shifted[index1] |
| new_mask[index1] = np.repeat(False, n_indices) |
| mask_index = np.where(~new_mask) |
| if new_mask.size != 1: |
| mask_index = np.where(~new_mask) |
| else: |
| mask_index = np.where(~np.isnan(values_original)) |
| new_values_temp = griddata((lons[mask_index], lats[mask_index]), values_original[mask_index], |
| (new_lons[new_index], |
| new_lats[new_index]), |
| method='linear') |
| # Make a masking map using nearest neighbour interpolation -use this to |
| # determine locations with MDI and mask these |
| qmdi = np.zeros_like(values_original) |
| values_true_indices = np.where(values_original.mask == True) |
| values_false_indices = np.where(values_original.mask == False) |
| qmdi[values_true_indices] = 1. |
| qmdi[values_false_indices] = 0. |
| qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(), |
| (new_lons[new_index], |
| new_lats[new_index]), |
| method='nearest') |
| new_values_temp = ma.masked_where(qmdi_r != 0.0, new_values_temp) |
| # Combine missing data mask, with outside domain mask define above. |
| new_values[i, new_index[0], new_index[1]] = new_values_temp[:] |
| new_values[i,:] = ma.masked_equal(new_values[i,:], 1.e+20) |
| |
| # TODO: |
| # This will call down to the _congrid() function and the lat and lon |
| # axis will be adjusted with the time axis being held constant |
| |
| # Create a new Dataset Object to return using new data |
| regridded_dataset = ds.Dataset(new_latitudes, |
| new_longitudes, |
| target_dataset.times, |
| new_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=target_dataset.name, |
| origin=target_dataset.origin) |
| return regridded_dataset |
| |
| |
| def ensemble(datasets): |
| """ |
| Generate a single dataset which is the mean of the input datasets |
| |
| An ensemble datasets combines input datasets assuming the all have |
| similar shape, dimensions, and units. |
| |
| :param datasets: Datasets to be used to compose the ensemble dataset from. |
| All Datasets must be the same shape. |
| :type datasets: :class:`list` of :class:`dataset.Dataset` |
| |
| :returns: New Dataset with a name of 'Dataset Ensemble' |
| :rtype: :class:`dataset.Dataset` |
| """ |
| _check_dataset_shapes(datasets) |
| dataset_values = [dataset.values for dataset in datasets] |
| ensemble_values = ma.mean(dataset_values, axis=0) |
| |
| # Build new dataset object from the input datasets and |
| # the ensemble values and return it |
| ensemble_dataset = ds.Dataset(datasets[0].lats, |
| datasets[0].lons, |
| datasets[0].times, |
| ensemble_values, |
| units=datasets[0].units, |
| name="Dataset Ensemble") |
| |
| return ensemble_dataset |
| |
| |
| def subset(target_dataset, subregion, subregion_name=None, extract=True, user_mask_values=[1]): |
| '''Subset given dataset(s) with subregion information |
| |
| :param subregion: The Bounds with which to subset the target Dataset. |
| :type subregion: :class:`dataset.Bounds` |
| |
| :param target_dataset: The Dataset object to subset. |
| :type target_dataset: :class:`dataset.Dataset` |
| |
| :param subregion_name: The subset-ed Dataset name |
| :type subregion_name: :mod:`string` |
| |
| :param extract: If False, the dataset inside regions will be masked. |
| :type extract: :mod:`boolean` |
| |
| :param user_mask_value: grid points where mask_variable == user_mask_value will be extracted or masked . |
| :type user_mask_value: :mod:`int` |
| |
| :returns: The subset-ed Dataset object |
| :rtype: :class:`dataset.Dataset` |
| |
| :raises: ValueError |
| ''' |
| |
| if not subregion.start: |
| time_start = target_dataset.times[0] |
| time_end = target_dataset.times[-1] |
| else: |
| time_start = subregion.start |
| time_end = subregion.end |
| |
| if not subregion_name: |
| subregion_name = target_dataset.name |
| |
| if subregion.lat_min is not None: |
| _are_bounds_contained_by_dataset(target_dataset, subregion) |
| |
| if target_dataset.lats.ndim == 2 and target_dataset.lons.ndim == 2: |
| temporal_subset = temporal_slice( |
| target_dataset, time_start, time_end) |
| nt = temporal_subset.values.shape[0] |
| y_index, x_index = np.where( |
| (target_dataset.lats >= subregion.lat_max) | ( |
| target_dataset.lats <= subregion.lat_min) | |
| (target_dataset.lons >= subregion.lon_max) | ( |
| target_dataset.lons <= subregion.lon_min)) |
| for it in np.arange(nt): |
| temporal_subset.values[it, y_index, x_index] = 1.e+20 |
| new_values = ma.masked_equal( |
| temporal_subset.values, 1.e+20) |
| return ds.Dataset( |
| target_dataset.lats, |
| target_dataset.lons, |
| temporal_subset.times, |
| new_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=subregion_name, |
| origin=target_dataset.origin) |
| |
| elif target_dataset.lats.ndim == 1 and target_dataset.lons.ndim == 1: |
| # Get subregion indices into subregion data |
| dataset_slices = _get_subregion_slice_indices(target_dataset, |
| subregion) |
| # Slice the values array with our calculated slice indices |
| if target_dataset.values.ndim == 2: |
| subset_values = ma.zeros([len(target_dataset.values[ |
| dataset_slices["lat_start"]:dataset_slices["lat_end"]]), |
| len(target_dataset.values[ |
| dataset_slices["lon_start"]:dataset_slices["lon_end"]])]) |
| |
| subset_values = target_dataset.values[ |
| dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, |
| dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] |
| |
| elif target_dataset.values.ndim == 3: |
| subset_values = ma.zeros([len(target_dataset.values[ |
| dataset_slices["time_start"]:dataset_slices["time_end"]]), |
| len(target_dataset.values[ |
| dataset_slices["lat_start"]:dataset_slices["lat_end"]]), |
| len(target_dataset.values[ |
| dataset_slices["lon_start"]:dataset_slices["lon_end"]])]) |
| |
| subset_values = target_dataset.values[ |
| dataset_slices["time_start"]:dataset_slices["time_end"] + 1, |
| dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, |
| dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] |
| # Build new dataset with subset information |
| return ds.Dataset( |
| # Slice the lats array with our calculated slice indices |
| target_dataset.lats[dataset_slices["lat_start"]: |
| dataset_slices["lat_end"] + 1], |
| # Slice the lons array with our calculated slice indices |
| target_dataset.lons[dataset_slices["lon_start"]: |
| dataset_slices["lon_end"] + 1], |
| # Slice the times array with our calculated slice indices |
| target_dataset.times[dataset_slices["time_start"]: |
| dataset_slices["time_end"] + 1], |
| # Slice the values array with our calculated slice indices |
| subset_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=subregion_name, |
| origin=target_dataset.origin |
| ) |
| |
| if subregion.boundary_type == 'us_states' or subregion.boundary_type == 'countries': |
| temporal_subset = temporal_slice( |
| target_dataset, time_start, time_end) |
| spatial_mask = utils.mask_using_shapefile_info(target_dataset.lons, target_dataset.lats, |
| subregion.masked_regions, extract=extract) |
| subset_values = utils.propagate_spatial_mask_over_time( |
| temporal_subset.values, mask=spatial_mask) |
| return ds.Dataset( |
| target_dataset.lats, |
| target_dataset.lons, |
| temporal_subset.times, |
| subset_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=subregion_name, |
| origin=target_dataset.origin) |
| |
| if subregion.boundary_type == 'user': |
| temporal_subset = temporal_slice( |
| target_dataset, time_start, time_end) |
| spatial_mask = utils.regrid_spatial_mask(target_dataset.lons, target_dataset.lats, |
| subregion.mask_longitude, subregion.mask_latitude, subregion.mask_variable, |
| user_mask_values, extract=extract) |
| subset_values = utils.propagate_spatial_mask_over_time( |
| temporal_subset.values, mask=spatial_mask) |
| return ds.Dataset( |
| target_dataset.lats, |
| target_dataset.lons, |
| temporal_subset.times, |
| subset_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| name=subregion_name, |
| origin=target_dataset.origin) |
| |
| def temporal_slice(target_dataset, start_time, end_time): |
| '''Temporally slice given dataset(s) with subregion information. This does not |
| spatially subset the target_Dataset |
| |
| :param start_time: start time |
| :type start_time: :class:'int' |
| |
| :param end_time: end time |
| :type end_time: :class:'datetime.datetime' |
| |
| :param target_dataset: The Dataset object to subset. |
| :type target_dataset: :class:`dataset.Dataset` |
| |
| :returns: The subset-ed Dataset object |
| :rtype: :class:`dataset.Dataset` |
| |
| :raises: ValueError |
| ''' |
| |
| # https://issues.apache.org/jira/browse/CLIMATE-938 |
| # netCDF datetimes allow for a variety of calendars while Python has |
| # only one. This would throw an error about a calendar mismatch when |
| # comparing a Python datetime object to a netcdf datetime object. |
| # Cast the date as best we can so the comparison will compare like |
| # data types This will still throw an excdeption if the start / end date are |
| # not valid in given calendar. February 29th in a DatetimeNoLeap calendar for example. |
| slice_start_time = start_time |
| slice_end_time = end_time |
| |
| if type(target_dataset.times[0]) != datetime.datetime: |
| slice_start_time =\ |
| type(target_dataset.times.item(0))(start_time.year, start_time.month, start_time.day, |
| start_time.hour, start_time.minute, start_time.second) |
| |
| slice_end_time =\ |
| type(target_dataset.times.item(0))(end_time.year, end_time.month, end_time.day, |
| end_time.hour, end_time.minute, end_time.second) |
| |
| start_time_index = np.where( |
| target_dataset.times >= slice_start_time)[0][0] |
| |
| end_time_index = np.where( |
| target_dataset.times <= slice_end_time)[0][-1] |
| |
| new_times = target_dataset.times[start_time_index:end_time_index + 1] |
| new_values = target_dataset.values[start_time_index:end_time_index + 1, :] |
| |
| return ds.Dataset( |
| target_dataset.lats, |
| target_dataset.lons, |
| new_times, |
| new_values, |
| variable=target_dataset.variable, |
| units=target_dataset.units, |
| origin=target_dataset.origin) |
| |
| def safe_subset(target_dataset, subregion, subregion_name=None): |
| '''Safely subset given dataset with subregion information |
| A standard subset requires that the provided subregion be entirely |
| contained within the datasets bounds. `safe_subset` returns the |
| overlap of the subregion and dataset without returning an error. |
| |
| :param subregion: The Bounds with which to subset the target Dataset. |
| :type subregion: :class:`dataset.Bounds` |
| |
| :param target_dataset: The Dataset object to subset. |
| :type target_dataset: :class:`dataset.Dataset` |
| |
| :param subregion_name: The subset-ed Dataset name |
| :type subregion_name: :mod:`string` |
| |
| :returns: The subset-ed Dataset object |
| :rtype: :class:`dataset.Dataset` |
| ''' |
| |
| lat_min, lat_max, lon_min, lon_max = target_dataset.spatial_boundaries() |
| start, end = target_dataset.temporal_boundaries() |
| if subregion.lat_min: |
| if subregion.lat_min < lat_min: |
| subregion.lat_min = lat_min |
| |
| if subregion.lat_max > lat_max: |
| subregion.lat_max = lat_max |
| |
| if subregion.lon_min < lon_min: |
| subregion.lon_min = lon_min |
| |
| if subregion.lon_max > lon_max: |
| subregion.lon_max = lon_max |
| |
| if subregion.start: |
| if subregion.start < start: |
| subregion.start = start |
| |
| if subregion.end: |
| if subregion.end > end: |
| subregion.end = end |
| |
| return subset(target_dataset, subregion, subregion_name) |
| |
| |
| def normalize_dataset_datetimes(dataset, timestep): |
| ''' Normalize Dataset datetime values. |
| |
| Force daily to an hour time value of 00:00:00. |
| Force monthly data to the first of the month at midnight. |
| |
| :param dataset: The Dataset which will have its time value normalized. |
| :type dataset: :class:`dataset.Dataset` |
| |
| :param timestep: The timestep of the Dataset's values. Either 'daily' or |
| 'monthly'. |
| :type timestep: :mod:`string` |
| |
| :returns: A new Dataset with normalized datetime values. |
| :rtype: :class:`dataset.Dataset` |
| ''' |
| new_times = _rcmes_normalize_datetimes(dataset.times, timestep) |
| return ds.Dataset( |
| dataset.lats, |
| dataset.lons, |
| np.array(new_times), |
| dataset.values, |
| variable=dataset.variable, |
| units=dataset.units, |
| name=dataset.name, |
| origin=dataset.origin |
| ) |
| |
| |
| def write_netcdf(dataset, path, compress=True): |
| ''' Write a dataset to a NetCDF file. |
| |
| :param dataset: The dataset to write. |
| :type dataset: :class:`dataset.Dataset` |
| |
| :param path: The output file path. |
| :type path: :mod:`string` |
| ''' |
| out_file = netCDF4.Dataset(path, 'w', format='NETCDF4') |
| |
| # Set attribute lengths |
| if dataset.lats.ndim == 2: |
| lat_len = dataset.lats.shape[0] |
| lon_len = dataset.lons.shape[1] |
| lat_dim_info = ('y', 'x') |
| lon_dim_info = ('y', 'x') |
| |
| else: |
| lat_len = len(dataset.lats) |
| lon_len = len(dataset.lons) |
| lat_dim_info = ('y',) |
| lon_dim_info = ('x',) |
| |
| time_len = len(dataset.times) |
| |
| # Create attribute dimensions |
| out_file.createDimension('y', lat_len) |
| out_file.createDimension('x', lon_len) |
| out_file.createDimension('time', None) |
| |
| # Create variables |
| lats = out_file.createVariable('lat', 'f8', lat_dim_info, zlib=compress) |
| lons = out_file.createVariable('lon', 'f8', lon_dim_info, zlib=compress) |
| times = out_file.createVariable('time', 'f8', ('time'), zlib=compress) |
| |
| var_name = dataset.variable if dataset.variable else 'var' |
| values = out_file.createVariable(var_name, |
| 'f8', |
| ('time', 'y', 'x'), |
| zlib=compress) |
| |
| # Set the time variable units |
| # We don't deal with hourly/minutely/anything-less-than-a-day data so |
| # we can safely stick with a 'days since' offset here. Note that the |
| # NetCDF4 helper date2num doesn't support 'months' or 'years' instead |
| # of days. |
| times.units = "days since %s" % dataset.times[0] |
| |
| # Store the dataset's values |
| lats[:] = dataset.lats |
| lons[:] = dataset.lons |
| times[:] = netCDF4.date2num(dataset.times, times.units) |
| values[:] = dataset.values |
| values.units = dataset.units |
| |
| out_file.close() |
| |
| |
| def write_netcdf_multiple_datasets_with_subregions(ref_dataset, ref_name, |
| model_dataset_array, |
| model_names, |
| path, |
| subregions=None, |
| subregion_array=None, |
| ref_subregion_mean=None, |
| ref_subregion_std=None, |
| model_subregion_mean=None, |
| model_subregion_std=None): |
| # Write multiple reference and model datasets and their subregional means |
| # and standard deivations in a NetCDF file. |
| |
| # :To be updated |
| # |
| out_file = netCDF4.Dataset(path, 'w', format='NETCDF4') |
| |
| dataset = ref_dataset |
| # Set attribute lenghts |
| nobs = 1 |
| nmodel = len(model_dataset_array) |
| lat_len, lon_len = dataset.values.shape[1:] |
| lat_ndim = dataset.lats.ndim |
| lon_ndim = dataset.lons.ndim |
| time_len = len(dataset.times) |
| |
| if subregions is not None: |
| nsubregion = len(subregions) |
| |
| # Create attribute dimensions |
| out_file.createDimension('y', lat_len) |
| out_file.createDimension('x', lon_len) |
| out_file.createDimension('time', time_len) |
| |
| # Create variables and store the values |
| if lat_ndim == 2: |
| lats = out_file.createVariable('lat', 'f8', ('y', 'x')) |
| else: |
| lats = out_file.createVariable('lat', 'f8', ('y')) |
| lats[:] = dataset.lats |
| if lon_ndim == 2: |
| lons = out_file.createVariable('lon', 'f8', ('y', 'x')) |
| else: |
| lons = out_file.createVariable('lon', 'f8', ('x')) |
| lons[:] = dataset.lons |
| times = out_file.createVariable('time', 'f8', ('time',)) |
| times.units = "days since %s" % dataset.times[0] |
| times[:] = netCDF4.date2num(dataset.times, times.units) |
| |
| # mask_array = np.zeros([time_len, lat_len, lon_len]) |
| # for iobs in np.arange(nobs): |
| # index = np.where(ref_dataset_array[iobs].values.mask[:] == True) |
| # mask_array[index] = 1 |
| out_file.createVariable(ref_name, 'f8', ('time', 'y', 'x')) |
| out_file.variables[ref_name][:] = ref_dataset.values |
| out_file.variables[ref_name].units = ref_dataset.units |
| for imodel in np.arange(nmodel): |
| out_file.createVariable(model_names[imodel], 'f8', ('time', 'y', 'x')) |
| # out_file.variables[model_names[imodel]][:] = ma.array( |
| # model_dataset_array[imodel].values, mask = mask_array) |
| out_file.variables[model_names[imodel]][:] = model_dataset_array[ |
| imodel].values |
| out_file.variables[model_names[imodel]].units = model_dataset_array[ |
| imodel].units |
| |
| if subregions is not None: |
| out_file.createVariable('subregion_array', 'i4', ('y', 'x')) |
| out_file.variables['subregion_array'][:] = subregion_array[:] |
| nsubregion = len(subregions) |
| out_file.createDimension('nsubregion', nsubregion) |
| out_file.createDimension('nobs', nobs) |
| out_file.createDimension('nmodel', nmodel) |
| out_file.createVariable('obs_subregion_mean', 'f8', ('nobs', |
| 'time', |
| 'nsubregion')) |
| out_file.variables['obs_subregion_mean'][:] = ref_subregion_mean[:] |
| out_file.createVariable('obs_subregion_std', 'f8', ('nobs', |
| 'time', |
| 'nsubregion')) |
| out_file.variables['obs_subregion_std'][:] = ref_subregion_std[:] |
| out_file.createVariable('model_subregion_mean', 'f8', ('nmodel', |
| 'time', |
| 'nsubregion')) |
| out_file.variables['model_subregion_mean'][:] = model_subregion_mean[:] |
| out_file.createVariable('model_subregion_std', 'f8', ('nmodel', |
| 'time', |
| 'nsubregion')) |
| out_file.variables['model_subregion_std'][:] = model_subregion_std[:] |
| |
| out_file.close() |
| |
| |
| def water_flux_unit_conversion(dataset): |
| ''' Convert water flux variables units as necessary |
| |
| Convert full SI units water flux units to more common units. |
| |
| :param dataset: The dataset to convert. |
| :type dataset: :class:`dataset.Dataset` |
| |
| :returns: A Dataset with values converted to new units. |
| :rtype: :class:`dataset.Dataset` |
| ''' |
| water_flux_variables = ['pr', 'prec', 'evspsbl', 'mrro', 'swe'] |
| variable = dataset.variable.lower() |
| |
| if any(sub_string in variable for sub_string in water_flux_variables): |
| dataset_units = dataset.units.lower() |
| if variable in 'swe': |
| if any(unit in dataset_units for unit in ['m', 'meter']): |
| dataset.values = 1.e3 * dataset.values |
| dataset.units = 'km' |
| else: |
| if any(unit in dataset_units |
| for unit in ['kg m-2 s-1', 'mm s-1', 'mm/sec']): |
| dataset.values = 86400. * dataset.values |
| dataset.units = 'mm/day' |
| |
| return dataset |
| |
| |
| def temperature_unit_conversion(dataset): |
| ''' Convert temperature units as necessary \ |
| Automatically convert Celcius to Kelvin in the given dataset. |
| |
| :param dataset: The dataset for which units should be updated. \ |
| :type dataset; :class:`dataset.Dataset` |
| |
| :returns: The dataset with (potentially) updated units. \ |
| :rtype: :class:`dataset.Dataset` |
| |
| ''' |
| |
| temperature_variables = ['temp', 'tas', 'tasmax', 'taxmin', 'T', 'tg'] |
| variable = dataset.variable.lower() |
| |
| if any(sub_string in variable for sub_string in temperature_variables): |
| dataset_units = dataset.units.lower() |
| if dataset_units == 'c' or dataset_units == 'celsius': |
| dataset.values = 273.15 + dataset.values |
| dataset.units = 'K' |
| |
| return dataset |
| |
| |
| def variable_unit_conversion(dataset): |
| ''' Convert water flux or temperature variables units as necessary |
| |
| For water flux variables, convert full SI units water flux units |
| to more common units. |
| For temperature, convert Celcius to Kelvin. |
| |
| :param dataset: The dataset to convert. |
| :type dataset: :class:`dataset.Dataset` |
| |
| :returns: A Dataset with values converted to new units. |
| :rtype: :class:`dataset.Dataset` |
| ''' |
| |
| dataset = water_flux_unit_conversion(dataset) |
| dataset = temperature_unit_conversion(dataset) |
| |
| return dataset |
| |
| |
| def _rcmes_normalize_datetimes(datetimes, timestep): |
| """ Normalize Dataset datetime values. |
| |
| Force daily to an hour time value of 00:00:00. |
| Force monthly data to the first of the month at midnight. |
| |
| :param datetimes: The datetimes to normalize. |
| :type datetimes: List of `datetime` values. |
| |
| :param timestep: The flag for how to normalize the datetimes. |
| :type timestep: String |
| """ |
| |
| normalDatetimes = [] |
| if timestep.lower() == 'monthly': |
| for inputDatetime in datetimes: |
| if inputDatetime.day != 1: |
| # Clean the inputDatetime |
| inputDatetimeString = inputDatetime.strftime('%Y%m%d') |
| normalInputDatetimeString = inputDatetimeString[:6] + '01' |
| inputDatetime = datetime.datetime.strptime( |
| normalInputDatetimeString, '%Y%m%d') |
| |
| normalDatetimes.append(inputDatetime) |
| |
| elif timestep.lower() == 'daily': |
| for inputDatetime in datetimes: |
| if (inputDatetime.hour != 0 or inputDatetime.minute != 0 or |
| inputDatetime.second != 0): |
| datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S') |
| normalDatetimeString = datetimeString[:8] + '000000' |
| inputDatetime = datetime.datetime.strptime( |
| normalDatetimeString, '%Y%m%d%H%M%S') |
| |
| normalDatetimes.append(inputDatetime) |
| |
| return normalDatetimes |
| |
| |
| def mask_missing_data(dataset_array): |
| ''' Check missing values in observation and model datasets. |
| If any of dataset in dataset_array has missing values at a grid point, |
| the values at the grid point in all other datasets are masked. |
| :param dataset_array: an array of OCW datasets |
| ''' |
| |
| mask_array = np.zeros(dataset_array[0].values.shape) |
| for dataset in dataset_array: |
| # CLIMATE-797 - Not every array passed in will be a masked array. |
| # For those that are, action based on the mask passed in. |
| # For those that are not, take no action (else AttributeError). |
| if hasattr(dataset.values, 'mask'): |
| index = np.where(dataset.values.mask == True) |
| if index[0].size > 0: |
| mask_array[index] = 1 |
| masked_array = [] |
| for dataset in dataset_array: |
| dataset.values = ma.array(dataset.values, mask=mask_array) |
| masked_array.append(dataset) |
| return [masked_dataset for masked_dataset in masked_array] |
| |
| |
| def deseasonalize_dataset(dataset): |
| '''Calculate daily climatology and subtract the climatology from |
| the input dataset |
| |
| :param dataset: The dataset to convert. |
| :type dataset: :class:`dataset.Dataset` |
| |
| :returns: A Dataset with values deseasonalized. |
| :rtype: :class:`dataset.Dataset` |
| ''' |
| |
| days = [d.month * 100. + d.day for d in dataset.times] |
| days_sorted = np.unique(days) |
| ndays = days_sorted.size |
| nt, ny, nx = dataset.values.shape |
| values_clim = ma.zeros([ndays, ny, nx]) |
| for iday, day in enumerate(days_sorted): |
| t_index = np.where(days == day)[0] |
| values_clim[iday, :] = ma.mean(dataset.values[t_index, :], axis=0) |
| for iday, day in enumerate(days_sorted): |
| t_index = np.where(days == day)[0] |
| dataset.values[t_index, :] = dataset.values[ |
| t_index, :] - values_clim[iday, :] |
| return dataset |
| |
| |
| def _rcmes_spatial_regrid(spatial_values, lat, lon, lat2, lon2, order=1): |
| ''' |
| Spatial regrid from one set of lat,lon values onto a new set (lat2,lon2) |
| |
| :param spatial_values: Values in a spatial grid that need to be regridded |
| :type spatial_values: 2d masked numpy array. shape (latitude, longitude) |
| :param lat: Grid of latitude values which map to the spatial values |
| :type lat: 2d numpy array. shape(latitudes, longitudes) |
| :param lon: Grid of longitude values which map to the spatial values |
| :type lon: 2d numpy array. shape(latitudes, longitudes) |
| :param lat2: Grid of NEW latitude values to regrid the spatial_values onto |
| :type lat2: 2d numpy array. shape(latitudes, longitudes) |
| :param lon2: Grid of NEW longitude values to regrid the spatial_values onto |
| :type lon2: 2d numpy array. shape(latitudes, longitudes) |
| :param order: Interpolation order flag. 1=bi-linear, 3=cubic spline |
| :type order: [optional] Integer |
| |
| :returns: 2d masked numpy array with shape(len(lat2), len(lon2)) |
| :rtype: (float, float) |
| ''' |
| |
| nlat = spatial_values.shape[0] |
| nlon = spatial_values.shape[1] |
| |
| nlat2 = lat2.shape[0] |
| nlon2 = lon2.shape[1] |
| |
| # To make our lives easier down the road, let's |
| # turn these into arrays of x & y coords |
| loni = lon2.ravel() |
| lati = lat2.ravel() |
| |
| loni = loni.copy() # NB. it won't run unless you do this... |
| lati = lati.copy() |
| |
| # Now, we'll set points outside the boundaries to lie along an edge |
| loni_max_indices = np.where(loni > lon.max()) |
| loni_min_indices = np.where(loni < lon.min()) |
| loni[loni_max_indices] = lon.max() |
| loni[loni_min_indices] = lon.min() |
| |
| # To deal with the "hard" break, we'll have to treat y differently, |
| # so we're just setting the min here... |
| lati_max_indices = np.where(lati > lat.max()) |
| lati_min_indices = np.where(lati < lat.min()) |
| lati[lati_max_indices] = lat.max() |
| lati[lati_min_indices] = lat.min() |
| |
| # We need to convert these to (float) indicies |
| # (xi should range from 0 to (nx - 1), etc) |
| loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min()) |
| |
| # Deal with the "hard" break in the y-direction |
| lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min()) |
| |
| """ |
| TODO: Review this docstring and see if it still holds true. |
| NOTE: This function doesn't use MDI currently. These are legacy comments |
| |
| Notes on dealing with MDI when regridding data. |
| Method adopted here: |
| Use bilinear interpolation of data by default |
| (but user can specify other order using order=... in call) |
| Perform bilinear interpolation of data, and of mask. |
| To be conservative, new grid point which contained some missing data on |
| the old grid is set to missing data. |
| -this is achieved by looking for any non-zero interpolated |
| mask values. |
| To avoid issues with bilinear interpolation producing strong gradients |
| leading into the MDI, set values at MDI points to mean data value so |
| little gradient visible = not ideal, but acceptable for now. |
| |
| Set values in MDI so that similar to surroundings so don't produce large |
| gradients when interpolating Preserve MDI mask, by only changing data part |
| of masked array object. |
| """ |
| for shift in (-1, 1): |
| for axis in (0, 1): |
| q_shifted = np.roll(spatial_values, shift=shift, axis=axis) |
| idx = ~q_shifted.mask * spatial_values.mask |
| indices = np.where(idx) |
| spatial_values.data[indices] = q_shifted[indices] |
| |
| # Now we actually interpolate |
| # map_coordinates does cubic interpolation by default, |
| # use "order=1" to preform bilinear interpolation instead... |
| |
| regridded_values = map_coordinates(spatial_values, |
| [lati, loni], |
| order=order) |
| regridded_values = regridded_values.reshape([nlat2, nlon2]) |
| |
| # Set values to missing data outside of original domain |
| regridded_values = ma.masked_array(regridded_values, |
| mask=np.logical_or( |
| np.logical_or(lat2 >= lat.max(), |
| lat2 <= lat.min()), |
| np.logical_or(lon2 <= lon.min(), |
| lon2 >= lon.max()))) |
| |
| # Make second map using nearest neighbour interpolation -use this to |
| # determine locations with MDI and mask these |
| qmdi = np.zeros_like(spatial_values) |
| spatial_indices_true = np.where(spatial_values.mask) |
| spatial_indices_false = np.where(not spatial_values.mask) |
| qmdi[spatial_indices_true] = 1. |
| qmdi[spatial_indices_false] = 0. |
| qmdi_r = map_coordinates(qmdi, [lati, loni], order=order) |
| qmdi_r = qmdi_r.reshape([nlat2, nlon2]) |
| mdimask = (qmdi_r != 0.0) |
| |
| # Combine missing data mask, with outside domain mask define above. |
| regridded_values.mask = np.logical_or(mdimask, regridded_values.mask) |
| |
| return regridded_values |
| |
| |
| def _rcmes_create_mask_using_threshold(masked_array, threshold=0.5): |
| '''Mask an array if percent of values missing data is above a threshold. |
| |
| For each value along an axis, if the proportion of steps that are missing |
| data is above ``threshold`` then the value is marked as missing data. |
| |
| ..note:: The 0th axis is currently always used. |
| |
| :param masked_array: Masked array of data |
| :type masked_array: Numpy Masked Array |
| :param threshold: (optional) Threshold proportion above which a value is |
| marked as missing data. |
| :type threshold: Float |
| |
| :returns: A Numpy array describing the mask for masked_array. |
| ''' |
| |
| # try, except used as some model files don't have a full mask, but a single |
| # bool the except catches this situation and deals with it appropriately. |
| try: |
| nT = masked_array.mask.shape[0] |
| |
| # For each pixel, count how many times are masked. |
| nMasked = masked_array.mask[:, :, :].sum(axis=0) |
| |
| # Define new mask as when a pixel has over a defined threshold ratio of |
| # masked data |
| # e.g. if the threshold is 75%, and there are 10 times, |
| # then a pixel will be masked if more than 5 times are masked. |
| mymask = nMasked > (nT * threshold) |
| |
| except: |
| mymask = np.zeros_like(masked_array.data[0, :, :]) |
| |
| return mymask |
| |
| |
| def _rcmes_calc_average_on_new_time_unit(data, dates, unit): |
| """ Rebin 3d array and list of dates using the provided unit parameter |
| |
| :param data: Input data that needs to be averaged |
| :type data: 3D masked numpy array of shape (times, lats, lons) |
| :param dates: List of dates that correspond to the given data values |
| :type dates: Python datetime objects |
| :param unit: Time unit to average the data into |
| :type unit: String matching one of these values : |
| full | annual | monthly | daily |
| |
| :returns: meanstorem, newTimesList |
| :rtype: 3D numpy masked array the same shape as the input array, |
| list of python datetime objects |
| :raises: ValueError |
| """ |
| |
| # Check if the user-selected temporal grid is valid. If not, EXIT |
| acceptable = ((unit == 'full') | (unit == 'annual') | |
| (unit == 'monthly') | (unit == 'daily')) |
| if not acceptable: |
| raise ValueError('Error: unknown unit type selected ' |
| 'for time averaging: EXIT') |
| |
| nt, ny, nx = data.shape |
| if unit == 'full': |
| new_data = ma.mean(data, axis=0) |
| new_date = [dates[dates.size // 2]] |
| if unit == 'annual': |
| years = [d.year for d in dates] |
| years_sorted = np.unique(years) |
| new_data = ma.zeros([years_sorted.size, ny, nx]) |
| it = 0 |
| new_date = [] |
| for year in years_sorted: |
| index = np.where(years == year)[0] |
| new_data[it, :] = ma.mean(data[index, :], axis=0) |
| new_date.append(datetime.datetime(year=year, month=7, day=2)) |
| it = it + 1 |
| if unit == 'monthly': |
| years = [d.year for d in dates] |
| years_sorted = np.unique(years) |
| months = [d.month for d in dates] |
| months_sorted = np.unique(months) |
| |
| new_data = ma.zeros([years_sorted.size * months_sorted.size, ny, nx]) |
| it = 0 |
| new_date = [] |
| for year in years_sorted: |
| for month in months_sorted: |
| index = np.where((years == year) & (months == month))[0] |
| new_data[it, :] = ma.mean(data[index, :], axis=0) |
| new_date.append(datetime.datetime(year=year, |
| month=month, |
| day=15)) |
| it = it + 1 |
| if unit == 'daily': |
| days = [d.year * 10000. + d.month * 100. + d.day for d in dates] |
| days_sorted = np.unique(days) |
| |
| new_data = ma.zeros([days_sorted.size, ny, nx]) |
| it = 0 |
| new_date = [] |
| for day in days_sorted: |
| index = np.where(days == day)[0] |
| new_data[it, :] = ma.mean(data[index, :], axis=0) |
| y = int(day // 10000) |
| m = int(day % 10000) // 100 |
| d = int(day % 100) |
| new_date.append(datetime.datetime(year=y, month=m, day=d)) |
| it = it + 1 |
| |
| return new_data, np.array(new_date) |
| |
| |
| def _rcmes_calc_average_on_new_time_unit_K(data, dates, unit): |
| """ Rebin 3d array and list of dates using the provided unit parameter |
| |
| :param data: Input data that needs to be averaged |
| :type data: 3D masked numpy array of shape (times, lats, lons) |
| :param dates: List of dates that correspond to the given data values |
| :type dates: Python datetime objects |
| :param unit: Time unit to average the data into |
| :type unit: String matching one of these values : |
| full | annual | monthly | daily |
| |
| :returns: meanstorem, newTimesList |
| :rtype: 3D numpy masked array the same shape as the input array, |
| list of python datetime objects |
| :raises: ValueError |
| """ |
| |
| # Check if the user-selected temporal grid is valid. If not, EXIT |
| acceptable = ((unit == 'full') | (unit == 'annual') | |
| (unit == 'monthly') | (unit == 'daily')) |
| if not acceptable: |
| raise ValueError('Error: unknown unit type selected ' |
| 'for time averaging: EXIT') |
| |
| # Calculate arrays of: annual timeseries: year (2007,2007), |
| # monthly time series: year-month (200701,200702), |
| # daily timeseries: year-month-day (20070101,20070102) |
| # depending on user-selected averaging period. |
| |
| # Year list |
| if unit == 'annual': |
| timeunits = np.array([int(d.strftime("%Y")) for d in dates]) |
| unique_times = np.unique(timeunits) |
| |
| # YearMonth format list |
| if unit == 'monthly': |
| timeunits = np.array([int(d.strftime("%Y%m")) for d in dates]) |
| unique_times = np.unique(timeunits) |
| |
| # YearMonthDay format list |
| if unit == 'daily': |
| timeunits = np.array([int(d.strftime("%Y%m%d")) for d in dates]) |
| unique_times = np.unique(timeunits) |
| |
| # TODO: add pentad setting using Julian days? |
| |
| # Full list: a special case |
| if unit == 'full': |
| # Calculating means data over the entire time range: |
| # i.e., annual-mean climatology |
| timeunits = [] |
| for i in np.arange(len(dates)): |
| # i.e. we just want the same value for all times. |
| timeunits.append(999) |
| timeunits = np.array(timeunits, dtype=int) |
| unique_times = np.unique(timeunits) |
| |
| # empty list to store new times |
| newTimesList = [] |
| |
| # Decide whether or not you need to do any time averaging. |
| # i.e. if data are already on required time unit then just |
| # pass data through and calculate and return representative |
| # datetimes. |
| processing_required = True |
| if len(timeunits) == (len(unique_times)): |
| processing_required = False |
| |
| # 1D data arrays, i.e. time series |
| if data.ndim == 1: |
| # Create array to store the resulting data |
| meanstore = np.zeros(len(unique_times)) |
| |
| # Calculate the means across each unique time unit |
| i = 0 |
| for myunit in unique_times: |
| if processing_required: |
| datam = ma.masked_array(data, timeunits != myunit) |
| meanstore[i] = ma.average(datam) |
| |
| # construct new times list |
| yyyy, mm, dd = _create_new_year_month_day(myunit, dates) |
| newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0)) |
| i = i + 1 |
| |
| # 3D data arrays |
| if data.ndim == 3: |
| # Create array to store the resulting data |
| meanstore = np.zeros([len(unique_times), data.shape[1], data.shape[2]]) |
| |
| # Calculate the means across each unique time unit |
| i = 0 |
| datamask_store = [] |
| for myunit in unique_times: |
| if processing_required: |
| mask = np.zeros_like(data) |
| mask[timeunits != myunit, :, :] = 1.0 |
| # Calculate missing data mask within each time unit... |
| datamask_at_this_timeunit = np.zeros_like(data) |
| datamask_at_this_timeunit[:] = _rcmes_create_mask_using_threshold( |
| data[timeunits == myunit, :, :], threshold=0.75) |
| # Store results for masking later |
| datamask_store.append(datamask_at_this_timeunit[0]) |
| # Calculate means for each pixel in this time unit, ignoring |
| # missing data (using masked array). |
| datam = ma.masked_array(data, np.logical_or(mask, |
| datamask_at_this_timeunit)) |
| meanstore[i, :, :] = ma.average(datam, axis=0) |
| # construct new times list |
| yyyy, mm, dd = _create_new_year_month_day(myunit, dates) |
| newTimesList.append(datetime.datetime(yyyy, mm, dd)) |
| i += 1 |
| |
| if not processing_required: |
| meanstorem = data |
| |
| if processing_required: |
| # Create masked array (using missing data mask defined above) |
| datamask_store = np.array(datamask_store) |
| meanstorem = ma.masked_array(meanstore, datamask_store) |
| |
| return meanstorem, newTimesList |
| |
| |
| def _create_new_year_month_day(time_unit, dates): |
| smyunit = str(time_unit) |
| if len(smyunit) == 4: # YYYY |
| yyyy = int(smyunit[0:4]) |
| mm = 1 |
| dd = 1 |
| elif len(smyunit) == 6: # YYYYMM |
| yyyy = int(smyunit[0:4]) |
| mm = int(smyunit[4:6]) |
| dd = 1 |
| elif len(smyunit) == 8: # YYYYMMDD |
| yyyy = int(smyunit[0:4]) |
| mm = int(smyunit[4:6]) |
| dd = int(smyunit[6:8]) |
| elif len(smyunit) == 3: # Full time range |
| # Need to set an appropriate time representing the mid-point of the |
| # entire time span |
| dt = dates[-1] - dates[0] |
| halfway = dates[0] + (dt / 2) |
| yyyy = int(halfway.year) |
| mm = int(halfway.month) |
| dd = int(halfway.day) |
| |
| return (yyyy, mm, dd) |
| |
| |
| def _congrid(a, newdims, method='linear', centre=False, minusone=False): |
| ''' |
| This function is from http://wiki.scipy.org/Cookbook/Rebinning - Example 3 |
| It has been refactored and changed a bit, but the original functionality |
| has been preserved. |
| Arbitrary resampling of source array to new dimension sizes. |
| Currently only supports maintaining the same number of dimensions. |
| To use 1-D arrays, first promote them to shape (x,1). |
| |
| Uses the same parameters and creates the same co-ordinate lookup points |
| as IDL''s congrid routine, which apparently originally came from a VAX/VMS |
| routine of the same name. |
| |
| method: |
| neighbour - closest value from original data |
| nearest and linear - uses n x 1-D interpolations using |
| scipy.interpolate.interp1d |
| (see Numerical Recipes for validity of use of n 1-D interpolations) |
| spline - uses ndimage.map_coordinates |
| |
| centre: |
| True - interpolation points are at the centres of the bins |
| False - points are at the front edge of the bin |
| |
| minusone: |
| For example- inarray.shape = (i,j) & new dimensions = (x,y) |
| False - inarray is resampled by factors of (i/x) * (j/y) |
| True - inarray is resampled by(i-1)/(x-1) * (j-1)/(y-1) |
| This prevents extrapolation one element beyond bounds of input array. |
| ''' |
| if a.dtype not in [np.float64, np.float32]: |
| a = np.cast[float](a) |
| |
| # this will merely take the True/False input and convert it to an |
| # array(1) or array(0) |
| m1 = np.cast[int](minusone) |
| # this also casts the True False input into a floating point number |
| # of 0.5 or 0.0 |
| ofs = np.cast[int](centre) * 0.5 |
| |
| old = np.array(a.shape) |
| ndims = len(a.shape) |
| if len(newdims) != ndims: |
| print("[congrid] dimensions error. " |
| "This routine currently only supports " |
| "rebinning to the same number of dimensions.") |
| return None |
| newdims = np.asarray(newdims, dtype=float) |
| dimlist = [] |
| |
| if method == 'neighbour': |
| newa = _congrid_neighbor(a, newdims, m1, ofs) |
| |
| elif method in ['nearest', 'linear']: |
| # calculate new dims |
| for i in range(ndims): |
| base = np.arange(newdims[i]) |
| dimlist.append((old[i] - m1) / (newdims[i] - m1) * |
| (base + ofs) - ofs) |
| # specify old dims |
| olddims = [np.arange(i, dtype=np.float) for i in list(a.shape)] |
| |
| # first interpolation - for ndims = any |
| mint = scipy.interpolate.interp1d(olddims[-1], a, kind=method) |
| newa = mint(dimlist[-1]) |
| |
| trorder = [ndims - 1] + range(ndims - 1) |
| for i in range(ndims - 2, -1, -1): |
| newa = newa.transpose(trorder) |
| |
| mint = scipy.interpolate.interp1d(olddims[i], newa, kind=method) |
| newa = mint(dimlist[i]) |
| |
| if ndims > 1: |
| # need one more transpose to return to original dimensions |
| newa = newa.transpose(trorder) |
| |
| return newa |
| elif method in ['spline']: |
| nslices = [slice(0, j) for j in list(newdims)] |
| newcoords = np.mgrid[nslices] |
| |
| newcoords_dims = range(np.rank(newcoords)) |
| # make first index last |
| newcoords_dims.append(newcoords_dims.pop(0)) |
| newcoords_tr = newcoords.transpose(newcoords_dims) |
| # makes a view that affects newcoords |
| |
| newcoords_tr += ofs |
| |
| deltas = (np.asarray(old) - m1) / (newdims - m1) |
| newcoords_tr *= deltas |
| |
| newcoords_tr -= ofs |
| |
| newa = scipy.ndimage.map_coordinates(a, newcoords) |
| return newa |
| else: |
| print("Congrid error: Unrecognized interpolation type.\n", |
| "Currently only \'neighbour\', \'nearest\',\'linear\',", |
| "and \'spline\' are supported.") |
| return None |
| |
| |
| def _check_dataset_shapes(datasets): |
| """ If the datasets are not the same shape throw a ValueError Exception |
| |
| :param datasets: OCW Datasets to check for a consistent shape |
| :type datasets: List of OCW Dataset Objects |
| |
| :raises: ValueError |
| """ |
| dataset_shape = None |
| for dataset in datasets: |
| if dataset_shape is None: |
| dataset_shape = dataset.values.shape |
| else: |
| if dataset.values.shape != dataset_shape: |
| msg = "%s != %s" % (dataset.values.shape, dataset_shape) |
| raise ValueError("Input datasets must be the same shape " |
| "for an ensemble :: ", msg) |
| else: |
| pass |
| |
| |
| def _congrid_neighbor(values, new_dims, minus_one, offset): |
| """ Use the nearest neighbor to create a new array |
| |
| :param values: Array of values that need to be interpolated |
| :type values: Numpy ndarray |
| :param new_dims: Longitude resolution in degrees |
| :type new_dims: float |
| :param lat_resolution: Latitude resolution in degrees |
| :type lat_resolution: float |
| |
| :returns: A new spatially regridded Dataset |
| :rtype: Open Climate Workbench Dataset Object |
| """ |
| ndims = len(values.shape) |
| dimlist = [] |
| old_dims = np.array(values.shape) |
| for i in range(ndims): |
| base = np.indices(new_dims)[i] |
| dimlist.append((old_dims[i] - minus_one) / (new_dims[i] - minus_one) * |
| (base + offset) - offset) |
| cd = np.array(dimlist).round().astype(int) |
| new_values = values[list(cd)] |
| return new_values |
| |
| |
| def _are_bounds_contained_by_dataset(dataset, bounds): |
| '''Check if a Dataset fully contains a bounds. |
| |
| :param bounds: The Bounds object to check. |
| :type bounds: Bounds |
| :param dataset: The Dataset that should be fully contain the |
| Bounds |
| :type dataset: Dataset |
| |
| :returns: True if the Bounds are contained by the Dataset, Raises |
| a ValueError otherwise |
| ''' |
| lat_min, lat_max, lon_min, lon_max = dataset.spatial_boundaries() |
| start, end = dataset.temporal_boundaries() |
| |
| errors = [] |
| |
| # TODO: THIS IS TERRIBLY inefficent and we need to use a geometry |
| # lib instead in the future |
| if (lat_min > bounds.lat_max): |
| error = ("bounds.lat_max: %s is not between lat_min: %s and" |
| " lat_max: %s" % (bounds.lat_max, lat_min, lat_max)) |
| errors.append(error) |
| |
| if (lon_min > bounds.lon_max): |
| error = ("bounds.lon_max: %s is not between lon_min: %s and" |
| " lon_max: %s" % (bounds.lon_max, lon_min, lon_max)) |
| errors.append(error) |
| if not (bounds.start is None): |
| if not start <= bounds.start <= end: |
| error = ("bounds.start: %s is not between start: %s and end: %s" % |
| (bounds.start, start, end)) |
| errors.append(error) |
| |
| if not (bounds.end is None): |
| if not start <= bounds.end <= end: |
| error = ("bounds.end: %s is not between start: %s and end: %s" % |
| (bounds.end, start, end)) |
| errors.append(error) |
| |
| if len(errors) == 0: |
| return True |
| else: |
| error_message = '\n'.join(errors) |
| raise ValueError(error_message) |
| |
| |
| def _get_subregion_slice_indices(target_dataset, subregion): |
| '''Get the indices for slicing Dataset values to generate the subregion. |
| |
| :param subregion: The Bounds that specify the subset of the Dataset |
| that should be extracted. |
| :type subregion: Bounds |
| :param target_dataset: The Dataset to subset. |
| :type target_dataset: Dataset |
| |
| :returns: The indices to slice the Datasets arrays as a Dictionary. |
| ''' |
| latStart = min(np.nonzero(target_dataset.lats >= subregion.lat_min)[0]) |
| latEnd = max(np.nonzero(target_dataset.lats <= subregion.lat_max)[0]) |
| |
| lonStart = min(np.nonzero(target_dataset.lons >= subregion.lon_min)[0]) |
| lonEnd = max(np.nonzero(target_dataset.lons <= subregion.lon_max)[0]) |
| |
| if not (subregion.start is None): |
| timeStart = min(np.nonzero(target_dataset.times >= subregion.start)[0]) |
| timeEnd = max(np.nonzero(target_dataset.times <= subregion.end)[0]) |
| else: |
| timeStart = 0 |
| timeEnd = len(target_dataset.times) -1 |
| |
| return { |
| "lat_start": latStart, |
| "lat_end": latEnd, |
| "lon_start": lonStart, |
| "lon_end": lonEnd, |
| "time_start": timeStart, |
| "time_end": timeEnd |
| } |