blob: bb06443038bc7a9522ce0e1711fd3ba595d651c6 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
'''
Classes:
Dataset - Container for a dataset's attributes and data.
Bounds - Container for holding spatial and temporal bounds information
for operations on a Dataset.
'''
import datetime as dt
import logging
import netCDF4
import numpy
import ocw.utils as utils
logger = logging.getLogger(__name__)
class Dataset:
'''Container for a dataset's attributes and data.'''
def __init__(self, lats, lons, times, values, variable=None, units=None,
origin=None, name=""):
'''Default Dataset constructor
:param lats: One dimensional numpy array of unique latitude values.
:type lats: :class:`numpy.ndarray`
:param lons: One dimensional numpy array of unique longitude values.
:type lons: :class:`numpy.ndarray`
:param times: One dimensional numpy array of unique python datetime
objects.
:type times: :class:`numpy.ndarray`
:param values: Three dimensional numpy array of parameter values with
shape [timesLength, latsLength, lonsLength].
:type values: :class:`numpy.ndarray`
:param variable: Name of the value variable.
:type variable: :mod:`string`
:param units: Name of the value units
:type units: :mod:`string`
:param name: An optional string name for the Dataset.
:type name: :mod:`string`
:param origin: An optional object used to specify information on where
this dataset was loaded from.
:type origin: :class:`dict`
:raises: ValueError
'''
self._validate_inputs(lats, lons, times, values)
lats, lons, values = utils.normalize_lat_lon_values(lats, lons, values)
self.lats = lats
self.lons = lons
self.times = times
self.values = values
self.variable = variable
self.units = units
self.name = name
self.origin = origin
def spatial_boundaries(self):
'''Calculate the spatial boundaries.
:returns: The Dataset's bounding latitude and longitude values as a
tuple in the form (min_lat, max_lat, min_lon, max_lon)
:rtype: :func:`tuple` of the form (:class:`float`, :class:`float`,
:class:`float`, :class:`float`).
'''
return (float(numpy.min(self.lats)), float(numpy.max(self.lats)),
float(numpy.min(self.lons)), float(numpy.max(self.lons)))
def temporal_boundaries(self):
'''Calculate the temporal range
:returns: The start and end date of the Dataset's temporal range as
a tuple in the form (start_time, end_time).
:rtype: :func:`tuple` of the form (:class:`datetime.datetime`,
:class:`datetime.datetime`)
'''
sorted_time = numpy.sort(self.times)
start_time = sorted_time[0]
end_time = sorted_time[-1]
return (start_time, end_time)
def spatial_resolution(self):
'''Calculate the latitudinal and longitudinal spatial resolution.
If self.lats and self.lons are from curvilinear coordinates,
the output resolutions are approximate values.
:returns: The Dataset's latitudinal and longitudinal spatial resolution
as a tuple of the form (lat_resolution, lon_resolution).
:rtype: (:class:`float`, :class:`float`)
'''
if self.lats.ndim == 1 and self.lons.ndim == 1:
sorted_lats = numpy.sort(list(set(self.lats)))
sorted_lons = numpy.sort(list(set(self.lons)))
lat_resolution = sorted_lats[1] - sorted_lats[0]
lon_resolution = sorted_lons[1] - sorted_lons[0]
if self.lats.ndim == 2 and self.lons.ndim == 2:
lat_resolution = self.lats[1, 1] - self.lats[0, 0]
lon_resolution = self.lons[1, 1] - self.lons[0, 0]
return (lat_resolution, lon_resolution)
def temporal_resolution(self):
'''Calculate the temporal resolution.
:raises ValueError: If timedelta.days as calculated from the sorted \
list of times is an unrecognized value a ValueError is raised.
:returns: The temporal resolution.
:rtype: :mod:`string`
'''
sorted_times = numpy.sort(self.times)
time_resolution = sorted_times[1] - sorted_times[0]
num_days = time_resolution.days
if num_days == 0:
num_hours = time_resolution.seconds / 3600
time_resolution = 'hourly' if num_hours >= 1 else 'minutely'
elif num_days == 1:
time_resolution = 'daily'
elif num_days <= 31:
time_resolution = 'monthly'
elif num_days > 31:
time_resolution = 'yearly'
return time_resolution
def _validate_inputs(self, lats, lons, times, values):
"""Check that Dataset inputs are valid.
:raises: ValueError
"""
err_msg = None
# Setup and Check parameter dimensionality is correct
lat_dim = len(lats.shape)
lon_dim = len(lons.shape)
time_dim = len(times.shape)
value_dim = len(values.shape)
lat_count = lats.shape[0]
lon_count = lons.shape[0]
if lat_dim == 2 and lon_dim == 2:
lon_count = lons.shape[1]
time_count = times.shape[0]
if time_dim != 1:
err_msg = ("Time Array should be 1 dimensional. %s dimensions"
" found." % time_dim)
elif value_dim < 2:
err_msg = ("Value Array should be at least 2 dimensional."
" %s dimensions found." % value_dim)
# Finally check that the Values array conforms to the proper shape
if value_dim == 2:
if (values.shape[0] != time_count and
values.shape != (lat_count, lon_count)):
err_msg = """Value Array must be of shape (lats, lons) or (times, locations).
Expected shape (%s, %s) but received (%s, %s)""" % (
lat_count,
lon_count,
values.shape[0],
values.shape[1])
if (value_dim == 3 and
values.shape != (time_count, lat_count, lon_count)):
err_msg = """Value Array must be of shape (times, lats, lons).
Expected shape (%s, %s, %s) but received (%s, %s, %s)""" % (
time_count,
lat_count,
lon_count,
values.shape[0],
values.shape[1],
values.shape[2])
if err_msg:
logger.error(err_msg)
raise ValueError(err_msg)
def __str__(self):
lat_min, lat_max, lon_min, lon_max = self.spatial_boundaries()
start, end = self.temporal_boundaries()
lat_range = "({}, {})".format(lat_min, lon_min)
lon_range = "({}, {})".format(lon_min, lon_min)
temporal_boundaries = "({}, {})".format(start, end)
formatted_repr = (
"<Dataset - name: {}, "
"lat-range: {}, "
"lon-range: {}, "
"temporal_boundaries: {}, "
"var: {}, "
"units: {}>"
)
return formatted_repr.format(
self.name if self.name != "" else None,
lat_range,
lon_range,
temporal_boundaries,
self.variable,
self.units
)
class Bounds(object):
"""Container for holding spatial and temporal bounds information.
Certain operations require valid bounding information to be present for
correct functioning. Bounds guarantees that a function receives well
formed information without the need to do the validation manually.
boundary_type may be one of the following:
* 'rectangular'
* 'CORDEX (CORDEX region name)': pre-defined CORDEX boundary
* 'us_states': an array of US states abbreviation is required (ex) us_states = ['CA','NV'])
* 'countries': an array of county names is required (ex) countries = ['United States','Canada']
* 'user': user_mask_file in a netCDF format with two dimensional mask variable is required.
If boundary_type == 'rectangular', spatial and temporal bounds must follow the
following guidelines.
* Latitude values must be in the range [-90, 90]
* Longitude values must be in the range [-180, 180]
* Lat/Lon Min values must be less than the corresponding Lat/Lon Max
values.
Temporal bounds must a valid datetime object
"""
def __init__(self, boundary_type='rectangular',
us_states=None, countries=None,
user_mask_file=None, mask_variable_name=None,
longitude_name=None, latitude_name=None,
lat_min=-90, lat_max=90, lon_min=-180, lon_max=180,
start=None, end=None):
"""Default Bounds constructor
:param boundary_type: The type of spatial subset boundary.
:type boundary_type: :mod:`string`
:param lat_min: The minimum latitude bound.
:type lat_min: :class:`float`
:param lat_min: The minimum latitude bound.
:type lat_min: :class:`float`
:param lat_max: The maximum latitude bound.
:type lat_max: :class:`float`
:param lon_min: The minimum longitude bound.
:type lon_min: :class:`float`
:param lon_max: The maximum longitude bound.
:type lon_max: :class:`float`
:param start: An optional datetime object for the starting
datetime bound.
:type start: :class:`datetime.datetime`
:param end: An optional datetime object for the ending datetime bound.
:type end: :class:`datetime.datetime`
:raises: ValueError
"""
self.boundary_type = boundary_type
self._start = None
self._end = None
self.lat_min = None
self.lat_max = None
self.lon_min = None
self.lon_max = None
if start and self._validate_start(start):
self._start = start
if end and self._validate_end(end):
self._end = end
if boundary_type == 'us_states':
self.masked_regions = utils.shapefile_boundary(boundary_type, us_states)
if boundary_type == 'countries':
self.masked_regions = utils.shapefile_boundary(boundary_type, countries)
if boundary_type == 'user':
file_object = netCDF4.Dataset(user_mask_file)
self.mask_variable = file_object.variables[mask_variable_name][:]
mask_longitude = file_object.variables[longitude_name][:]
mask_latitude = file_object.variables[latitude_name][:]
if mask_longitude.ndim == 1 and mask_latitude.ndim == 1:
self.mask_longitude, self.mask_latitude = \
numpy.meshgrid(mask_longitude, mask_latitude)
elif mask_longitude.ndim == 2 and mask_latitude.ndim == 2:
self.mask_longitude = mask_longitude
self.mask_latitude = mask_latitude
if boundary_type == 'rectangular':
if self._validate_lat_lon(lat_max=lat_max, lat_min=lat_min, lon_max=lon_max, lon_min=lon_min):
self.lat_min = float(lat_min)
self.lat_max = float(lat_max)
self.lon_min = float(lon_min)
self.lon_max = float(lon_max)
if boundary_type[:6].upper() == 'CORDEX':
lat_min, lat_max, lon_min, lon_max = \
utils.CORDEX_boundary(boundary_type[6:].replace(" ", "").lower())
if self._validate_lat_lon(lat_max=lat_max, lat_min=lat_min, lon_max=lon_max, lon_min=lon_min):
self.lat_min = float(lat_min)
self.lat_max = float(lat_max)
self.lon_min = float(lon_min)
self.lon_max = float(lon_max)
@property
def start(self):
""" Getter for start attribute. """
return self._start
@start.setter
def start(self, value):
""" Setter for start attribute. """
if value and self._validate_start(value):
self._start = value
@property
def end(self):
""" Getter for end attribute. """
return self._end
@end.setter
def end(self, value):
""" Setter for end attribute. """
if value and self._validate_end(value):
self._end = value
def _validate_start(self, value):
""" Validate start is both the correct type and less than end. """
if not isinstance(value, dt.datetime):
error = "Attempted to set start to invalid type: %s" % (type(value))
logger.error(error)
raise ValueError(error)
if self._end:
if value > self._end:
error = "Attempted to set start to invalid value: %s" % (value)
logger.error(error)
raise ValueError(error)
return True
def _validate_end(self, value):
""" Validate end is both the correct type and greater than start. """
if not isinstance(value, dt.datetime):
error = "Attempted to set end to invalid type: %s" % (type(value))
logger.error(error)
raise ValueError(error)
if self._start:
if value < self._start:
error = "Attempted to set end to invalid value: %s" % (value)
logger.error(error)
raise ValueError(error)
return True
def _validate_lat_lon(self, lat_max, lat_min, lon_max, lon_min):
""" Confirm the min / max lat / lon are within expected ranges. """
if not (-90 <= float(lat_min) <= 90) or float(lat_min) > float(lat_max):
error = "Attempted to set lat_min to invalid value: %s" % (lat_min)
logger.error(error)
raise ValueError(error)
if not -90 <= float(lat_max) <= 90:
error = "Attempted to set lat_max to invalid value: %s" % (lat_max)
logger.error(error)
raise ValueError(error)
if not (-180 <= float(lon_min) <= 180) or float(lon_min) > float(lon_max):
error = "Attempted to set lon_min to invalid value: %s" % (lon_min)
logger.error(error)
raise ValueError(error)
if not -180 <= float(lon_max) <= 180:
error = "Attempted to set lat_max to invalid value: %s" % (lon_max)
logger.error(error)
raise ValueError(error)
return True