blob: c0d4b07433875b2b938826d49ce8bbc8020d86ae [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import calendar
from datetime import timedelta ,datetime
from time import strptime
from glob import glob
import re
import string
import os
from ocw.dataset import Dataset
import ocw.utils as utils
import netCDF4
import numpy
import numpy.ma as ma
LAT_NAMES = ['x', 'rlat', 'rlats', 'lat', 'lats', 'latitude', 'latitudes']
LON_NAMES = ['y', 'rlon', 'rlons', 'lon', 'lons', 'longitude', 'longitudes']
TIME_NAMES = ['time', 'times', 'date', 'dates', 'julian']
def _get_netcdf_variable_name(valid_var_names, netcdf, netcdf_var):
''' Determine if one of a set of variable names are in a NetCDF Dataset.
Looks for an occurrence of a valid_var_name in the NetCDF variable data.
This is useful for automatically determining the names of the lat, lon,
and time variable names inside of a dataset object.
:param valid_var_names: The possible variable names to search for in
the netCDF object.
:type valid_var_names: List of Strings
:param netcdf: The netCDF Dataset object in which to check for
valid_var_names.
:type netcdf: netcdf4.Dataset
:param netcdf_var: The relevant variable name to search over in the
netcdf object. This is used to narrow down the search for valid
variable names by first checking the desired variable's dimension
values for one or more of the valid variable names.
:returns: The variable from valid_var_names that it locates in
the netCDF object.
:raises ValueError: When unable to locate a single matching variable
name in the NetCDF Dataset from the supplied list of valid variable
names.
'''
# Check for valid variable names in netCDF variable dimensions
dimensions = netcdf.variables[netcdf_var].dimensions
dims_lower = [dim.encode().lower() for dim in dimensions]
intersect = set(valid_var_names).intersection(dims_lower)
if len(intersect) == 1:
# Retrieve the name of the dimension where we found the matching
# variable name
index = dims_lower.index(intersect.pop())
dimension_name = dimensions[index].encode()
# Locate all of the variables that share the dimension that we matched
# earlier. If the dimension's name matches then that variable is
# potentially what we want to return to the user.
possible_vars = []
for var in netcdf.variables.keys():
var_dimensions = netcdf.variables[var].dimensions
# Skip any dimensions are > 1D
if len(var_dimensions) != 1:
continue
if var_dimensions[0].encode() == dimension_name:
possible_vars.append(var)
# If there are multiple variables with matching dimension names then we
# aren't able to determining the correct variable name using the
# variable dimensions. We need to try a different approach. Otherwise,
# we're done!
if len(possible_vars) == 1:
return possible_vars[0]
# Check for valid variable names in netCDF variable names
variables = netcdf.variables.keys()
vars_lower = [var.encode().lower() for var in variables]
intersect = set(valid_var_names).intersection(vars_lower)
if len(intersect) == 1:
index = vars_lower.index(intersect.pop())
return variables[index]
# If we couldn't locate a single matching valid variable then we're unable
# to automatically determine the variable names for the user.
error = (
"Unable to locate a single matching variable name from the "
"supplied list of valid variable names. "
)
raise ValueError(error)
def load_WRF_2d_files(file_path,
filename_pattern,
variable_name,
name=''):
''' Load multiple WRF (or nuWRF) original output files containing 2D fields such as precipitation and surface variables into a Dataset.
The dataset can be spatially subset.
:param file_path: Directory to the NetCDF file to load.
:type file_path: :mod:`string`
:param filename_pattern: Path to the NetCDF file to load.
:type filename_pattern: :list:`string`
:param variable_name: The variable name to load from the NetCDF file.
:type variable_name: :mod:`string`
:param name: (Optional) A name for the loaded dataset.
:type name: :mod:`string`
:returns: An OCW Dataset object with the requested variable's data from
the NetCDF file.
:rtype: :class:`dataset.Dataset`
:raises ValueError:
'''
WRF_files = []
for pattern in filename_pattern:
WRF_files.extend(glob(file_path + pattern))
WRF_files.sort()
file_object_first = netCDF4.Dataset(WRF_files[0])
lats = file_object_first.variables['XLAT'][0,:]
lons = file_object_first.variables['XLONG'][0,:]
times = []
for ifile, file in enumerate(WRF_files):
file_object = netCDF4.Dataset(file)
time_struct_parsed = strptime(file[-19:],"%Y-%m-%d_%H:%M:%S")
for ihour in numpy.arange(24):
times.append(datetime(*time_struct_parsed[:6]) + timedelta(hours=ihour))
values0= file_object.variables[variable_name][:]
if ifile == 0:
values = file_object.variables[variable_name][:]
else:
values = numpy.concatenate((values, file_object.variables[variable_name][:]))
file_object.close()
times = numpy.array(times)
return Dataset(lats, lons, times, values, variable_name, name=name)
def load_file(file_path,
variable_name,
variable_unit = None,
elevation_index=0,
name='',
lat_name=None,
lon_name=None,
time_name=None):
''' Load a NetCDF file into a Dataset.
:param file_path: Path to the NetCDF file to load.
:type file_path: :mod:`string`
:param variable_name: The variable name to load from the NetCDF file.
:type variable_name: :mod:`string`
:param variable_unit: (Optional) The variable unit to load from the NetCDF file.
:type variable_unit: :mod:`string`
:param elevation_index: (Optional) The elevation index for which data should
be returned. Climate data is often times 4 dimensional data. Some
datasets will have readins at different height/elevation levels. OCW
expects 3D data so a single layer needs to be stripped out when loading.
By default, the first elevation layer is used. If desired you may
specify the elevation value to use.
:type elevation_index: :class:`int`
:param name: (Optional) A name for the loaded dataset.
:type name: :mod:`string`
:param lat_name: (Optional) The latitude variable name to extract from the
dataset.
:type lat_name: :mod:`string`
:param lon_name: (Optional) The longitude variable name to extract from the
dataset.
:type lon_name: :mod:`string`
:param time_name: (Optional) The time variable name to extract from the
dataset.
:type time_name: :mod:`string`
:returns: An OCW Dataset object with the requested variable's data from
the NetCDF file.
:rtype: :class:`dataset.Dataset`
:raises ValueError: When the specified file path cannot be loaded by ndfCDF4
or when the lat/lon/time variable name cannot be determined
automatically.
'''
try:
netcdf = netCDF4.Dataset(file_path, mode='r')
except RuntimeError:
err = "Dataset filepath is invalid. Please ensure it is correct."
raise ValueError(err)
except:
err = (
"The given file cannot be loaded. Please ensure that it is a valid "
"NetCDF file. If problems persist, report them to the project's "
"mailing list."
)
raise ValueError(err)
if not lat_name:
lat_name = _get_netcdf_variable_name(LAT_NAMES, netcdf, variable_name)
if not lon_name:
lon_name = _get_netcdf_variable_name(LON_NAMES, netcdf, variable_name)
if not time_name:
time_name = _get_netcdf_variable_name(TIME_NAMES, netcdf, variable_name)
lats = netcdf.variables[lat_name][:]
lons = netcdf.variables[lon_name][:]
time_raw_values = netcdf.variables[time_name][:]
times = utils.decode_time_values(netcdf, time_name)
times = numpy.array(times)
values = ma.array(netcdf.variables[variable_name][:])
variable_unit = netcdf.variables[variable_name].units
# If the values are 4D then we need to strip out the elevation index
if len(values.shape) == 4:
# Determine the set of possible elevation dimension names excluding
# the list of names that are used for the lat, lon, and time values.
dims = netcdf.variables[variable_name].dimensions
dimension_names = [dim_name.encode() for dim_name in dims]
lat_lon_time_var_names = [lat_name, lon_name, time_name]
elev_names = set(dimension_names) - set(lat_lon_time_var_names)
# Grab the index value for the elevation values
level_index = dimension_names.index(elev_names.pop())
# Strip out the elevation values so we're left with a 3D array.
if level_index == 0:
values = values [elevation_index,:,:,:]
elif level_index == 1:
values = values [:,elevation_index,:,:]
elif level_index == 2:
values = values [:,:,elevation_index,:]
else:
values = values [:,:,:,elevation_index]
origin = {
'source': 'local',
'path': file_path,
'lat_name': lat_name,
'lon_name': lon_name,
'time_name': time_name
}
if elevation_index != 0: origin['elevation_index'] = elevation_index
return Dataset(lats, lons, times, values, variable=variable_name,
units=variable_unit, name=name, origin=origin)
def load_multiple_files(file_path,
filename_pattern,
variable_name,
dataset_name='ref',
variable_unit=None,
lat_name=None,
lon_name=None,
time_name=None):
''' load multiple netcdf files with common filename pattern and return an array of OCW datasets
:param file_path: directory name where the NetCDF files to load are stored.
:type file_path: :mod:`string`
:param filename_pattern: common file name patterns
:type filename_pattern: :list:`string`
:param dataset_name: a name of dataset when reading a single file
:type dataset_name: :mod:'string'
:param variable_name: The variable name to load from the NetCDF file.
:type variable_name: :mod:`string`
:param variable_unit: (Optional) The variable unit to load from the NetCDF file.
:type variable_unit: :mod:`string`
:param elevation_index: (Optional) The elevation index for which data should
be returned. Climate data is often times 4 dimensional data. Some
datasets will have readins at different height/elevation levels. OCW
expects 3D data so a single layer needs to be stripped out when loading.
By default, the first elevation layer is used. If desired you may
specify the elevation value to use.
:param lat_name: (Optional) The latitude variable name to extract from the
dataset.
:type lat_name: :mod:`string`
:param lon_name: (Optional) The longitude variable name to extract from the
dataset.
:type lon_name: :mod:`string`
:param time_name: (Optional) The time variable name to extract from the
dataset.
:type time_name: :mod:`string`
:returns: An array of OCW Dataset objects, an array of dataset names
:rtype: :class:`list`
'''
data_filenames = []
for pattern in filename_pattern:
data_filenames.extend(glob(file_path + pattern))
data_filenames.sort()
# number of files
ndata = len(data_filenames)
if ndata == 1:
data_name = [dataset_name]
else:
data_name = []
data_filenames_reversed = []
for element in data_filenames:
data_filenames_reversed.append(element[::-1])
prefix = os.path.commonprefix(data_filenames)
postfix = os.path.commonprefix(data_filenames_reversed)[::-1]
for element in data_filenames:
data_name.append(element.replace(prefix,'').replace(postfix,''))
datasets = []
for ifile,filename in enumerate(data_filenames):
datasets.append(load_file(filename, variable_name, variable_unit, name=data_name[ifile],
lat_name=lat_name, lon_name=lon_name, time_name=time_name))
return datasets