blob: 7bbcc1eb801a2a8dcfa46c7d4f9753b682e43fbc [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Module for handling data input files. Requires netCDF and Numpy be
installed.
This module can easily open NetCDF, HDF and Grib files. Search the netCDF4
documentation for a complete list of supported formats.
"""
from os import path
import netCDF4
import numpy as np
import numpy.ma as ma
import sys
from toolkit import process
from utils import fortranfile
from utils import misc
VARIABLE_NAMES = {'time': ['time', 'times', 'date', 'dates', 'julian'],
'latitude': ['latitude', 'lat', 'lats', 'latitudes'],
'longitude': ['longitude', 'lon', 'lons', 'longitudes']
}
def findunique(seq):
keys = {}
for e in seq:
keys[e] = 1
return keys.keys()
def getVariableByType(filename, variableType):
"""
Function that will try to return the variable from a file based on a provided
parameter type.
Input::
filename - the file to inspect
variableType - time | latitude | longitude
Output::
variable name OR list of all variables in the file if a single variable
name match cannot be found.
"""
try:
f = netCDF4.Dataset(filename, mode='r')
except:
print "netCDF4Error:", sys.exc_info()[0]
raise
variableKeys = f.variables.keys()
f.close()
variableKeys = [variable.encode().lower() for variable in variableKeys]
variableMatch = VARIABLE_NAMES[variableType]
commonVariables = list(set(variableKeys).intersection(variableMatch))
if len(commonVariables) == 1:
return str(commonVariables[0])
else:
return variableKeys
def getVariableRange(filename, variableName):
"""
Function to return the min and max values of the given variable in
the supplied filename.
Input::
filename - absolute path to a file
variableName - variable whose min and max values should be returned
Output::
variableRange - tuple of order (variableMin, variableMax)
"""
try:
f = netCDF4.Dataset(filename, mode='r')
except:
print "netCDF4Error:", sys.exc_info()[0]
raise
varArray = f.variables[variableName][:]
return (varArray.min(), varArray.max())
def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarName):
'''
Read in data from a list of model files into a single data structure
Input:
filelist - list of filenames (including path)
myvar - string containing name of variable to load in (as it appears in file)
Output:
lat, lon - 2D array of latitude and longitude values
timestore - list of times
t2store - numpy array containing data from all files
NB. originally written specific for WRF netCDF output files
modified to make more general (Feb 2011)
Peter Lean July 2010
'''
filelist.sort()
filename = filelist[0]
# Crash nicely if 'filelist' is zero length
"""TODO: Throw Error instead via try Except"""
if len(filelist) == 0:
print 'Error: no files have been passed to read_data_from_file_list()'
sys.exit()
# Open the first file in the list to:
# i) read in lats, lons
# ii) find out how many timesteps in the file
# (assume same ntimes in each file in list)
# -allows you to create an empty array to store variable data for all times
tmp = netCDF4.Dataset(filename, mode='r')
latsraw = tmp.variables[latVarName][:]
lonsraw = tmp.variables[lonVarName][:]
if(latsraw.ndim == 1):
lon, lat = np.meshgrid(lonsraw, latsraw)
if(latsraw.ndim == 2):
lon = lonsraw
lat = latsraw
timesraw = tmp.variables[timeVarName]
ntimes = len(timesraw)
print 'Lats and lons read in for first file in filelist'
# Create a single empty masked array to store model data from all files
t2store = ma.zeros((ntimes * len(filelist), len(lat[:, 0]), len(lon[0, :])))
timestore = ma.zeros((ntimes * len(filelist)))
# Now load in the data for real
# NB. no need to reload in the latitudes and longitudes -assume invariant
i = 0
timesaccu = 0 # a counter for number of times stored so far in t2store
# NB. this method allows for missing times in data files
# as no assumption made that same number of times in each file...
for i, ifile in enumerate(filelist):
#print 'Loading data from file: ',filelist[i]
f = netCDF4.Dataset(ifile, mode='r')
t2raw = ma.array(f.variables[myvar][:])
timesraw = f.variables[timeVarName]
time = timesraw[:]
ntimes = len(time)
print 'file= ', i, 'ntimes= ', ntimes, filelist[i]
print 't2raw shape: ', t2raw.shape
# Flatten dimensions which needn't exist, i.e. level
# e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do.
# Code requires data to have dimensions, (time,lat,lon)
# i.e. remove level dimensions
# Remove 1d axis from the t2raw array
# Example: t2raw.shape == (365, 180, 360 1) <maps to (time, lat, lon, height)>
# After the squeeze you will be left with (365, 180, 360) instead
t2tmp = t2raw.squeeze()
# Nb. if this happens to be data for a single time only, then we just flattened it by accident
# lets put it back...
if t2tmp.ndim == 2:
t2tmp = np.expand_dims(t2tmp, 0)
t2store[timesaccu + np.arange(ntimes), :, :] = t2tmp[:, :, :]
timestore[timesaccu + np.arange(ntimes)] = time
timesaccu += ntimes
f.close()
print 'Data read in successfully with dimensions: ', t2store.shape
# TODO: search for duplicated entries (same time) and remove duplicates.
# Check to see if number of unique times == number of times, if so then no problem
if(len(np.unique(timestore)) != len(np.where(timestore != 0)[0].view())):
print 'WARNING: Possible duplicated times'
# Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here
timestore, _ = process.getModelTimes(filename, timeVarName)
# Make sure latlon grid is monotonically increasing and that the domains
# are correct
lat, lon, t2store = checkLatLon(lat, lon, t2store)
data_dict = {'lats': lat, 'lons': lon, 'times': timestore, 'data': t2store}
return data_dict
def select_var_from_file(myfile, fmt='not set'):
'''
Routine to act as user interface to allow users to select variable of interest from a file.
Input:
myfile - filename
fmt - (optional) specify fileformat for netCDF4 if filename suffix is non-standard
Output:
myvar - variable name in file
Peter Lean September 2010
'''
print fmt
if fmt == 'not set':
f = netCDF4.Dataset(myfile, mode='r')
if fmt != 'not set':
f = netCDF4.Dataset(myfile, mode='r', format=fmt)
keylist = [key.encode().lower() for key in f.variables.keys()]
i = 0
for v in keylist:
print '[', i, '] ', f.variables[v].long_name, ' (', v, ')'
i += 1
user_selection = raw_input('Please select variable : [0 -' + str(i - 1) + '] ')
myvar = keylist[int(user_selection)]
return myvar
def select_var_from_wrf_file(myfile):
'''
Routine to act as user interface to allow users to select variable of interest from a wrf netCDF file.
Input:
myfile - filename
Output:
mywrfvar - variable name in wrf file
Peter Lean September 2010
'''
f = netCDF4.Dataset(myfile, mode='r', format='NETCDF4')
keylist = [key.encode().lower() for key in f.variables.keys()]
i = 0
for v in keylist:
try:
print '[', i, '] ', f.variables[v].description, ' (', v, ')'
except:
print ''
i += 1
user_selection = raw_input('Please select WRF variable : [0 -' + str(i - 1) + '] ')
mywrfvar = keylist[int(user_selection)]
return mywrfvar
def read_data_from_one_file(ifile, myvar, latVarName, lonVarName, timeVarName, file_type):
"""
Purpose::
Read in data from one file at a time
Input::
filelist - list of filenames (including path)
myvar - string containing name of variable to load in (as it appears in file)s
lonVarName - name of the Longitude Variable
timeVarName - name of the Time Variable
fileType - type of file we are trying to parse
Output::
lat, lon - 2D arrays of latitude and longitude values
times - list of times
t2store - numpy array containing data from the file for the requested variable
varUnit - units for the variable given by t2store
"""
f = netCDF4.Dataset(ifile, mode='r')
try:
varUnit = f.variables[myvar].units.encode().upper()
except:
varUnit = raw_input('Enter the model variable unit: \n> ').upper()
t2raw = ma.array(f.variables[myvar][:])
t2tmp = t2raw.squeeze()
if t2tmp.ndim == 2:
t2tmp = np.expand_dims(t2tmp, 0)
lonsraw = f.variables[lonVarName][:]
latsraw = f.variables[latVarName][:]
if(latsraw.ndim == 1):
lon, lat = np.meshgrid(lonsraw, latsraw)
if(latsraw.ndim == 2):
lon = lonsraw
lat = latsraw
f.close()
print ' success read_data_from_one_file: VarName=', myvar, ' Shape(Full)= ', t2tmp.shape, ' Unit= ', varUnit
timestore = process.decode_model_timesK(ifile, timeVarName, file_type)
# Make sure latlon grid is monotonically increasing and that the domains
# are correct
lat, lon, t2store = checkLatLon(lat, lon, t2tmp)
return lat, lon, timestore, t2store, varUnit
def findTimeVariable(filename):
"""
Function to find what the time variable is called in a model file.
Input::
filename - file to crack open and check for a time variable
Output::
timeName - name of the input file's time variable
variableNameList - list of variable names from the input filename
"""
try:
f = netCDF4.Dataset(filename, mode='r')
except:
print("Unable to open '%s' to try and read the Time variable" % filename)
raise
variableNameList = [variable.encode() for variable in f.variables.keys()]
# convert all variable names into lower case
varNameListLowerCase = [x.lower() for x in variableNameList]
# Use "set" types for finding common variable name from in the file and from the list of possibilities
possibleTimeNames = set(['time', 'times', 'date', 'dates', 'julian'])
# Use the sets to find the intersection where variable names are in possibleNames
timeNameSet = possibleTimeNames.intersection(varNameListLowerCase)
if len(timeNameSet) == 0:
print("Unable to autodetect the Time Variable Name in the '%s'" % filename)
timeName = misc.askUserForVariableName(variableNameList, targetName ="Time")
else:
timeName = timeNameSet.pop()
return timeName, variableNameList
def findLatLonVarFromFile(filename):
"""
Function to find what the latitude and longitude variables are called in a model file.
Input::
-filename
Output::
-latVarName
-lonVarName
-latMin
-latMax
-lonMin
-lonMax
"""
try:
f = netCDF4.Dataset(filename, mode='r')
except:
print("Unable to open '%s' to try and read the Latitude and Longitude variables" % filename)
raise
variableNameList = [variable.encode() for variable in f.variables.keys()]
# convert all variable names into lower case
varNameListLowerCase = [x.lower() for x in variableNameList]
# Use "set" types for finding common variable name from in the file and from the list of possibilities
possibleLatNames = set(['latitude', 'lat', 'lats', 'latitudes'])
possibleLonNames = set(['longitude', 'lon', 'lons', 'longitudes'])
# Use the sets to find the intersection where variable names are in possibleNames
latNameSet = possibleLatNames.intersection(varNameListLowerCase)
lonNameSet = possibleLonNames.intersection(varNameListLowerCase)
if len(latNameSet) == 0 or len(lonNameSet) == 0:
print("Unable to autodetect Latitude and/or Longitude values in the file")
latName = misc.askUserForVariableName(variableNameList, targetName ="Latitude")
lonName = misc.askUserForVariableName(variableNameList, targetName ="Longitude")
else:
latName = latNameSet.pop()
lonName = lonNameSet.pop()
lats = np.array(f.variables[latName][:])
lons = np.array(f.variables[lonName][:])
latMin = lats.min()
latMax = lats.max()
# Convert the lons from 0:360 into -180:180
lons[lons > 180] = lons[lons > 180] - 360.
lonMin = lons.min()
lonMax = lons.max()
return latName, lonName, latMin, latMax, lonMin, lonMax
def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, lonVarName, file_type):
##################################################################################
# Read in data from a list of model files into a single data structure
# Input: filelist - list of filenames (including path)
# myvar - string containing name of variable to load in (as it appears in file)
# Output: lat, lon - 2D array of latitude and longitude values
# times - list of times
# t2store - numpy array containing data from all files
# Modified from read_data_from_file_list to read data from multiple models into a 4-D array
# 1. The code now processes model data that completely covers the 20-yr period. Thus,
# all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes
# is fixed here.
# 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be
# truncated to the 240 mons using the ntimes determined from the first file.
##################################################################################
filelist.sort()
nfiles = len(filelist)
# Crash nicely if 'filelist' is zero length
if nfiles == 0:
print 'Error: no files have been passed to read_data_from_file_list(): Exit'
sys.exit()
# Open the first file in the list to:
# i) read in lats, lons
# ii) find out how many timesteps in the file (assume same ntimes in each file in list)
# -allows you to create an empty array to store variable data for all times
tmp = netCDF4.Dataset(filelist[0], mode='r', format=file_type)
latsraw = tmp.variables[latVarName][:]
lonsraw = tmp.variables[lonVarName][:]
timesraw = tmp.variables[timeVarName]
if(latsraw.ndim == 1):
lon, lat = np.meshgrid(lonsraw, latsraw)
elif(latsraw.ndim == 2):
lon = lonsraw
lat = latsraw
ntimes = len(timesraw); nygrd = len(lat[:, 0]); nxgrd = len(lon[0, :])
print 'Lats and lons read in for first file in filelist'
# Create a single empty masked array to store model data from all files
#t2store = ma.zeros((ntimes*nfiles,nygrd,nxgrd))
t2store = ma.zeros((nfiles, ntimes, nygrd, nxgrd))
#timestore=ma.zeros((ntimes*nfiles))
## Now load in the data for real
## NB. no need to reload in the latitudes and longitudes -assume invariant
#timesaccu=0 # a counter for number of times stored so far in t2store
# NB. this method allows for missing times in data files
# as no assumption made that same number of times in each file...
for i, ifile in enumerate(filelist):
#print 'Loading data from file: ',filelist[i]
f = netCDF4.Dataset(ifile, mode='r')
t2raw = ma.array(f.variables[myvar][:])
timesraw = f.variables[timeVarName]
#ntimes=len(time)
#print 'file= ',i,'ntimes= ',ntimes,filelist[i]
## Flatten dimensions which needn't exist, i.e. level
## e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do.
## Code requires data to have dimensions, (time,lat,lon)
## i.e. remove level dimensions
t2tmp = t2raw.squeeze()
## Nb. if data happen to be for a single time, we flattened it by accident; lets put it back...
if t2tmp.ndim == 2:
t2tmp = np.expand_dims(t2tmp, 0)
#t2store[timesaccu+np.arange(ntimes),:,:]=t2tmp[0:ntimes,:,:]
t2store[i, 0:ntimes, :, :] = t2tmp[0:ntimes, :, :]
#timestore[timesaccu+np.arange(ntimes)]=time
#timesaccu=timesaccu+ntimes
f.close()
print 'Data read in successfully with dimensions: ', t2store.shape
# Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here
ifile = filelist[0]
timestore, _ = process.getModelTimes(ifile, timeVarName)
# Make sure latlon grid is monotonically increasing and that the domains
# are correct
lat, lon, t2store = checkLatLon(lat, lon, t2store)
return lat, lon, timestore, t2store
def find_latlon_ranges(filelist, lat_var_name, lon_var_name):
# Function to return the latitude and longitude ranges of the data in a file,
# given the identifying variable names.
#
# Input:
# filelist - list of filenames (data is read in from first file only)
# lat_var_name - variable name of the 'latitude' variable
# lon_var_name - variable name of the 'longitude' variable
#
# Output:
# latMin, latMax, lonMin, lonMax - self explanatory
#
# Peter Lean March 2011
filename = filelist[0]
try:
f = netCDF4.Dataset(filename, mode='r')
lats = f.variables[lat_var_name][:]
latMin = lats.min()
latMax = lats.max()
lons = f.variables[lon_var_name][:]
lons[lons > 180] = lons[lons > 180] - 360.
lonMin = lons.min()
lonMax = lons.max()
return latMin, latMax, lonMin, lonMax
except:
print 'Error: there was a problem with finding the latitude and longitude ranges in the file'
print ' Please check that you specified the filename, and variable names correctly.'
sys.exit()
def writeBN_lola(fileName, lons, lats):
# write a binary data file that include longitude (1-d) and latitude (1-d) values
F = fortranfile.FortranFile(fileName, mode='w')
ngrdY = lons.shape[0]; ngrdX = lons.shape[1]
tmpDat = ma.zeros(ngrdX); tmpDat[:] = lons[0, :]; F.writeReals(tmpDat)
tmpDat = ma.zeros(ngrdY); tmpDat[:] = lats[:, 0]; F.writeReals(tmpDat)
# release temporary arrays
tmpDat = 0
F.close()
def writeBNdata(fileName, numOBSs, numMDLs, nT, ngrdX, ngrdY, numSubRgn, obsData, mdlData, obsRgnAvg, mdlRgnAvg):
#(fileName,maskOption,numOBSs,numMDLs,nT,ngrdX,ngrdY,numSubRgn,obsData,mdlData,obsRgnAvg,mdlRgnAvg):
# write spatially- and regionally regridded data into a binary data file
missing = -1.e26
F = fortranfile.FortranFile(fileName, mode='w')
# construct a data array to replace mask flag with a missing value (missing=-1.e12) for printing
data = ma.zeros((nT, ngrdY, ngrdX))
for m in np.arange(numOBSs):
data[:, :, :] = obsData[m, :, :, :]; msk = data.mask
for n in np.arange(nT):
for j in np.arange(ngrdY):
for i in np.arange(ngrdX):
if msk[n, j, i]: data[n, j, i] = missing
# write observed data. allowed to write only one row at a time
tmpDat = ma.zeros(ngrdX)
for n in np.arange(nT):
for j in np.arange(ngrdY):
tmpDat[:] = data[n, j, :]
F.writeReals(tmpDat)
# write model data (dep. on the number of models).
for m in np.arange(numMDLs):
data[:, :, :] = mdlData[m, :, :, :]
msk = data.mask
for n in np.arange(nT):
for j in np.arange(ngrdY):
for i in np.arange(ngrdX):
if msk[n, j, i]:
data[n, j, i] = missing
for n in np.arange(nT):
for j in np.arange(ngrdY):
tmpDat[:] = data[n, j, :]
F.writeReals(tmpDat)
data = 0 # release the array allocated for data
# write data in subregions
if numSubRgn > 0:
print 'Also included are the time series of the means over ', numSubRgn, ' areas from obs and model data'
tmpDat = ma.zeros(nT); print numSubRgn
for m in np.arange(numOBSs):
for n in np.arange(numSubRgn):
tmpDat[:] = obsRgnAvg[m, n, :]
F.writeReals(tmpDat)
for m in np.arange(numMDLs):
for n in np.arange(numSubRgn):
tmpDat[:] = mdlRgnAvg[m, n, :]
F.writeReals(tmpDat)
tmpDat = 0 # release the array allocated for tmpDat
F.close()
def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, mdlRgnAvg, obsList, mdlList, subRegions):
# write an output file of variables up to 3 dimensions
# fileName: the name of the output data file
# numSubRgn : the number of subregions
# lons[ngrdX]: longitude
# lats[ngrdY]: latitudes
# obsData[nT,ngrdY,ngrdX]: the obs time series of the entire model domain
# mdlData[numMDLs,nT,ngrdY,ngrdX]: the mdltime series of the entire model domain
# obsRgnAvg[numSubRgn,nT]: the obs time series for the all subregions
# mdlRgnAvg[numMDLs,numSubRgn,nT]: the mdl time series for the all subregions
dimO = obsData.shape[0] # the number of obs data
dimM = mdlData.shape[0] # the number of mdl data
dimT = mdlData.shape[1] # the number of time levels
dimY = mdlData.shape[2] # y-dimension
dimX = mdlData.shape[3] # x-dimension
dimR = obsRgnAvg.shape[1] # the number of subregions
f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
print mdlRgnAvg.shape, dimM, dimR, dimT
#create global attributes
f.description = ''
# create dimensions
print 'Creating Dimensions within the NetCDF Object...'
f.createDimension('unity', 1)
f.createDimension('time', dimT)
f.createDimension('west_east', dimX)
f.createDimension('south_north', dimY)
f.createDimension('obs', dimO)
f.createDimension('models', dimM)
# create the variable (real*4) to be written in the file
print 'Creating Variables...'
f.createVariable('lon', 'd', ('south_north', 'west_east'))
f.createVariable('lat', 'd', ('south_north', 'west_east'))
f.createVariable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east'))
f.createVariable('mDat', 'd', ('models', 'time', 'south_north', 'west_east'))
if subRegions:
f.createDimension('regions', dimR)
f.createVariable('oRgn', 'd', ('obs', 'regions', 'time'))
f.createVariable('mRgn', 'd', ('models', 'regions', 'time'))
f.variables['oRgn'].varAttName = 'Observation time series: Subregions'
f.variables['mRgn'].varAttName = 'Model time series: Subregions'
loadDataIntoNetCDF(f, obsList, obsData, 'Observation')
print 'Loaded the Observations into the NetCDF'
loadDataIntoNetCDF(f, mdlList, mdlData, 'Model')
# create attributes and units for the variable
print 'Creating Attributes and Units...'
f.variables['lon'].varAttName = 'Longitudes'
f.variables['lon'].varUnit = 'degrees East'
f.variables['lat'].varAttName = 'Latitudes'
f.variables['lat'].varUnit = 'degrees North'
f.variables['oDat'].varAttName = 'Observation time series: entire domain'
f.variables['mDat'].varAttName = 'Model time series: entire domain'
# assign the values to the variable and write it
f.variables['lon'][:] = lons[:]
f.variables['lat'][:] = lats[:]
if subRegions:
f.variables['oRgn'][:] = obsRgnAvg[:]
f.variables['mRgn'][:] = mdlRgnAvg[:]
f.close()
def writeNCfile1(fileName, numDatasets, numObs, numMdl, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions):
# write an output file of variables up to 3 dimensions
# fileName: the name of the output data file
# numDatasets: the total number of datasets (numObs + numMdl)
# numObs : the number of obs datasets (including the obs ensemble, if exists)
# numMdl : the number of mdl datasets (including the mdl ensemble, if exists)
# numSubRgn : the number of subregions
# lons[ngrdX]: longitude
# lats[ngrdY]: latitudes
# allData[numDatasets,nT,ngrdY,ngrdX]: the combined obs and model time series for the entire model domain (t,y,x)
# dataRgn[numDatasets,numSubRgn,nT]: the obs time series for the all subregions
# dataList : the names of datasets
# subRegions : sub-region info
dimN = allData.shape[0] # the number of all (obs + model) datasets
dimT = allData.shape[1] # the number of time levels
dimY = allData.shape[2] # y-dimension
dimX = allData.shape[3] # x-dimension
dimR = dataRgn.shape[1] # the number of subregions
f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
print dataRgn.shape, dimN, dimR, dimT
#create global attributes
f.description = ''
# create dimensions
print 'Creating Dimensions within the NetCDF Object...'
f.createDimension('unity', 1)
f.createDimension('time', dimT)
f.createDimension('west_east', dimX)
f.createDimension('south_north', dimY)
f.createDimension('data', dimN)
# create the variable (real*4) to be written in the file
print 'Creating Variables...'
f.createVariable('lon', 'd', ('south_north', 'west_east'))
f.createVariable('lat', 'd', ('south_north', 'west_east'))
f.createVariable('dGrd', 'd', ('data', 'time', 'south_north', 'west_east'))
if subRegions:
f.createDimension('regions', dimR)
f.createVariable('dRgn', 'd', ('data', 'regions', 'time'))
f.variables['dRgn'].varAttName = 'Sub-region mean time series'
# load the gridded data into the output file
loadDataIntoNetCDF1(f, dataList, allData)
print 'Loaded the gridded into the NetCDF'
# create attributes and units for the variable
print 'Creating Attributes and Units...'
f.variables['lon'].varAttName = 'Longitudes'
f.variables['lon'].varUnit = 'degrees East'
f.variables['lat'].varAttName = 'Latitudes'
f.variables['lat'].varUnit = 'degrees North'
f.variables['dGrd'].varAttName = 'Gridded time series'
# assign the values to the variable and write it
f.variables['lon'][:] = lons[:]
f.variables['lat'][:] = lats[:]
if subRegions:
f.variables['dRgn'][:] = dataRgn[:]
f.close()
def loadDataIntoNetCDF(fileObject, datasets, dataArray, dataType):
"""
Input::
fileObject - netCDF4 file object data will be loaded into
datasets - List of dataset names
dataArray - Multi-dimensional array of data to be loaded into the NetCDF file
dataType - String with value of either 'Model' or 'Observation'
Output::
No return value. netCDF4 file object is updated in place
"""
datasetCount = 0
for datasetCount, dataset in enumerate(datasets):
if dataType.lower() == 'observation':
datasetName = dataset.replace(' ','')
elif dataType.lower() == 'model':
datasetName = path.splitext(path.basename(dataset))[0]
print "Creating variable %s" % datasetName
fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east'))
fileObject.variables[datasetName].varAttName = 'Obseration time series: entire domain'
print 'Loading values into %s' % datasetName
fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:]
def loadDataIntoNetCDF1(fileObject, datasets, dataArray):
"""
Input::
fileObject - netCDF4 file object data will be loaded into
datasets - List of dataset names
dataArray - Multi-dimensional array of data to be loaded into the NetCDF file
Output::
No return value. netCDF4 file object is updated in place
"""
datasetCount = 0
for datasetCount, dataset in enumerate(datasets):
datasetName = path.splitext(path.basename(dataset))[0]
print "Creating variable %s" % datasetName
fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east'))
fileObject.variables[datasetName].varAttName = 'Gridded time series: entire domain'
print 'Loading values into %s' % datasetName
fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:]
def checkLatLon(latsin, lonsin, datain):
"""
Purpose::
Checks whether latitudes and longitudes are monotonically increasing
within the domains [-90, 90) and [-180, 180) respectively, and rearranges the input data
accordingly if they are not.
Input::
latsin - Array of latitudes read from a raw netcdf file
lonsin - Array of longitudes read from a raw netcdf file
datain - Array of data values read from a raw netcdf file.
The shape is assumed to be (..., nLat, nLon).
Output::
latsout - 2D array of (rearranged) latitudes
lonsout - 2D array of (rearranged) longitudes
dataout - Array of (rearranged) data
"""
# Avoid unnecessary shifting if all lons are higher than 180
if lonsin.min() > 180:
lonsin -= 360
# Make sure lats and lons are monotonically increasing
latsDecreasing = np.diff(latsin[:, 0]) < 0
lonsDecreasing = np.diff(lonsin[0]) < 0
# If all values are decreasing then they just need to be reversed
latsReversed, lonsReversed = latsDecreasing.all(), lonsDecreasing.all()
# If the lat values are unsorted then raise an exception
if not latsReversed and latsDecreasing.any():
raise ValueError('Latitudes must be monotonically increasing.')
# Perform same checks now for lons
if not lonsReversed and lonsDecreasing.any():
raise ValueError('Longitudes must be monotonically increasing.')
# Also check if lons go from [0, 360), and convert to [-180, 180)
# if necessary
lonsShifted = lonsin.max() > 180
latsout, lonsout, dataout = latsin[:], lonsin[:], datain[:]
# Now correct data if latlon grid needs to be shifted
if latsReversed:
latsout = latsout[::-1]
dataout = dataout[..., ::-1, :]
if lonsReversed:
lonsout = lonsout[..., ::-1]
dataout = dataout[..., ::-1]
if lonsShifted:
lat1d = latsout[:, 0]
dataout, lon1d = shiftgrid(180, dataout, lonsout[0], start=False)
lonsout, latsout = np.meshgrid(lon1d, lat1d)
return latsout, lonsout, dataout
def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0):
"""
Purpose::
Shift global lat/lon grid east or west. This function is taken directly
from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not
currently support arrays with more than two dimensions.
https://github.com/matplotlib/basemap
Input::
lon0 - starting longitude for shifted grid (ending longitude if start=False).
lon0 must be on input grid (within the range of lonsin).
datain - original data with longitude the right-most dimension.
lonsin - original longitudes.
start - if True, lon0 represents the starting longitude of the new grid.
if False, lon0 is the ending longitude. Default True.
cyclic - width of periodic domain (default 360)
Output::
dataout - data on shifted grid
lonsout - lons on shifted grid
"""
if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4:
# Use all data instead of raise ValueError, 'cyclic point not included'
start_idx = 0
else:
# If cyclic, remove the duplicate point
start_idx = 1
if lon0 < lonsin[0] or lon0 > lonsin[-1]:
raise ValueError('lon0 outside of range of lonsin')
i0 = np.argmin(np.fabs(lonsin-lon0))
i0_shift = len(lonsin)-i0
if ma.isMA(datain):
dataout = ma.zeros(datain.shape,datain.dtype)
else:
dataout = np.zeros(datain.shape,datain.dtype)
if ma.isMA(lonsin):
lonsout = ma.zeros(lonsin.shape,lonsin.dtype)
else:
lonsout = np.zeros(lonsin.shape,lonsin.dtype)
if start:
lonsout[0:i0_shift] = lonsin[i0:]
else:
lonsout[0:i0_shift] = lonsin[i0:]-cyclic
dataout[...,0:i0_shift] = datain[...,i0:]
if start:
lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic
else:
lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]
dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx]
return dataout,lonsout