blob: ed5091c04c70919f91fb8e5ba91ef34e939c4145 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Class factory
#
# Select the right handler depending on the file format
from __future__ import print_function
import os
import pdb
import sys
import magic
import scipy.io
import cdms2
import cdtime
import numpy.ma as ma
import numpy
import netCDF4
import re
from cdms2.grid import createUniformGrid
# ********************************************************************
# import_equation()
#
# provide equation dynamically
# ********************************************************************
def import_equation(eq):
d = eq.rfind(".")
EqName = eq[ d+1:len(eq) ]
m = __import__( eq[0:d], globals( ), locals( ), [ EqName ] )
return getattr( m, EqName )
# ********************************************************************
# HandlerGrads()
#
# Manage Grads file io
# ********************************************************************
class HandlerGrads(object):
'''
'''
def __new__(klass, *args, **kwargs):
# ------------------------------------------------------------------
# This return make sure __init__ is begin called after the class is
# created
# ------------------------------------------------------------------
return super(HandlerGrads, klass).__new__(klass, *args, **kwargs)
def __init__(self):
pass
def open(self, file, variable=None):
'''
Open ctl file and read data
'''
self.f=cdms2.open(file)
self.variable=variable
self.data=self.f(variable) # read variable
def getData(self, variable=None):
'''
Return data array
'''
if( variable != None ):
data=self.f(variable)
else:
data=self.data
return data
def getLongitude(self):
'''
Return Longitude.
'''
return self.data.getLongitude()
def getLatitude(self):
'''
Return Latitude.
'''
return self.data.getLatitude()
def getTime(self,timeunits=None):
'''
Return time in cdtime format.
'''
if(timeunits == 'internal'):
timeunits=self.getTimeUnits()
time_values = numpy.arange(len(self.data.getTime()))
time_bounds = numpy.arange(len(self.data.getTime())+1)
cur_time=[cdtime.reltime(i,timeunits)
for i in time_values]
return cur_time
def getTimeUnits(self,timeunits):
'''
return Time units (month since yyyy)
'''
if( timeunits == 'internal' ):
return self.data.getTime().units
return timeunits
# ********************************************************************
# HandlerNetCDF()
#
# Manage netCDF file io. Note, this API reads HDF4 and Netcdf4/HDF5
# ********************************************************************
class HandlerNetCDF(object):
'''
'''
def __new__(klass, *args, **kwargs):
# ------------------------------------------------------------------
# This return make sure __init__ is begin called after the class is
# created
# ------------------------------------------------------------------
return super(HandlerNetCDF, klass).__new__(klass, *args, **kwargs)
def __init__(self):
pass
def open(self, file, variable=None):
'''
Open ctl file and read data
'''
self.f=cdms2.open(file)
self.data=self.f(variable).astype('float32') # read variable
def getData(self):
'''
Return data array
'''
return self.data
def getLongitude(self):
'''
Return Longitude.
'''
return self.data.getLongitude()
def getLatitude(self):
'''
Return Latitude.
'''
return self.data.getLatitude()
def getTime(self,timeunits=None):
'''
Return time.
'''
if( timeunits == 'internal' ):
timeunits=self.getTimeUnits()
time_values = self.f.getAxis("time")[:]
deltime = time_values[ 1 ] - time_values[ 0 ]
time_bounds = time_values
numpy.append(time_bounds, time_values[-1] + deltime )
cur_time=[ cdtime.reltime( i, timeunits )
for i in time_values ]
return cur_time
def getTimeUnits(self,timeunits):
'''
return Time units (month since yyyy)
'''
if( timeunits == 'internal' ):
return self.data.getTime().units
return timeunits
# ********************************************************************
# HandlerNCAggregate()
#
# Aggreate a list of NetCDF files from a text file
# Each file must have the same lat/lon dimension
# ********************************************************************
class HandlerNCAggregate(object):
'''
'''
def __new__(klass, *args, **kwargs):
# ------------------------------------------------------------------
# This return make sure __init__ is begin called after the class is
# created
# ------------------------------------------------------------------
return super(HandlerNCAggregate, klass).__new__(klass, *args, **kwargs)
def __init__(self):
'''
Nothing to do
'''
pass
def open(self, file, variable=None):
'''
Make sure that all files in the list exist
'''
f = open(file,'r')
self.flist = f.readlines()
f.close()
for file in self.flist:
filename=file.strip()
if( not os.path.exists(filename) ):
print("File {} does not exist in filelist".format(filename))
# --------------------------------------------------------
# Extract General information from first file in the list
# ---------------------------------------------------------
firstfile = cdms2.open(self.flist[0].strip(),'r')
self.variable = firstfile(variable)
self.missing_value = self.variable.missing_value
# ----------------------------
# Read first file in the list
# -----------------------------
self.timeunits = self.variable.getTime().units
def getData(self, variable=None):
'''
Aggreate all data from the file list in the order given.
'''
filename = self.flist[0].strip()
f = cdms2.open( filename, 'r' )
if( variable != None ):
self.vartoread = variable
else:
self.vartoread = self.variable.id
data = f(self.vartoread)[:]
# ---------------------------
# Concatenate following files
# ---------------------------
for filename in self.flist[ 1: ]:
print("reading {}".format(filename.strip()))
f = cdms2.open( filename.strip(), 'r' )
data2 = f(self.vartoread)[:]
data = numpy.concatenate((data,data2), axis=0)
f.close()
data=ma.array(data=data, \
fill_value=self.missing_value, \
copy=0, \
dtype='float32' )
data=ma.masked_equal(data, self.missing_value, copy=0)
return data
def getLongitude(self):
'''
Assume same grid for all files.
'''
return self.variable.getLongitude()
def getLatitude(self):
'''
Assume same grid for all files.
'''
return self.variable.getLatitude()
def getTime(self,timeunits=None):
'''
Return time.
Assume that file time variable name will be the same in all files.
'''
flist=self.flist
nbFiles = len(self.flist)
# ----------------------------
# Read first file in the list
# -----------------------------
time = self.variable.getTime()[:]
# ---------------------------
# Concatenate following files
# ---------------------------
for filename in self.flist[ 1: ]:
f = cdms2.open( filename.strip(), 'r' )
time2 = f(self.variable.id).getTime()
timeunits2 = f(self.variable.id).getTime().units
match=re.match('.*since.*', timeunits2)
# ---------------------------------------------------
# if timeunits is not UDUNITS format, pass back value
# and let InputTimeUnits attribute to take care of
# the conversion.
# ----------------------------------------------------
if( not match ): timeunits2=self.timeunits
# --------------------------------
# Make sure we have the same units
# ---------------------------------
if( timeunits2 != self.timeunits ):
file_time = [ cdtime.reltime(time2[ i ], timeunits2 ) \
for i in range( len( time2 ) )]
time2 = [ file_time[i].torel( timeunits ).value \
for i in range( len( time2 ) ) ]
time2=time2[:]
time = numpy.concatenate((time,time2), axis=0)
f.close()
self.time = [cdtime.reltime(i,self.timeunits) for i in time]
return self.time
def getTimeUnits(self,timeunits=None):
'''
return Timeunits.
'''
# ---------------------------------------------------
# If file timeunits are UDUNITS compliante, overwrite
# timeunits argument
# ---------------------------------------------------
match = re.match('.*since.*',self.variable.getTime().units)
if( match ):
return self.variable.getTime().units
self.timeunits=timeunits
return timeunits
# ********************************************************************
# MatlabData()
#
# Manage Matlab data object.
#
# Note: Assume that data is in the key that is not internal to Matlab.
# Matlab internal keys are surrounded by underscores (__key__).
# ********************************************************************
class MatlabData(object):
'''
Convert Matlab data Array to cmor2 data Array.
Create lat/lon grid
'''
def __init__(self,oMatlab):
'''
Read data and flip y axis.
Assume data is -180 to 180 and will be shifted to get an array from
0 to 360 degrees.
'''
DataKey = [key for key in oMatlab.keys()
if key not in ['__version__', '__header__', '__globals__']]
# ---------------------------------------
# Read in Data as a Numpy masked array.
# ---------------------------------------
self.data=ma.array(data=oMatlab[DataKey[0]], \
fill_value=0.0, \
copy=0, \
dtype='float32' )
# ----------------------------------------------
# Assume fill value is 0 (Need to pass a value)
# -----------------------------------------------
self.data=ma.masked_values(self.data,0)
# -----------------------------------------------
# Transpose 3D matrix to get the time axis first
# -----------------------------------------------
self.data=self.data.transpose(2,0,1)
# -------------
# Flip y axis
# -------------
self.data=self.data[:,::-1,:]
# --------------------------------------------------
# Extract X lenght and Y length to compute Lat/Lon
# --------------------------------------------------
nlat = self.data.shape[1]
nlon = self.data.shape[2]
# --------------------------------------------------------------------
# shift array from -180 to 180 degrees to 0 to 360 degrees
# The last half of the array becomes the first half and the first half
# becomes the last half
# --------------------------------------------------------------------
flipped=ma.array( numpy.zeros( self.data.shape ), \
fill_value=0.0, \
dtype='float32' )
# -----------------------------------------
# Masked fill_value (default to 0 for now)
# -----------------------------------------
flipped=ma.masked_values(flipped,0)
flipped[:,:,-nlon/2:]=self.data[:,:,:nlon/2]
flipped[:,:,:nlon/2] =self.data[:,:,-nlon/2:]
# ---------------------------------
# New flipped array becomes the data.
# ----------------------------------
self.data=flipped
flipped=0 # In order to save memory
# -------------------------------------------------------
# Compute latitudes longitude from X length and Y Length
# Assume Global data on cylindrical rectangular grid.
# -------------------------------------------------------
deltaLat = 180.0/nlat
deltaLon = 360.0/nlon
startLat = -90 + (deltaLat/2.0)
startLon = 0 + (deltaLon/2.0)
# -------------------------------
# Used cdms2 API to create grid
# --------------------------------
self.grid= createUniformGrid( startLat, nlat, deltaLat, \
startLon, nlon, deltaLon)
def getData(self):
'''
return data array
'''
return self.data
def getLatitude(self):
'''
return lattitude array
'''
return self.grid.getLatitude()
def getLongitude(self):
'''
return longitude array
'''
return self.grid.getLongitude()
def getTime( self, cur_timeunits ):
'''
'''
nTime = self.data.shape[0]
time_values = numpy.arange(nTime)
fileTime =[ cdtime.reltime(i, cur_timeunits)
for i in time_values ]
return fileTime
def getTimeUnits(self,timeunits):
'''
return Time units (month since yyyy)
'''
return timeunits
def __getitem__(self, key):
'''
'''
return self.getData()
# ********************************************************************
# HandlerMatlab()
#
# Manage Matlab data file.
#
# Note: Assume that data is in the key that is not internal to Matlab.
# ********************************************************************
class HandlerMatlab(object):
'''
'''
def __new__(klass, *args, **kwargs):
# ------------------------------------------------------------------
# This return make sure __init__ is begin called after the class is
# created
# ------------------------------------------------------------------
return super(HandlerMatlab, klass).__new__(klass, *args, **kwargs)
def __init__(self):
pass
def open(self, file, variable=None):
'''
Open Matlab and read data
'''
self.f=scipy.io.loadmat(file)
self.oMatlab=MatlabData(self.f)
def getData(self):
'''
Return data array
'''
return self.oMatlab.getData()
def getLongitude(self):
'''
Return Longitude.
'''
return self.oMatlab.getLongitude()
def getLatitude(self):
'''
Return Latitude.
'''
return self.oMatlab.getLatitude()
def getTime(self,timeunits):
'''
Return time.
'''
return self.oMatlab.getTime(timeunits)
def getTimeUnits(self,timeunits):
'''
return Time units (month since yyyy)
'''
return self.oMatlab.getTimeUnits(timeunits)
# ********************************************************************
# HandlerFormats()
#
# Factory detecting file format and return a pointer to the related
# format class
#
# Reference:
# http://code.activestate.com/recipes/576687/
#
# ********************************************************************
class HandlerFormats(object):
'''
Rerturn Format Hanlder depending on file magic number.
'''
Formats= {'NetCDF': HandlerNetCDF,
'Hierar': HandlerNetCDF,
'NCList': HandlerNCAggregate,
'DSET' : HandlerGrads,
'Matlab': HandlerMatlab}
def __new__( klass, filename ):
MagicNumber = magic.from_file( filename )
try:
return HandlerFormats.Formats[MagicNumber[0:6]]()
except:
# --------------------------------------------------------------------------------
# Magic is too relax and come backup with 'Bio-Rad .PIC Image File'
# Since we will never read these files in the program, I just assume this will be
# a list of files.
# --------------------------------------------------------------------------------
if((MagicNumber[0:5] == 'ASCII') or (MagicNumber[0:5] == "Bio-R")):
f=open(filename,'r')
lines=f.readlines()
for line in lines:
if(line[0:4].upper() == 'DSET'):
return HandlerFormats.Formats['DSET']()
elif( line.strip().endswith("nc") ):
return HandlerFormats.Formats['NCList']()
def __init__(self, filename ):
pass