"""loading_tools
======================
The loading_tools module of cmipdata is a set of functions which use
the cdo python bindings and NetCDF4 to load data from input NetCDF
files listed in a cmipdata ensemble object into python numpy arrays.
Some processing can optionally be done during the loading, specifically
remapping, time-slicing, time-averaging and zonal-averaging.
.. moduleauthor:: Neil Swart <neil.swart@ec.gc.ca>
"""
import cdo as cdo
cdo = cdo.Cdo() # recommended import
import os
import numpy as np
from netCDF4 import Dataset, num2date, date2num
import datetime
# clean out tmp to make space for CDO processing.
os.system('rm -rf /tmp/cdo*')
[docs]def loadvar(ifile, varname, cdostr=None, **kwargs):
"""
Load variables from a NetCDF file with optional pre-processing.
Load a CMIP5 netcdf variable "varname" from "ifile" and an optional
cdo string for preprocessing the data from the netCDF files.
Requires netCDF4, CDO and CDO python bindings.
Returns a masked array, var.
"""
# Open the variable using NetCDF4 to get scale and offset attributes.
nc = Dataset(ifile, 'r')
ncvar = nc.variables[varname]
# apply cdo string if it exists
if(cdostr):
opslist = cdostr.split()
base_op = opslist[0].replace('-', '')
if len(opslist) > 1:
ops_str = ' '.join(opslist[1::]) + ' ' + ifile
var = getattr(cdo, base_op)(input=ops_str, returnMaArray=varname)
else:
var = getattr(cdo, base_op)(input=ifile, returnMaArray=varname)
else:
var = cdo.readMaArray(ifile, varname=varname)
# Apply any scaling and offsetting needed:
try:
var_offset = ncvar.add_offset
except:
var_offset = 0
try:
var_scale = ncvar.scale_factor
except:
var_scale = 1
# var = var*var_scale + var_offset
# return var
return np.squeeze(var)
def _create_tempfile(ens, varname, ifileone, cdostr=None, **kwargs):
"""
_create_tempfile is called when modifications are made to the ensemeble without
creating new files. Creates a temporary file that can be used to determine dimensions of
the modified data.
"""
if(cdostr):
opslist = cdostr.split()
op = opslist[0].replace('-', '')
cdo_str = 'cdo ' + op + ' ' + ifileone + ' temporary_0.nc'
ex = os.system(cdo_str)
if len(opslist) > 1:
for i in range(1, len(opslist)):
op = opslist[i].replace('-', '')
cdo_str = 'cdo ' + op + ' ' + 'temporary_' + str(i-1) + '.nc' + ' temporary_' + str(i) + '.nc'
ex = os.system(cdo_str)
os.remove('temporary_' + str(i-1) + '.nc')
if i == len(opslist)-1:
os.rename('temporary_' + str(len(opslist)-1) + '.nc', 'temp123.nc')
else:
os.rename('temporary_0.nc', 'temp123.nc')
[docs]def loadfiles(ens, varname, toDatetime=False, **kwargs):
"""
Load a variable "varname" from all files in ens, and load it into a matrix
where the zeroth dimensions represents an input file and dimensions 1 to n are
the dimensions of the input variable. Variable "varname" must have the same shape
in all ifiles. Keyword argument toDatetime (defaults to False) will be passed as
a keyword argument to get_dimensions(). Optionally specify any kwargs valid for loadvar.
Requires netCDF4, cdo bindings and numpy
Returns
-------
dictionary with keys data and dimensions
data maps to a numpy array containing the data
dimensions has keys; models, realizations,
and possibly lat, lon, and time
"""
# Get all input files from the ensemble
files = ens.objects('ncfile')
ifiles = []
for f in files:
ifiles.append(f.name)
# if a cdostr is being applied,
# create a temporaryfile to determine the dimensions of the data
if 'cdostr' in kwargs:
_create_tempfile(ens, varname, ifiles[0], **kwargs)
dimensions = get_dimensions('temp123.nc', varname, toDatetime=toDatetime)
os.remove('temp123.nc')
else:
dimensions = get_dimensions(ifiles[0], varname, toDatetime=toDatetime)
vst = loadvar(ifiles[0], varname, **kwargs)
varmat = np.ones((len(ifiles),) + vst.shape) * 999e99
for i, ifile in enumerate(ifiles):
varmat[i, :] = loadvar(ifile, varname, **kwargs)
varmat = np.ma.masked_equal(varmat, 999e99)
models = get_models(files)
realizations = get_realizations(files)
dimensions['models'] = models
dimensions['realizations'] = realizations
return {"data": varmat,
"dimensions": dimensions,
}
[docs]def get_models(files):
models = []
for f in files:
models.append(f.parentobject('model').name)
return models
[docs]def get_realizations(files):
realizations = []
for f in files:
realizations.append(f.parentobject('realization').name)
return realizations
[docs]def get_dimensions(ifile, varname, toDatetime=False):
"""Returns the dimensions of variable varname in file ifile as a dictionary.
If one of the dimensions begins with lat (Lat, Latitude and Latitudes), it
will be returned with a key of lat, and similarly for lon. If toDatetime=True,
the time dimension is converted to a datetime.
"""
# Open the variable using NetCDF4
nc = Dataset(ifile, 'r')
ncvar = nc.variables[varname]
dimensions = {}
for dimension in ncvar.dimensions:
if dimension.lower().startswith('lat'):
dimensions['lat'] = nc.variables[dimension][:]
elif dimension.lower().startswith('lon'):
dimensions['lon'] = nc.variables[dimension][:]
elif dimension.lower().startswith('time'):
if toDatetime is True:
# Following Phil Austin's slice_nc
nc_time = nc.variables[dimension]
try:
cal = nc_time.calendar
except:
cal = 'standard'
dimensions['time'] = num2date(nc_time[:], nc_time.units, cal)
dimensions['time'] = [datetime.datetime(
*item.timetuple()[:6]) for item in dimensions['time']]
dimensions['time'] = np.array(dimensions['time'])
else:
dimensions['time'] = nc.variables[dimension][:]
else:
dimensions[dimension] = nc.variables[dimension][:]
return dimensions
if __name__ == "__main__":
pass