"""loading_tools
======================
The loading_tools module of cmipdata is a set of functions which use
the cdo python bindings and NetCDF4 to load data from input NetCDF
files listed in a cmipdata ensemble object into python numpy arrays.
Some processing can optionally be done during the loading, specifically
remapping, time-slicing, time-averaging and zonal-averaging.
.. moduleauthor:: Neil Swart <neil.swart@ec.gc.ca>
"""
try:
import cdo as cdo
cdo = cdo.Cdo() # recommended import
except ImportError:
pass
import os
import numpy as np
from netCDF4 import Dataset, num2date, date2num
import datetime
# clean out tmp to make space for CDO processing.
os.system('rm -rf /tmp/cdo*')
[docs]def loadvar(ifile, varname, cdostr=None, **kwargs):
"""
Load variables from a NetCDF file with optional pre-processing.
Load a CMIP5 netcdf variable "varname" from "ifile" and an optional
cdo string for preprocessing the data from the netCDF files.
Requires netCDF4, CDO and CDO python bindings.
Returns a masked array, var.
"""
# Open the variable using NetCDF4 to get scale and offset attributes.
nc = Dataset(ifile, 'r')
ncvar = nc.variables[varname]
# apply cdo string if it exists
if(cdostr):
opslist = cdostr.split()
base_op = opslist[0].replace('-', '')
if len(opslist) > 1:
ops_str = ' '.join(opslist[1::]) + ' ' + ifile
var = getattr(cdo, base_op)(input=ops_str, returnMaArray=varname)
else:
var = getattr(cdo, base_op)(input=ifile, returnMaArray=varname)
else:
var = cdo.readMaArray(ifile, varname=varname)
# Apply any scaling and offsetting needed:
try:
var_offset = ncvar.add_offset
except:
var_offset = 0
try:
var_scale = ncvar.scale_factor
except:
var_scale = 1
# var = var*var_scale + var_offset
# return var
return np.squeeze(var)
def _create_tempfile(ens, varname, ifileone, cdostr=None, **kwargs):
"""
_create_tempfile is called when modifications are made to the ensemeble without
creating new files. Creates a temporary file that can be used to determine dimensions of
the modified data.
"""
if(cdostr):
opslist = cdostr.split()
op = opslist[0].replace('-', '')
cdo_str = 'cdo ' + op + ' ' + ifileone + ' temporary_0.nc'
ex = os.system(cdo_str)
if len(opslist) > 1:
for i in range(1, len(opslist)):
op = opslist[i].replace('-', '')
cdo_str = 'cdo ' + op + ' ' + 'temporary_' + str(i-1) + '.nc' + ' temporary_' + str(i) + '.nc'
ex = os.system(cdo_str)
os.remove('temporary_' + str(i-1) + '.nc')
if i == len(opslist)-1:
os.rename('temporary_' + str(len(opslist)-1) + '.nc', 'temp123.nc')
else:
os.rename('temporary_0.nc', 'temp123.nc')
[docs]def loadfiles(ens, varname, **kwargs):
"""
Load a variable "varname" from all files in ens, and load it into a matrix
where the zeroth dimensions represents an input file and dimensions 1 to n are
the dimensions of the input variable. Variable "varname" must have the same shape
in all ifiles. Optionally specify any kwargs valid for loadvar.
Requires netCDF4, cdo bindings and numpy
Returns a dictionary with
- a masked numpy array, varmat
- the dimensions of the data, dimensions
"""
# Get all input files from the ensemble
ifiles = ens.lister('ncfile')
datetime = False
if 'toDatetime' in kwargs:
datetime = kwargs['toDatetime']
# if a cdostr is being applied,
# create a temporaryfile to determine the dimensions of the data
if 'cdostr' in kwargs:
_create_tempfile(ens, varname, ifiles[0], **kwargs)
dimensions = get_dimensions('temp123.nc', varname, toDatetime=datetime)
os.remove('temp123.nc')
else:
dimensions = get_dimensions(ifiles[0], varname, toDatetime=datetime)
vst = loadvar(ifiles[0], varname, **kwargs)
varmat = np.ones((len(ifiles),) + vst.shape) * 999e99
for i, ifile in enumerate(ifiles):
varmat[i, :] = loadvar(ifile, varname, **kwargs)
varmat = np.ma.masked_equal(varmat, 999e99)
return {"data": varmat, "dimensions": dimensions}
[docs]def get_dimensions(ifile, varname, toDatetime=False):
"""Returns the dimensions of variable varname in file ifile as a dictionary.
If one of the dimensions begins with lat (Lat, Latitude and Latitudes), it
will be returned with a key of lat, and similarly for lon. If to a Datetime=True,
the time dimension is converted to a datetime.
"""
# Open the variable using NetCDF4
nc = Dataset(ifile, 'r')
ncvar = nc.variables[varname]
dimensions = {}
for dimension in ncvar.dimensions:
if dimension.lower().startswith('lat'):
dimensions['lat'] = nc.variables[dimension][:]
elif dimension.lower().startswith('lon'):
dimensions['lon'] = nc.variables[dimension][:]
elif dimension.lower().startswith('time'):
if toDatetime is True:
# Following Phil Austin's slice_nc
nc_time = nc.variables[dimension]
try:
cal = nc_time.calendar
except:
cal = 'standard'
dimensions['time'] = num2date(nc_time[:], nc_time.units, cal)
dimensions['time'] = [datetime.datetime(
*item.timetuple()[:6]) for item in dimensions['time']]
dimensions['time'] = np.array(dimensions['time'])
else:
dimensions['time'] = nc.variables[dimension][:]
else:
dimensions[dimension] = nc.variables[dimension][:]
return dimensions