"""preprocessing_tools
======================
The preprocessing_tools module of cmipdata is a set of functions which use
os.system calls to Climate Data Operators (cdo) to systematically apply a
given processing on multiple NetCDF files, which are listed in cmipdata
ensemble objects.
.. moduleauthor:: Neil Swart <neil.swart@ec.gc.ca>
"""
import os
import glob
import classes as dc
import copy
import itertools
# ===========================================================================
# The next three operators work on multiple files across the ensemble,
# and cannot be chained together.
# ===========================================================================
[docs]def cat_exp_slices(ensemble, delete=True, output_prefix=''):
"""
Concatenate multiple time-slice files per experiment.
For all models in ens which divide their output into multiple files per
experiment (time-slices), cat_exp_slices concatenates the files into one
unified file, and deletes the individual slices, unless delete=False.
The input ensemble can contain multiple models, experiments, realizations
and variables, which cat_exp_slices will process independently. In other words,
files are joined per-model, per-experiment, per-realization, per-variable.
For example, if the ensemble contains two experiments for many models/realizations
for variable psl, two unified files will be produced per realization: one for the
historical and one for the rcp45 experiment. To join files
over experiments (e.g. to concatenate historical and rcp45) see cat_experiments.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the concatenation.
delete : boolean
If delete=True, delete the individual time-slice files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
concatenated files.
The concatenated files are written to present working directory.
See also
--------
cat_experiments : Concatenate the files for two experiments.
Examples
---------
For a simple ensemble comprized of only 1 model, 1 experiment and one realization.::
# Look at the ensemble structure before the concatenation
ens.fulldetails()
HadCM3:
historical
r1i1p1
ts
ts_Amon_HadCM3_historical_r1i1p1_185912-188411.nc
ts_Amon_HadCM3_historical_r1i1p1_188412-190911.nc
ts_Amon_HadCM3_historical_r1i1p1_190912-193411.nc
ts_Amon_HadCM3_historical_r1i1p1_193412-195911.nc
ts_Amon_HadCM3_historical_r1i1p1_195912-198411.nc
ts_Amon_HadCM3_historical_r1i1p1_198412-200512.nc
# Do the concantenation
ens = cd.cat_exp_slices(ens)
# Look at the ensemble structure after the concatenation
ens.fulldetails()
HadCM3:
historical
r1i1p1
ts
ts_Amon_HadCM3_historical_r1i1p1_185912-200512.nc
"""
ens = copy.deepcopy(ensemble)
# Set the env variable to skip repeated times
os.environ["SKIP_SAME_TIME"] = "1"
# Loop over all variables
for var in ens.objects('variable'):
files = var.children
modfiles = [f.name for f in files]
startdates = [f.start_date for f in files]
enddates = [f.end_date for f in files]
# check if there are multiple files
if len(modfiles) > 1:
print 'joining files'
infiles = ' '.join(modfiles)
outfile = (output_prefix +
os.path.split(files[0].getNameWithoutDates())[1] + '_' +
str(min(startdates)) + '-' +
str(max(enddates)) + '.nc')
if not os.path.isfile(outfile):
# join the files
catstring = 'cdo mergetime ' + infiles + ' ' + outfile
os.system(catstring)
else:
print outfile + ' already exists.'
f = dc.DataNode('ncfile', outfile, parent=var, start_date=min(startdates), end_date=max(enddates))
var.children = [f]
# delete the old files
if delete is True:
for cfile in modfiles:
delstr = 'rm ' + cfile
os.system(delstr)
ens.squeeze()
return ens
[docs]def cat_experiments(ensemble, variable_name, exp1_name, exp2_name, delete=True, output_prefix=''):
"""Concatenate the files for two experiments.
Experiments exp1 and exp2 are concatenated into a single file for each
realization of each model listed in ens. For each realization, the concatenated file
for variable variable_name is written to the current working directory and the input files
are deleted by default, unless delete=False.
The concatenation occurs for each realization for which input files
exist for both exp1 and exp2. If no match is found for the realization
in exp1 (i.e. there is no corresponding realization in exp2), then the files
for both experiments are deleted from the path (unless delete=False) and
the realization is removed from ens. Similarly if exp2 is missing for a
given model, that model is deleted from ens.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the concatenation.
variable_name : str
The name of the variable to be concatenated.
exp1_name : str
The name of the first experiment to be concatenated (e.g. 'historical').
exp2_name : str
The name of the second experiment to be concatenated (e.g. 'rcp45').
delete : boolean
If delete=True, delete the individual time-slice files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
concatenated files.
The concatenated files are written to present working directory.
Examples
---------
1. Join the historical and rcp45 simulations for variable ts in ens::
ens = cd.cat_experiments(ens, 'ts', exp1_name='historical', exp2_name='rcp45')
"""
ens = copy.deepcopy(ensemble)
# Set the env variable to skip repeated times
os.environ["SKIP_SAME_TIME"] = "1"
# Create a copy of ens to use later for deleting input files if delete=True
del_ens = copy.deepcopy(ens)
# a list of models to remove from ens, if one experiment is missing
# completely from the model
models_to_delete = {}
# a list of realizations to remove from ens, if the realization is missing
# from one experiment
realizations_to_delete = {}
# Loop over all models
for model in ens.children:
e1 = model.getChild(exp1_name)
e2 = model.getChild(exp2_name)
for e in model.children:
if e is not e1 and e is not e2:
model.delete(e)
# if the model is missing one experiment, remove that model from ens.
if (e1 is not None) and (e2 is not None):
# Get a list of realizations names in the two experiments.
e1_r_names = [r.name for r in e1.children]
e2_r_names = [r.name for r in e2.children]
# Find matching realizations btwn the two experiments.
realization_matches = set(e1_r_names).intersection(e2_r_names)
realization_misses = set(e1_r_names).difference(e2_r_names)
# add non-matching realizations to the realizations_deleted dict
# for printing later
if realization_misses:
realizations_to_delete[model.name] = realization_misses
# Delete non-matching realizations from ens, and do the join for
# matching ones.
for realization_name in realization_matches:
# Get the realizations
e1r = e1.getChild(realization_name)
e2r = e2.getChild(realization_name)
# Get the variable objects from the two experiments
e1v = e1r.getChild(variable_name)
e2v = e2r.getChild(variable_name)
# join the two experiments original filenames with a whitespace
filenames = []
for f in e1v.children:
filenames.append(f.name)
for f in e2v.children:
filenames.append(f.name)
infiles = ' '.join(filenames)
startdates = []
enddates = []
for f in e1v.children:
startdates.append(f.start_date)
enddates.append(f.end_date)
for f in e2v.children:
startdates.append(f.start_date)
enddates.append(f.end_date)
out_startdate = min(startdates)
out_enddate = max(enddates)
# construct the output filename
outfile = (output_prefix +
e1v.name + '_' + e1v.realm + '_' + model.name + '_' +
e1.name + '-' + e2.name + '_' +
e1r.name + '_' +
out_startdate + '-' +
out_enddate + '.nc')
# do the concatenation using CDO
print "\n join " + model.name + '_' + e1r.name + ' ' + e1.name + ' to ' + e2.name
catstring = ('cdo mergetime ' + infiles + ' ' + outfile)
os.system(catstring)
# Add a new joined experiment to ens,
# with a newly minted realization, variable + filenames.
e = dc.DataNode('experiment', e1.name + '-' + e2.name, parent=model)
model.add(e)
r = dc.DataNode('realization', e1r.name, parent=e)
e.add(r)
v = dc.DataNode('variable', e1v.name, parent=r)
r.add(v)
f = dc.DataNode('ncfile', outfile, parent=v,
start_date=out_startdate,
end_date=out_enddate)
v.add(f)
# delete e1 and e2, which have been replaced with joined_e
model.delete(e1)
model.delete(e2)
elif e1 is None and e2 is None:
pass
elif e2 is None:
models_to_delete[model.name] = e1.name
elif e1 is None:
models_to_delete[model.name] = e2.name
# If delete=True, delete the original files for variable_name,
# leaving only the newly joined ones behind.
if delete is True:
for f in del_ens.objects('ncfile'):
delstr = 'rm ' + f.name
os.system(delstr)
# Remove models with missing experiments from ens, and then return ens
print ' \n\n Models deleted from ensemble (missing one experiment completely): \n'
print '\t Model \t Experiment \n'
for model_name, missing_experiment in models_to_delete.iteritems():
ens.delete(ens.getChild(model_name))
print '\t %s \t %s' % (model_name, missing_experiment)
print ' \n\n Realizations deleted (missing from one experiment): \n'
print '\t Model \t Realizations \n'
for key, value in realizations_to_delete.iteritems():
print '\t %s \t %s' % (key, ' '.join(value))
ens.squeeze()
return ens
[docs]def ens_stats(ens, variable_name, output_prefix=''):
""" Compute the ensemble mean and standard deviation.
The ensemble mean and standard deviation is computed over all models-realizations
and experiments for variable variable_name in ens, such that each model has a weight
of one. An output file is written containing the ensemble mean and another file is
written with the standard deviation, containing the names '_ENS-MEAN_' and '_ENS-STD_'
in the place of the model-name. If the ensemble contains multiple experiments, files
are written for each experiment.
The ensemble in ens must be homogenous. That is to say all files must be on the same
grid and span the same time-frame, within each experiment (see remap, and time_slice for more).
Additionally, variable_name should have only one filename per realization and experiment. That
is, join_exp_slice should have been applied.
The calculation is done by, first computing the mean over all realizations for each model;
then for the ensemble, calculating the mean over all models.
The standard deviation is calculated across models using the realization mean for each model.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the concatenation.
variable_name : str
The name of the variable to be concatenated.
Returns
-------
A tuple of lists containing the names of the mean and standard deviation files created
The ENS-MEAN and ENS-STD files are written to present working directory.
Examples
---------
1. Compute the statistics for the ts variable::
>>cd.ens_stats(ens, 'ts')
experiment_list = ens.lister('experiment')
for exname in experiment_list:
files_to_mean = []
for model in ens.objects('model'):
experiment = model.getChild(exname)
if experiment != None:
modfilesall = []
for realization in experiment.children:
realization
modfilesall.append(realization.getChild(variable_name).children)
"""
meanfiles = []
stdevfiles = []
experiments = {}
for f in ens.objects('ncfile'):
table = f.getDictionary()
if table['variable'] == variable_name:
if table['experiment'] in experiments:
experiments[table['experiment']].append([f, table['model']])
else:
experiments[table['experiment']] = [[f, table['model']]]
# multiple output files for multiple experiments
for experimentname in experiments:
files_to_mean = []
models = {}
for fm in experiments[experimentname]:
if fm[1] in models:
models[fm[1]].append(fm[0])
else:
models[fm[1]] = [fm[0]]
for model in models:
files = models[model]
fnames = []
for f in files:
fnames.append(f.name)
inputfiles = ''
for f in fnames:
inputfiles = inputfiles + ' ' + f
outfile = output_prefix + os.path.split(fnames[0])[1].replace(files[0].parent.parent.name, 'R-MEAN')
cdostr = 'cdo ensmean ' + inputfiles + ' ' + outfile
if os.path.isfile(outfile):
files_to_mean.append(outfile)
else:
os.system(cdostr)
files_to_mean.append(outfile)
in_files = ' '.join(files_to_mean)
print files_to_mean[0]
print experiments[experimentname][0][1]
outfilename = os.path.split(files_to_mean[0])[1].replace(experiments[experimentname][0][1] + '_', "")
print outfilename
out_file = output_prefix + 'ENS-MEAN_' + outfilename
cdo_str = 'cdo ensmean ' + in_files + ' ' + out_file
os.system(cdo_str)
meanfiles.append(out_file)
# Now do the standard deviation
out_file = output_prefix + 'ENS-STD_' + outfilename.replace('R-MEAN', 'STD')
cdo_str = 'cdo ensstd ' + in_files + ' ' + out_file
os.system(cdo_str)
stdevfiles.append(out_file)
for fname in files_to_mean:
os.system('rm ' + fname)
return meanfiles, stdevfiles
# =========================================================================
# The operators below this point work on a file-by-file basis and can be chained together
# (in principle, not implemented). Practically my_operator can be used to chain operations.
# =========================================================================
[docs]def areaint(ensemble, delete=True, output_prefix=''):
"""
Calculate the area weighted integral for each file in ens.
The output files are prepended with 'area-integral'. The original
the input files are removed if delete=True (default). An updated
ensemble object is also returned.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the processing.
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory.
Examples
--------
1. Compute the area integral for all files in ens::
ens = cd.areaint(ens)
"""
ens = copy.deepcopy(ensemble)
# loop over all files
for f in ens.objects('ncfile'):
outfile = output_prefix + 'area-integral_' + os.path.split(f.name)[1]
cdostr = 'cdo fldsum -mul ' + f.name + ' -gridarea ' + f.name + ' ' + outfile
os.system(cdostr)
# delete old files
if delete is True:
delstr = 'rm ' + f.name
os.system(delstr)
var = f.parent
ncfile = dc.DataNode('ncfile', outfile, parent=var, start_date=f.start_date, end_date=f.end_date)
var.add(ncfile)
var.delete(f)
return ens
[docs]def areamean(ensemble, delete=True, output_prefix=''):
"""
Calculate the area mean for each file in ens.
The output files are prepended with 'area-mean'. The original
the input files are removed if delete=True (default). An updated
ensemble object is also returned.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the processing.
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory.
Examples
--------
1. Compute the area mean for all files in ens::
area_mean_ens = cd.areamean(ens)
"""
ens = copy.deepcopy(ensemble)
# loop over all files
for f in ens.objects('ncfile'):
outfile = output_prefix + 'area-mean_' + os.path.split(f.name)[1]
cdostr = 'cdo fldmean ' + f.name + ' ' + outfile
os.system(cdostr)
# delete old files
if delete is True:
delstr = 'rm ' + f.name
os.system(delstr)
var = f.parent
ncfile = dc.DataNode('ncfile', outfile, parent=var, start_date=f.start_date, end_date=f.end_date)
var.add(ncfile)
var.delete(f)
return ens
[docs]def zonmean(ensemble, delete=True, output_prefix=''):
"""
Calculate the zonal mean for each file in ens.
The output files are prepended with 'zonal-mean'. The original
the input files are removed if delete=True (default). An updated
ensemble object is also returned.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the processing.
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory.
Examples
---------
1. Compute the zonal mean for all files in ens::
zonal_mean_ens = cd.zonmean(ens)
"""
ens = copy.deepcopy(ensemble)
# loop over all files
for f in ens.objects('ncfile'):
outfile = output_prefix + 'zonal-mean_' + os.path.split(f.name)[1]
cdostr = 'cdo zonmean ' + f.name + ' ' + outfile
ex = os.system(cdostr)
var = f.parent
# if zonalmean is not succesful, delete the new file
if ex != 0:
try:
print 'deleting ' + outfile
os.system('rm -f ' + outfile)
except:
pass
else:
ncfile = dc.DataNode('ncfile', outfile, parent=var, start_date=f.start_date, end_date=f.end_date)
var.add(ncfile)
var.delete(f)
# delete the old files
if delete is True:
delstr = 'rm ' + f.name
os.system(delstr)
return ens
[docs]def climatology(ensemble, delete=True, output_prefix=''):
"""
Compute the monthly climatology for each file in ens.
The climatology is calculated over the full file-length using
cdo ymonmean, and the output files are prepended with 'climatology_'.
The original the input files are removed if delete=True (default).
An updated ensemble object is also returned.
If you want to compute the climatology over a specific time slice, use time_slice
before compute the climatology.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the remapping.
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory.
Examples
--------
1. Compute the climatology::
climatology_ens = cd.climatology(ens)
"""
ens = copy.deepcopy(ensemble)
# loop over all the files
for f in ens.objects('ncfile'):
outfile = output_prefix + 'climatology_' + os.path.split(f.name)[1]
var = f.parent
cdostr = 'cdo ymonmean -selvar,' + var.name + ' ' + f.name + ' ' + outfile
os.system(cdostr)
# delete the old file
if delete is True:
delstr = 'rm ' + f.name
os.system(delstr)
ncfile = dc.DataNode('ncfile', outfile, parent=var, start_date=f.start_date, end_date=f.end_date)
var.add(ncfile)
var.delete(f)
return ens
[docs]def remap(ensemble, remap='r360x180', method='remapdis', delete=True, output_prefix=''):
"""
Remap files to a specified resolution.
For each file in ens, remap to resolution remap='r_nlon_x_nlat_', where _nlon_,
_nlat_ are the number of lat-lon points to use. Removal of the original input
files occurs if delete=True (default). An updated ensemble object is also returned.
By default the distance weighted remapping is used, but any valid cdo
remapping method can be used by specifying the option argument 'method',
e.g. method='remapdis'.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the remapping.
remap : str
The resolution to remap to, e.g. for a 1-degree grid remap='r360x180'
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory.
EXAMPLE:
--------
1. remap files to a one-degree grid::
ens = cd.remap(ens, remap='r1x180')
"""
ens = copy.deepcopy(ensemble)
# loop over all files
for f in ens.objects('ncfile'):
outfile = output_prefix + 'remap_' + os.path.split(f.name)[1]
var = f.parent
cdostr = ('cdo ' + method + ',' + remap + ' -selvar,' +
var.name + ' ' + f.name + ' ' + outfile)
ex = os.system(cdostr)
# if remapping is not successful delete the new file
if ex != 0:
try:
print 'deleting ' + outfile
os.system('rm -f ' + outfile)
except:
pass
else:
ncfile = dc.DataNode('ncfile', outfile, parent=var, start_date=f.start_date, end_date=f.end_date)
var.add(ncfile)
var.delete(f)
# delete the old file
if delete is True:
delstr = 'rm ' + f.name
os.system(delstr)
return ens
[docs]def time_slice(ensemble, start_date, end_date, delete=True, output_prefix=''):
"""
Limit the data to the period between start_date and end_date,
for each file in ens.
The resulting output is written to file, named with with the correct
date range, and the original input files are deleted if delete=True.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the processing.
start_date : str
Start date for the output file with format: YYYY-MM-DD
end_date : str
End date for the output file with format: YYYY-MM-DD
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory.
EXAMPLES
---------
1. Select data between 1 January 1980 and 31 December 2013::
ens = cd.time_slice(ens, start_date='1979-01-01', end_date='2013-12-31')
"""
ens = copy.deepcopy(ensemble)
date_range = start_date + ',' + end_date
# convert dates to CMIP YYYYMM format
start_yyyymm = start_date.replace('-', '')[0:6]
end_yyyymm = end_date.replace('-', '')[0:6]
for f in ens.objects('ncfile'):
print f.name
# don't proceed if the file already has the correct start date
if f.start_date != start_yyyymm or f.start_date != end_yyyymm:
var = f.parent
# check that the new date range is within the old date range
if f.start_date <= start_yyyymm and f.end_date >= end_yyyymm:
outfile = output_prefix + os.path.split(f.getNameWithoutDates())[1] + '_' + start_yyyymm + '-' + end_yyyymm + '.nc'
print 'time limiting...'
cdostr = ('cdo -L seldate,' + date_range + ' -selvar,' +
var.name + ' ' + f.name + ' ' + outfile)
ex = os.system(cdostr)
# if the time silcing is unsuccesful, remove the new file
if ex != 0:
try:
print 'deleting ' + outfile
os.system('rm -f ' + outfile)
except:
pass
else:
ncfile = dc.DataNode('ncfile', outfile, parent=var,
start_date=start_yyyymm, end_date=end_yyyymm)
var.add(ncfile)
else:
print "%s %s is not in the date-range" % (var.parent.parent.parent.name, var.parent.name)
var.delete(f)
# delete the old file
if delete is True:
delstr = 'rm ' + f.name
os.system(delstr)
ens.squeeze()
return ens
[docs]def time_anomaly(ensemble, start_date, end_date, delete=False, output_prefix=''):
"""
Compute the anomaly relative the period between start_date and end_date,
for each file in ens.
The resulting output is written to file with the prefix 'anomaly_', and the
original input files are deleted if delete=True.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the processing.
start_date : str
Start date for the base period with format: YYYY-MM-DD
end_date : str
End date for the base period with format: YYYY-MM-DD
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory.
EXAMPLES
---------
1. Compute the anomaly relative to the base period 1980 to 2010::
ens = cd.time_anomaly(ens, start_date='1980-01-01', end_date='2010-12-31')
"""
ens = copy.deepcopy(ensemble)
date_range = start_date + ',' + end_date
# convert dates to CMIP YYYYMM format
start_yyyymm = start_date.replace('-', '')[0:6]
end_yyyymm = end_date.replace('-', '')[0:6]
# loop over all files
for f in ens.objects('ncfile'):
var = f.parent
# check the date range is within the file date range
if f.start_date <= start_yyyymm and f.end_date >= start_yyyymm:
var = f.parent
outfile = output_prefix + 'anomaly_' + os.path.split(f.name)[1]
cdostr = ('cdo sub ' + f.name + ' -timmean -seldate,' + date_range +
' -selvar,' + var.name + ' ' + f.name + ' ' + outfile)
os.system(cdostr)
ncfile = dc.DataNode('ncfile', outfile, parent=var, start_date=f.start_date, end_date=f.end_date)
var.add(ncfile)
var.delete(f)
# delete the old file
if delete is True:
delstr = 'rm ' + f.name
os.system(delstr)
ens.squeeze()
return ens
[docs]def my_operator(ensemble, my_cdo_str="", output_prefix='processed_', delete=False):
"""
Apply a customized cdo operation to all files in ens.
For each file in ens the command in my_cdo_str is applied and an output
file appended by 'output_prefix' is created.
Optionally delete the original input files if delete=True.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the processing.
my_cdo_str : str
The (chain) of cdo commands to apply. Defined variables which can
be used in my_cdo_str are: model, experiment, realization, variable,
infile, outfile
output_prefix : str
The string to prepend to the processed filenames.
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory.
EXAMPLES
---------
1. Do an annual mean::
my_cdo_str = 'cdo -yearmean {infile} {outfile}'
my_ens = cd.my_operator(ens, my_cdo_str, output_prefix='annual_')
2. Do a date selection and time mean::
my_cdo_str = 'cdo sub {infile} -timmean -seldate,1991-01-01,2000-12-31 {infile} {outfile}'
my_ens = cd.my_operator(ens, my_cdo_str, output_prefix='test_')
"""
ensem = copy.deepcopy(ensemble)
if delete is True:
# Take a copy of the original ensemble before we modify it below
del_ens = copy.deepcopy(ensemble)
# loop over all files
for f in ensem.objects('ncfile'):
outfile = output_prefix + os.path.split(f.name)[1]
values = f.getDictionary()
values['infile'] = f.name
values['outfile'] = outfile
cdostr = my_cdo_str.format(**values)
ex = os.system(cdostr)
var = f.parent
# if the operation is unsuccessful, delete the new file
if ex != 0:
try:
print 'Failed processing... deleting ' + outfile
os.system('rm -f ' + outfile)
except:
pass
else:
ncfile = dc.DataNode('ncfile', outfile, parent=var, start_date=f.start_date, end_date=f.end_date)
var.add(ncfile)
var.delete(f)
if delete is True:
del_ens_files(del_ens)
ensem.squeeze()
return ensem
[docs]def del_ens_files(ensem):
""" delete from disk all files listed in ensemble ens"""
for infile in ensem.objects('ncfile'):
delstr = 'rm ' + infile.name
os.system(delstr)
infile.parent.delete(infile)
ensem.squeeze()
[docs]def trends(ensemble, start_date, end_date, delete=False):
"""
Compute linear trends over the period between start_date and end_date,
for each file in ens.
The resulting output is written to file, named with with the correct
date range, and the original input files are deleted if delete=True.
Parameters
----------
ens : cmipdata Ensemble
The ensemble on which to do the processing.
start_date : str
Start date for the output file with format: YYYY-MM-DD
end_date : str
End date for the output file with format: YYYY-MM-DD
delete : boolean
If delete=True, delete the original input files.
Returns
-------
ens : cmipdata Ensemble
An updated ensemble object, containing the names of the newly
processed files.
The processed files are also written to present working directory,
and begin with "slope_" and "intercept_".
EXAMPLES
---------
1. Select data between 1 January 1980 and 31 December 2013::
ens = cd.trends(ens, start_date='1979-01-01', end_date='2013-12-31')
"""
# copy the ens object
ens = copy.deepcopy(ensemble)
# set up the dates in cmip5 format
date_range = start_date + ',' + end_date
start_yyyymm = start_date.replace('-', '')[0:6] # convert date format
end_yyyymm = end_date.replace('-', '')[0:6]
# loop over all files
for f in ens.objects('ncfile'):
var = f.parent
# check the date range is within the file range
if f.start_date <= start_yyyymm and f.end_date >= end_yyyymm:
outfile = f.getNameWithoutDates() + '_' + start_yyyymm + '-' + end_yyyymm + '.nc'
print 'time limiting...'
cdostr = ('cdo trend -seldate,' + date_range + ' ' +
'-selvar,' + var.name + ' ' + f.name + ' ' +
'intercept_' + outfile + ' ' +
'slope_' + outfile)
ex = os.system(cdostr)
# if the trands are not successful the new file is deleted
if ex != 0:
try:
print 'Failed processing... deleting ' + outfile
os.system('rm -f ' + outfile)
os.system('rm -f intercept_' + outfile)
os.system('rm -f slope_' + outfile)
except:
pass
else:
ncfile = dc.DataNode('ncfile', outfile, parent=var, start_date=start_yyyymm, end_date=end_yyyymm)
var.add(ncfile)
ncfile = dc.DataNode('ncfile', 'intercept_' + outfile, parent=var, start_date=start_yyyymm, end_date=end_yyyymm)
var.add(ncfile)
ncfile = dc.DataNode('ncfile', 'slope_' + outfile, parent=var, start_date=start_yyyymm, end_date=end_yyyymm)
var.add(ncfile)
var.delete(f)
# delete the old file
if delete is True:
delstr = 'rm ' + f.name
os.system(delstr)
return ens