In [ ]:
import cdms2
DATAPATH = './data/Obs/mo/sst/HadISST/'
# open the nc file throgh cdms2 module
f = cdms2.open(DATAPATH + 'sst_HadISST_Climatology_1961-1990.nc')
# You can query the file
print f.listvariables()
In [ ]:
x = f('sst') # retrieves the whole dataset - a “slab”
x.shape
In [ ]:
# Selecting a specific area
x = f('sst', latitude=(0., 35.), longitude= (20., 100.))
x.shape
In [ ]:
# Selecting specific times
y = f('sst', time=('0-6-1', '0-6-31'))
print "y shape : ", y.shape # note : this is monthly climatology file.
# You can also select the specific time from a “slab”
z = x(time=('0-6-1', '0-6-31'))
print "z shape : ", y.shape
In [ ]:
# You can “squeeze” out “singleton” dimensions while retreiving data from file itself
y = f('sst', time=('0-6-1', '0-6-31'), squeeze=1)
print "squeezed", y.shape
In [ ]:
print z.shape
# You can “squeeze” out “singleton” dimensions to loaded array also
z = z(squeeze=1)
print z.shape
In [ ]:
# You can also change the order of returned dimensions
z = x(time=('0-6-1', '0-6-31'), order='xyt')
z.info()
In [ ]:
# You can query the order of returned dimensions!
print 'Order of dimensions for x is:', x.getOrder()
In [ ]:
# You can change the order of returned dimensions!
x2 = f('sst', latitude=(0., 35.), longitude=(20., 100.), order='xty')
print x2.info()
In [ ]:
# If I just want to ensure that time as my first axis
x3 = f('sst', order='t...')
x3.info()
In [ ]:
# You can check the axis and its properties
lat_axis = x2.getLatitude()
print lat_axis.info()
lat_bounds = lat_axis.getBounds()
#print lat_bounds
In [ ]:
# Axes can be retrieved by index position
my_ax = x.getAxis(0)
print my_ax.info()
In [1]:
#hideme
from IPython.core.display import Image
Image(filename='img/bounds.png')
Out[1]:
In [ ]:
x3 = f('sst', latitude=(32.1, 32.1, 'cob'), longitude=(100.3, 100.3, 'cob'))
x3.shape
In the args of latitute / longitude, we can pass thrid option as 'cob', 'oob', 'ccb', etc., * The first 2 positions are either 'c'losed or 'o'pen * The 3rd position is 'b'ound or 'e'xtend or .....
In [ ]:
import cdutil
data = f("sst", cdutil.region.NorthernHemisphere)
data.shape
In [ ]:
# specify NINO domain latitute & longitude
NINO34 = cdms2.selectors.Selector(cdutil.region.domain(latitude=(-5., 5.), longitude=(190., 240.)))
In [ ]:
# specify Indian domain latitute & longitude
India = cdms2.selectors.Selector(cdutil.region.domain(latitude=(0., 40.), longitude=(60., 100.)))
In [ ]:
Idata = f("sst", India)
Idata.shape
In [ ]:
# Suppose we have a slab
ds1 = f('sst', latitude=(0., 35.), longitude=(20., 100.), time=('0-6-1', '0-6-31'), order='tyx')
f.close()
print 'ds1.shape = ', ds1.shape, 'Axis order=', ds1.getOrder()
In [ ]:
# Let us now extract another dataset.
f2 = cdms2.open('data/Climatology/NCEP_NCAR_Climatology_ltm/Surface/slp.mon.ltm.nc')
ds2 = f2('slp', latitude=(0., 35.), longitude=(20., 100.), time=('1-6-1', '1-6-30'))
print 'ds2.shape = ', ds2.shape, 'Axis order =', ds2.getOrder()
In [ ]:
# Lets transform ds2 into the grid in ds1
ds3 = ds2.regrid(ds1.getGrid())
print 'ds3.shape = ', ds3.shape, 'Axis order =', ds3.getOrder()
In [ ]:
# getGrid() function returns the grid type and resolution.
ds1.getGrid()
In [ ]:
# import Horizontal function from regrid2 module
from regrid2 import Horizontal
# get the source grid
ingrid = ds2.getGrid()
# get the destination grid
outgrid = ds1.getGrid()
# generate the function by passing in & out grid as args
# Horizontal will be used to regrid over lat, lon region
horizontalRegridFunc = Horizontal(ingrid, outgrid)
# pass the data into the function which we generated in just above!
new_ds2 = horizontalRegridFunc(ds2)
print "original data shape", ds2.shape
print "after regrid", new_ds2.shape
>>> var_on_new_levels = var.pressureRegrid(levout)
In [ ]:
# open & get uwnd data with pressure level
f3 = cdms2.open('data/Climatology/NCEP_NCAR_Climatology_ltm/Levels/uwnd.jan.ltm.nc')
u = f3('uwnd')
f3.close()
print "U shape", u.shape
# get the level axis
u.getLevel()
In [ ]:
# this will return the actual value or index of the level axis
u.getLevel()[:]
Lets create axis which contains only 850 hPa and 200 hPa pressure levels
In [ ]:
# Lets create our own level axis
levAxis = cdms2.createAxis([850., 200.], id='level')
levAxis.designateLevel()
levAxis
In [ ]:
# do pressure regrid
u_850_200 = u.pressureRegrid(levAxis)
print "Original U shape", u.shape
print "After pressure regrided u shape", u_850_200.shape
u_850_200.getLevel()
In [ ]:
# import PressureRegridder function from regrid2 module
from regrid2 import PressureRegridder
# generate the function by passing in & out level axis as args
# PressureRegridder will be used to regrid over level axis
verticalRegridFunc = PressureRegridder(u.getLevel(), levAxis)
# pass the data into the function which we generated in just above!
u_new = verticalRegridFunc(u)
print "Original U shape", u.shape
print "After pressure regrided u shape", u_850_200.shape
In [ ]:
import numpy
lev_50_intervals = numpy.arange(1000, 10, -50)
print lev_50_intervals
# Lets create our own level axis
lev50Axis = cdms2.createAxis(lev_50_intervals, id='level')
lev50Axis.designateLevel()
print lev50Axis
In [ ]:
# do pressure regrid
u_50_intervals = u.pressureRegrid(lev50Axis)
print "Original U shape", u.shape
print "After pressure regrided u shape", u_50_intervals.shape
u_50_intervals.getLevel()
In [ ]:
# return the available module functions
import regrid2
dir(regrid2)
- get masked array from one dataset or create it by yourself - set that masked array to another dataset
In [ ]:
# get the mask array from ds1 (sst) data
sst_mask = cdms2.MV2.getmask(ds1(order='tyx')) # or 'ds1.mask' will return the same
print 'sst_mask.shape after reorder = ', sst_mask.shape
print sst_mask.__class__
# So we resize the mask with respect to ds3
sst_mask = sst_mask.resize(ds3.shape)
# ds3.info()
# Create new variable with new mask
ds4 = cdms2.createVariable(ds3[:], mask=sst_mask, id='masked_slp', fill_value=1.e+20)
ds4.comments = 'set mask from observed HadISST dataset'
ds4.info()
Try the following in your terminal:
$ cdscan -x "some_filename.xml" DATA_PATH/*.nc
You can also change the time axis while you are at it!
$ cdscan -x "some_filename.xml" -i 1 -r "months since 1-1-1" DATA_PATH2/*.nc
To know about more command line options
$ cdscan --help
In [ ]:
!ls data/Climatology/NCEP_NCAR_Climatology_ltm/Levels
We have monthly climatology in "12 uwnd" individual monthly and "5 vwnd" mixed monthly nc files. Lets do cdscan !
In [ ]:
# Lets find system uv-cdat cdscan path
import sys
cdscan = sys.path[1].split('lib')[0] + 'bin/cdscan'
print "cdscan command path is '%s'" % cdscan
print "* Use the above cdscan command system path to execute the follwoing code *.\n **Kindly change the cdscan path line startswith ! mark **"
In [ ]:
# make sure cdscan cmd path is same as above, otherwise change the below cmd
import os
current_dir = os.getcwd()
datapath = 'data/Climatology/NCEP_NCAR_Climatology_ltm/Levels'
if not current_dir.endswith(datapath): os.chdir(datapath)
# cdscan -x out.xml -r 'months since 1-1-1' *.nc
!/usr/local/uvcdat/1.5.1/bin/cdscan -x wnd.xml -r 'months since 1-1-1' *.nc
# back to previous directory
if not current_dir.endswith(datapath): os.chdir(current_dir)
print 'current', os.getcwd()
We did cdscan and created single xml file (wnd.xml) which has all individual nc files links and meta data in it
In [ ]:
!ls -lrth data/Climatology/NCEP_NCAR_Climatology_ltm/Levels
Look at the file size of the xml file which we created by cdscan command. It is in size of KB only know.
In [ ]:
# Lets access the xml file through cdms2
f4 = cdms2.open('data/Climatology/NCEP_NCAR_Climatology_ltm/Levels/wnd.xml')
print "Available Vars in wnd.xml : ",f4.listvariable()
print "\n U wind info\n\n", f4['uwnd'].info()
print "\n U Wind Time : ", f4['uwnd'].getTime().asComponentTime()
print "\n\n\nV wind info\n\n", f4['vwnd'].info()
print "\n V Wind Time : ", f4['vwnd'].getTime().asComponentTime()
f4.close()
Climatology, Departures, Anomalies Tools works on BOUNDS, NOT on time values, designed for monthly seasons, but one can create an engine for other kind of data (daily, yearly, etc...).
ac = cdutil.times.ANNUALCYCLE.climatology(data) # data may be monthlyDatasetOfManyYears - cdms variable
cdutil.setTimeBoundsMonthly(Obj)
cdutil.setTimeBoundsYearly(Obj)
cdutil.setTimeBoundsDaily(Obj, frequency=1)(Obj can be data slab or time axis)
MONSOON = cdutil.times.Seasons('JJAS')
In [ ]:
# Create your own seasons:
import cdutil, cdms2
MONSOON = cdutil.times.Seasons('JJAS')
f5 = cdms2.open('data/Climatology/NCEP_NCAR_Climatology_ltm/Levels/wnd.xml', 'r')
uwnd_fullyear_data = f5('uwnd', level=850)#, squeeze=1)
print 'uwnd_fullyear_data.shape =',uwnd_fullyear_data.shape
# set (monthly) bounds to time axis of data
cdutil.setTimeBoundsMonthly(uwnd_fullyear_data)
# get average over MONSOON season
monsoon_averaged_data = MONSOON(uwnd_fullyear_data)
print 'monsoon_averaged_data.shape =',monsoon_averaged_data.shape, 'averaged over JJAS only'
cdutil.setTimeBoundsMonthly(slab/axis)
In [2]:
#hideme
from IPython.core.display import Image
Image(filename='img/tbounds.png')
Out[2]:
longitude = [0, 90, 180, 270] ∴ bounds = [[-45, 45], [45, 135], [135, 225], [225, 315]]
timeax = [“1999-1-1”, “1999-2-1”, ..., “2100-12-1”]
Need to set the bounds sensibly: >>> cdutil.setTimeBoundsMonthly(timeax)
>>> mam_mean = cdutil.MAM(my_var)
In [ ]:
mam_mean = cdutil.MAM(uwnd_fullyear_data)
print "uwnd full year shape", uwnd_fullyear_data.shape
print "MAM mean shape", mam_mean.shape
In [ ]:
djf_mean = cdutil.DJF(uwnd_fullyear_data)
print "uwnd full year shape", uwnd_fullyear_data.shape
print "DJF mean shape", djf_mean.shape
>>> seas_mns = cdutil.SEASONALCYCLE(my_var)
In [ ]:
seas_mean = cdutil.SEASONALCYCLE(uwnd_fullyear_data)
print "uwnd full year shape", uwnd_fullyear_data.shape
print "Seasonal mean shape", seas_mean.shape
print "Since it has only one year data, DJF gives two different seasons like 'JF' & 'D'"
seas_mean.getTime().asComponentTime()
>>> annual_mean = cdutil.YEAR(my_var)
In [ ]:
annual_mean = cdutil.YEAR(uwnd_fullyear_data)
print "uwnd full year shape", uwnd_fullyear_data.shape
print "yearly mean shape", annual_mean.shape
>>> monthly_mean = cdutil.ANNUALCYCLE(my_var)
In [ ]:
monthly_mean = cdutil.ANNUALCYCLE(uwnd_fullyear_data)
print "uwnd full year shape", uwnd_fullyear_data.shape
print "monthly mean shape", monthly_mean.shape
print "**Since uwnd already monthly average dataset, we cant see any difference in it**"
EXERCISE: Try calculating the climatological annual cycle for the NCEP Reanalysis data you have read in.
Season extractors have 2 functions available:
>>> monthly_climatology = cdutil.ANNUALCYCLE.climatology(v)
In [ ]:
monthly_climatology = cdutil.ANNUALCYCLE.climatology(uwnd_fullyear_data)
print "uwnd full year shape", uwnd_fullyear_data.shape
print "monthly climatology shape", monthly_climatology.shape
>>> anomaly = cdutil.ANNUALCYCLE.departures(v, cli60_99)
Note that the second argument is optional but can be a pre‐computed climatology such as here cli60_99 is a 1960‐1999 climatology but the variable v is defined from 1900‐2000. If not given then the overall climatology for v is used.</pre>
In [ ]:
anomaly = cdutil.ANNUALCYCLE.departures(uwnd_fullyear_data)
print "uwnd full year shape", uwnd_fullyear_data.shape
print "monthly anomaly shape", monthly_climatology.shape
– Averaging over 4 time steps: >>> t.shape (4, 181, 360) >>> avg = (t[0]+t[1]+t[2]+t[3])/4
>>>MV2.average(x, axis=0, weights=None, returned=0) – computes the average value of the non‐masked elements of x along the selected axis. If weights is given, it must match the size and shape of x, and the value returned is:
– elements corresponding to those masked in x or weights are ignored. If returned, a 2‐tuple consisting of the average and the sum of the weights is returned.
In [ ]:
import cdms2, MV2
f = cdms2.open('data/Climatology/NCEP_NCAR_Climatology_ltm/Surface/slp.mon.ltm.nc')
data = f('slp')
print data.shape
zm = MV2.average(data, axis=2)
print zm.shape
print zm.info()
The "cdutil.averager()" function is the key to spatial and temporal averaging in UV-CDAT.
result = averager( V, axis=axisoptions, weights=weightoptions, action=actionoptions, returned=returnedoptions, combinewts=combinewtsoptions)
axisoptions has to be a string. You can pass axis='tyx', or '123', or 'x (plev)'. weightoptions is one of 'generate' | 'weighted' | 'equal' | 'unweighted' | array | Masked Variable actionoptions is 'average' | 'sum' [Default = 'average']. You can either return the weighted average or the weighted sum of the data.
In [ ]:
import cdutil
# define the your custom regions.
NINO3 = cdms2.selectors.Selector(cdutil.region.domain(latitude=(-5., 5.,'ccb'), longitude=(210., 270., 'ccb')))
fslp = cdms2.open('data/Obs/mo/sst/HadISST/sst_HadISST_Climatology_1961-1990.nc')
#'sst_HadISST_1870-1_2011-1.nc' !!!!!! use this for actual nino3 sst data
nino3_data = fslp('sst', NINO3)
cdutil.setTimeBoundsMonthly(nino3_data)
print "data shape : ", nino3_data.shape, "& its order : ", nino3_data.getOrder()
# Compute the Spatial average
nino3_average = cdutil.averager(nino3_data, axis='xy')
print "after averged over lat, lon shape : ", nino3_average.shape
# Anomaly from climatology computed over 1961-1990
nino3_slice = nino3_average(time=('0-1-16 12:0:0.0', '0-12-16 12:0:0.0'))
#nino3_slice = nino3_average(time=('1961-1-1', '1990-12-31')) # !!!!!! use this for actual nino3 sst data
nino3_clim = cdutil.ANNUALCYCLE.climatology(nino3_slice)
print "climatology shape : ", nino3_clim.shape
# Now departures
nino3_anomaly = cdutil.ANNUALCYCLE.departures(nino3_average, nino3_clim)
print "anomaly shape : ", nino3_anomaly.shape
>>> c1 = genutil.statistics.correlation(a, b, axis='t') >>> c1.shape >>> c2 = genutil.statistics.correlation(a, b, axis='xy') >>> c2.shape
In [3]:
#hideme
from IPython.core.display import Image
Image(filename='img/grids.png')
Out[3]:
Arulalan.TDate : 18.06.2014 Project Associate, Centre for Atmospheic Sciences, Indian Institute of Technology Delhi, India. Blog : http://tuxcoder.wordpress.com Repo : https://github.com/arulalant/UV-CDAT-IPython-Notebooks