ENV / ATM 415: Climate Laboratory

Some examples of working with data in NetCDF files

Live notes from class on Thursday April 7 2016


In [38]:
%matplotlib inline

In [39]:
import netCDF4 as nc
import numpy as np
import matplotlib.pyplot as plt

In [40]:
nc.Dataset('air.mon.1981-2010.ltm.nc')


Out[40]:
<type 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format UNDEFINED):
    description:  Data from NCEP initialized reanalysis (4x/day).  These are interpolated to pressure surfaces from model (sigma) surfaces.
    platform: Model
    Conventions: COARDS
    not_missing_threshold_percent: minimum 3% values input to have non-missing output value
    history: Created 2011/07/12 by doMonthLTM
Converted to chunked, deflated non-packed NetCDF4 2014/09
    title: monthly ltm air from the NCEP Reanalysis
    References: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.derived.html
    dimensions(sizes): lon(144), lat(73), level(17), time(12), nbnds(2)
    variables(dimensions): float32 level(level), float32 lat(lat), float32 lon(lon), float64 time(time), float64 climatology_bounds(time,nbnds), float32 air(time,level,lat,lon), float32 valid_yr_count(time,level,lat,lon)
    groups: 

In [41]:
air = nc.Dataset('air.mon.1981-2010.ltm.nc')

In [42]:
print air


<type 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format UNDEFINED):
    description:  Data from NCEP initialized reanalysis (4x/day).  These are interpolated to pressure surfaces from model (sigma) surfaces.
    platform: Model
    Conventions: COARDS
    not_missing_threshold_percent: minimum 3% values input to have non-missing output value
    history: Created 2011/07/12 by doMonthLTM
Converted to chunked, deflated non-packed NetCDF4 2014/09
    title: monthly ltm air from the NCEP Reanalysis
    References: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.derived.html
    dimensions(sizes): lon(144), lat(73), level(17), time(12), nbnds(2)
    variables(dimensions): float32 level(level), float32 lat(lat), float32 lon(lon), float64 time(time), float64 climatology_bounds(time,nbnds), float32 air(time,level,lat,lon), float32 valid_yr_count(time,level,lat,lon)
    groups: 


In [43]:
air.variables


Out[43]:
OrderedDict([(u'level', <type 'netCDF4._netCDF4.Variable'>
              float32 level(level)
                  units: millibar
                  long_name: Level
                  positive: down
                  GRIB_id: 100
                  GRIB_name: hPa
                  actual_range: [ 1000.    10.]
                  axis: Z
              unlimited dimensions: 
              current shape = (17,)
              filling on, default _FillValue of 9.96920996839e+36 used),
             (u'lat', <type 'netCDF4._netCDF4.Variable'>
              float32 lat(lat)
                  units: degrees_north
                  actual_range: [ 90. -90.]
                  long_name: Latitude
                  standard_name: latitude
                  axis: Y
              unlimited dimensions: 
              current shape = (73,)
              filling on, default _FillValue of 9.96920996839e+36 used),
             (u'lon', <type 'netCDF4._netCDF4.Variable'>
              float32 lon(lon)
                  units: degrees_east
                  long_name: Longitude
                  actual_range: [   0.   357.5]
                  standard_name: longitude
                  axis: X
              unlimited dimensions: 
              current shape = (144,)
              filling on, default _FillValue of 9.96920996839e+36 used),
             (u'time', <type 'netCDF4._netCDF4.Variable'>
              float64 time(time)
                  long_name: Time
                  delta_t: 0000-01-00 00:00:00
                  avg_period: 0030-00-00 00:00:00
                  prev_avg_period: 0000-01-00 00:00:00
                  standard_name: time
                  axis: T
                  climatology: climatology_bounds
                  climo_period: 1981/01/01 - 2010/12/31
                  interpreted_actual_range: 0001/01/01 00:00:00 - 0001/12/01 00:00:00
                  units: days since 1800-01-01 00:00:0.0
                  actual_range: [-657073. -656739.]
              unlimited dimensions: 
              current shape = (12,)
              filling on, default _FillValue of 9.96920996839e+36 used),
             (u'climatology_bounds', <type 'netCDF4._netCDF4.Variable'>
              float64 climatology_bounds(time, nbnds)
                  long_name: Climate Time Boundaries
                  units: hours since 1-1-1 00:00:0.0
              unlimited dimensions: 
              current shape = (12, 2)
              filling on, default _FillValue of 9.96920996839e+36 used),
             (u'air', <type 'netCDF4._netCDF4.Variable'>
              float32 air(time, level, lat, lon)
                  long_name: Monthly Long Term Mean of Air temperature
                  units: degC
                  add_offset: 0.0
                  scale_factor: 1.0
                  missing_value: -9.96921e+36
                  precision: 2
                  least_significant_digit: 1
                  var_desc: Air Temperature
                  level_desc: Multiple levels
                  statistic: Long Term Mean
                  parent_stat: Mean
                  valid_range: [-200.  300.]
                  actual_range: [-89.72233582  41.61600494]
                  dataset: NCEP Reanalysis Derived Products
              unlimited dimensions: 
              current shape = (12, 17, 73, 144)
              filling on, default _FillValue of 9.96920996839e+36 used),
             (u'valid_yr_count', <type 'netCDF4._netCDF4.Variable'>
              float32 valid_yr_count(time, level, lat, lon)
                  long_name: count of non-missing values used in mean
                  missing_value: 32767
                  add_offset: 0.0
                  scale_factor: 1.0
              unlimited dimensions: 
              current shape = (12, 17, 73, 144)
              filling on, default _FillValue of 9.96920996839e+36 used)])

In [44]:
air.variables['lat']


Out[44]:
<type 'netCDF4._netCDF4.Variable'>
float32 lat(lat)
    units: degrees_north
    actual_range: [ 90. -90.]
    long_name: Latitude
    standard_name: latitude
    axis: Y
unlimited dimensions: 
current shape = (73,)
filling on, default _FillValue of 9.96920996839e+36 used

In [45]:
lat = air.variables['lat'][:]

In [46]:
print lat


[ 90.   87.5  85.   82.5  80.   77.5  75.   72.5  70.   67.5  65.   62.5
  60.   57.5  55.   52.5  50.   47.5  45.   42.5  40.   37.5  35.   32.5
  30.   27.5  25.   22.5  20.   17.5  15.   12.5  10.    7.5   5.    2.5
   0.   -2.5  -5.   -7.5 -10.  -12.5 -15.  -17.5 -20.  -22.5 -25.  -27.5
 -30.  -32.5 -35.  -37.5 -40.  -42.5 -45.  -47.5 -50.  -52.5 -55.  -57.5
 -60.  -62.5 -65.  -67.5 -70.  -72.5 -75.  -77.5 -80.  -82.5 -85.  -87.5
 -90. ]

In [47]:
air


Out[47]:
<type 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format UNDEFINED):
    description:  Data from NCEP initialized reanalysis (4x/day).  These are interpolated to pressure surfaces from model (sigma) surfaces.
    platform: Model
    Conventions: COARDS
    not_missing_threshold_percent: minimum 3% values input to have non-missing output value
    history: Created 2011/07/12 by doMonthLTM
Converted to chunked, deflated non-packed NetCDF4 2014/09
    title: monthly ltm air from the NCEP Reanalysis
    References: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.derived.html
    dimensions(sizes): lon(144), lat(73), level(17), time(12), nbnds(2)
    variables(dimensions): float32 level(level), float32 lat(lat), float32 lon(lon), float64 time(time), float64 climatology_bounds(time,nbnds), float32 air(time,level,lat,lon), float32 valid_yr_count(time,level,lat,lon)
    groups: 

In [48]:
air.variables['air']


Out[48]:
<type 'netCDF4._netCDF4.Variable'>
float32 air(time, level, lat, lon)
    long_name: Monthly Long Term Mean of Air temperature
    units: degC
    add_offset: 0.0
    scale_factor: 1.0
    missing_value: -9.96921e+36
    precision: 2
    least_significant_digit: 1
    var_desc: Air Temperature
    level_desc: Multiple levels
    statistic: Long Term Mean
    parent_stat: Mean
    valid_range: [-200.  300.]
    actual_range: [-89.72233582  41.61600494]
    dataset: NCEP Reanalysis Derived Products
unlimited dimensions: 
current shape = (12, 17, 73, 144)
filling on, default _FillValue of 9.96920996839e+36 used

In [49]:
lon = air.variables['lon'][:]

In [50]:
lon


Out[50]:
array([   0. ,    2.5,    5. ,    7.5,   10. ,   12.5,   15. ,   17.5,
         20. ,   22.5,   25. ,   27.5,   30. ,   32.5,   35. ,   37.5,
         40. ,   42.5,   45. ,   47.5,   50. ,   52.5,   55. ,   57.5,
         60. ,   62.5,   65. ,   67.5,   70. ,   72.5,   75. ,   77.5,
         80. ,   82.5,   85. ,   87.5,   90. ,   92.5,   95. ,   97.5,
        100. ,  102.5,  105. ,  107.5,  110. ,  112.5,  115. ,  117.5,
        120. ,  122.5,  125. ,  127.5,  130. ,  132.5,  135. ,  137.5,
        140. ,  142.5,  145. ,  147.5,  150. ,  152.5,  155. ,  157.5,
        160. ,  162.5,  165. ,  167.5,  170. ,  172.5,  175. ,  177.5,
        180. ,  182.5,  185. ,  187.5,  190. ,  192.5,  195. ,  197.5,
        200. ,  202.5,  205. ,  207.5,  210. ,  212.5,  215. ,  217.5,
        220. ,  222.5,  225. ,  227.5,  230. ,  232.5,  235. ,  237.5,
        240. ,  242.5,  245. ,  247.5,  250. ,  252.5,  255. ,  257.5,
        260. ,  262.5,  265. ,  267.5,  270. ,  272.5,  275. ,  277.5,
        280. ,  282.5,  285. ,  287.5,  290. ,  292.5,  295. ,  297.5,
        300. ,  302.5,  305. ,  307.5,  310. ,  312.5,  315. ,  317.5,
        320. ,  322.5,  325. ,  327.5,  330. ,  332.5,  335. ,  337.5,
        340. ,  342.5,  345. ,  347.5,  350. ,  352.5,  355. ,  357.5], dtype=float32)

In [51]:
air.variables['time']


Out[51]:
<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    long_name: Time
    delta_t: 0000-01-00 00:00:00
    avg_period: 0030-00-00 00:00:00
    prev_avg_period: 0000-01-00 00:00:00
    standard_name: time
    axis: T
    climatology: climatology_bounds
    climo_period: 1981/01/01 - 2010/12/31
    interpreted_actual_range: 0001/01/01 00:00:00 - 0001/12/01 00:00:00
    units: days since 1800-01-01 00:00:0.0
    actual_range: [-657073. -656739.]
unlimited dimensions: 
current shape = (12,)
filling on, default _FillValue of 9.96920996839e+36 used

In [52]:
time = air.variables['time'][:]

In [53]:
time


Out[53]:
array([-657073., -657042., -657014., -656983., -656953., -656922.,
       -656892., -656861., -656830., -656800., -656769., -656739.])

In [54]:
temperature = air.variables['air'][:]

In [55]:
temperature.shape


Out[55]:
(12, 17, 73, 144)

In [56]:
lat


Out[56]:
array([ 90. ,  87.5,  85. ,  82.5,  80. ,  77.5,  75. ,  72.5,  70. ,
        67.5,  65. ,  62.5,  60. ,  57.5,  55. ,  52.5,  50. ,  47.5,
        45. ,  42.5,  40. ,  37.5,  35. ,  32.5,  30. ,  27.5,  25. ,
        22.5,  20. ,  17.5,  15. ,  12.5,  10. ,   7.5,   5. ,   2.5,
         0. ,  -2.5,  -5. ,  -7.5, -10. , -12.5, -15. , -17.5, -20. ,
       -22.5, -25. , -27.5, -30. , -32.5, -35. , -37.5, -40. , -42.5,
       -45. , -47.5, -50. , -52.5, -55. , -57.5, -60. , -62.5, -65. ,
       -67.5, -70. , -72.5, -75. , -77.5, -80. , -82.5, -85. , -87.5, -90. ], dtype=float32)

In [57]:
lat[19]


Out[57]:
42.5

In [58]:
temperature[:,:,19,:].shape


Out[58]:
(12, 17, 144)

In [59]:
lon


Out[59]:
array([   0. ,    2.5,    5. ,    7.5,   10. ,   12.5,   15. ,   17.5,
         20. ,   22.5,   25. ,   27.5,   30. ,   32.5,   35. ,   37.5,
         40. ,   42.5,   45. ,   47.5,   50. ,   52.5,   55. ,   57.5,
         60. ,   62.5,   65. ,   67.5,   70. ,   72.5,   75. ,   77.5,
         80. ,   82.5,   85. ,   87.5,   90. ,   92.5,   95. ,   97.5,
        100. ,  102.5,  105. ,  107.5,  110. ,  112.5,  115. ,  117.5,
        120. ,  122.5,  125. ,  127.5,  130. ,  132.5,  135. ,  137.5,
        140. ,  142.5,  145. ,  147.5,  150. ,  152.5,  155. ,  157.5,
        160. ,  162.5,  165. ,  167.5,  170. ,  172.5,  175. ,  177.5,
        180. ,  182.5,  185. ,  187.5,  190. ,  192.5,  195. ,  197.5,
        200. ,  202.5,  205. ,  207.5,  210. ,  212.5,  215. ,  217.5,
        220. ,  222.5,  225. ,  227.5,  230. ,  232.5,  235. ,  237.5,
        240. ,  242.5,  245. ,  247.5,  250. ,  252.5,  255. ,  257.5,
        260. ,  262.5,  265. ,  267.5,  270. ,  272.5,  275. ,  277.5,
        280. ,  282.5,  285. ,  287.5,  290. ,  292.5,  295. ,  297.5,
        300. ,  302.5,  305. ,  307.5,  310. ,  312.5,  315. ,  317.5,
        320. ,  322.5,  325. ,  327.5,  330. ,  332.5,  335. ,  337.5,
        340. ,  342.5,  345. ,  347.5,  350. ,  352.5,  355. ,  357.5], dtype=float32)

In [60]:
360 - 73


Out[60]:
287

In [61]:
done = False
n = 0
while not done:
    if (lon[n] == 287.5):
        print n
        done = True
    else:
        n += 1


115

In [62]:
lon[115]


Out[62]:
287.5

In [63]:
lon_index_albany = 115
lat_index_albany = 19

In [64]:
temperature[:,:, lat_index_albany, lon_index_albany].shape


Out[64]:
(12, 17)

In [65]:
lev = air.variables['level']

In [66]:
lev


Out[66]:
<type 'netCDF4._netCDF4.Variable'>
float32 level(level)
    units: millibar
    long_name: Level
    positive: down
    GRIB_id: 100
    GRIB_name: hPa
    actual_range: [ 1000.    10.]
    axis: Z
unlimited dimensions: 
current shape = (17,)
filling on, default _FillValue of 9.96920996839e+36 used

In [67]:
lev = air.variables['level'][:]

In [68]:
lev


Out[68]:
array([ 1000.,   925.,   850.,   700.,   600.,   500.,   400.,   300.,
         250.,   200.,   150.,   100.,    70.,    50.,    30.,    20.,
          10.], dtype=float32)

In [69]:
lev[0]


Out[69]:
1000.0

In [70]:
temperature[:, 0, lat_index_albany, lon_index_albany]


Out[70]:
array([ -3.27266216,  -2.25366211,   1.3030045 ,   7.07533789,
        12.56800461,  17.8836689 ,  21.09667015,  20.80966949,
        17.19533539,  11.08667088,   5.73033762,  -0.55532897], dtype=float32)

In [71]:
plt.plot(temperature[:, 0, lat_index_albany, lon_index_albany])


Out[71]:
[<matplotlib.lines.Line2D at 0x1141a34d0>]

In [72]:
air.variables['air']


Out[72]:
<type 'netCDF4._netCDF4.Variable'>
float32 air(time, level, lat, lon)
    long_name: Monthly Long Term Mean of Air temperature
    units: degC
    add_offset: 0.0
    scale_factor: 1.0
    missing_value: -9.96921e+36
    precision: 2
    least_significant_digit: 1
    var_desc: Air Temperature
    level_desc: Multiple levels
    statistic: Long Term Mean
    parent_stat: Mean
    valid_range: [-200.  300.]
    actual_range: [-89.72233582  41.61600494]
    dataset: NCEP Reanalysis Derived Products
unlimited dimensions: 
current shape = (12, 17, 73, 144)
filling on, default _FillValue of 9.96920996839e+36 used

In [73]:
months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']

In [37]:
plt.plot(temperature[:, 0, lat_index_albany, lon_index_albany])
plt.xticks(months)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-37-7529994bbf1f> in <module>()
      1 plt.plot(temperature[:, 0, lat_index_albany, lon_index_albany])
----> 2 plt.xticks(months)

/Users/br546577/anaconda/lib/python2.7/site-packages/matplotlib/pyplot.pyc in xticks(*args, **kwargs)
   1669         labels = ax.get_xticklabels()
   1670     elif len(args)==1:
-> 1671         locs = ax.set_xticks(args[0])
   1672         labels = ax.get_xticklabels()
   1673     elif len(args)==2:

/Users/br546577/anaconda/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in set_xticks(self, ticks, minor)
   2848         ACCEPTS: sequence of floats
   2849         """
-> 2850         ret = self.xaxis.set_ticks(ticks, minor=minor)
   2851         self.stale = True
   2852         return ret

/Users/br546577/anaconda/lib/python2.7/site-packages/matplotlib/axis.pyc in set_ticks(self, ticks, minor)
   1596             xleft, xright = self.get_view_interval()
   1597             if xright > xleft:
-> 1598                 self.set_view_interval(min(ticks), max(ticks))
   1599             else:
   1600                 self.set_view_interval(max(ticks), min(ticks))

/Users/br546577/anaconda/lib/python2.7/site-packages/matplotlib/axis.pyc in set_view_interval(self, vmin, vmax, ignore)
   1928             if Vmin < Vmax:
   1929                 self.axes.viewLim.intervalx = (min(vmin, vmax, Vmin),
-> 1930                                                max(vmin, vmax, Vmax))
   1931             else:
   1932                 self.axes.viewLim.intervalx = (max(vmin, vmax, Vmin),

/Users/br546577/anaconda/lib/python2.7/site-packages/matplotlib/transforms.pyc in _set_intervalx(self, interval)
    986 
    987     def _set_intervalx(self, interval):
--> 988         self._points[:, 0] = interval
    989         self.invalidate()
    990     intervalx = property(BboxBase._get_intervalx, _set_intervalx)

ValueError: could not convert string to float: SEP

Plot soundings at Albany


In [74]:
jan = temperature[0, :, lat_index_albany, lon_index_albany]

In [75]:
print jan


[ -3.27266216  -6.17032766  -7.01099586 -11.81399632 -17.51132774
 -25.51899338 -35.95066071 -47.86932373 -52.46098709 -53.74898911
 -53.80265808 -57.7659874  -59.29999542 -59.80699539 -58.69099045
 -57.25065994 -51.58332443]

In [76]:
jan.shape


Out[76]:
(17,)

In [77]:
lev.shape


Out[77]:
(17,)

In [78]:
lev


Out[78]:
array([ 1000.,   925.,   850.,   700.,   600.,   500.,   400.,   300.,
         250.,   200.,   150.,   100.,    70.,    50.,    30.,    20.,
          10.], dtype=float32)

In [79]:
#  approximate vertical height axis in km
Z = -8 * np.log(lev/lev[0])
print Z


[ -0.           0.62369221   1.30015123   2.85339975   4.0866046
   5.54517746   7.3303256    9.63178253  11.09035492  12.87550354
  15.17695999  18.420681    21.27408028  23.96585846  28.05246353
  31.29618454  36.841362  ]

In [80]:
plt.plot(jan, Z, label='Jan')
plt.legend()
plt.xlabel('Temperature (deg C)')
plt.ylabel('height (km)')


Out[80]:
<matplotlib.text.Text at 0x113ddcb50>

In [81]:
for n in range(12):
    data = temperature[n, :, lat_index_albany, lon_index_albany]
    plt.plot(data, Z, label=months[n])
plt.legend()
plt.xlabel('Temperature (deg C)')
plt.ylabel('height (km)')
plt.xlim(-70, 50)


Out[81]:
(-70, 50)

In [82]:
#  plot cross section of zonal and annual average temperature as function of height and latitude

temp_zon = np.mean(temperature, axis=3)

In [83]:
temp_zon.shape


Out[83]:
(12, 17, 73)

In [84]:
temp_zon_ann = np.mean(temp_zon, axis=0)

In [85]:
temp_zon_ann.shape


Out[85]:
(17, 73)

In [86]:
temp_zon_ann = np.mean(temperature, axis=(0,3))

In [87]:
temp_zon_ann.shape


Out[87]:
(17, 73)

In [88]:
cax = plt.contourf(lat, Z, temp_zon_ann)
plt.colorbar(cax)
plt.xlabel('Latitude')
plt.ylabel('height (km)')
plt.title('Annual, zonal average observed air temperature', fontsize=14)


Out[88]:
<matplotlib.text.Text at 0x11569f550>

In [ ]:


In [ ]: