netCDF is a collection of formats for storing arrays
netCDF classic
netCDF 64 bit offset
NetCDF4
Example for an ocean model dataset:
Used to underpin interfaces to other languages such as python (e.g. python package netCDF4)
Include ncdump/ncgen to convert to and from human readable CDL format.
Command line utilities to extract, create and operate data in netCDF files.
> ncks -v u_wind -d lat,50.,51. -d lon,0.,5 inputfile.nc outputfile.nc
ncview, pyncview, panoply, etc...
ArcGIS, QGIS, Surfer,Ferret etc...
Alternative to plain netCDF4 access from python.
Brings the power of pandas to environmental sciences, by providing N-dimensional variants of the core pandas data structures:
worth using for multidimensional data even when not using
Pandas | xarray |
---|---|
Series | DataArray |
DataFrame | Dataset |
DataArray uses names of dimensions making it easier to track than by using axis numbers. It is possible to write:
da.sel(time='2000-01-01')
or da.mean(dim='time')
intead of df.mean(0)
HTML documentation: http://xarray.pydata.org/
In [390]:
# Import everything that we are going to need... but not more
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, cm
%matplotlib inline
In [391]:
DF=pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
orient='index', columns=['one', 'two', 'three'])
DF.mean(0)
Out[391]:
In [392]:
pd_s=pd.Series(range(3), index=list('abc'), name='foo')
print(pd_s)
print()
#conver 1D series to ND aware dataArray
print(xr.DataArray(pd_s))
$ ncdump data/cefas_GETM_nwes.nc | more
netcdf cefas_GETM_nwes {
dimensions:
latc = 360 ;
lonc = 396 ;
time = UNLIMITED ; // (6 currently)
level = 5 ;
variables:
double bathymetry(latc, lonc) ;
bathymetry:units = "m" ;
bathymetry:long_name = "bathymetry" ;
bathymetry:valid_range = -5., 4000. ;
bathymetry:_FillValue = -10. ;
bathymetry:missing_value = -10. ;
float h(time, level, latc, lonc) ;
h:units = "m" ;
h:long_name = "layer thickness" ;
h:_FillValue = -9999.f ;
h:missing_value = -9999.f ;
double latc(latc) ;
latc:units = "degrees_north" ;
double level(level) ;
level:units = "level" ;
double lonc(lonc) ;
lonc:units = "degrees_east" ;
float temp(time, level, latc, lonc) ;
temp:units = "degC" ;
temp:long_name = "temperature" ;
temp:valid_range = -2.f, 40.f ;
temp:_FillValue = -9999.f ;
temp:missing_value = -9999.f ;
double time(time) ;
time:long_name = "time" ;
time:units = "seconds since 1996-01-01 00:00:00" ;
Now let's go back to python and use xarray.
In [393]:
# Naughty datasets might require decode_cf=False
# Here it just needed decode_times=False
naughty_data = xr.open_dataset(
'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods',
decode_times=False)
naughty_data
Out[393]:
In [394]:
GETM = xr.open_dataset('../data/cefas_GETM_nwes.nc4')
GETM
Out[394]:
In [395]:
GETM.dims
Out[395]:
In [396]:
print(type(GETM.coords['latc']))
GETM.coords['latc'].shape
Out[396]:
In [397]:
# List name of dataset attributes
GETM.attrs.keys()
Out[397]:
In [398]:
# List variable names
GETM.data_vars.keys()
Out[398]:
Extract variable from dataset
In [399]:
temp=GETM['temp']
print(type( temp ))
temp.shape
Out[399]:
Access variable attributes
In [633]:
# print varaible attributes
for at in temp.attrs:
print(at+':\t\t',end=' ')
print(temp.attrs[at])
In [400]:
temp[0,0,90,100]
Out[400]:
In [401]:
#positional by integer
print( temp[0,2,:,:].shape )
# positional by label
print( temp.loc['1996-02-02T01:00:00',:,:,:].shape )
# by name and integer
print( temp.isel(level=1,latc=90,lonc=100).shape )
# by name and label
print( temp.sel(time='1996-02-02T01:00:00').shape )
#temp.loc
In [538]:
#GETM.sel(level=1)['temp']
GETM['temp'].sel(level=1,lonc=-5.0,latc=-50.0, method='nearest')
Out[538]:
In [539]:
try:
GETM['temp'].sel(level=1,lonc=-5.0,latc=-50.0, method='nearest',tolerance=0.5)
except KeyError:
print('ERROR: outside tolerance of '+str(0.5))
In [600]:
# Define a general mapping function using basemap
def do_map(var,title,units):
latc=GETM.coords['latc'].values
lonc=GETM.coords['lonc'].values
# create figure and axes instances
fig = plt.figure()
ax = fig.add_axes()
# create polar stereographic Basemap instance.
m = Basemap(projection='stere', lon_0=0.,lat_0=60.,
llcrnrlat=49,urcrnrlat=60,
llcrnrlon=-10,urcrnrlon=15,resolution='l')
# bondaries resolution can be 'c','l','i','h' or 'f'
m.drawcoastlines(linewidth=0.5)
m.fillcontinents(color='0.8')
parallels = np.arange(-45,70,5)
m.drawparallels(parallels,labels=[1,0,0,0],fontsize=10)
m.drawparallels?
meridians = np.arange(-15,20,5)
m.drawmeridians(meridians,labels=[0,0,0,1],fontsize=10)
# create arrays of coordinates for contourf
lon2d,lat2d=np.meshgrid(lonc,latc)
# draw filled contours.
m.contourf(lon2d,lat2d,var,50,latlon=True)
# add colorbar.
cbar = m.colorbar(cmap=plt.cm.coolwarm,location='right')
cbar.set_label(units)
# add title
plt.title(title)
In [606]:
# Extract attributes
units=GETM['temp'].attrs['units']
var_long_name=GETM['temp'].attrs['long_name']
# and plot
do_map(var=time_ave.sel(level=21),
units=units,
title='Time averaged '+var_long_name)
In [611]:
# But often, this will do
time_ave.sel(level=21).plot()
Out[611]:
In [593]:
top=GETM['temp'].isel(time=0,level=4)
bottom=GETM['temp'].isel(time=0,level=0)
diff=top-bottom
diff.plot()
Out[593]:
In [634]:
# average over time
time_ave = GETM['temp'].mean('time')
#average over time and level (vertical)
timelev_ave=GETM['temp'].mean(['time','level'])
timelev_ave.plot()
Out[634]:
In [636]:
#zonal average (vertical)
timelon_ave=GETM['temp'].mean(['time','lonc']).isel(level=4)
timelon_ave.plot()
Out[636]:
In [637]:
ds=GETM[['temp']].mean('time','level')
ds.to_netcdf('../data/temp_avg_level_time.nc')
In [638]:
print(type( GETM[['temp']]) )
print(type( GETM['temp']) )
In [520]:
Out[520]:
In [608]:
# bathy = GETM
# bedtemp=GETM
# plt.scatter( , ,marker='.')