In [ ]:
import netCDF4 # Note: python is case-sensitive!
import numpy as np
Let's create a new, empty netCDF file named 'data/new.nc', opened for writing.
Be careful, opening a file with 'w' will clobber any existing data (unless clobber=False
is used, in which case an exception is raised if the file already exists).
mode='r'
is the default.mode='a'
opens an existing file and allows for appending (does not clobber existing data)format
can be one of NETCDF3_CLASSIC
, NETCDF3_64BIT
, NETCDF4_CLASSIC
or NETCDF4
(default). NETCDF4_CLASSIC
uses HDF5 for the underlying storage layer (as does NETCDF4
) but enforces the classic netCDF 3 data model so data can be read with older clients.
In [ ]:
try: ncfile.close() # just to be safe, make sure dataset is not already open.
except: pass
ncfile = netCDF4.Dataset('data/new.nc',mode='w',format='NETCDF4_CLASSIC')
print ncfile
The ncfile object we created is a container for dimensions, variables, and attributes. First, let's create some dimensions using the createDimension
method.
ncfile.dimensions
dictionary.Setting the dimension length to 0
or None
makes it unlimited, so it can grow.
NETCDF4
files, any variable's dimension can be unlimited. NETCDF4_CLASSIC
and NETCDF3*
files, only one per variable can be unlimited, and it must be the leftmost (fastest varying) dimension.
In [ ]:
lat_dim = ncfile.createDimension('lat', 73) # latitude axis
lon_dim = ncfile.createDimension('lon', 144) # longitude axis
time_dim = ncfile.createDimension('time', None) # unlimited axis (can be appended to).
for dim in ncfile.dimensions.items():
print dim
netCDF attributes can be created just like you would for any python object.
In [ ]:
ncfile.title='My model data'
print ncfile.title
Try adding some more attributes...
Now let's add some variables and store some data in them.
The createVariable
method takes 3 mandatory args.
variables
dictionary.NETCDF4
file, any unlimited dimension must be the leftmost one.format='NETCDF'
) to control compression, chunking, fill_value, etc.
In [ ]:
# Define two variables with the same names as dimensions,
# a conventional way to define "coordinate variables".
lat = ncfile.createVariable('lat', np.float32, ('lat',))
lat.units = 'degrees_north'
lat.long_name = 'latitude'
lon = ncfile.createVariable('lon', np.float32, ('lon',))
lon.units = 'degrees_east'
lon.long_name = 'longitude'
time = ncfile.createVariable('time', np.float64, ('time',))
time.units = 'hours since 1800-01-01'
time.long_name = 'time'
# Define a 3D variable to hold the data
temp = ncfile.createVariable('temp',np.float64,('time','lat','lon')) # note: unlimited dimension is leftmost
temp.units = 'K' # degrees Kelvin
temp.standard_name = 'air_temperature' # this is a CF standard name
print temp
In [ ]:
print "-- Some pre-defined attributes for variable temp:"
print "temp.dimensions:", temp.dimensions
print "temp.shape:", temp.shape
print "temp.dtype:", temp.dtype
print "temp.ndim:", temp.ndim
In [ ]:
nlats = len(lat_dim); nlons = len(lon_dim); ntimes = 3
# Write latitudes, longitudes.
# Note: the ":" is necessary in these "write" statements
lat[:] = -90. + (180./nlats)*np.arange(nlats) # south pole to north pole
lon[:] = (180./nlats)*np.arange(nlons) # Greenwich meridian eastward
# create a 3D array of random numbers
data_arr = np.random.uniform(low=280,high=330,size=(ntimes,nlats,nlons))
# Write the data. This writes the whole 3D netCDF variable all at once.
temp[:,:,:] = data_arr # Appends data along unlimited dimension
print "-- Wrote data, temp.shape is now ", temp.shape
# read data back from variable (by slicing it), print min and max
print "-- Min/Max values:", temp[:,:,:].min(), temp[:,:,:].max()
Let's add another time slice....
In [ ]:
# create a 2D array of random numbers
data_slice = np.random.uniform(low=280,high=330,size=(nlats,nlons))
temp[3,:,:] = data_slice # Appends the 4th time slice
print "-- Wrote more data, temp.shape is now ", temp.shape
Note that we have not yet written any data to the time variable. It automatically grew as we appended data along the time dimension to the variable temp
, but the data is missing.
In [ ]:
print time
times_arr = time[:]
print type(times_arr),times_arr # dashes indicate masked values (where data has not yet been written)
Let's add write some data into the time variable.
In [ ]:
from datetime import datetime
from netCDF4 import date2num,num2date
# 1st 4 days of October.
dates = [datetime(2014,10,1,0),datetime(2014,10,2,0),datetime(2014,10,3,0),datetime(2014,10,4,0)]
print dates
times = date2num(dates, time.units)
print times, time.units # numeric values
time[:] = times
# read time data back, convert to datetime instances, check values.
print num2date(time[:],time.units)
In [ ]:
# first print the Dataset object to see what we've got
print ncfile
# close the Dataset.
ncfile.close(); print 'Dataset is closed!'
Read SREF 24-h forecast precip probability (exercise from reading_netCDF notebook) write to a file (with compression).
In [ ]:
ncfile = netCDF4.Dataset('data/new2.nc','w',format='NETCDF4')
print ncfile
netCDF version 4 added support for organizing data in hierarchical groups.
A netCDF4.Dataset
creates a special group, called the 'root group', which is similar to the root directory in a unix filesystem.
groups are created using the createGroup
method.
groups
dictionary.Here we create two groups to hold data for two different model runs.
In [ ]:
grp1 = ncfile.createGroup('model_run1')
grp2 = ncfile.createGroup('model_run2')
for grp in ncfile.groups.items():
print grp
Create some dimensions in the root group.
In [ ]:
lat_dim = ncfile.createDimension('lat', 73) # latitude axis
lon_dim = ncfile.createDimension('lon', 144) # longitude axis
time_dim = ncfile.createDimension('time', None) # unlimited axis (can be appended to).
Now create a variable in grp1 and grp2. The library will search recursively upwards in the group tree to find the dimensions (which in this case are defined one level up).
In [ ]:
temp1 = grp1.createVariable('temp',np.float64,('time','lat','lon'),zlib=True)
temp2 = grp2.createVariable('temp',np.float64,('time','lat','lon'),zlib=True)
for grp in ncfile.groups.items(): # shows that each group now contains 1 variable
print grp
Here we create a variable with a compound data type to represent complex data (there is no native complex data type in netCDF).
createCompoundType
method.
In [ ]:
# create complex128 numpy structured data type
complex128 = np.dtype([('real',np.float64),('imag',np.float64)])
# using this numpy dtype, create a netCDF compound data type object
# the string name can be used as a key to access the datatype from the cmptypes dictionary.
complex128_t = ncfile.createCompoundType(complex128,'complex128')
# create a variable with this data type, write some data to it.
cmplxvar = grp1.createVariable('cmplx_var',complex128_t,('time','lat','lon'))
# write some data to this variable
# first create some complex random data
nlats = len(lat_dim); nlons = len(lon_dim)
data_arr_cmplx = np.random.uniform(size=(nlats,nlons))+1.j*np.random.uniform(size=(nlats,nlons))
# write this complex data to a numpy complex128 structured array
data_arr = np.empty((nlats,nlons),complex128)
data_arr['real'] = data_arr_cmplx.real; data_arr['imag'] = data_arr_cmplx.imag
cmplxvar[0] = data_arr # write the data to the variable (appending to time dimension)
print cmplxvar
data_out = cmplxvar[0] # read one value of data back from variable
print data_out.dtype, data_out.shape, data_out[0,0]
netCDF 4 has support for variable-length or "ragged" arrays. These are arrays of variable length sequences having the same type.
createVLType
method.
In [ ]:
vlen_t = ncfile.createVLType(np.int64, 'phony_vlen')
A new variable can then be created using this datatype.
In [ ]:
vlvar = grp2.createVariable('phony_vlen_var', vlen_t, ('time','lat','lon'))
Since there is no native vlen datatype in numpy, vlen arrays are represented in python as object arrays (arrays of dtype object
).
In [ ]:
vlen_data = np.empty((nlats,nlons),object)
for i in range(nlons):
for j in range(nlats):
size = np.random.randint(1,10,size=1) # random length of sequence
vlen_data[j,i] = np.random.randint(0,10,size=size)# generate random sequence
vlvar[0] = vlen_data # append along unlimited dimension (time)
print vlvar
print 'data =\n',vlvar[:]
Close the Dataset and examine the contents with ncdump.
In [ ]:
ncfile.close()
!ncdump -h data/new2.nc