Convert UMAINE existing time series NetCDF files in 4D (T,Z,Y,X) "grid" format to CF-1.6 format


In [1]:
import glob
import os
import netCDF4
import StringIO

In [2]:
#idir='/usgs/data2/emontgomery/stellwagen/Data/ARGO_MERCHANT'
#idir = '/usgs/data2/emontgomery/stellwagen/Data/MVCO_11'

idir='/usgs/data2/rsignell/data/umaine'
odir='/usgs/data2/rsignell/data/umaine/cf'
os.chdir(idir)

In [3]:
# find netcdf files 
types = ('*.cdf', '*.nc') # the tuple of file types
files = []
for file in types:
    files.extend(glob.glob(file))
print files


['A01.sbe37.realtime.1m.nc']

Read a sample old file


In [4]:
file = files[-1]
print file
nc = netCDF4.Dataset(file)


A01.sbe37.realtime.1m.nc

In [5]:
vars=nc.variables.keys()

In [6]:
coord_vars = ['time','time2','depth','lat','lon']

In [7]:
# find data variables by removing coordinate variables from the variable list
data_vars = [var for var in vars if var not in coord_vars]
print data_vars


[u'offset_time', u'conductivity', u'conductivity_qc', u'temperature', u'temperature_qc', u'salinity', u'salinity_qc', u'sigma_t', u'sigma_t_qc', u'time_created', u'time_modified']

In [8]:
nt = len(nc.dimensions['time'])
nz = len(nc.dimensions['depth'])
print nt,nz


1041 1

Write a sample "CF-1.6, timeSeries" File


In [9]:
id = file.split('.')[0]
print id


A01

In [10]:
ofile = os.path.join(odir, file)

In [11]:
# create dimensions
nco = netCDF4.Dataset(ofile,'w',clobber=True)
nco.createDimension('time',nt)
if nz > 1:
    nco.createDimension('depth',nz)

nchar=20
nco.createDimension('nchar',nchar)


Out[11]:
<netCDF4.Dimension at 0x27e7690>

In [12]:
# create coordinate variables
time_v = nco.createVariable('time', 'f8', ('time'))
lon_v = nco.createVariable('lon','f4')
lat_v = nco.createVariable('lat','f4')
if nz > 1:
    depth_v = nco.createVariable('depth','f4',dimensions='depth')
else:
    depth_v = nco.createVariable('depth','f4')

station_v = nco.createVariable('site','S1',('nchar'))

In [13]:
# write global attributes
g_attdict = nc.__dict__
g_attdict['Conventions'] = 'CF-1.6'
if nz>1:
    g_attdict['featureType'] = 'timeSeriesProfile'
else:
    g_attdict['featureType'] = 'timeSeries'

g_attdict['naming_authority'] = 'edu.maine'
g_attdict['id'] = id
g_attdict['source'] = 'USGS'
g_attdict['institution'] = 'Woods Hole Coastal and Marine Science Center'
g_attdict['title'] = g_attdict['source'] + g_attdict['id']
g_attdict['keywords']='Oceans > Ocean Pressure > Water Pressure, Oceans > Ocean Temperature > Water Temperature, Oceans > Salinity/Density > Conductivity, Oceans > Salinity/Density > Salinity'
g_attdict['keywords_vocabulary']='GCMD Science Keywords'
g_attdict['standard_name_vocabulary'] = 'CF-1.6'
g_attdict['creator_email'] = g_attdict['contact']
g_attdict['creator_name'] =  'Neil Pettigrew'
g_attdict['publisher_url'] = g_attdict['institution_url']
g_attdict['publisher_name'] =  'Neil Pettigrew'
g_attdict['publisher_phone'] = '207-581-4381'
g_attdict['publisher_email'] = g_attdict['contact']
nco.setncatts(g_attdict)

In [14]:
# write station variable
station_v.cf_role = 'timeseries_id'
station_v.standard_name = 'station_id'
data = numpy.empty((1,),'S'+repr(nchar))
data[0] = id
station_v[:] = netCDF4.stringtochar(data)

# write time variable
time_v.units = 'days since 1858-11-17 00:00:00 +0:00'
time_v.standard_name = 'time'
time_v.calendar = 'gregorian'
time_v[:] = nc.variables['time'][:]

# write lon variable
lon_v.units = 'degree_east'
lon_v.standard_name = 'longitude'
lon_v[:] = nc.variables['lon'][:]

# write lat variable
lat_v.units = 'degree_north'
lat_v.standard_name = 'latitude'
lat_v[:] = nc.variables['lat'][:]

# write depth variable
depth_v.units = 'm'
depth_v.standard_name = 'depth'   
depth_v.positive = 'down'
depth_v.axis = 'Z'
depth_v[:] = nc.variables['depth'][:]

In [15]:
nc.variables['depth'][:]


Out[15]:
array([ 1.])

In [16]:
# variable mapping
d={}
d['eastward_sea_water_velocity']=['u_1205', 'u']
d['northward_sea_water_velocity']=['v_1206', 'v']
d['sea_water_temperature']=['t_20']
d['sea_water_salinity'] = ['s_40']

In [17]:
# create the data variables
var_v=[]
for varname in data_vars:
    ncvar = nc.variables[varname]
    # if time series variable
    if size(ncvar) == nt:
        var = nco.createVariable(varname,ncvar.dtype,('time'))
    elif size(ncvar) == nz:
        var = nco.createVariable(varname,ncvar.dtype,('depth'))
    else:
        var = nco.createVariable(varname,ncvar.dtype,('time','depth'))

#   load old variable attributes and modify if necessary 
    attdict = ncvar.__dict__
#   if dounpackshort and 'scale_factor' in attdict: del attdict['scale_factor']
  
    attdict['coordinates'] = 'time lon lat depth'

    # assign standard_name if in dictionary
    a =[k for (k, v) in d.iteritems() if varname.lower() in v]
    if len(a)==1: attdict['standard_name']=a[0]
    
    # write variable attributes
    var.setncatts(attdict) 
    # write the data
#    print ncvar
    var[:] = ncvar[:]

In [18]:
nco.close()