In [1]:
import glob
import os
import netCDF4
import StringIO
import pandas as pd
import re
In [2]:
projs = '''
ARGO_MERCHANT,B. Butman,Argo Merchant Experiment,A moored array deployed after the ARGO MERCHANT ran aground onNantucket Shoals designed to help understand the fate of the spilled oil.
BUZZ_BAY,B. Butman,Currents and Sediment Transport in Buzzards Bay,Investigation of the near-bottom circulation in Buzzards Bay and consequent transport of fine-grained sediments that may be contaminated with PCBs from inner New Bedford Harbor.
CAMP,B. Butman,California Area Monitoring Program (CAMP),A four-year multi-disciplinary field and laboratory study to investigate the sediment transport regime in the vicinity of production drilling rigs in the Santa Barbara Basin
CAPE_COD_BAY,B. Butman,Currents and Sediment Transport in Cape Cod Bay,A pilot study to determine the effect of winter storms on sediment movement at two potential dredge spoil disposal areas.
CC_MISC,B. Butman,Transport studies - Nauset Inlet,Part of a collaborative study of sediment movement in Nauset Inlet.
DEEP_REEF,J. Lacey,Gulf of Mexico - Pinnacles,Pressure data from the Gulf of Mexico
DWDS_106,B. Butman,Sediment Transport at Deep Water Dump Site 106,Near-bottom current measurements to understand the fate and transport of sludge from the New York Metropolitan region discharged at the sea surface.
ECOHAB_II,R. Signell,Ecology of Harmful Algal Blooms (ECOHAB-II),A field program to continue investigating the transport and fate of toxic dinoflagellate blooms in the western Gulf of Maine.
ECOHAB_I,R. Signell,Ecology of Harmful Algal Blooms (ECOHAB-I),A field program to study the transport and fate of toxic dinoflagellate blooms in the western Gulf of Maine.
EUROSTRATAFORM,C. Sherwood,EuroSTRATAFORM,The EuroSTRATAFORM Po and Apennine Sediment Transport and Accumulation (PASTA) experiment was an international study of sediment-transport processes and formation of geological strata in the Adriatic Sea.
FARALLONES,M. Noble,Farallons,Program to measure the currents and circulation on the continental slope off San Francisco CA and thus infer the transport of dredged materialat the newly-established deep-water disposal site.
GB_SED,B. Butman,Georges Bank Current and Sediment Transport Studies,A series of studies to assess environmental hazards to petroleum development in the Georges Bank and New England Shelf region
GLOBEC_GB,R. Schlitz,GLOBEC Georges Bank Program,A moored array program to investigate the circulation and mixing of plankton on Georges Bank.
GLOBEC_GSC,R. Schlitz,GLOBEC Great South Channel Circulation Experiment,A moored array program to investigate the recirculation of water and plankton around Georges Bank
GULF_MAINE,B. Butman,Deep Circulation in the Gulf of Maine,A two-year field study to investigate the deep flow between the major basins in the Gulf of Maine and the effects on the distribution of suspended sediments.
HUDSON_SVALLEY,B. Butman,Circulation and Sediment Transport in the Hudson Shelf Valley,Field experiments have been carried out to understand the transport of sediments and associated contaminants in the Hudson Shelf Valley offshore of New York.
KARIN_RIDGE,M. Noble,Karin Ridge Experiment,Current measurements collected at 2 sites in Karin Ridge Seamount.
LYDONIA_C,B. Butman,Lydonia Canyon Dynamics Experiment,A major field experiment to determine the importance of submarine canyons in sediment transport along and across the continental margin.
MAB_SED,B. Butman,Sediment Transport Observations in the Middle Atlantic Bight,A series of studies to assess environmental hazards to petroleum development in the Middle Atlantic Bight.
MAMALA_BAY,D. Cacchione,Mamala bay Experiment,Current measurements collected at 350-450 meters in Mamala Bay near Waikiki Beach.
MBAY_CIRC,R. Signell, Massachusetts Bay Circulation Experiment,Current measurements collected at 6 sites in Massachusetts Bay throughout the year to map the tidal wind and density driven currents.
MBAY_IWAVE,B. Butman,Massachusetts Bay Internal Wave Experiment,A 1-month 4-element moored array experiment to measure the currents associated with large-amplitude internal waves generated by tidal flow across Stellwagen Bank.
MBAY_LTB,B. Butman,Long-term observations in Massachusetts Bay; Site B-Scituate,Measurements of currents and other oceanographic properties were made to assess the impact of sewage discharge from the proposed outfall site.
MBAY_LT,B. Butman,Long-term observations in Massachusetts Bay; Site A-Boston Harbor,Measurements of currents and other oceanographic properties were made to assess the impact of sewage discharge from the proposed outfall site.
MBAY_STELL,R. Signell,Monitoring on Stellwagen Bank,A year-long series of current measurements on the eastern flank of Stellwagen Bank to document the currents at the mouth of Massachusetts Bay driven by the Maine Coastal current.
MBAY_WEST,B. Butman,Currents and Sediment Transport in Western Massachusetts Bay,A pilot winter-time experiment to investigate circulation and sediment transport. Designed to provide information to aid in citing the new ocean outfall for the Boston sewer system.
MOBILE_BAY,B. Butman,Mobile Bay Study,Measure currents and transport out of Mobile Bay.
MONTEREY_BAY,M. Noble,Monterey Bay National Marine Sanctuary Program,Part of a large multi-disciplinary experiment to characterize the geologic environment and to generate a sediment budget.
MONTEREY_CAN,M. Noble,Monterey Canyon Experiment, A program to determine the mechanisms that govern the circulation within and the transport of sediment and water through Monterey Submarine Canyon.
MYRTLEBEACH,J. Warner,Myrtle Beach Experiment SC,Measurements collected as part of a larger study to understand the physical processes that control the transport of sediments in Long Bay South Carolina.
NE_SLOPE,B. Butman,Currents on the New England Continental Slope,A study designed to describe the currents and to investigate the transport of sediment from the shelf to the slope.
OCEANOG_C,B. Butman,Oceanographer Canyon Dynamics Experiment,A field experiment to determine the importance of submarine canyons in sediment transport along and across the continental margin.
ORANGE_COUNTY,M. Noble,Orange County Sanitation District Studies,Observations to monitor coastal ocean process that transport suspended material and associated comtaminants across the shelf
PONCHARTRAIN,R. Signell,Lake Ponchartrain Project,A series of moored array studies to investigate the circulation and particle transport in Lake Pontchartrain.
PV_SHELF04,M. Noble,Palos Verdes Shelf 2004,Additional observations to estimate the quantity and direction of sediment erosion and transport on the shelf near the White Point ocean outfalls.
PV_SHELF07,M. Noble,Palos Verdes Shelf 2007,Follow-up observations to evaluate how often coastal ocean processes move the DDT contaminated sediments near the White Point ocean outfalls.
PV_SHELF,M. Noble,Palos Verdes Shelf Study,Initial observations of currents and circulation near the White Point ocean outfalls determine how often coastal ocean processes move the DDT contaminated sediments in this region.
SAB_SED,B. Butman,Sediment Transport Observations in the Southern Atlantic Bight,A series of studies to assess environmental hazards to petroleum development in the South Atlantic Bight.
SOUTHERN_CAL,M. Noble,Southern California Project,A series of moorings were deployed to understand how coastal ocean processes that move sediments change with location on the shelf.
STRESS,B. Butman,Sediment Transport on Shelves and Slopes (STRESS),Experiment on the California continental margin to investigate storm-driven sediment transport.
WRIGHTSVILLE,R. Thieler,Wrightsville Beach Study, Measurements of bottom currents and waves to investigate the flow field and sediment transport in a rippled scour depression offshore of Wrightsville Beach NC.
DIAMONDSHOALS,J. Warner,Cape Hatteras- Diamond Shoals,This experiment was designed to investigate the ocean circulation and sediment transport dynamics at Diamond Shoals NC.
CHANDELEUR,C. Sherwood,Chandeleur Islands Oceanographic Measurements,A program to measure waves water levels and currents near the Chandeleur Islands Louisiana and adjacent berm construction site.
WFAL,N. Ganju,West Falmouth Harbor Fluxes,Oceanographic and water-quality observations made at six locations in West Falmouth Harbor and Buzzards Bay.
BW2011,N. Ganju, Blackwater 2011, Oceanographic and Water-Quality Measurements made at several sites in 2 watersheds in Blackwater National Wildlife Refuge.
MVCO_11,C. Sherwood, OASIS MVCO 2011, Near-seabed Oceanographic Observations made as part of the 2011 OASIS Project at the MVCO.
hurrIrene_bb,B. Butman, Observations in Buzzards Bay during and after a Hurricane, Oceanographic data collected in Buzzards Bay MA during Hurricane Irene August 2011.'''
In [3]:
project = pd.read_csv(StringIO.StringIO(projs.strip()), sep=",\s*",index_col='project_id',names=['project_id', 'project_pi', 'project_name','project_summary'])
In [4]:
project.ix['PV_SHELF']
Out[4]:
In [4]:
In [5]:
"""function nname=lookup_cf(long_name)
% LOOKUP_CF Get CF equivalent name for EPIC variable long_name
% return the new name string or [] if there's no equivalent
%
if(strfind(lower(long_name),'temp'))
nname='sea_water_temperature';
elseif (strfind(lower(long_name),'cond'))
nname='sea_water_electrical_conductivity';
elseif (strfind(lower(long_name),'sal'))
nname='sea_water_salinity';
elseif (strfind(lower(long_name),'sigma'))
nname='sea_water_sigma_theta';
% also have to deal with the min, max std of vels for burst stats files
elseif (strfind(lower(long_name),'east'))
nname='eastward_sea_water_velocity';
elseif (strfind(lower(long_name),'north'))
nname='northward_sea_water_velocity';
elseif (strfind(lower(long_name),'vertical'))
nname='upward_sea_water_velocity';
elseif (strfind(lower(long_name),'pitch'))
nname='platform_pitch_angle';
elseif (strfind(lower(long_name),'roll'))
nname='platform_roll_angle';
elseif (strfind(lower(long_name),'head'))
nname='platform_orientation';
elseif (strfind(lower(long_name),'pres'))
if ~isempty(strfind(lower(long_name),'dev')) || ~isempty(strfind(lower(long_name),'std'))
nname=[];
else
nname='sea_water_pressure';
end
elseif (strfind(lower(long_name),'cond'))
nname='sea_water_electrical_conductivity';
elseif (strfind(lower(long_name),'speed'))
if (strfind(lower(long_name),'rotor'))
nname=[];
else
nname='sea_water_speed';
end
elseif (strfind(lower(long_name),'direction'))
nname='direction_of_sea_water_velocity';
else
nname=[];
end
disp([long_name ' : ' nname])
""";
In [6]:
# variable mapping
d={}
d['eastward_sea_water_velocity']=['u_1205', 'u']
d['northward_sea_water_velocity']=['v_1206', 'v']
d['sea_water_temperature']=['t_20']
d['sea_water_salinity'] = ['s_40']
In [7]:
#os.chdir('/usgs/data2/emontgomery/stellwagen/Data/ARGO_MERCHANT')
root_dir='/usgs/data2/emontgomery/stellwagen/Data/MVCO_11'
root_dir='/usgs/data2/emontgomery/stellwagen/Data/'
os.chdir(root_dir)
In [8]:
# now find all the unique names, long_names & units
names = set()
long_names = set()
units = set()
for path, subdirs, files in os.walk(root_dir):
for name in files:
file= os.path.join(path, name)
try:
nc=netCDF4.Dataset(file)
for var in nc.variables.keys():
names.add(var)
try:
long_names.add(nc.variables[var].long_name)
except:
pass
try:
units.add(nc.variables[var].units)
except:
pass
except:
pass
names= list(names)
long_names = list(long_names)
units = list(units)
print len(names)
print len(long_names)
print len(units)
In [ ]:
# let's use Ellyn's approach of matching substrings in the long_names to deduce standard_names
In [9]:
# air temp, frtemp, laser temp factor, internal, temp diff
filter(lambda x:re.search(r'temp',x.lower()), long_names)
Out[9]:
In [10]:
# seconds, second
filter(lambda x:re.search(r'cond',x.lower()), long_names)
Out[10]:
In [11]:
filter(lambda x:re.search(r'sal',x.lower()), long_names)
Out[11]:
In [12]:
filter(lambda x:re.search(r'sigma',x.lower()), long_names)
Out[12]:
In [13]:
# deal with burst data, std dev, resolution velocity, variance
filter(lambda x:re.search(r'east',x.lower()), long_names)
Out[13]:
In [14]:
filter(lambda x:re.search(r'north',x.lower()), long_names)
Out[14]:
In [15]:
filter(lambda x:re.search(r'vertical',x.lower()), long_names)
Out[15]:
In [16]:
# wind, rotor speed
filter(lambda x:re.search(r'speed',x.lower()), long_names)
Out[16]:
In [17]:
# std dev
filter(lambda x:re.search(r'pitch',x.lower()), long_names)
Out[17]:
In [18]:
filter(lambda x:re.search(r'roll',x.lower()), long_names)
Out[18]:
In [20]:
# std dev
filter(lambda x:re.search(r'heading',x.lower()), long_names)
Out[20]:
In [22]:
# std, dev, wave height spectra, barometric, presscheck
pres = filter(lambda x:re.search(r'press',x.lower()), long_names)
pres
Out[22]:
In [23]:
filter(lambda x:re.search(r'std|dev',x.lower()), pres)
Out[23]:
In [25]:
# save only direction and current direction
filter(lambda x:re.search(r'direct',x.lower()), long_names)
Out[25]:
In [ ]:
f = open('/usgs/data2/notebook/names.txt','w')
f.write("\n".join(names))
f.close()
f = open('/usgs/data2/notebook/long_names.txt','w')
f.write("\n".join(long_names))
f.close()
f = open('/usgs/data2/notebook/units.txt','w')
f.write("\n".join(units))
f.close()
In [ ]:
In [ ]:
pwd
In [ ]:
# find netcdf files
types = ('*.cdf', '*.nc') # the tuple of file types
files = []
for file in types:
files.extend(glob.glob(file))
print files
In [ ]:
In [ ]:
file = files[-1]
print file
nc = netCDF4.Dataset(file)
In [ ]:
vars=nc.variables.keys()
In [ ]:
coord_vars = ['time','time2','depth','lat','lon']
In [ ]:
# find data variables by removing coordinate variables from the variable list
data_vars = [var for var in vars if var not in coord_vars]
print data_vars
In [ ]:
print nc.variables['u_1205']
In [ ]:
nt = len(nc.dimensions['time'])
nz = len(nc.dimensions['depth'])
print nt,nz
In [ ]:
ofile = '/usgs/data2/notebook/%s' % file
id = file.split('.')[0]
print id
In [ ]:
# create dimensions
nco = netCDF4.Dataset(ofile,'w',clobber=True)
nco.createDimension('time',nt)
if nz > 1:
nco.createDimension('depth',nz)
nchar=20
nco.createDimension('nchar',nchar)
In [ ]:
# create coordinate variables
time_v = nco.createVariable('time', 'f8', ('time'))
lon_v = nco.createVariable('lon','f4')
lat_v = nco.createVariable('lat','f4')
if nz > 1:
depth_v = nco.createVariable('depth','f4',dimensions='depth')
else:
depth_v = nco.createVariable('depth','f4')
station_v = nco.createVariable('site','S1',('nchar'))
In [ ]:
# write global attributes
g_attdict = nc.__dict__
g_attdict['Conventions'] = 'CF-1.6'
if nz>1:
g_attdict['featureType'] = 'timeSeriesProfile'
else:
g_attdict['featureType'] = 'timeSeries'
g_attdict['naming_authority'] = 'gov.usgs'
g_attdict['id'] = id
g_attdict['source'] = 'USGS'
g_attdict['institution'] = 'Woods Hole Coastal and Marine Science Center'
g_attdict['project'] = 'Coastal and Marine Geology Program'
g_attdict['title'] = g_attdict['source'] + g_attdict['id']
g_attdict['keywords']='Oceans > Ocean Pressure > Water Pressure, Oceans > Ocean Temperature > Water Temperature, Oceans > Salinity/Density > Conductivity, Oceans > Salinity/Density > Salinity'
g_attdict['keywords_vocabulary']='GCMD Science Keywords'
g_attdict['standard_name_vocabulary'] = 'CF-1.6'
g_attdict['creator_email'] = 'emontgomery@usgs.gov'
g_attdict['creator_name'] = 'Ellyn Montgomery'
nco.setncatts(g_attdict)
In [ ]:
# write station variable
station_v.cf_role = 'timeseries_id'
station_v.standard_name = 'station_id'
data = numpy.empty((1,),'S'+repr(nchar))
data[0] = id
station_v[:] = netCDF4.stringtochar(data)
# write time variable
time_v.units = 'seconds since 1858-11-17 00:00:00 +0:00'
time_v.standard_name = 'time'
time_v.calendar = 'gregorian'
time_v[:] = (nc.variables['time'][:]-2400001.)*3600.*24. + nc.variables['time2'][:]/1000.
# write lon variable
lon_v.units = 'degree_east'
lon_v.standard_name = 'longitude'
lon_v[:] = nc.variables['lon'][:]
# write lat variable
lat_v.units = 'degree_north'
lat_v.standard_name = 'latitude'
lat_v[:] = nc.variables['lat'][:]
# write depth variable
depth_v.units = 'm'
depth_v.standard_name = 'depth'
depth_v.positive = 'down'
depth_v.axis = 'Z'
depth_v[:] = nc.variables['depth'][:]
In [ ]:
nc.variables['depth'][:]
In [ ]:
# variable mapping
d={}
d['eastward_sea_water_velocity']=['u_1205', 'u']
d['northward_sea_water_velocity']=['v_1206', 'v']
d['sea_water_temperature']=['t_20']
d['sea_water_salinity'] = ['s_40']
In [ ]:
# create the data variables
var_v=[]
for varname in data_vars:
ncvar = nc.variables[varname]
# if time series variable
if size(ncvar) == nt:
var = nco.createVariable(varname,ncvar.dtype,('time'))
elif size(ncvar) == nz:
var = nco.createVariable(varname,ncvar.dtype,('depth'))
else:
var = nco.createVariable(varname,ncvar.dtype,('time','depth'))
# load old variable attributes and modify if necessary
attdict = ncvar.__dict__
# if dounpackshort and 'scale_factor' in attdict: del attdict['scale_factor']
attdict['coordinates'] = 'time lon lat depth'
# assign standard_name if in dictionary
a =[k for (k, v) in d.iteritems() if varname.lower() in v]
if len(a)==1: attdict['standard_name']=a[0]
# write variable attributes
var.setncatts(attdict)
# write the data
# print ncvar
var[:] = ncvar[:]
In [ ]:
nco.close()