All the USGS time series data has traditionally been stored in PMEL EPIC-compliant NetCDF files. These files use EPIC codes for
In [74]:
import glob
import os
import netCDF4
import StringIO
import pandas as pd
import re
In [81]:
projs = '''
ARGO_MERCHANT:B. Butman:Argo Merchant Experiment:A moored array deployed after the ARGO MERCHANT ran aground onNantucket Shoals designed to help understand the fate of the spilled oil.
BUZZ_BAY:B. Butman:Currents and Sediment Transport in Buzzards Bay:Investigation of the near-bottom circulation in Buzzards Bay and consequent transport of fine-grained sediments that may be contaminated with PCBs from inner New Bedford Harbor.
CAMP:B. Butman:California Area Monitoring Program (CAMP):A four-year multi-disciplinary field and laboratory study to investigate the sediment transport regime in the vicinity of production drilling rigs in the Santa Barbara Basin
CAPE_COD_BAY:B. Butman:Currents and Sediment Transport in Cape Cod Bay:A pilot study to determine the effect of winter storms on sediment movement at two potential dredge spoil disposal areas.
CC_MISC:B. Butman:Transport studies - Nauset Inlet:Part of a collaborative study of sediment movement in Nauset Inlet.
DEEP_REEF:J. Lacey:Gulf of Mexico - Pinnacles:Pressure data from the Gulf of Mexico
DWDS_106:B. Butman:Sediment Transport at Deep Water Dump Site 106:Near-bottom current measurements to understand the fate and transport of sludge from the New York Metropolitan region discharged at the sea surface.
ECOHAB_II:R. Signell:Ecology of Harmful Algal Blooms (ECOHAB-II):A field program to continue investigating the transport and fate of toxic dinoflagellate blooms in the western Gulf of Maine.
ECOHAB_I:R. Signell:Ecology of Harmful Algal Blooms (ECOHAB-I):A field program to study the transport and fate of toxic dinoflagellate blooms in the western Gulf of Maine.
EUROSTRATAFORM:C. Sherwood:EuroSTRATAFORM:The EuroSTRATAFORM Po and Apennine Sediment Transport and Accumulation (PASTA) experiment was an international study of sediment-transport processes and formation of geological strata in the Adriatic Sea.
FARALLONES:M. Noble:Farallons:Program to measure the currents and circulation on the continental slope off San Francisco CA and thus infer the transport of dredged materialat the newly-established deep-water disposal site.
GB_SED:B. Butman:Georges Bank Current and Sediment Transport Studies:A series of studies to assess environmental hazards to petroleum development in the Georges Bank and New England Shelf region
GLOBEC_GB:R. Schlitz:GLOBEC Georges Bank Program:A moored array program to investigate the circulation and mixing of plankton on Georges Bank.
GLOBEC_GSC:R. Schlitz:GLOBEC Great South Channel Circulation Experiment:A moored array program to investigate the recirculation of water and plankton around Georges Bank
GULF_MAINE:B. Butman:Deep Circulation in the Gulf of Maine:A two-year field study to investigate the deep flow between the major basins in the Gulf of Maine and the effects on the distribution of suspended sediments.
HUDSON_SVALLEY:B. Butman:Circulation and Sediment Transport in the Hudson Shelf Valley:Field experiments have been carried out to understand the transport of sediments and associated contaminants in the Hudson Shelf Valley offshore of New York.
KARIN_RIDGE:M. Noble:Karin Ridge Experiment:Current measurements collected at 2 sites in Karin Ridge Seamount.
LYDONIA_C:B. Butman:Lydonia Canyon Dynamics Experiment:A major field experiment to determine the importance of submarine canyons in sediment transport along and across the continental margin.
MAB_SED:B. Butman:Sediment Transport Observations in the Middle Atlantic Bight:A series of studies to assess environmental hazards to petroleum development in the Middle Atlantic Bight.
MAMALA_BAY:D. Cacchione:Mamala bay Experiment:Current measurements collected at 350-450 meters in Mamala Bay near Waikiki Beach.
MBAY_CIRC:R. Signell: Massachusetts Bay Circulation Experiment:Current measurements collected at 6 sites in Massachusetts Bay throughout the year to map the tidal wind and density driven currents.
MBAY_IWAVE:B. Butman:Massachusetts Bay Internal Wave Experiment:A 1-month 4-element moored array experiment to measure the currents associated with large-amplitude internal waves generated by tidal flow across Stellwagen Bank.
MBAY_LTB:B. Butman:Long-term observations in Massachusetts Bay; Site B-Scituate:Measurements of currents and other oceanographic properties were made to assess the impact of sewage discharge from the proposed outfall site.
MBAY_LT:B. Butman:Long-term observations in Massachusetts Bay; Site A-Boston Harbor:Measurements of currents and other oceanographic properties were made to assess the impact of sewage discharge from the proposed outfall site.
MBAY_STELL:R. Signell:Monitoring on Stellwagen Bank:A year-long series of current measurements on the eastern flank of Stellwagen Bank to document the currents at the mouth of Massachusetts Bay driven by the Maine Coastal current.
MBAY_WEST:B. Butman:Currents and Sediment Transport in Western Massachusetts Bay:A pilot winter-time experiment to investigate circulation and sediment transport. Designed to provide information to aid in citing the new ocean outfall for the Boston sewer system.
MOBILE_BAY:B. Butman:Mobile Bay Study:Measure currents and transport out of Mobile Bay.
MONTEREY_BAY:M. Noble:Monterey Bay National Marine Sanctuary Program:Part of a large multi-disciplinary experiment to characterize the geologic environment and to generate a sediment budget.
MONTEREY_CAN:M. Noble:Monterey Canyon Experiment: A program to determine the mechanisms that govern the circulation within and the transport of sediment and water through Monterey Submarine Canyon.
MYRTLEBEACH:J. Warner:Myrtle Beach Experiment SC:Measurements collected as part of a larger study to understand the physical processes that control the transport of sediments in Long Bay South Carolina.
NE_SLOPE:B. Butman:Currents on the New England Continental Slope:A study designed to describe the currents and to investigate the transport of sediment from the shelf to the slope.
OCEANOG_C:B. Butman:Oceanographer Canyon Dynamics Experiment:A field experiment to determine the importance of submarine canyons in sediment transport along and across the continental margin.
ORANGE_COUNTY:M. Noble:Orange County Sanitation District Studies:Observations to monitor coastal ocean process that transport suspended material and associated comtaminants across the shelf
PONCHARTRAIN:R. Signell:Lake Ponchartrain Project:A series of moored array studies to investigate the circulation and particle transport in Lake Pontchartrain.
PV_SHELF04:M. Noble:Palos Verdes Shelf 2004:Additional observations to estimate the quantity and direction of sediment erosion and transport on the shelf near the White Point ocean outfalls.
PV_SHELF07:M. Noble:Palos Verdes Shelf 2007:Follow-up observations to evaluate how often coastal ocean processes move the DDT contaminated sediments near the White Point ocean outfalls.
PV_SHELF:M. Noble:Palos Verdes Shelf Study:Initial observations of currents and circulation near the White Point ocean outfalls determine how often coastal ocean processes move the DDT contaminated sediments in this region.
SAB_SED:B. Butman:Sediment Transport Observations in the Southern Atlantic Bight:A series of studies to assess environmental hazards to petroleum development in the South Atlantic Bight.
SOUTHERN_CAL:M. Noble:Southern California Project:A series of moorings were deployed to understand how coastal ocean processes that move sediments change with location on the shelf.
STRESS:B. Butman:Sediment Transport on Shelves and Slopes (STRESS):Experiment on the California continental margin to investigate storm-driven sediment transport.
WRIGHTSVILLE:R. Thieler:Wrightsville Beach Study: Measurements of bottom currents and waves to investigate the flow field and sediment transport in a rippled scour depression offshore of Wrightsville Beach NC.
DIAMONDSHOALS:J. Warner:Cape Hatteras- Diamond Shoals:This experiment was designed to investigate the ocean circulation and sediment transport dynamics at Diamond Shoals NC.
CHANDELEUR:C. Sherwood:Chandeleur Islands Oceanographic Measurements:A program to measure waves water levels and currents near the Chandeleur Islands Louisiana and adjacent berm construction site.
WFAL:N. Ganju:West Falmouth Harbor Fluxes:Oceanographic and water-quality observations made at six locations in West Falmouth Harbor and Buzzards Bay.
BW2011:N. Ganju: Blackwater 2011: Oceanographic and Water-Quality Measurements made at several sites in 2 watersheds in Blackwater National Wildlife Refuge.
MVCO_11:C. Sherwood: OASIS MVCO 2011: Near-seabed Oceanographic Observations made as part of the 2011 OASIS Project at the MVCO.
HURRIRENE_BB:B. Butman: Observations in Buzzards Bay during and after a Hurricane: Oceanographic data collected in Buzzards Bay MA during Hurricane Irene August 2011.
FI12:J. Warner:Fire Island NY - Offshore: Oceanographic and meteorological observations were made at 7 sites on and around the sand ridges offshore of Fire Island NY in winter 2012 to study coastal processes.
BARNEGAT:N. Ganju:Light attenuation and sediment resuspension in Barnegat Bay New Jersey: Light attenuation is a critical parameter governing the ecological function of shallow estuaries. Near-bottom and mid-water observations of currents, pressure, chlorophyll, and fDOM were collected at three pairs of sites sequentially at different locations in the estuary to characterize the conditions.
'''
In [82]:
project = pd.read_csv(StringIO.StringIO(projs.strip()),
sep=':',index_col='project_id',
names=['project_id', 'project_pi', 'project_name','project_summary'])
In [84]:
project.head()
Out[84]:
In [85]:
project.ix['PV_SHELF'].project_pi
Out[85]:
In [ ]:
len(project)
In [88]:
proj=project.ix[['FI12','BARNEGAT','WFAL']]
In [89]:
len(proj)
Out[89]:
In [90]:
for index,row in proj.iterrows():
print index,row['project_pi']
In [91]:
""" this is Ellyn's old Matlab code:
function nname=lookup_cf(long_name)
% LOOKUP_CF Get CF equivalent name for EPIC variable long_name
% return the new name string or [] if there's no equivalent
%
if(strfind(lower(long_name),'temp'))
nname='sea_water_temperature';
elseif (strfind(lower(long_name),'cond'))
nname='sea_water_electrical_conductivity';
elseif (strfind(lower(long_name),'sal'))
nname='sea_water_salinity';
elseif (strfind(lower(long_name),'sigma'))
nname='sea_water_sigma_theta';
% also have to deal with the min, max std of vels for burst stats files
elseif (strfind(lower(long_name),'east'))
nname='eastward_sea_water_velocity';
elseif (strfind(lower(long_name),'north'))
nname='northward_sea_water_velocity';
elseif (strfind(lower(long_name),'vertical'))
nname='upward_sea_water_velocity';
elseif (strfind(lower(long_name),'pitch'))
nname='platform_pitch_angle';
elseif (strfind(lower(long_name),'roll'))
nname='platform_roll_angle';
elseif (strfind(lower(long_name),'head'))
nname='platform_orientation';
elseif (strfind(lower(long_name),'pres'))
if ~isempty(strfind(lower(long_name),'dev')) || ~isempty(strfind(lower(long_name),'std'))
nname=[];
else
nname='sea_water_pressure';
end
elseif (strfind(lower(long_name),'cond'))
nname='sea_water_electrical_conductivity';
elseif (strfind(lower(long_name),'speed'))
if (strfind(lower(long_name),'rotor'))
nname=[];
else
nname='sea_water_speed';
end
elseif (strfind(lower(long_name),'direction'))
nname='direction_of_sea_water_velocity';
else
nname=[];
end
disp([long_name ' : ' nname])
""";
In [92]:
#os.chdir('/usgs/data2/emontgomery/stellwagen/Data/ARGO_MERCHANT')
root_dir='/usgs/data2/emontgomery/stellwagen/Data/'
#root_dir='/usgs/data2/emontgomery/stellwagen/Data/MVCO_11'
odir='/usgs/data2/emontgomery/stellwagen/CF-1.6/'
os.chdir(root_dir)
In [70]:
# now find all the unique names, long_names & units
names = set()
long_names = set()
units = set()
epic_keys = set()
for path, subdirs, files in os.walk(root_dir):
for name in files:
file= os.path.join(path, name)
try:
nc=netCDF4.Dataset(file)
for var in nc.variables.keys():
names.add(var)
try:
long_names.add(nc.variables[var].long_name)
except:
pass
try:
units.add(nc.variables[var].units)
except:
pass
try:
epic_keys.add(nc.variables[var].epic_code)
except:
pass
except:
pass
In [72]:
names= list(names)
long_names = list(long_names)
units = list(units)
epic_keys = list(epic_keys)
In [73]:
print len(names)
print len(long_names)
print len(units)
print len(epic_keys)
In [42]:
# let's use Ellyn's approach of matching substrings in the long_names to deduce standard_names
In [43]:
# air temp, frtemp, laser temp factor, internal, temp diff
filter(lambda x:re.search(r'temp',x.lower()), long_names)
Out[43]:
In [44]:
# seconds, second
filter(lambda x:re.search(r'cond',x.lower()), long_names)
Out[44]:
In [45]:
filter(lambda x:re.search(r'sal',x.lower()), long_names)
Out[45]:
In [46]:
filter(lambda x:re.search(r'sigma',x.lower()), long_names)
Out[46]:
In [47]:
# deal with burst data, std dev, resolution velocity, variance
filter(lambda x:re.search(r'east',x.lower()), long_names)
Out[47]:
In [48]:
filter(lambda x:re.search(r'north',x.lower()), long_names)
Out[48]:
In [49]:
filter(lambda x:re.search(r'vertical',x.lower()), long_names)
Out[49]:
In [50]:
# wind, rotor speed
filter(lambda x:re.search(r'speed',x.lower()), long_names)
Out[50]:
In [51]:
# std dev
filter(lambda x:re.search(r'pitch',x.lower()), long_names)
Out[51]:
In [52]:
filter(lambda x:re.search(r'roll',x.lower()), long_names)
Out[52]:
In [53]:
# std dev
filter(lambda x:re.search(r'heading',x.lower()), long_names)
Out[53]:
In [54]:
# std, dev, wave height spectra, barometric, presscheck
pres = filter(lambda x:re.search(r'press',x.lower()), long_names)
pres
Out[54]:
In [55]:
filter(lambda x:re.search(r'std|dev',x.lower()), pres)
Out[55]:
In [56]:
# save only direction and current direction
filter(lambda x:re.search(r'direct',x.lower()), long_names)
Out[56]:
In [57]:
f = open('/usgs/data2/notebook/names.txt','w')
f.write("\n".join(names))
f.close()
f = open('/usgs/data2/notebook/long_names.txt','w')
f.write("\n".join(long_names))
f.close()
f = open('/usgs/data2/notebook/units.txt','w')
f.write("\n".join(units))
f.close()
In [93]:
d={}
In [94]:
d['sea_water_temperature']=['instrument transducer temp.', 'temperature (c)','fr temp',
'adp transducer temp.','adcp transducer temp.','transducer temp.','temp 1','temp 2',
'temperature','internal temperature','frtemp','temp 2 q','temp','temp lp','sea surface temperature (degrees C)']
In [95]:
d['sea_water_salinity'] = ['salinity 2 q','salinity 1','ctd salinity, pss-78','salinity','salinity (ppt)','salinity (psu)','salinity']
In [96]:
d['northward_sea_water_velocity']=['northward velocity','north','mean northward velocity','northward velocity','north lp']
In [97]:
d['eastward_sea_water_velocity']=['eastward velocity','east','mean eastward velocity','eastward velocity','east lp']
In [98]:
def grid2dsg(ifile,ofile,coord_vars=['time','time2','depth','lat','lon'],
project_name=None,project_pi=None,project_summary=None):
nc = netCDF4.Dataset(ifile)
id = '%s/%s' % (project_name,ifile.split('.')[0])
#id = ifile.split('.')[0]
vars=nc.variables.keys()
data_vars = [var for var in vars if var not in coord_vars]
nt = len(nc.dimensions['time'])
nz = len(nc.dimensions['depth'])
# create dimensions of output file
nco = netCDF4.Dataset(ofile,'w',clobber=True)
nco.createDimension('time',nt)
if nz > 1:
nco.createDimension('depth',nz)
nchar=20
nco.createDimension('nchar',nchar)
# create coordinate variables
time_v = nco.createVariable('time', 'f8', ('time'))
lon_v = nco.createVariable('lon','f4')
lat_v = nco.createVariable('lat','f4')
if nz > 1:
depth_v = nco.createVariable('depth','f4',dimensions='depth')
else:
depth_v = nco.createVariable('depth','f4')
station_v = nco.createVariable('site','S1',('nchar'))
# write global attributes
g_attdict = nc.__dict__
g_attdict['Conventions'] = 'CF-1.6'
if nz>1:
g_attdict['featureType'] = 'timeSeriesProfile'
else:
g_attdict['featureType'] = 'timeSeries'
g_attdict['naming_authority'] = 'gov.usgs'
g_attdict['id'] = id
g_attdict['source'] = 'USGS'
g_attdict['institution'] = 'USGS Woods Hole Coastal and Marine Science Center'
g_attdict['project'] = 'Coastal and Marine Geology Program'
g_attdict['title'] = '%s/%s/%s' % (g_attdict['source'],project_name,g_attdict['id'])
g_attdict['keywords']='Oceans > Ocean Pressure > Water Pressure, Oceans > Ocean Temperature > Water Temperature, Oceans > Salinity/Density > Conductivity, Oceans > Salinity/Density > Salinity'
g_attdict['keywords_vocabulary']='GCMD Science Keywords'
g_attdict['standard_name_vocabulary'] = 'CF-1.6'
g_attdict['creator_email'] = 'rsignell@usgs.gov'
g_attdict['creator_name'] = 'Rich Signell'
g_attdict['creator_phone'] = '+1 (508) 548-8700'
g_attdict['creator_url'] = 'http://www.usgs.gov'
g_attdict['publisher_email'] = 'emontgomery@usgs.gov'
g_attdict['publisher_name'] = 'Ellyn Montgomery'
g_attdict['publisher_phone'] = '+1 (508) 548-8700'
g_attdict['publisher_url'] = 'http://www.usgs.gov'
g_attdict['contributor_name'] = project_pi
g_attdict['contributor_role'] = 'principalInvestigator' #from esip ACDD
g_attdict['summary'] = project_summary
nco.setncatts(g_attdict)
# write station variable
station_v.cf_role = 'timeseries_id'
station_v.standard_name = 'station_id'
data = numpy.empty((1,),'S'+repr(nchar))
data[0] = ifile.split('.')[0]
station_v[:] = netCDF4.stringtochar(data)
# write time variable
time_v.units = 'milliseconds since 1858-11-17 00:00:00 +0:00'
time_v.standard_name = 'time'
time_v.calendar = 'gregorian'
time_v[:] = (np.int64(nc.variables['time'][:])-2400001)*3600*24*1000 + nc.variables['time2'][:]
# write lon variable
lon_v.units = 'degree_east'
lon_v.standard_name = 'longitude'
lon_v[:] = nc.variables['lon'][:]
# write lat variable
lat_v.units = 'degree_north'
lat_v.standard_name = 'latitude'
lat_v[:] = nc.variables['lat'][:]
# write depth variable
depth_v.units = 'm'
depth_v.standard_name = 'depth'
depth_v.positive = 'down'
depth_v.axis = 'Z'
depth_v[:] = nc.variables['depth'][:]
# create the data variables
var_v=[]
for varname in data_vars:
ncvar = nc.variables[varname]
# if time series variable
if size(ncvar) == nt:
var = nco.createVariable(varname,ncvar.dtype,('time'))
elif size(ncvar) == nz:
var = nco.createVariable(varname,ncvar.dtype,('depth'))
else:
var = nco.createVariable(varname,ncvar.dtype,('time','depth'))
# load old variable attributes and modify if necessary
attdict = ncvar.__dict__
# if dounpackshort and 'scale_factor' in attdict: del attdict['scale_factor']
attdict['coordinates'] = 'time lon lat depth'
# assign standard_name if in dictionary
a =[k for (k, v) in d.iteritems() if attdict['long_name'].strip().lower() in v]
if len(a)==1: attdict['standard_name']=a[0]
# write variable attributes
var.setncatts(attdict)
# write the data
# print ncvar
var[:] = ncvar[:]
nco.close()
In [99]:
root_idir='/usgs/data2/emontgomery/stellwagen/Data/'
#root_dir='/usgs/data2/emontgomery/stellwagen/Data/MVCO_11'
root_odir='/usgs/data2/emontgomery/stellwagen/CF-1.6/'
os.chdir(root_dir)
In [100]:
types = ('*.nc', '*.cdf')
coord_vars = ['time','time2','depth','lat','lon']
badfiles = []
goodfiles = []
for index,row in proj.iterrows():
idir = os.path.join(root_idir,index)
os.chdir(idir)
odir = os.path.join(root_odir,index)
if not os.path.exists(odir):
os.makedirs(odir)
ncfiles = []
for files in types:
ncfiles.extend(glob.glob(files))
print index, len(ncfiles)
project_name = index
project_pi = project.ix[index].ix['project_pi']
project_summary = project.ix[index].ix['project_summary']
print project_name,project_pi,project_summary
for ifile in ncfiles:
ofile = os.path.join(odir,ifile)
print ifile,ofile
try:
grid2dsg(ifile,ofile,coord_vars = coord_vars,
project_name = project_name,
project_pi = project_pi,
project_summary = project_summary)
goodfiles.append(ifile)
except:
badfiles.append(ifile)
In [101]:
len(badfiles)
Out[101]:
In [102]:
len(goodfiles)
Out[102]:
In [103]:
print badfiles
In [38]: