Creating netCDF files from a standard DataFrame


In [1]:
%matplotlib inline
from IPython.lib.pretty import pprint
import logging
logger = logging.getLogger('gutils')
logger.handlers = [logging.StreamHandler()]
logger.setLevel(logging.DEBUG)

def plot_profiles(default_df):
    import matplotlib.dates as mpd
    import matplotlib.pyplot as plt
    df = default_df.copy()
    df['z'] = df.z.values * -1
    df['t'] = mpd.date2num(df.t.dt.to_pydatetime())
    df.plot.scatter(x='t', y='z', c='profile', cmap='tab20')
    plt.show()

Helper for creating standard DataFrame


In [19]:
from pathlib import Path

from gutils.slocum import SlocumReader
from gutils.filters import process_dataset

ascii_folder = Path('.').absolute().parent.parent / 'gutils' / 'tests' / 'resources' / 'slocum'
ascii_file = ascii_folder / 'usf_bass_2016_252_1_12_sbd.dat'

# Do it all in one shot
standard, mode = process_dataset(
    file=str(ascii_file),
    reader_class=SlocumReader,
    tsint=1,
    filter_z=1,
    filter_points=3,
    filter_time=10,
    filter_distance=1
)
print('Profiles : ', len(standard.profile.unique()))
plot_profiles(standard)


('Filtered 5/11 profiles from /data/Development/secoora/sgs/GUTILS/gutils/tests/resources/slocum/usf_bass_2016_252_1_12_sbd.dat', 'Depth (1m): 4', 'Points (3): 1', 'Time (10s): 0', 'Distance (1m): 0')
Profiles :  6

Create netCDF from a default template


In [3]:
import tempfile
import netCDF4 as nc4
from gutils.nc import read_attrs, create_netcdf

# output folder
netcdf_output = tempfile.mkdtemp()

In [5]:
# Default "trajectory" metadata
default_attrs = read_attrs()

# The required attributes to identify a unique deployment
default_attrs['glider'] = 'bass'
default_attrs['trajectory_date'] = '20160909T1733'

list(default_attrs['variables'].keys())


Out[5]:
['crs',
 'conductivity',
 'density',
 'depth',
 'm_altitude',
 'lat',
 'lat_uv',
 'lon',
 'lon_uv',
 'platform',
 'pressure',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'salinity',
 'temperature',
 'time',
 'time_uv',
 'trajectory',
 'u',
 'v',
 'u_orig',
 'v_orig',
 'sci_oxy3835_oxygen',
 'sci_oxy3835_wphase_oxygen',
 'sci_oxy3835_wphase_saturation',
 'sci_bbfl2s_chlor_scaled',
 'sci_flbbcd_chlor_units',
 'sci_flntu_chlor_units',
 'sci_flntu_turb_units',
 'sci_bbfl2s_cdom_scaled',
 'sci_flbbcd_cdom_units']

In [8]:
# Export the actual netCDF files
netcdf_files = create_netcdf(
    attrs=default_attrs,
    data=standard,
    output_path=netcdf_output,
    mode=mode
)
with nc4.Dataset(netcdf_files[0]) as first_profile:
    pprint(list(first_profile.variables.keys()))


Excluded from output (absent from JSON config):
  * m_leakdetect_voltage
  * m_water_depth
  * m_pitch
  * drv_m_gps_lon
  * m_battpos
  * m_gps_lat
  * m_vehicle_temp
  * m_battery
  * m_mission_avg_speed_climbing
  * m_heading
  * m_mission_avg_speed_diving
  * m_ballast_pumped
  * sci_water_pressure
  * m_lat
  * drv_m_gps_lat
  * c_wpt_lat
  * sci_bbfl2s_bb_scaled
  * m_depth
  * sci_m_present_time
  * m_roll
  * m_gps_lon
  * m_avg_speed
  * c_heading
  * m_vacuum
  * m_depth_rate
  * m_present_time
  * m_lon
Created: /tmp/tmp8nebo2lv/bass_20160909T165243Z_1473439963_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T165732Z_1473440252_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170019Z_1473440419_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170518Z_1473440718_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170805Z_1473440885_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T171146Z_1473441106_rt.nc
['trajectory',
 'crs',
 'time',
 'depth',
 'lat',
 'lon',
 'm_altitude',
 'sci_bbfl2s_cdom_scaled',
 'sci_bbfl2s_chlor_scaled',
 'sci_oxy3835_oxygen',
 'conductivity',
 'temperature',
 'pressure',
 'salinity',
 'density',
 'lat_uv',
 'lon_uv',
 'platform',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'time_uv',
 'u',
 'v']

Create netCDF from an IOOS template


In [10]:
# IOOS NGDAC metadata
ioos_attrs = read_attrs(template='ioos_ngdac')

# The required attributes to identify a unique deployment
ioos_attrs['glider'] = 'bass'
ioos_attrs['trajectory_date'] = '20160909T1733'

list(ioos_attrs['variables'].keys())


Out[10]:
['crs',
 'conductivity',
 'density',
 'depth',
 'lat',
 'lat_uv',
 'lon',
 'lon_uv',
 'platform',
 'pressure',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'salinity',
 'temperature',
 'time',
 'time_uv',
 'trajectory',
 'u',
 'v']

In [11]:
# Export the actual netCDF files
netcdf_files = create_netcdf(
    attrs=ioos_attrs,
    data=standard,
    output_path=netcdf_output,
    mode=mode
)
with nc4.Dataset(netcdf_files[0]) as first_profile:
    pprint(list(first_profile.variables.keys()))


Excluded from output (absent from JSON config):
  * m_leakdetect_voltage
  * m_water_depth
  * m_pitch
  * drv_m_gps_lon
  * m_battpos
  * m_gps_lat
  * m_vehicle_temp
  * m_battery
  * m_mission_avg_speed_climbing
  * m_heading
  * m_mission_avg_speed_diving
  * m_ballast_pumped
  * sci_water_pressure
  * m_lat
  * drv_m_gps_lat
  * c_wpt_lat
  * sci_bbfl2s_bb_scaled
  * m_depth
  * sci_bbfl2s_cdom_scaled
  * sci_m_present_time
  * m_roll
  * m_gps_lon
  * m_avg_speed
  * c_heading
  * m_vacuum
  * m_depth_rate
  * m_present_time
  * sci_oxy3835_oxygen
  * sci_bbfl2s_chlor_scaled
  * m_lon
  * m_altitude
Created: /tmp/tmp8nebo2lv/bass_20160909T165243Z_1473439963_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T165732Z_1473440252_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170019Z_1473440419_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170518Z_1473440718_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170805Z_1473440885_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T171146Z_1473441106_rt.nc
['trajectory',
 'crs',
 'time',
 'depth',
 'lat',
 'lon',
 'conductivity',
 'temperature',
 'pressure',
 'salinity',
 'density',
 'lat_uv',
 'lon_uv',
 'platform',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'time_uv',
 'u',
 'v']

Create netCDF without subsetting any variables

By default only variables that have metadata defined for them are exported


In [13]:
# Export the actual netCDF files, no subset
netcdf_files = create_netcdf(
    attrs=default_attrs,
    data=standard,
    output_path=netcdf_output,
    subset=False,
    mode=mode
)
with nc4.Dataset(netcdf_files[0]) as first_profile:
    pprint(list(first_profile.variables.keys()))


Created: /tmp/tmp8nebo2lv/bass_20160909T165243Z_1473439963_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T165732Z_1473440252_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170019Z_1473440419_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170518Z_1473440718_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T170805Z_1473440885_rt.nc
Created: /tmp/tmp8nebo2lv/bass_20160909T171146Z_1473441106_rt.nc
['trajectory',
 'crs',
 'time',
 'depth',
 'lat',
 'lon',
 'c_heading',
 'c_wpt_lat',
 'm_altitude',
 'm_avg_speed',
 'm_ballast_pumped',
 'm_battery',
 'm_battpos',
 'm_depth',
 'm_depth_rate',
 'm_gps_lat',
 'm_gps_lon',
 'm_heading',
 'm_lat',
 'm_leakdetect_voltage',
 'm_lon',
 'm_mission_avg_speed_climbing',
 'm_mission_avg_speed_diving',
 'm_pitch',
 'm_present_time',
 'm_roll',
 'm_vacuum',
 'm_vehicle_temp',
 'm_water_depth',
 'sci_bbfl2s_bb_scaled',
 'sci_bbfl2s_cdom_scaled',
 'sci_bbfl2s_chlor_scaled',
 'sci_m_present_time',
 'sci_oxy3835_oxygen',
 'conductivity',
 'sci_water_pressure',
 'temperature',
 'drv_m_gps_lat',
 'drv_m_gps_lon',
 'pressure',
 'salinity',
 'density',
 'lat_uv',
 'lon_uv',
 'platform',
 'profile_id',
 'profile_lat',
 'profile_lon',
 'profile_time',
 'time_uv',
 'u',
 'v']

Produced netCDF files have metadata automatically calculated


In [14]:
with nc4.Dataset(netcdf_files[0]) as first_profile:
    print(first_profile)


<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    featureType: trajectory
    date_created: 2017-11-06T19:46:02Z
    cdm_data_type: trajectory
    Conventions: CF-1.6, Unidata Dataset Discovery v1.0
    format_version: IOOS_Glider_NetCDF_v3.0-noqartod.nc
    keywords: AUVS > Autonomous Underwater Vehicles, Oceans > Ocean Pressure > Water Pressure, Oceans > Ocean Temperature > Water Temperature, Oceans > Salinity/Density > Conductivity, Oceans > Salinity/Density > Density, Oceans > Salinity/Density > Salinity
    keywords_vocabulary: GCMD Science Keywords
    license: This data may be redistributed and used without restriction.  Data provided as is with no expressed or implied assurance of quality assurance or quality control
    Metadata_Conventions: CF-1.6, Unidata Dataset Discovery v1.0
    platform_type: Slocum Glider
    processing_level: Dataset taken from glider native file format and is provided as is with no expressed or implied assurance of quality assurance or quality control.
    source: Observational data from a profiling glider
    standard_name_vocabulary: CF-v25
    summary: Gliders are small, free-swimming, unmanned vehicles that use changes in buoyancy to move vertically and horizontally through the water column in a saw-tooth pattern. They are deployed for days to several months and gather detailed information about the physical, chemical and biological processes of the water.
    title: Glider Dataset
    geospatial_lon_min: -80.29825
    geospatial_lat_min: 28.36769
    geospatial_lon_max: -80.29801
    geospatial_lat_max: 28.3683
    geospatial_bounds: POLYGON ((28.368300 -80.298250, 28.368300 -80.298010, 28.367690 -80.298010, 28.367690 -80.298250, 28.368300 -80.298250))
    geospatial_vertical_min: 0.258264
    geospatial_vertical_units: m
    geospatial_vertical_max: 22.159835
    time_coverage_duration: P0DT0H4M44.92523S
    time_coverage_start: 2016-09-09T16:52:43Z
    time_coverage_end: 2016-09-09T16:57:28Z
    date_modified: 2017-11-06T19:46:02Z
    date_issued: 2017-11-06T19:46:02Z
    history: 2017-11-06T19:46:02Z - Created with the GUTILS package: "/home/kwilcox/miniconda3-py36/envs/gutils35/lib/python3.5/site-packages/ipykernel_launcher.py"
    id: bass-20160909T1733
    dimensions(sizes): obs(156)
    variables(dimensions): <class 'str'> trajectory(), int32 crs(), float64 time(obs), float64 depth(obs), float64 lat(obs), float64 lon(obs), float64 c_heading(obs), float64 c_wpt_lat(obs), float64 m_altitude(obs), float64 m_avg_speed(obs), float64 m_ballast_pumped(obs), float64 m_battery(obs), float64 m_battpos(obs), float64 m_depth(obs), float64 m_depth_rate(obs), float64 m_gps_lat(obs), float64 m_gps_lon(obs), float64 m_heading(obs), float64 m_lat(obs), float64 m_leakdetect_voltage(obs), float64 m_lon(obs), float64 m_mission_avg_speed_climbing(obs), float64 m_mission_avg_speed_diving(obs), float64 m_pitch(obs), float64 m_present_time(obs), float64 m_roll(obs), float64 m_vacuum(obs), float64 m_vehicle_temp(obs), float64 m_water_depth(obs), float64 sci_bbfl2s_bb_scaled(obs), float64 sci_bbfl2s_cdom_scaled(obs), float64 sci_bbfl2s_chlor_scaled(obs), float64 sci_m_present_time(obs), float64 sci_oxy3835_oxygen(obs), float64 conductivity(obs), float64 sci_water_pressure(obs), float64 temperature(obs), float64 drv_m_gps_lat(obs), float64 drv_m_gps_lon(obs), float64 pressure(obs), float64 salinity(obs), float64 density(obs), float64 lat_uv(), float64 lon_uv(), int32 platform(), int32 profile_id(), float64 profile_lat(), float64 profile_lon(), float64 profile_time(), float64 time_uv(), float64 u(), float64 v()
    groups: 

Built-in compliance checker


In [15]:
from types import SimpleNamespace
from gutils.nc import check_dataset

In [16]:
# Good file will return 0
first = SimpleNamespace(file=netcdf_files[0])
check_dataset(first)


Out[16]:
0

In [17]:
# Bad file will return 1 and log the errors
bad = SimpleNamespace(file=str(ascii_folder.parent / 'should_fail.nc'))
check_dataset(bad)


WARNING: The following exceptions occured during the gliderdac checker (possibly indicate compliance checker issues):
gliderdac.check_primary_variable_attributes: 'pressure'
  File "/home/kwilcox/miniconda3-py36/envs/gutils35/lib/python3.5/site-packages/cc_plugin_glider/glider_dac.py", line 336, in check_primary_variable_attributes
    test = hasattr(dataset.variables[var], attribute)

Out[17]:
1

Full circle back to a DataFrame


In [18]:
from pocean.dsg import IncompleteMultidimensionalTrajectory as imt

with imt(netcdf_files[1]) as ncd:
    df = ncd.to_dataframe(axes={
        't': 'time',
        'z': 'depth',
        'x': 'lon',
        'y': 'lat'
    })
df.head(20)


Out[18]:
time lon lat depth trajectory crs m_ballast_pumped m_battpos m_depth m_depth_rate ... conductivity sci_water_pressure temperature pressure salinity density profile_id profile_lat profile_lon profile_time
0 2016-09-09 16:57:32.110840 -80.298008 28.368306 22.288953 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.57568 2.244 26.1661 22.44 36.058043 1023.878204 1473440252 28.368492 -80.297931 8.422883e+08
1 2016-09-09 16:57:33.152860 -80.298007 28.368308 NaN bass-20160909T1733 0.0 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 1473440252 28.368492 -80.297931 8.422883e+08
2 2016-09-09 16:57:33.543910 -80.298007 28.368308 NaN bass-20160909T1733 0.0 NaN NaN 22.4166 NaN ... NaN NaN NaN NaN NaN NaN 1473440252 28.368492 -80.297931 8.422883e+08
3 2016-09-09 16:57:36.266660 -80.298004 28.368315 22.199564 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.57492 2.235 26.1712 22.35 36.048522 1023.869030 1473440252 28.368492 -80.297931 8.422883e+08
4 2016-09-09 16:57:38.257110 -80.298002 28.368320 NaN bass-20160909T1733 0.0 138.571 NaN 22.3583 NaN ... NaN NaN NaN NaN NaN NaN 1473440252 28.368492 -80.297931 8.422883e+08
5 2016-09-09 16:57:38.714020 -80.298002 28.368320 22.120107 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.57575 2.227 26.1795 22.27 36.048077 1023.865736 1473440252 28.368492 -80.297931 8.422883e+08
6 2016-09-09 16:57:40.990780 -80.298000 28.368325 22.040650 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.57473 2.219 26.1798 22.19 36.040431 1023.859532 1473440252 28.368492 -80.297931 8.422883e+08
7 2016-09-09 16:57:42.928130 -80.297998 28.368330 NaN bass-20160909T1733 0.0 NaN NaN 22.1016 NaN ... NaN NaN NaN NaN NaN NaN 1473440252 28.368492 -80.297931 8.422883e+08
8 2016-09-09 16:57:44.399540 -80.297996 28.368335 21.822142 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.57489 2.197 26.2188 21.97 36.011053 1023.824139 1473440252 28.368492 -80.297931 8.422883e+08
9 2016-09-09 16:57:46.747740 -80.297994 28.368339 21.603634 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.57698 2.175 26.2516 21.75 36.000620 1023.804980 1473440252 28.368492 -80.297931 8.422883e+08
10 2016-09-09 16:57:47.541660 -80.297993 28.368342 NaN bass-20160909T1733 0.0 NaN NaN 21.7126 NaN ... NaN NaN NaN NaN NaN NaN 1473440252 28.368492 -80.297931 8.422883e+08
11 2016-09-09 16:57:48.893710 -80.297992 28.368344 21.355329 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.57869 2.150 26.2911 21.50 35.982194 1023.777543 1473440252 28.368492 -80.297931 8.422883e+08
12 2016-09-09 16:57:52.064449 -80.297988 28.368354 20.997770 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.58893 2.114 26.4735 21.14 35.914137 1023.666959 1473440252 28.368492 -80.297931 8.422883e+08
13 2016-09-09 16:57:52.177760 -80.297988 28.368354 NaN bass-20160909T1733 0.0 NaN 0.757697 21.1798 NaN ... NaN NaN NaN NaN NaN NaN 1473440252 28.368492 -80.297931 8.422883e+08
14 2016-09-09 16:57:54.172060 -80.297986 28.368359 20.828922 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.60057 2.097 26.6685 20.97 35.846707 1023.553431 1473440252 28.368492 -80.297931 8.422883e+08
15 2016-09-09 16:57:56.790370 -80.297984 28.368364 NaN bass-20160909T1733 0.0 NaN NaN 20.7441 NaN ... NaN NaN NaN NaN NaN NaN 1473440252 28.368492 -80.297931 8.422883e+08
16 2016-09-09 16:57:58.784520 -80.297982 28.368368 20.431633 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.62320 2.057 26.9497 20.57 35.792107 1023.420723 1473440252 28.368492 -80.297931 8.422883e+08
17 2016-09-09 16:58:00.986540 -80.297980 28.368373 20.262785 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.63257 2.040 27.0190 20.40 35.805904 1023.408150 1473440252 28.368492 -80.297931 8.422883e+08
18 2016-09-09 16:58:01.408420 -80.297979 28.368376 NaN bass-20160909T1733 0.0 NaN NaN 20.3513 NaN ... NaN NaN NaN NaN NaN NaN 1473440252 28.368492 -80.297931 8.422883e+08
19 2016-09-09 16:58:04.254120 -80.297976 28.368383 19.855563 bass-20160909T1733 0.0 NaN NaN NaN NaN ... 5.64353 1.999 27.0717 19.99 35.843907 1023.418080 1473440252 28.368492 -80.297931 8.422883e+08

20 rows × 29 columns


In [ ]: