Save Tower CSV data as NetCDF

Set local variables


In [12]:
url='http://geoport.whoi.edu/thredds/fileServer/usgs/data2/notebook/data/CR3000_SN3557_Table1_MainTowerCR3000_ground_V6.CR3.txt'
input_data="data.txt"
output_dir="/data"
output_file="julia.nc"
fillvalue=-9999.9

Download the data


In [13]:
import urllib
urllib.urlretrieve(url, input_data)


Out[13]:
('data.txt', <httplib.HTTPMessage instance at 0x7f117c0ebb00>)

In [14]:
import pandas as pd
df = pd.read_csv(input_data,skiprows=[0,2,3],
                 parse_dates=True,
                 index_col='TIMESTAMP',
                 low_memory=False,
                 na_values=['NAN',''],
                 tupleize_cols=True)
df = df.fillna(fillvalue)
df.head()


Out[14]:
RECORD Year Month DOM Hour Minute Second uSecond WeekDay Day_of_Year ... del_TsoilOpen_Std del_TsoilRiparian_Std TsoilTree_Avg TsoilGrass_Avg TsoilOpen_Avg TsoilRiparian_Avg TsoilTree_Std TsoilGrass_Std TsoilOpen_Std TsoilRiparian_Std
TIMESTAMP
2010-04-19 16:30:00 0 2010 4 19 16 30 0 0 2 109 ... 10.060 -9999.9 25.37 27.08 33.28 -9999.9 0.032 0.030 0.016 -9999.9
2010-04-19 17:00:00 0 2010 4 19 17 0 0 0 2 109 ... 6.272 -9999.9 25.16 26.96 33.20 -9999.9 0.059 0.026 0.026 -9999.9
2010-04-19 17:30:00 1 2010 4 19 17 30 0 0 2 109 ... 0.014 -9999.9 24.86 26.81 32.76 -9999.9 0.084 0.044 0.241 -9999.9
2010-04-19 18:00:00 2 2010 4 19 18 0 0 0 2 109 ... 0.010 -9999.9 24.56 26.58 31.80 -9999.9 0.092 0.083 0.291 -9999.9
2010-04-19 18:30:00 3 2010 4 19 18 30 0 0 2 109 ... 0.014 -9999.9 24.23 26.29 30.81 -9999.9 0.098 0.090 0.275 -9999.9

5 rows × 162 columns

Simple plot


In [23]:
import matplotlib.pyplot as plt
%matplotlib inline
df[['Tsoil10cmTree_Avg','Tsoil20cmTree_Avg']].plot(figsize=(12,4));


Create netCDF file


In [24]:
import numpy as np
def pd_to_secs(df):
    # convert a pandas datetime index to seconds since 1970
    import calendar
    return np.asarray([ calendar.timegm(x.timetuple()) for x in df.index ], dtype=np.int64)

def cf_safe_name(name):
    # Create a CF safe name for a group/dimension/variable
    import re
    if isinstance(name, basestring):
        if re.match('^[0-9_]', name):
            # Add a letter to the front
            name = "v_{}".format(name)
        return re.sub(r'[^_a-zA-Z0-9]', "_", name)
    return name

In [25]:
import os
out_file = os.path.join(output_dir, output_file)
if os.path.isfile(out_file):
    os.remove(out_file)

from pyaxiom.netcdf.sensors import TimeSeries
ts = TimeSeries(output_dir,
                latitude=0.39,
                longitude=36.7,
                station_name='urn:ioos:station:edu.princeton.ecohydrolab:MainTower',
                global_attributes={},
                times=pd_to_secs(df),
                verticals=[10],
                output_filename=output_file)

In [26]:
for c in df.columns[::-1]:
    # Add units based on column name?
    var_attributes = dict()
    ts.add_variable(cf_safe_name(c), df[c].values, attributes=var_attributes, fillvalue=-9999.9)

In [ ]: