Meta Data Dictionary


This notebook contains everything you need to create a nice neat list of meta data dictionaries out of netcdf files. In this case we have made one meta data dictionary for each day in a five year span. The dictionaries are only created when there is data available on the given day, and there are up to 8 datafiles represented on each day. Each files contains data from various sensors and that is reported out in a whole slew of variables. Each variable has attributes associated with it in the netcdf file. These attributes are carried over into the dict and other attributes are added, such as a flag variable that can be raised for various problematic data situations (missing data, unreasonable data, ...)

Overview of Data Dict Structure

data_dict = {title: 'Mpala Flux Tower Data', conventions: 'CF 1.6', year: 2010, doy: 001, month: 01 date: 2010-01-01, files: [{filename: Table1, logger: 'CR3000_SN4709', frequency: 1, variables: [{var: 'Tsoil10cm_Avg', units : 'ft', flags : ['data missing']}]}]}

Setup


In [1]:
from __future__ import print_function
import pandas as pd
import datetime as dt
import numpy as np
import os
import xray
from posixpath import join

ROOTDIR = 'C:/Users/Julia/Documents/GitHub/MpalaTower/raw_netcdf_output/'
data = 'Table1'

datas = ['upper', 'Table1', 'lws', 'licor6262', 'WVIA',
         'Manifold', 'flux', 'ts_data', 'Table1Rain']
non_static_attrs = ['instrument', 'source', 'program', 'logger']
static_attrs = ['station_name', 'lat', 'lon', 'elevation',
                'Year', 'Month', 'DOM', 'Minute', 'Hour',
                'Day_of_Year', 'Second', 'uSecond', 'WeekDay']

# Setting expected ranges for units. It is ok to include multiple ways of writing
# the same unit, just put all the units in a list
flag_by_units = {}

temp_min = 0
temp_max = 40
temp = ['Deg C', 'C']
for unit in temp:
    flag_by_units.update({unit : {'min' : temp_min, 'max' : temp_max}})
    
percent_min = 0
percent_max = 100
percent = ['percent', '%']
for unit in percent:
    flag_by_units.update({unit : {'min' : percent_min, 'max' : percent_max}})

shf_min = ''
shf_max = ''
shf = ['W/m^2']

shf_cal_min = ''
shf_cal_max = ''
shf_cal = ['W/(m^2 mV)']

batt_min = 11
batt_max = 240
batt = ['Volts', 'V']
for unit in batt:
    flag_by_units.update({unit : {'min' : batt_min, 'max' : batt_max}})

PA_min = 15
PA_max = 25
PA = ['uSec']

In [2]:
def process_netcdf(input_dir, data, f, static_attrs):
    ds = xray.Dataset()
    ds = xray.open_dataset(join(input_dir, data, f),
                           decode_cf=True, decode_times=True)
    df = ds.to_dataframe()

    # drop from df, columns that don't change with time
    exclude = [var for var in static_attrs if var in df.columns]
    df_var = df.drop(exclude, axis=1)  # dropping vars like lat, lon
    df_clean = df_var.dropna(axis=1, how='all')  # dropping NAN vars

    # get some descriptive statistics on each of the variables
    df_int = df_clean.describe()
    df_summ = pd.DataFrame(df_int, dtype=str)
    for i in df_int:
        # this loop outputs rounded string values instead of floats
        # this helps to control the size of the final array
        for k in range(len(df_int)):
            try:
                precision = 2  # higher numbers improve precision
                sigfig = precision-int(np.log10(abs(df_int[i][k])))
                command = '%2.'+str(sigfig)+'f'
                df_summ[i][k] = command % df_int[i][k]
            except:
                pass
    return ds, df_var, df_clean, df_summ

In [3]:
def convert_to_sec(num, units):
    if units.startswith(('Min','min')):
        out = int(num)*60
    elif units.startswith('ms', 'mS'):
        out = float(num)/1000
    elif units.statswith(('s','S')):
        out = int(num)
    else:
        print('couldn\'t parse units')
        return (num, units)
    return out

In [4]:
convert_to_sec(10, 'Min')


Out[4]:
600

Local attribute dict


In [5]:
def generate_local_attrs(ds, df_summ, var, flag_by_units):
    local_attrs = {'var': var}

    local_attrs.update(ds[var].attrs)
    local_attrs.update(df_summ[var].to_dict())
    
    # check status of data and raise flags
    var_count = float(local_attrs['count'])
    var_max = float(local_attrs['max'])
    var_min = float(local_attrs['min'])
    var_mean = float(local_attrs['mean'])

    flags = []
    
    if len(ds['time'])*11/12 < var_count < len(ds['time']):
        flags.append('missing a little data')
    elif len(ds['time'])/2 < var_count <= len(ds['time'])*11/12:
        flags.append('missing some data')
    elif var_count <= len(ds['time'])/2:
        flags.append('missing lots of data')

    if var.startswith('del'):
        pass
    elif local_attrs['comment'] == 'Std':  # don't check std_dev
        pass
    else:
        try:
            if var_max > flag_by_units[local_attrs['units']]['max']:
                flags.append('contains high values')
            if var_min < flag_by_units[local_attrs['units']]['min']:
                flags.append('contains low values')
        except:
            pass

    if var.endswith(('total', 'Total', 'tot', 'Tot')):
        local_attrs.update({'total' : var_mean*var_count})
    
    local_attrs['flags'] = flags
    
    return local_attrs

Datafile attribute dict


In [6]:
def programmed_frequency(ROOTDIR, data_dict,j):
    data = data_dict['files'][j]['filename']
    program = data_dict['files'][j]['source'].split('CPU:')[1].split(',')[0]
    try:
        f = open(join(ROOTDIR, 'programs', program))
    except:
        print('program not found', program)
    lines = f.readlines()
    i= 0
    k = 0
    freq = {}
    DT = 'DataTable'
    DI = 'DataInterval'
    for i in range(len(lines)):
        if lines[i].startswith((DT, ' '+DT, '  '+DT)) and data in lines[i]:
            freq.update({'datatable': {data: {}}})
            k = i
        if lines[i].startswith((DI, ' '+DI, '  '+DI, '   '+DI, '    '+DI, '     '+DI)) and i <= (k+2):
            interval = lines[i].split(',')[1]
            print(interval)
            units = lines[i].split(',')[2]
            freq['datatable'][data].update({'interval': interval, 'units': units})
        i +=1
    try:
        num = int(interval)
    except:
        scan = freq['datatable'][data]['interval']
        for line in lines:
            if line.startswith('Const '+scan):
                a = line.split('=')[1]
                b = a.split()[0]
                num = int(b)
    freq['datatable'][data].update({'num':num})
    return freq

In [7]:
def generate_datafile_attrs(data, ds, df_var, df_clean, df_summ,
                            flag_by_units, non_static_attrs):
    datafile_attrs = {'filename': data, 'variables': []}

    # add in non_static datafile attributes from the global ones
    for attr in [d for d in non_static_attrs if d in ds.attrs]:
        datafile_attrs.update({attr : ds.attrs[attr]})

    # calculate average frequency
    times = ds.coords['time'].values
    freq = np.diff(times, axis=-1)
    a = freq.mean()
    freq = a.astype('timedelta64[s]')
    datafile_attrs['frequency'] = freq.astype(float)
    
    # populate the empty variable list with dataless variables
    empty_vars = [{'var': var, 'flags' : ['no data']} for var in df_var if var not in df_clean]
    empty_vars.sort()
    for var in empty_vars:
        datafile_attrs['variables'].append(var)
    
    # populate it with local attributes
    full_vars = list(df_summ.columns)
    full_vars.sort()
    for var in full_vars:
        local_attrs = generate_local_attrs(ds, df_summ, var, flag_by_units)
        datafile_attrs['variables'].append(local_attrs)

    return datafile_attrs

Process data into list of daily data dicts


In [18]:
from . import db


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-18-cba80396fc34> in <module>()
----> 1 from . import db

ValueError: Attempted relative import in non-package

In [10]:
data_dict = None
data_list = []
start = '2010-01-01'
end = dt.datetime.utcnow()
rng = pd.date_range(start, end, freq='D')
for date in rng:
    i = 0
    y = date.year
    m = date.month
    d = date.dayofyear
    f = 'raw_MpalaTower_%i_%03d.nc' % (y, d)
    if any(f in os.listdir(join(ROOTDIR, data)) for data in datas):
        Metadata.__init__(join(ROOTDIR, data, f))


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-023d6e088a8f> in <module>()
     11     f = 'raw_MpalaTower_%i_%03d.nc' % (y, d)
     12     if any(f in os.listdir(join(ROOTDIR, data)) for data in datas):
---> 13         Metadata.__init__(join(ROOTDIR, data, f))

NameError: name 'Metadata' is not defined

In [8]:
print(date)
        data_dict = {'year': y, 'month' : m, 'doy': d, 'date' : date, 'files': []}
        i += 1
    for data in datas:
        if f in os.listdir(join(ROOTDIR, data)):
            print(f, data)
            ds, df_var, df_clean, df_summ = process_netcdf(ROOTDIR, data, f, static_attrs)
            datafile_attrs = generate_datafile_attrs(data, ds, df_var, df_clean, df_summ,
                                                     flag_by_units, non_static_attrs)
            data_dict['files'].append(datafile_attrs)
    if i == 1:
        for attr in [d for d in ds.attrs if d not in non_static_attrs]:
            data_dict.update({attr : ds.attrs[attr]})
        data_list.append(data_dict)


2015-04-07 00:00:00
raw_MpalaTower_2015_097.nc upper
2015-04-08 00:00:00
raw_MpalaTower_2015_098.nc upper
raw_MpalaTower_2015_098.nc Table1
raw_MpalaTower_2015_098.nc flux
2015-04-09 00:00:00
raw_MpalaTower_2015_099.nc upper
raw_MpalaTower_2015_099.nc Table1
raw_MpalaTower_2015_099.nc lws
raw_MpalaTower_2015_099.nc Manifold
raw_MpalaTower_2015_099.nc flux
2015-04-10 00:00:00
raw_MpalaTower_2015_100.nc upper
raw_MpalaTower_2015_100.nc Table1
raw_MpalaTower_2015_100.nc lws
raw_MpalaTower_2015_100.nc WVIA
raw_MpalaTower_2015_100.nc Manifold
raw_MpalaTower_2015_100.nc flux
2015-04-11 00:00:00
raw_MpalaTower_2015_101.nc upper
raw_MpalaTower_2015_101.nc Table1
raw_MpalaTower_2015_101.nc lws
raw_MpalaTower_2015_101.nc WVIA
raw_MpalaTower_2015_101.nc Manifold
raw_MpalaTower_2015_101.nc flux
2015-04-12 00:00:00
raw_MpalaTower_2015_102.nc upper
raw_MpalaTower_2015_102.nc Table1
raw_MpalaTower_2015_102.nc lws
raw_MpalaTower_2015_102.nc WVIA
raw_MpalaTower_2015_102.nc Manifold
raw_MpalaTower_2015_102.nc flux
2015-04-13 00:00:00
raw_MpalaTower_2015_103.nc upper
raw_MpalaTower_2015_103.nc Table1
raw_MpalaTower_2015_103.nc lws
raw_MpalaTower_2015_103.nc WVIA
raw_MpalaTower_2015_103.nc Manifold
raw_MpalaTower_2015_103.nc flux
2015-04-14 00:00:00
raw_MpalaTower_2015_104.nc upper
raw_MpalaTower_2015_104.nc Table1
raw_MpalaTower_2015_104.nc lws
raw_MpalaTower_2015_104.nc WVIA
raw_MpalaTower_2015_104.nc Manifold
raw_MpalaTower_2015_104.nc flux

Send to internet


In [187]:
for data_dict in data_list:
    for j in range(len(data_dict['files'])):
        data = data_dict['files'][j]['filename']
        print(data)
        big_freq = programmed_frequency(ROOTDIR, data_dict,j)
        num = big_freq['datatable'][data]['num']
        units = big_freq['datatable'][data]['units']
        prog_freq = convert_to_sec(num, units)
        data_dict['files'][j]['prog_freq'] = prog_freq


upper
OUTPUT_INTERVAL
upper
OUTPUT_INTERVAL
Table1
10
flux
OUTPUT_INTERVAL
upper
OUTPUT_INTERVAL
Table1
10
lws
SHORT_INTERVAL
Manifold
1
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-187-f394cd2b5f0f> in <module>()
      6         num = big_freq['datatable'][data]['num']
      7         units = big_freq['datatable'][data]['units']
----> 8         prog_freq = convert_to_sec(num, units)
      9         data_dict['files'][j]['prog_freq'] = prog_freq

<ipython-input-169-274f11c5699b> in convert_to_sec(num, units)
      2     if units.startswith(('Min','min')):
      3         out = int(num)*60
----> 4     elif units.startswith('ms', 'mS'):
      5         out = float(num)/1000
      6     elif units.statswith(('s','S')):

TypeError: slice indices must be integers or None or have an __index__ method

In [180]:
big_freq


Out[180]:
{'datatable': {'upper': {'interval': 'OUTPUT_INTERVAL',
   'num': 10,
   'units': 'Min'}}}

In [6]:
def send(data_list):
    from pymongo import MongoClient
    from pylab import array, nbytes
    
    db_uri = 'mongodb://joey:joejoe@dogen.mongohq.com:10097/mpala_tower_metadata'
    client = MongoClient(db_uri)
    db = client.mpala_tower_metadata
    Metadata = db.metadata
    
    A = array(data_list)
    print(A.nbytes,'bytes')

    Metadata.remove({})
    Metadata.insert(data_list)

In [6]:
from pylab import array, nbytes
A = array(data_list)
print(A.nbytes,'bytes')


64 bytes

In [7]:
send(data_list)


64 bytes

In [139]:
freq = programmed_frequency(ROOTDIR, data_dict)


{'datatable': {'upper': {}}}
{'datatable': {'upper': {'units': 'Min', 'interval': 'OUTPUT_INTERVAL'}}}

In [146]:
freq['datatable'][data]['num']


Out[146]:
10

In [143]:
freq


Out[143]:
{'datatable': {'upper': {'interval': 'OUTPUT_INTERVAL',
   'num': 10,
   'units': 'Min'}}}

In [9]:
data_list[0]


Out[9]:
{u'Conventions': u'CF-1.6',
 u'acknowledgement': u'Funded by NSF and Princeton University',
 u'creator_email': u'kcaylor@princeton.edu',
 u'creator_name': u'Kelly Caylor',
 'date': Timestamp('2015-04-07 00:00:00', offset='D'),
 'doy': 97,
 u'featureType': u'timeSeries',
 'files': [{'filename': 'upper',
   'frequency': 600.0,
   'instrument': u'CR5000_SN2446',
   'source': u'Flux tower sensor data CR5000_SN2446_upper.dat, CPU:MainTowerCR5000_V7.CR5, 41490',
   'variables': [{'flags': ['no data'], 'var': u'Solar_MJ_1_Tot'},
    {'flags': ['no data'], 'var': u'Solar_MJ_Tot'},
    {'flags': ['no data'], 'var': u'Solar_Wm2_1_Avg'},
    {'flags': ['no data'], 'var': u'Solar_Wm2_Avg'},
    {'flags': ['no data'], 'var': u'Tsoil_Avg'},
    {'flags': ['no data'], 'var': u'Tsoil_Std'},
    {'flags': ['no data'], 'var': u'del_Tsoil_Avg'},
    {'flags': ['no data'], 'var': u'del_Tsoil_Std'},
    {'flags': ['no data'], 'var': u'shf_Avg'},
    {'flags': ['no data'], 'var': u'shf_Std'},
    {'flags': ['no data'], 'var': u'shf_cal_Avg'},
    {'flags': ['no data'], 'var': u'shf_cal_Std'},
    {'25%': '0.15',
     '50%': '0.42',
     '75%': '8.49',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '42.3',
     'mean': '5.89',
     'min': '-1.70',
     'std': '9.27',
     u'units': u'W/meter^2',
     'var': u'Albedo_Avg'},
    {'25%': '0.0038',
     '50%': '0.0095',
     '75%': '2.43',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '74.5',
     'mean': '4.00',
     'min': '0.0',
     'std': '10.7',
     u'units': u'none',
     'var': u'Albedo_Std'},
    {'25%': '407',
     '50%': '465',
     '75%': '516',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '105',
     'flags': ['missing some data'],
     'max': '569',
     'mean': '469',
     'min': '399',
     'std': '55.6',
     u'units': u'W/meter^2',
     'var': u'CG3DnCo_Avg'},
    {'25%': '0.60',
     '50%': '2.40',
     '75%': '6.85',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '105',
     'flags': ['missing some data'],
     'max': '20.4',
     'mean': '4.05',
     'min': '0.13',
     'std': '4.50',
     u'units': u'W/meter^2',
     'var': u'CG3DnCo_Std'},
    {'25%': '6.22',
     '50%': '10.2',
     '75%': '30.5',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '80.7',
     'mean': '20.5',
     'min': '-0.87',
     'std': '21.4',
     u'units': u'W/meter^2',
     'var': u'CG3Dn_Avg'},
    {'25%': '0.29',
     '50%': '0.58',
     '75%': '3.20',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '15.4',
     'mean': '2.29',
     'min': '0.094',
     'std': '3.22',
     u'units': u'W/meter^2',
     'var': u'CG3Dn_Std'},
    {'25%': '363',
     '50%': '386',
     '75%': '395',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '105',
     'flags': ['missing some data'],
     'max': '416',
     'mean': '379',
     'min': '336',
     'std': '20.9',
     u'units': u'W/meter^2',
     'var': u'CG3UpCo_Avg'},
    {'25%': '1.56',
     '50%': '2.35',
     '75%': '4.12',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '105',
     'flags': ['missing some data'],
     'max': '14.5',
     'mean': '3.32',
     'min': '0.56',
     'std': '2.78',
     u'units': u'W/meter^2',
     'var': u'CG3UpCo_Std'},
    {'25%': '-81.3',
     '50%': '-54.3',
     '75%': '-29.9',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '-1.45',
     'mean': '-54.1',
     'min': '-99.8',
     'std': '27.6',
     u'units': u'W/meter^2',
     'var': u'CG3Up_Avg'},
    {'25%': '1.61',
     '50%': '2.81',
     '75%': '4.36',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '14.5',
     'mean': '3.53',
     'min': '0.48',
     'std': '2.65',
     u'units': u'W/meter^2',
     'var': u'CG3Up_Std'},
    {'25%': '1.54',
     '50%': '3.69',
     '75%': '66.0',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '169',
     'mean': '38.8',
     'min': '-0.28',
     'std': '51.1',
     u'units': u'W/meter^2',
     'var': u'CM3Dn_Avg'},
    {'25%': '0.78',
     '50%': '1.06',
     '75%': '14.8',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '52.0',
     'mean': '9.79',
     'min': '0.65',
     'std': '14.5',
     u'units': u'W/meter^2',
     'var': u'CM3Dn_Std'},
    {'25%': '-1.33',
     '50%': '3.20',
     '75%': '443',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '1144.0',
     'mean': '248',
     'min': '-4.35',
     'std': '348',
     u'units': u'V',
     'var': u'CM3Up_Avg'},
    {'25%': '0.36',
     '50%': '3.97',
     '75%': '90.6',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '352',
     'mean': '61.8',
     'min': '0.15',
     'std': '99.7',
     u'units': u'W/meter^2',
     'var': u'CM3Up_Std'},
    {'25%': '17.5',
     '50%': '25.4',
     '75%': '29.8',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '105',
     'flags': ['missing some data'],
     'max': '32.0',
     'mean': '24.2',
     'min': '15.7',
     'std': '5.72',
     u'units': u'W/meter^2',
     'var': u'CNR1TC_Avg'},
    {'25%': '0.048',
     '50%': '0.17',
     '75%': '0.37',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '105',
     'flags': ['missing some data'],
     'max': '0.96',
     'mean': '0.25',
     'min': '0.0090',
     'std': '0.25',
     u'units': u'Deg C',
     'var': u'CNR1TC_Std'},
    {'25%': '291',
     '50%': '298',
     '75%': '303',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '105',
     'flags': ['missing some data'],
     'max': '305',
     'mean': '297',
     'min': '289',
     'std': '5.72',
     u'units': u'Deg C',
     'var': u'CNR1TK_Avg'},
    {'25%': '0.048',
     '50%': '0.17',
     '75%': '0.37',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '105',
     'flags': ['missing some data'],
     'max': '0.96',
     'mean': '0.25',
     'min': '0.0090',
     'std': '0.25',
     u'units': u'K',
     'var': u'CNR1TK_Std'},
    {'25%': '10.3',
     '50%': '14.7',
     '75%': '94.0',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '245',
     'mean': '59.4',
     'min': '4.47',
     'std': '70.7',
     u'units': u'W/meter^2',
     'var': u'DnTot_Avg'},
    {'25%': '0.86',
     '50%': '1.17',
     '75%': '16.5',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '66.8',
     'mean': '11.5',
     'min': '0.70',
     'std': '17.2',
     u'units': u'W/meter^2',
     'var': u'DnTot_Std'},
    {'25%': '-105',
     '50%': '-61.2',
     '75%': '-38.0',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '-11.4',
     'mean': '-74.7',
     'min': '-179',
     'std': '44.7',
     u'units': u'W/meter^2',
     'var': u'NetRl_Avg'},
    {'25%': '1.81',
     '50%': '3.81',
     '75%': '6.90',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '20.0',
     'mean': '4.76',
     'min': '0.39',
     'std': '3.72',
     u'units': u'W/meter^2',
     'var': u'NetRl_Std'},
    {'25%': '-2.76',
     '50%': '1.95',
     '75%': '376',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '975',
     'mean': '209',
     'min': '-8.17',
     'std': '297',
     u'units': u'K',
     'var': u'NetRs_Avg'},
    {'25%': '0.86',
     '50%': '3.50',
     '75%': '76.7',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '300',
     'mean': '52.8',
     'min': '0.69',
     'std': '84.9',
     u'units': u'W/meter^2',
     'var': u'NetRs_Std'},
    {'25%': '-43.5',
     '50%': '-28.0',
     '75%': '269',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '810',
     'mean': '134',
     'min': '-71.3',
     'std': '257',
     u'units': u'W/meter^2',
     'var': u'NetTot_Avg'},
    {'25%': '2.78',
     '50%': '6.91',
     '75%': '75.4',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '294',
     'mean': '52.8',
     'min': '0.93',
     'std': '81.6',
     u'units': u'W/meter^2',
     'var': u'NetTot_Std'},
    {'25%': '0.051',
     '50%': '9.20',
     '75%': '886',
     u'comment': u'WVc',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '2269.0',
     'mean': '498',
     'min': '0.0010',
     'std': '691',
     u'units': u'm/s',
     'var': u'PAR_Den_Avg'},
    {'25%': '0.31',
     '50%': '55.2',
     '75%': '5314.16210938',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '13614.5302734',
     'mean': '2989.23983714',
     'min': '0.0030',
     'std': '4145.27654107',
     'total': 430450.53654816,
     u'units': u'umol/s/m2',
     'var': u'PAR_Tot_Tot'},
    {'25%': '18.0',
     '50%': '21.2',
     '75%': '27.0',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '31.6',
     'mean': '22.4',
     'min': '16.1',
     'std': '4.81',
     u'units': u'C',
     'var': u'SensorBodyTemp'},
    {'25%': '18.0',
     '50%': '22.1',
     '75%': '27.6',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '31.8',
     'mean': '22.9',
     'min': '16.2',
     'std': '5.23',
     u'units': u'degC',
     'var': u'SensorBodyTemp_1'},
    {'25%': '18.0',
     '50%': '21.2',
     '75%': '27.0',
     u'comment': u'Smp',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '31.3',
     'mean': '22.4',
     'min': '16.2',
     'std': '4.79',
     u'units': u'degC',
     'var': u'SensorBodyTemp_Avg'},
    {'25%': '0.027',
     '50%': '0.080',
     '75%': '0.24',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.93',
     'mean': '0.16',
     'min': '0.0050',
     'std': '0.18',
     u'units': u'degC',
     'var': u'SensorBodyTemp_Std'},
    {'25%': '17.9',
     '50%': '23.1',
     '75%': '33.9',
     u'comment': u'Smp',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '46.8',
     'mean': '26.7',
     'min': '16.1',
     'std': '9.91',
     u'units': u'mV',
     'var': u'TargetTemp'},
    {'25%': '20.7',
     '50%': '24.2',
     '75%': '31.2',
     u'comment': u'Smp',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '35.1',
     'mean': '25.7',
     'min': '18.2',
     'std': '5.40',
     u'units': u'mV',
     'var': u'TargetTemp_1'},
    {'25%': '20.5',
     '50%': '24.2',
     '75%': '30.7',
     u'comment': u'Smp',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '34.2',
     'mean': '25.6',
     'min': '18.6',
     'std': '5.38',
     u'units': u'degC',
     'var': u'TargetTemp_1_Avg'},
    {'25%': '0.46',
     '50%': '0.56',
     '75%': '0.67',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '1.48',
     'mean': '0.59',
     'min': '0.33',
     'std': '0.20',
     u'units': u'degC',
     'var': u'TargetTemp_1_Std'},
    {'25%': '17.8',
     '50%': '22.9',
     '75%': '33.8',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '46.3',
     'mean': '26.6',
     'min': '16.2',
     'std': '9.68',
     u'units': u'degC',
     'var': u'TargetTemp_Avg'},
    {'25%': '0.23',
     '50%': '0.29',
     '75%': '0.95',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '3.57',
     'mean': '0.67',
     'min': '0.13',
     'std': '0.71',
     u'units': u'degC',
     'var': u'TargetTemp_Std'},
    {'25%': '0.057',
     '50%': '0.14',
     '75%': '0.53',
     u'comment': u'Smp',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '1.31',
     'mean': '0.32',
     'min': '-0.051',
     'std': '0.38',
     u'units': u'degC',
     'var': u'ThP_mV_irr'},
    {'25%': '0.12',
     '50%': '0.16',
     '75%': '0.22',
     u'comment': u'Smp',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.48',
     'mean': '0.18',
     'min': '-0.020',
     'std': '0.098',
     u'units': u'degC',
     'var': u'ThP_mV_irr_1'},
    {'25%': '-32.8',
     '50%': '-11.4',
     '75%': '369',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '1054.0',
     'mean': '194',
     'min': '-60.9',
     'std': '327',
     u'units': u'none',
     'var': u'UpTot_Avg'},
    {'25%': '2.97',
     '50%': '8.14',
     '75%': '90.5',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '353',
     'mean': '63.4',
     'min': '0.51',
     'std': '98.7',
     u'units': u'W/meter^2',
     'var': u'UpTot_Std'},
    {'25%': '13.2',
     '50%': '13.2',
     '75%': '13.2',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '13.2',
     'mean': '13.2',
     'min': '13.2',
     'std': '0.0052',
     u'units': u'Unnamed',
     'var': u'batt_volt_Avg'},
    {'25%': '0.016',
     '50%': '0.016',
     '75%': '0.016',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': ['contains low values'],
     'max': '0.017',
     'mean': '0.016',
     'min': '0.016',
     'std': '0.00030',
     u'units': u'V',
     'var': u'batt_volt_Std'},
    {'25%': '0.0',
     '50%': '0.0',
     '75%': '0.0',
     u'comment': u'Tot',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.0',
     'mean': '0.0',
     'min': '0.0',
     'std': '0.0',
     u'units': u'MJ/m2',
     'var': u'e_hmp_Avg'},
    {'25%': '0.0',
     '50%': '0.0',
     '75%': '0.0',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.0',
     'mean': '0.0',
     'min': '0.0',
     'std': '0.0',
     u'units': u'kPa',
     'var': u'e_hmp_Std'},
    {'25%': '-41.5',
     '50%': '-28.0',
     '75%': '233',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '728',
     'mean': '114',
     'min': '-70.3',
     'std': '226',
     u'units': u'W/meter^2',
     'var': u'net_rad_Avg'},
    {'25%': '3.03',
     '50%': '7.91',
     '75%': '67.0',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '250',
     'mean': '46.7',
     'min': '0.86',
     'std': '69.7',
     u'units': u'W/meter^2',
     'var': u'net_rad_Std'},
    {'25%': '29.7',
     '50%': '34.6',
     '75%': '42.0',
     u'comment': u'Smp',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '45.2',
     'mean': '35.7',
     'min': '27.6',
     'std': '6.32',
     u'units': u'Unnamed',
     'var': u'panel_temp_raw_Avg'},
    {'25%': '0.026',
     '50%': '0.061',
     '75%': '0.096',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.23',
     'mean': '0.071',
     'min': '0.0060',
     'std': '0.056',
     u'units': u'Unnamed',
     'var': u'panel_temp_raw_Std'},
    {'25%': '0.0',
     '50%': '0.0',
     '75%': '0.0',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.10',
     'mean': '0.0028',
     'min': '0.0',
     'std': '0.016',
     'total': 0.4032,
     u'units': u'degC',
     'var': u'rainfall_Tot'},
    {'25%': '0.0',
     '50%': '0.0',
     '75%': '0.0',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.0',
     'mean': '0.0',
     'min': '0.0',
     'std': '0.0',
     u'units': u'C',
     'var': u'rh_hmp_Avg'},
    {'25%': '0.0',
     '50%': '0.0',
     '75%': '0.0',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.0',
     'mean': '0.0',
     'min': '0.0',
     'std': '0.0',
     u'units': u'percent',
     'var': u'rh_hmp_Std'},
    {'25%': '0.0',
     '50%': '0.0',
     '75%': '0.0',
     u'comment': u'Std',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.0',
     'mean': '0.0',
     'min': '0.0',
     'std': '0.0',
     u'units': u'kPa',
     'var': u't_hmp_Avg'},
    {'25%': '0.0',
     '50%': '0.0',
     '75%': '0.0',
     u'comment': u'Avg',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '0.0',
     'mean': '0.0',
     'min': '0.0',
     'std': '0.0',
     u'units': u'C',
     'var': u't_hmp_Std'},
    {'25%': '1.40',
     '50%': '2.35',
     '75%': '4.10',
     u'comment': u'Tot',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '9.88',
     'mean': '2.84',
     'min': '0.28',
     'std': '1.84',
     u'units': u'mm',
     'var': u'wspeed_met_WVc_1_'},
    {'25%': '107',
     '50%': '207',
     '75%': '254',
     u'comment': u'WVc',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '352',
     'mean': '190',
     'min': '3.20',
     'std': '90.7',
     u'units': u'm/s',
     'var': u'wspeed_met_WVc_2_'},
    {'25%': '11.3',
     '50%': '18.0',
     '75%': '31.7',
     u'comment': u'WVc',
     u'content_coverage_type': u'physicalMeasurement',
     u'coordinates': u'time lon lat elevation',
     'count': '144',
     'flags': [],
     'max': '86.7',
     'mean': '25.3',
     'min': '3.51',
     'std': '19.2',
     u'units': u'm/s',
     'var': u'wspeed_met_WVc_3_'}]}],
 u'id': u'MPALA Tower',
 u'institution': u'Princeton University',
 u'license': u'MIT License',
 'month': 4,
 u'naming_authority': u'caylor.princeton.edu',
 u'summary': u'This raw data comes from the MPALA Flux Tower, which is maintained by the Ecohydrology Lab at Mpala Research Centre in Laikipia, Kenya. It is part of a long-term monitoring project that was originally funded by NSF and now runs with support from Princeton. Its focus is on using stable isotopes to better understand water balance in drylands, particularly transpiration and evaporation fluxes.',
 u'title': u'Flux Tower Data from MPALA',
 'year': 2015}

In [ ]: