OOI Equipment mapping

  • by Landung Setiawan
  • 6/14/2016
  • This notebook is for retrieving information from google sheets and then mapping to a JSON file, each instrument has its own JSON file configuration
  • The required libraries for this manipulation is gspread, oauth2client, and pycrypto

In [1]:
# Google Authentication Libraries
import oauth2client, gspread
import json

# oauth2client version check and gspread
oauth_ver = oauth2client.__version__
gspread_ver = gspread.__version__

print "oauth2client version : {}".format(oauth_ver) 
print "gspread version : {}".format(gspread_ver)


oauth2client version : 1.5.2
gspread version : 0.3.0

In [2]:
if oauth_ver < "2.0.2":
    from oauth2client.client import SignedJwtAssertionCredentials

    json_key = json.load(open('XXXX.json'))
    # Get scope for google sheets
    # Gather all spreadsheets shared with the client_email: XXXX@appspot.gserviceaccount.com
    scope = ['https://spreadsheets.google.com/feeds']
    
    # Retrieve credentials from JSON key of service account
    credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'], scope)
    
    # Authorize gspread to connect to google sheets
    gc = gspread.authorize(credentials)
else:
    from oauth2client.service_account import ServiceAccountCredentials
    # Get scope for google sheets
    # Gather all spreadsheets shared with the client_email: XXXX@appspot.gserviceaccount.com
    scope = ['https://spreadsheets.google.com/feeds']

    # Retrieve credentials from JSON key of service account
    credentials = ServiceAccountCredentials.from_json_keyfile_name('XXXX.json', scope)

    # Authorize gspread to connect to google sheets
    gc = gspread.authorize(credentials)

In [3]:
# Get all spreadsheets available for NANOOS
gsheets = gc.openall()
# Get title of the spreadsheets
for i in range(0,len(gsheets)):
    print "{0} {1}".format(i,gsheets[i].title)


0 sensor_configurations_mappings
1 GOA-ON_structering
2 2016_Spring_NEMO
3 ooi_equipment
4 AGGI_Table
5 WINTER 2014 SCHEDULE
6 ocean_extents
7 nanoos_asset_list_20160427T092836
8 nanoos_asset_list_20160429T083128

In [4]:
# Open sensor_configurations_mappings only
sc = gc.open("sensor_configurations_mappings")

In [5]:
# Get all worksheets in a sheet
wks = sc.worksheets()
wks


Out[5]:
[<Worksheet 'instruments' id:o5yzc1h>, <Worksheet 'measurements' id:odfoenj>]

In [6]:
s1 = sc.get_worksheet(0)
s2 = sc.get_worksheet(1)
print s1, s2


<Worksheet 'instruments' id:o5yzc1h> <Worksheet 'measurements' id:odfoenj>

Parsing data to a pandas dataframe

  • Now that connection has been established, data is parsed to be viewed

In [7]:
# Import pandas and numpy to make data easier to view
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
print "pandas version: {}".format(pd.__version__)
print "numpy version: {}".format(np.__version__)


/home/lsetiawan/anaconda2/envs/uwapl_em_mc_1aui/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
/home/lsetiawan/anaconda2/envs/uwapl_em_mc_1aui/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module argparse was already imported from /home/lsetiawan/anaconda2/envs/uwapl_em_mc_1aui/lib/python2.7/argparse.pyc, but /home/lsetiawan/anaconda2/envs/uwapl_em_mc_1aui/lib/python2.7/site-packages/argparse-1.4.0-py2.7.egg is being added to sys.path
  from pkg_resources import resource_stream
pandas version: 0.18.1
numpy version: 1.10.4

In [8]:
# Getting all the values of sheet1
array1 = s1.get_all_values()
array2 = s2.get_all_values()

In [9]:
# Convert data into pandas dataframe
df = pd.DataFrame(array1)
df.columns = array1[0]
df.drop(df.index[0], inplace=True)
df = df.convert_objects(convert_numeric=True)
df.head()


/home/lsetiawan/anaconda2/envs/uwapl_em_mc_1aui/lib/python2.7/site-packages/ipykernel/__main__.py:5: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
Out[9]:
platform_label depth_m mfn base_url platform deployment data_logger instrument subtype raw_url
1 OOI_CE07SHSM 0 https://rawdata.oceanobservatories.org/files CE07SHSM D00003 cg_data/dcl11 metbk https://rawdata.oceanobservatories.org/files/C...
2 OOI_CE02SHSM 0 https://rawdata.oceanobservatories.org/files CE02SHSM D00003 cg_data/dcl11 metbk https://rawdata.oceanobservatories.org/files/C...
3 OOI_CE04OSSM 0 https://rawdata.oceanobservatories.org/files CE04OSSM D00002 cg_data/dcl11 metbk https://rawdata.oceanobservatories.org/files/C...
4 OOI_CE09OSSM 0 https://rawdata.oceanobservatories.org/files CE09OSSM D00003 cg_data/dcl11 metbk https://rawdata.oceanobservatories.org/files/C...
5 OOI_CE01ISSM -7 https://rawdata.oceanobservatories.org/files CE01ISSM D00005 dcl16 ctdbp1 ctd_type::2 https://rawdata.oceanobservatories.org/files/C...

In [10]:
# Convert data into pandas dataframe
df1 = pd.DataFrame(array2)
df1.columns = array2[0]
df1.drop(df1.index[0], inplace=True)
df1 = df1.convert_objects(convert_numeric=True)
df1.head()


/home/lsetiawan/anaconda2/envs/uwapl_em_mc_1aui/lib/python2.7/site-packages/ipykernel/__main__.py:5: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
Out[10]:
instrument data_products relative_depth_m OOI_units measurement_label notes
1 metbk air_temperature 4.1 degC A1_AirTemp
2 metbk barometric_pressure 4.3 mbar A1_BarPress
3 metbk relative_humidity 4.1 % A1_RelHumidity
4 metbk eastward_wind_velocity 4.7 m/s A1_WindSpeed
5 metbk northward_wind_velocity 4.7 m/s A1_WindSpeed

In [11]:
def createJSON(df):
    # Get Platforms
    json_data = df[['platform','instrument','depth_m','mfn','deployment','data_logger','subtype']].reset_index(drop=True)
    platforms = json_data['platform'].unique()
    mainkey = dict()
    prop = dict()
    
    # Gather Platform info together
    plat = [json_data.loc[json_data['platform'] == p] for p in platforms]
    
    # Create JSON
    for i in range(0, len(plat)):
        instrum = dict()
        mainkey = dict()
        for j in range(0, len(plat[i]['platform'].values)):
            platform_name = plat[i]['platform'].values[j]
            instrument_name = plat[i]['instrument'].values[j]
            depth_m = plat[i]['depth_m'].values[j]
            mfn = plat[i]['mfn'].values[j]
            deployment = plat[i]['deployment'].values[j]
            data_logger = plat[i]['data_logger'].values[j]
            subtype = plat[i]['subtype'].values[j]

            # Check for mfn
            if mfn != '':
                mfn = True
            else:
                mfn = False
            # Getting subtype
            if subtype != '':
                subtype = subtype.split('::')[1]
            else:
                subtype = None

            prop['depth_m'] = float(depth_m)
            prop['mfn'] = mfn
            prop['deployment'] = deployment
            prop['data_logger'] = data_logger
            prop['subtype'] = subtype
            instrum['{}'.format(instrument_name)] = prop
            mainkey['{}'.format(platform_name)] = instrum
            prop = dict()
            
        # prints the JSON structured dictionary
        print json.dumps(mainkey, sort_keys=True, indent=4, separators=(',', ': '))
        # Output to JSON file 
        fj = open("{}.json".format(platform_name), 'w')
        fj.write(json.dumps(mainkey, sort_keys=False, indent=4, separators=(',', ': ')))
        fj.close()
createJSON(df)


{
    "CE07SHSM": {
        "ctdbp1": {
            "data_logger": "cg_data/dcl27",
            "deployment": "D00003",
            "depth_m": -7.0,
            "mfn": false,
            "subtype": "1"
        },
        "ctdbp2": {
            "data_logger": "cg_data/dcl37",
            "deployment": "D00003",
            "depth_m": -87.0,
            "mfn": true,
            "subtype": "2"
        },
        "metbk": {
            "data_logger": "cg_data/dcl11",
            "deployment": "D00003",
            "depth_m": 0.0,
            "mfn": false,
            "subtype": null
        },
        "wavss": {
            "data_logger": "cg_data/dcl12",
            "deployment": "D00003",
            "depth_m": 0.0,
            "mfn": false,
            "subtype": null
        }
    }
}
{
    "CE02SHSM": {
        "ctdbp": {
            "data_logger": "cg_data/dcl27",
            "deployment": "D00003",
            "depth_m": -7.0,
            "mfn": false,
            "subtype": "1"
        },
        "metbk": {
            "data_logger": "cg_data/dcl11",
            "deployment": "D00003",
            "depth_m": 0.0,
            "mfn": false,
            "subtype": null
        },
        "wavss": {
            "data_logger": "cg_data/dcl12",
            "deployment": "D00003",
            "depth_m": 0.0,
            "mfn": false,
            "subtype": null
        }
    }
}
{
    "CE04OSSM": {
        "ctdbp": {
            "data_logger": "cg_data/dcl27",
            "deployment": "D00002",
            "depth_m": -7.0,
            "mfn": false,
            "subtype": "1"
        },
        "metbk": {
            "data_logger": "cg_data/dcl11",
            "deployment": "D00002",
            "depth_m": 0.0,
            "mfn": false,
            "subtype": null
        },
        "wavss": {
            "data_logger": "cg_data/dcl12",
            "deployment": "D00002",
            "depth_m": 0.0,
            "mfn": false,
            "subtype": null
        }
    }
}
{
    "CE09OSSM": {
        "ctdbp1": {
            "data_logger": "cg_data/dcl27",
            "deployment": "D00003",
            "depth_m": -7.0,
            "mfn": false,
            "subtype": "1"
        },
        "ctdbp2": {
            "data_logger": "cg_data/dcl37",
            "deployment": "D00001",
            "depth_m": -540.0,
            "mfn": true,
            "subtype": "2"
        },
        "metbk": {
            "data_logger": "cg_data/dcl11",
            "deployment": "D00003",
            "depth_m": 0.0,
            "mfn": false,
            "subtype": null
        },
        "wavss": {
            "data_logger": "cg_data/dcl12",
            "deployment": "D00003",
            "depth_m": 0.0,
            "mfn": false,
            "subtype": null
        }
    }
}
{
    "CE01ISSM": {
        "ctdbp1": {
            "data_logger": "dcl16",
            "deployment": "D00005",
            "depth_m": -7.0,
            "mfn": false,
            "subtype": "2"
        },
        "ctdbp2": {
            "data_logger": "dcl37",
            "deployment": "D00001",
            "depth_m": -25.0,
            "mfn": true,
            "subtype": "2"
        }
    }
}
{
    "CE02SHBP": {
        "": {
            "data_logger": "LJ01D",
            "deployment": "",
            "depth_m": -80.0,
            "mfn": false,
            "subtype": null
        }
    }
}
{
    "CE04OSBP": {
        "": {
            "data_logger": "LJ01C",
            "deployment": "",
            "depth_m": -580.0,
            "mfn": false,
            "subtype": null
        }
    }
}
{
    "CE06ISSM": {
        "ctdbp1": {
            "data_logger": "dcl16",
            "deployment": "D00004",
            "depth_m": -7.0,
            "mfn": false,
            "subtype": "2"
        },
        "ctdbp2": {
            "data_logger": "dcl37",
            "deployment": "D00004",
            "depth_m": -29.0,
            "mfn": true,
            "subtype": "2"
        }
    }
}

In [ ]: