IOOS System Test: Rapid Deployment gages from USGS

Can we obtain water level data from the rapid deployment gages, deployment for Hurricane Irene?

This notebook is based on IOOS System Test: Inundation

Methodology:

USGS gage data (ASCII Text Files) obtained from http://ga.water.usgs.gov/flood/hurricane/irene/sites/datafiles/ and 90% of data zipped up for use in the notebook (large files were removed for efficiency)
Station data Automatically gets unzipped
Process data files (195 stations), and store data in dictionary for access
Plot Water level data for the New Jersey area
Plot Barometric pressure data for the New Jersey area
Plot Gage Locations on a map, overlaid with the Hurricane Irene track line
Plot time series of maximum waterlevel
Plot locations of maximum waterlevel

import required libraries



In [1]:

    
import re
import os
import csv
import uuid
import zipfile

from datetime import datetime

import folium
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.dates as md
import matplotlib.pyplot as plt

from IPython.display import HTML, Javascript, display

from utilities import css_styles, inline_map
css_styles()









    Out[1]:

Extract Data - Does the data dir exist, if not extract it



In [2]:

    
def unzip(source_filename, dest_dir):
    with zipfile.ZipFile(source_filename) as zf:
        zf.extractall(dest_dir)



In [3]:

    
if os.path.isdir("data_files"):
    pass
else:
    print("Data Dir does not exist... Extracting.")
    unzip('sample_data_files.zip', os.getcwd())



In [4]:

    
files = os.listdir('data_files') 
print("Water Level Files: %s" % len(files))









    



Water Level Files: 195

Process Data - Read the data files and create a dict of the fields



In [5]:

    
def parse_metadata(fname):
    meta_data = {}
    non_decimal = re.compile(r'[^\d.]+')
    fields = {'Sensor location latitude': 'lat',
              'Sensor location longitude': 'lon',
              'Site id =': 'name',
              'Sensor elevation above NAVD 88 =': 'elevation',
              'Barometric sensor site (source of bp) =': 'bp_source',
              'Lowest recordable water elevation is': 'lowest_wl'}
    with open(os.path.join('data_files', fname)) as f:
        content = f.readlines()
        for k, ln in enumerate(content):
            content[k] = ln.strip()
            if content[k].startswith('#'):
                for fd in fields:
                    if fd in content[k]:
                        if fields[fd] == 'name':
                            meta_data[fields[fd]] = content[k].split(fd)[-1]
                        else:
                            val = (content[k].split(fd)[-1])
                            val = float(non_decimal.sub('', val))
                            meta_data[fields[fd]] = val
                        if fields[fd] == 'lon':
                            meta_data[fields[fd]] = -meta_data[fields[fd]]
    return meta_data



In [6]:

    
divid = str(uuid.uuid4())

pb = HTML("""
<div style="border: 1px solid black; width:500px">
  <div id="%s" style="background-color:blue; width:0%%">&nbsp;</div>
</div> 
""" % divid)

display(pb)
full_data = {}
for count, fname in enumerate(files):
    meta_data = parse_metadata(fname)
    kw = dict(parse_dates=True, sep='\t', skiprows=29, index_col=0)
    actual_data = pd.read_csv(os.path.join('data_files', fname), **kw)
    full_data[fname] = {'meta': meta_data,
                        'data': actual_data}
    
    percent_complete = ((float(count+1) / float(len(files))) * 100.)
    display(Javascript("$('div#%s').width('%i%%')" %
                       (divid, int(percent_complete))))

Show the available fields from the processed data files



In [7]:

    
print("Data Fields: {}, {}, {}".format(actual_data.index.name,
                                       *actual_data.columns))









    



Data Fields: date_time_GMT, elevation, nearest_barometric_sensor_psi

Remove 'Sensor elevation above NAVD 88 (ft)'



In [8]:

    
for key, value in full_data.iteritems():
    offset = float(value['meta']['elevation'])
    value['data']['elevation'] -= offset

Plot all Water Level data in the NJ area



In [9]:

    
fig, ax = plt.subplots(figsize=(16, 3))

fig.suptitle('Water Elevation', fontsize=14)

for key, value in full_data.iteritems():
    try:
        if 'SSS-NJ' in key:
            df = value['data']                     
            ax.plot(df.index, df['elevation'])
            ax.set_xlabel('Date', fontsize=14)
            ax.set_ylabel('Elevation (ft)', fontsize=14) 
    except Exception as e:
        print(e)

Plot all Pressure data in the NJ area



In [10]:

    
fig, ax = plt.subplots(figsize=(16, 3))

fig.suptitle('nearest_barometric_sensor_psi', fontsize=14)

for key, value in full_data.iteritems():
    try:
        if 'SSS-NJ' in key:
            df = value['data']                     
            ax.plot(df.index, df['nearest_barometric_sensor_psi'])
            ax.set_xlabel('Date', fontsize=14)
            ax.set_ylabel('Nearest barometric sensor (psi)', fontsize=14) 
    except Exception as e:
        print(e)

Map the Gage locations



In [11]:

    
map = folium.Map(width=800, height=500, location=[30, -73], zoom_start=4)

# Generate the color map for the storms.
color_list = {"Tropical Storm": '#4AD200',
              "Category 1 Hurricane": '#CFD900',
              "Category 2 Hurricane": '#E16400',
              "Category 3 Hurricane": '#ff0000'}


# Add the track line.
with open('track.csv', 'rb') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in spamreader:
        lon, lat = row[3], row[2]
        popup = "{} : {} <br> {}".format(row[0], row[1], row[6])
        map.circle_marker([lat, lon], popup=popup,
                          fill_color=color_list[row[6]],
                          radius=10000, line_color='#000000')

# Add the station.
for st in full_data:
    lat = full_data[st]['meta']['lat']
    lon = full_data[st]['meta']['lon']
    map.simple_marker([lat, lon], popup=st, clustered_marker=True)    

map.add_layers_to_map()
inline_map(map)









    Out[11]:

Generate Plot Of Maximum Water Levels from each gage



In [12]:

    
dt, dv = [], []

fig, ax = plt.subplots(figsize=(16, 3))
fig.suptitle('Max Water Level (ft), 2011', fontsize=14)

for key, value in full_data.iteritems():
    df = value['data']                     
    z = df['elevation'].values
    
    idx = np.argmax(z)
    val = z[idx]
    t = df.index[idx]
    
    dt.append(t)
    dv.append(val)
    
    data_dict = {'elevation': dv,
                 'dates': dt}
    
    df = pd.DataFrame(data=data_dict, index=dt, columns=['elevation', 'dates'])   
    
    ax.scatter(df.index, df['elevation'])
    ax.set_xlabel('Date', fontsize=14)
    ax.set_ylabel('Water Level (ft)', fontsize=14) 


ax.set_ylim(0, 20)
ax.set_xlim(md.date2num(datetime(2011, 8, 25, 20)),
            md.date2num(datetime(2011, 8, 30)))
ax.xaxis.set_major_formatter(md.DateFormatter('%B,%d\n%H:%M'))

Generate Plot of maximum water level and its location



In [13]:

    
mpl.rcParams['legend.fontsize'] = 10

x, y, zz, bpz = [], [], [], []

fig, ax = plt.subplots(figsize=(10, 10))

for key, value in full_data.iteritems():
    df = value['data']
    z = df['elevation'].values
    bp = df['nearest_barometric_sensor_psi'].values
    lon = value['meta']['lon']
    lat = value['meta']['lat']
    
    idx = np.argmax(z)
    bpz.append(bp[idx])
    zz.append(z[idx])
    x.append(lon)
    y.append(lat)

bpz = np.array(bpz) * 10.
pts = ax.scatter(x, y, c=zz, s=bpz)
ax.set_xlabel('Lon')
ax.set_ylabel('Lat')
title = ("Plot Showing Locations of Maximum Water Level\n"
         "Color coded by Maximum water level (ft)\n"
         "Sized by barometric pressure (psi)")
ax.set_title(title)
cb = fig.colorbar(pts)