CVC Data Summaries (with simple method hydrology)

Setup the basic working environment


In [ ]:
%matplotlib inline

import os
import sys
import datetime
import warnings

import numpy as np
import matplotlib.pyplot as plt
import pandas
import seaborn
seaborn.set(style='ticks', context='paper')

import wqio
from wqio import utils
import pybmpdb
import pynsqd

import pycvc

min_precip = 1.9999
big_storm_date = datetime.date(2013, 7, 8)

pybmpdb.setMPLStyle()
seaborn.set(style='ticks', rc={'text.usetex': False}, palette='deep')

POCs = [
    p['cvcname'] 
    for p in filter(
        lambda p: p['include'], 
        pycvc.info.POC_dicts
    )
]

warning_filter = "ignore" 
warnings.simplefilter(warning_filter)

## general result groupings
groups = [
    {'name': 'Overall', 'col': None},
    {'name': 'By Year', 'col': 'year'},
    {'name': 'By Season', 'col': 'season'},
    {'name': 'By Grouped Season', 'col': 'grouped_season'},
    {'name': 'By Storm Size', 'col': 'storm_bin'},
]

site_lists = [
    {
        'sites': ['ED-1'], 
        'name': 'ElmDrive', 
        'colors': [seaborn.color_palette()[0]],
        'markers': ['o'],
    }, {
        'sites': ['LV-1', 'LV-2', 'LV-4'],
        'name': 'Lakeview',
        'colors': seaborn.color_palette()[1:4],
        'markers': ['s', '^', 'v'],
    },
]

poc_lists = [
    {
        'params': POCs[6:],
        'units': 'mg/L', 
        'name': 'Nutrients'
    }, {
        'params': POCs[:6],
        'units': 'μg/L',
        'name': 'Metals'
    },
]

Load water quality data

External sources


In [ ]:
bmpdb = pycvc.external.bmpdb('black', 'D')
nsqdata = pycvc.external.nsqd('black', 'd')

CVC tidy data

Data using the Simple Method hydrology is suffixed with _simple. You could also use the SWMM Model hydrology with the _SWMM files.

Loads from the July 8, 2013 storm are removed here.


In [ ]:
# simple method file
tidy_file = 'output/tidy/wq_simple.csv'

# # SWMM file
# tidy_file = 'output/tidy/wq_swmm.csv'

datecols = [
    'start_date',
    'end_date',
    'samplestart',
    'samplestop',
]

wq = (
    pandas.read_csv(tidy_file, parse_dates=datecols)
        .pipe(pycvc.summary.classify_storms, 'total_precip_depth')
        .pipe(pycvc.summary.remove_load_data_from_storms, [big_storm_date], 'start_date')
)

Water Quality Summaries

Prevalence Tables


In [ ]:
with pandas.ExcelWriter('output/xlsx/CVCWQ_DataInventory.xlsx') as xl_prev_tables:
    raw = pycvc.summary.prevalence_table(wq, groupby_col='samplestart')
    raw.to_excel(xl_prev_tables, sheet_name='Raw', index=False)
    for g in groups:
        prevalence = pycvc.summary.prevalence_table(wq, groupby_col=g['col'])
        prevalence.to_excel(xl_prev_tables, sheet_name=g['name'], index=False)

Concentrations Stats


In [ ]:
summaryopts = dict(rescol='concentration', sampletype='composite')
with pandas.ExcelWriter('output/xlsx/CVCWQ_ConcStats.xlsx') as xl_conc:
    for g in groups:
        wq_stats = pycvc.summary.wq_summary(wq, groupby_col=g['col'], **summaryopts)
        wq_stats.to_excel(xl_conc, sheet_name=g['name'], index=False)

Load Stats


In [ ]:
summaryopts = dict(rescol='load_outflow', sampletype='composite')
with pandas.ExcelWriter('output/xlsx/CVCWQ_LoadStats.xlsx') as xl_loads:
    for g in groups:
        load_stats = pycvc.summary.wq_summary(wq, groupby_col=g['col'], **summaryopts)
        load_stats.to_excel(xl_loads, sheet_name=g['name'], index=False)

Total Loads Summary


In [ ]:
with pandas.ExcelWriter('output/xlsx/CVCWQ_LoadsTotals.xlsx') as xl_load_totals:
    for g in groups:
        load_totals = pycvc.summary.load_totals(wq, groupby_col=g['col'])
        load_totals.to_excel(xl_load_totals, sheet_name=g['name'], index=False)

Total Load Reduction Tables and Figures


In [ ]:
ed_nutrients = ['Nitrate + Nitrite', 'Orthophosphate (P)', 'Total Kjeldahl Nitrogen (TKN)', 'Total Phosphorus']
ed_metals = ['Cadmium (Cd)', 'Copper (Cu)', 'Lead (Pb)', 'Nickel (Ni)', 'Zinc (Zn)']
lv_nutrients = ['Nitrate (N)', 'Orthophosphate (P)', 'Total Kjeldahl Nitrogen (TKN)', 'Total Phosphorus']
lv_metals = ['Cadmium (Cd)', 'Copper (Cu)', 'Lead (Pb)', 'Nickel (Ni)', 'Iron (Fe)', 'Zinc (Zn)']

figures = [
    {
        'sites': ['ED-1'], 
        'name': 'ElmDrive_TSS', 
        'params': ['Total Suspended Solids'], 
        'leg_loc': (0.5, 0.05)
    }, {
        'sites': ['LV-2', 'LV-4'],
        'name': 'LakeViewTSS',
        'params': ['Total Suspended Solids'],
        'leg_loc': (0.5, 0.05)
    }, {
        'sites': ['ED-1'],
        'name': 'ElmDrive_Nutrients',
        'params': ed_nutrients,
        'leg_loc': (0.6, 0.03)
    }, {
        'sites': ['LV-2', 'LV-4'],
        'name': 'LakeView_Nutrients',
        'params': lv_nutrients,
        'leg_loc': (0.6, 0.03)
    }, {
        'sites': ['ED-1'],
        'name': 'ElmDrive_Metals',
        'params': ed_metals,
        'leg_loc': (0.6, 0.03)
    }, {
        'sites': ['LV-2', 'LV-4'],
        'name': 'LakeView_Metals',
        'params': lv_metals,
        'leg_loc': (0.57, 0.02)
    },
]

with pandas.ExcelWriter('output/xlsx/CVCWQ_LoadReductionPct.xlsx') as xl_load_pct:
    for g in groups:
        reduction = (
            wq.pipe(pycvc.summary.load_reduction_pct, groupby_col=g['col'])
        )
        reduction.to_excel(xl_load_pct, sheet_name=g['name'], index=False)
        
        if g['col'] is not None and g['col'] != 'season':
            for f in figures:
                _params = f['params']
                _sites = f['sites']
                fg = pycvc.viz.reduction_plot(
                    reduction.query("site in @_sites and parameter in @_params"), 
                    _params, 
                    'parameter', 
                    'site', 
                    g['col'], 
                    f['leg_loc'],
                    lower='load_red_lower',
                    reduction='load_red',
                    upper='load_red_upper',

                )
                fg.set_axis_labels(x_var='', y_var='Load Reduction (%)')
                for ax in fg.axes:
                    ax.set_ylim(top=100)
                    if g['col'] == 'storm_bin':
                        utils.figutils.rotateTickLabels(ax, 20, 'x')

                fg.savefig('output/img/LoadReduction/{}_{}.png'.format(f['name'], g['col']))

Faceted Plots

Combine NSQD, BMP DB datasets with CVC data


In [ ]:
bmptidy = pycvc.external.combine_wq(wq, bmpdb, 'category')
nsqdtidy = pycvc.external.combine_wq(wq, nsqdata, 'primary_landuse')

Boxplots with external sources


In [ ]:
bmps = [
    'Bioretention', 'Detention Basin',
    'Manufactured Device', 'Retention Pond',
    'Wetland Channel',
]

LUs = [
    'Commercial', 'Freeway', 'Industrial', 
    'Institutional',  'Residential', 'Open Space',
]

for sl in site_lists:
    for pocs in poc_lists:
        box_opts = dict(
            sites=sl['sites'], 
            params=pocs['params'],
            units=pocs['units'], 
        )

        bmppal = sl['colors'].copy() + seaborn.color_palette('BuPu', n_colors=len(bmps))
        fg1 = pycvc.viz.external_boxplot(bmptidy, categories=bmps, palette=bmppal, **box_opts)
        fg1name = 'Boxplot_BMPBD_{}_{}.png'.format(sl['name'], pocs['name'])
        pycvc.viz.savefig(fg1.fig, fg1name, extra='Megafigure')
        
        nsqdpal = sl['colors'].copy() + seaborn.color_palette('RdPu', n_colors=len(LUs))
        fg2 = pycvc.viz.external_boxplot(nsqdtidy, categories=LUs, palette=nsqdpal, **box_opts)
        fg2name = 'Boxplot_NSQD_{}_{}.png'.format(sl['name'], pocs['name'])
        pycvc.viz.savefig(fg2.fig, fg2name, extra='Megafigure')

Time series, probability and seasonal box and whisker plots


In [ ]:
for sl in site_lists:
    for pocs in poc_lists:
        # common options for the plots
        plot_opts = dict(
            sites=sl['sites'],
            params=pocs['params'],
            units=pocs['units'], 
            palette=sl['colors'], 
            markers=sl['markers'],
        )
        
        # plots
        ts = pycvc.viz.ts_plot(wq, 'samplestart', 'concentration', **plot_opts)
        pp = pycvc.viz.prob_plot(wq, 'concentration', **plot_opts)
        bp = pycvc.viz.seasonal_boxplot(wq, 'concentration', params=pocs['params'],
                                        units=pocs['units'])
                                        
        # output filenames
        tsname = 'TimeSeries_{}_{}.png'.format(sl['name'], pocs['name'])
        ppname = 'ProbPlot_{}_{}.png'.format(sl['name'], pocs['name'])
        bpname = 'Boxplot_Seasonal_{}_{}.png'.format(sl['name'], pocs['name'])
        
        # save the figures
        pycvc.viz.savefig(ts.fig, tsname, extra='MegaFigure')
        pycvc.viz.savefig(pp.fig, ppname, extra='MegaFigure')
        pycvc.viz.savefig(bp.fig, bpname, extra='MegaFigure')