Load, filter, export the NSQD Dataset

The cell below imports the libaries we need and defines some function that help up clean up the NSQD



In [1]:

    
import numpy 

import wqio
import pynsqd
import pycvc

def get_cvc_parameter(nsqdparam):
    try:
        cvcparam = list(filter(
            lambda p: p['nsqdname'] == nsqdparam, pycvc.info.POC_dicts
        ))[0]['cvcname']
    except IndexError:
        cvcparam = numpy.nan
    return cvcparam


def fix_nsqd_bacteria_units(df, unitscol='units'):
    df[unitscol] = df[unitscol].replace(to_replace='MPN/100 mL', value='CFU/100 mL')
    return df


nsqd_params = [
    p['nsqdname'] 
    for p in pycvc.info.POC_dicts
]

Create a raw data set, then compute season and apply basic filters

(also export to CSV file)



In [2]:

    
raw_data = pynsqd.NSQData().data

clean_data = (
    raw_data
        .query("primary_landuse != 'Unknown'")
        .query("parameter in @nsqd_params")
        .query("fraction == 'Total'")
        .query("epa_rain_zone == 1")
        .assign(station='outflow')
        .assign(cvcparam=lambda df: df['parameter'].apply(get_cvc_parameter))
        .assign(season=lambda df: df['start_date'].apply(wqio.utils.getSeason))
        .drop('parameter', axis=1)
        .rename(columns={'cvcparam': 'parameter'})
        .pipe(fix_nsqd_bacteria_units)
        .query("primary_landuse == 'Residential'")
)

Show the sample counts for each parameter



In [3]:

    
clean_data.groupby(by=['parameter', 'season']).size().unstack(level='season')









    Out[3]:






  
    
      season
      autumn
      spring
      summer
      winter
    
    
      parameter
      
      
      
      
    
  
  
    
      Cadmium (Cd)
      35
      35
      49
      22
    
    
      Copper (Cu)
      61
      105
      106
      29
    
    
      Dissolved Chloride (Cl)
      9
      12
      16
      NaN
    
    
      Escherichia coli
      4
      7
      3
      5
    
    
      Lead (Pb)
      58
      85
      81
      31
    
    
      Nickel (Ni)
      11
      9
      10
      7
    
    
      Nitrate + Nitrite
      97
      123
      153
      29
    
    
      Orthophosphate (P)
      5
      8
      5
      5
    
    
      Total Kjeldahl Nitrogen (TKN)
      99
      124
      156
      37
    
    
      Total Oil & Grease
      5
      12
      4
      5
    
    
      Total Phosphorus
      115
      174
      203
      41
    
    
      Total Suspended Solids
      104
      151
      185
      35
    
    
      Zinc (Zn)
      64
      101
      125
      29

Export TSS to a CSV file



In [4]:

    
(
    clean_data
        .query("parameter == 'Total Suspended Solids'")
        .to_csv('NSQD_Res_TSS.csv', index=False)
)

season	autumn	spring	summer	winter
parameter
Cadmium (Cd)	35	35	49	22
Copper (Cu)	61	105	106	29
Dissolved Chloride (Cl)	9	12	16	NaN
Escherichia coli	4	7	3	5
Lead (Pb)	58	85	81	31
Nickel (Ni)	11	9	10	7
Nitrate + Nitrite	97	123	153	29
Orthophosphate (P)	5	8	5	5
Total Kjeldahl Nitrogen (TKN)	99	124	156	37
Total Oil & Grease	5	12	4	5
Total Phosphorus	115	174	203	41
Total Suspended Solids	104	151	185	35
Zinc (Zn)	64	101	125	29