In [1]:
%matplotlib inline
import os
import sys
import platform
import matplotlib
import pandas as pd
import numpy as np
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import matplotlib.ticker as tick
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime,timedelta
from pylab import rcParams
rcParams['figure.figsize'] = 15, 15
#rcParams['figure.figsize'] = 10, 10

In [2]:
# custom functions for transducer data import
import Snake_Valley_Data_Import as svdi
import wellapplication as wa
USGS = wa.usgs()
piper = wa.piper()


ok

In [3]:
print("Operating System " + platform.system() + " " + platform.release())
print("Python Version " + str(sys.version))
print("Pandas Version " + str(pd.__version__))
print("Numpy Version " + str(np.__version__))
print("Matplotlib Version " + str(matplotlib.__version__))
print("Well Application Version " + str(wa.__version__))
print("Scipy Version " +str(scipy.__version__))


Operating System Windows 7
Python Version 2.7.10 (default, May 23 2015, 09:40:32) [MSC v.1500 32 bit (Intel)]
Pandas Version 0.18.1
Numpy Version 1.11.0
Matplotlib Version 1.5.1
Well Application Version 0.2.13
Scipy Version 0.17.0

In [5]:
if platform.system() == 'Windows':
    drive = 'M:'
else:
    drive = '/media/p/Transcend/'
    
outputFolder = drive + '/GIS/'
raw_archive_folder = drive + '/PROJECTS/Snake Valley Water/Transducer Data/Raw_data_archive/'

Climate Data

Import Raw Data


In [6]:
def impClimateData(folder, filename, prefix, yearCutoff):
    df = pd.read_csv(folder + filename, skiprows=15, parse_dates={'dt':[0]}, index_col='dt', na_values='M')
    df[prefix + ' Precipitation'] = pd.to_numeric(df['Precipitation'], errors='coerce')
    df[prefix + ' Snow Depth'] = pd.to_numeric(df['Snow Depth'], errors='coerce')
    df[prefix + ' Snow Fall'] = pd.to_numeric(df['Snow Depth'], errors='coerce')
    df = df[df.index >= pd.datetime(1960,1,1)]
    df[prefix + ' cumdept'] = df[prefix + ' Precipitation'].apply(lambda x: x- df[prefix + ' Precipitation'].mean()).cumsum()
    df[prefix + ' stdcumdept'] = df[prefix + ' Precipitation'].apply(lambda x: (x- df[prefix + ' Precipitation'].mean())/df[prefix + ' Precipitation'].std()).cumsum()
    return df

In [7]:
Eskd = impClimateData(outputFolder, 'UCC_ghcn_USC00422607_2016_03_28_1459219796.csv', 'Eskd', 1960)
Part = impClimateData(outputFolder, 'UCC_ghcn_USC00426708_2016_03_28_1459222060.csv', 'Part', 1960)
Calo = impClimateData(outputFolder, 'UCC_ghcn_USC00421144_2016_03_28_1459222774.csv', 'Calo', 1960)
Fish = impClimateData(outputFolder, 'UCC_ghcn_USC00422852_2016_03_28_1459223049.csv', 'Fish', 1960)
IBAH = impClimateData(outputFolder, 'UCC_ghcn_USC00424174_2016_03_28_1459224176.csv', 'IBAH', 1960)

Merge Datasets


In [8]:
PC = pd.merge(Part,Calo, left_index=True,right_index=True,how='outer')
EF = pd.merge(Eskd,Fish, left_index=True,right_index=True,how='outer')
IBEF = pd.merge(IBAH,EF, left_index=True,right_index=True,how='outer')
climate = pd.merge(IBEF,PC, left_index=True,right_index=True,how='outer')

In [9]:
climate['avgDept'] = climate[['Eskd cumdept','Fish cumdept','Calo cumdept','Part cumdept']].mean(axis=1)
climate['StdAvgDept'] = climate[['Eskd stdcumdept','Fish stdcumdept','Calo stdcumdept','Part stdcumdept']].mean(axis=1)
climate['movStdAvgDept'] = climate['StdAvgDept'].rolling(window=60, center=True).mean()

UGS Data

Import Data


In [10]:
UGS = pd.read_csv(outputFolder + "halfDayUGSdata.csv", low_memory=False, 
                 index_col='DateTime', parse_dates=True)
g = pd.read_csv(outputFolder + 'SitesWQualifier.csv')
#wells = list(g['WellID'].values)
siteDict = pd.Series(g['MonType'].values,index = g['WellID']).to_dict()


#UGS = UGS[UGS['site_no'].isin([11,12,13,14,15,16,17,18,19,20,21,24,25,36,37,38,70,73,74,60,69,58,59])]
UGS = UGS[UGS['site_no'].isin([69,18,31,21,74,70,14,24,37,58,59])]
# assign site types
UGS['MonType'] = UGS['site_no'].apply(lambda x: siteDict[x],1)


wellid = {}
for well in g.WellID:
    wellid[well] = g[g['WellID'] == well]['Well'].values[0]
    
UGS['julian'] = UGS.index.to_julian_date()

TypeDict = {'S':'Wetland Monitoring Sites','P':'Agricultural Monitoring Sites',
            'W':'Sites Outside of Pumping and Evapotranspiration Influence'}

UGS['well_name'] = UGS['site_no'].apply(lambda x: wellid[x],1)

UGS.ix[UGS['site_no']==69,'MonType'] = 'W'
UGS = UGS[(UGS.index > pd.datetime(2010,1,1)) & (UGS.index < pd.datetime(2015,1,1))]

In [11]:
UGSa = UGS[['stdWL','lev_va','avgDiffWL','diff','site_no']].groupby('site_no').resample('1M').mean().drop(['site_no'],axis=1).reset_index().set_index('DateTime')
UGSa['well_name'] = UGSa['site_no'].apply(lambda x: wellid[x],1)

In [12]:
plt.figure()
for g, f in UGSa.groupby('well_name'):
    f['stdWL'].plot(label=g)
    
plt.legend()
plt.ylabel("Standardized Deviation from Mean Water Level Elevation")
plt.xlabel("Date Time")
plt.savefig(outputFolder+"localwells.svg")


Plot Monthly Data


In [90]:
pdf = PdfPages(outputFolder + 'monthly.pdf')

UGS['doy'] = UGS.index.dayofyear



grp = "doy"
data = 'avgDiffWL'
title = 'Deviation from Mean Water Elevation (ft)'
plt.figure()
f, ax = plt.subplots(2, sharex=True)
grpd = UGS.groupby([grp])[data]

x = grpd.median().index
y = grpd.median()
grpd = UGS[UGS['MonType']=='P'].groupby([grp])[data]
x1 = grpd.median().index
y1 = grpd.median()
grpd = UGS[UGS['MonType']=='W'].groupby([grp])[data]
x2 = grpd.median().index
y2 = grpd.median()
grpd = UGS[UGS['MonType']=='S'].groupby([grp])[data]
x3 = grpd.median().index
y3 = grpd.median()

ax[0].plot(x, y, color = 'blue', label = 'All')
ax[0].plot(x1, y1, color = 'red', label = 'Near Pumping')
ax[0].plot(x2, y2, color = 'green', label = 'Away from Pumping & Springs')
ax[0].plot(x3, y3, color = 'purple', label = 'Near Springs')

#ax[0].set_xlim(0,13)
#ax[0].set_xticks(range(0,13))
ax[0].set_ylabel(title)
ax[0].grid()
ax[0].legend(loc=3)

grp = "doy"
data = 'stdWL'
title = 'Standardized Deviation from Mean Water Elevation'

grpd = UGS.groupby([grp])[data]
x = grpd.median().index
y = grpd.median()
grpd = UGS[UGS['MonType']=='P'].groupby([grp])[data]
x1 = grpd.median().index
y1 = grpd.median()
grpd = UGS[UGS['MonType']=='W'].groupby([grp])[data]
x2 = grpd.median().index
y2 = grpd.median()
grpd = UGS[UGS['MonType']=='S'].groupby([grp])[data]
x3 = grpd.median().index
y3 = grpd.median()

ax[1].plot(x, y,color = 'blue', label = 'All')
ax[1].plot(x1, y1, color = 'red', label = 'Near Pumping')
ax[1].plot(x2, y2, color = 'green', label = 'Away from Pumping & Springs')
ax[1].plot(x3, y3, color = 'purple', label = 'Near Springs')

ax[1].set_ylabel(title)
ax[1].grid()
ax[1].set_xlabel('month')
dtrng = pd.date_range('1/1/2014','12/31/2014',freq='1M')
datelabels = [d.strftime('%b') for d in dtrng]
ax[1].set_ylim(-2,1)
ax[1].set_xticks(range(0,365,30))#dtrng)#,rotation=90)
ax[1].set_xticklabels(datelabels)
ax[0].set_xticks(range(0,365,30))#dtrng)#,rotation=90)
ax[0].set_xticklabels(datelabels)
plt.xlim(0,365)
plt.title('Median Seasonal Variations in UGS Snake Valley Wells')
f.subplots_adjust(hspace=0.05)
pdf.savefig()

plt.figure()

pdf.close()


<matplotlib.figure.Figure at 0x1b5d9130>
<matplotlib.figure.Figure at 0x1fe3cf70>

In [77]:
365/30


Out[77]:
12

Plot Yearly Data


In [14]:
pdf = PdfPages(outputFolder + 'UGS_yearly.pdf')

grp = pd.TimeGrouper("M")
data = 'avgDiffWL'
title = 'Deviation from Mean Water Elevation (ft)'
plt.figure()
f, ax = plt.subplots(2, sharex=True)

grpd = UGS.groupby([grp])[data]
x = grpd.median().index
y = grpd.median()
grpd = UGS[UGS['MonType']=='P'].groupby([grp])[data]
x1 = grpd.median().index
y1 = grpd.median()
grpd = UGS[UGS['MonType']=='W'].groupby([grp])[data]
x2 = grpd.median().index
y2 = grpd.median()
grpd = UGS[UGS['MonType']=='S'].groupby([grp])[data]
x3 = grpd.median().index
y3 = grpd.median()

ax[0].xaxis_date()

ax[0].plot(x, y, color = 'blue', label = 'All')
ax[0].plot(x1, y1, color = 'red', label = 'Near Pumping')
ax[0].plot(x2, y2, color = 'green', label = 'Away from Pumping & Springs')
ax[0].plot(x3, y3, color = 'purple', label = 'Near Springs')

ax[0].set_ylabel(title)
ax[0].grid()
ax[0].legend(loc=3)

data = 'stdWL'
title = 'Standardized Deviation from Mean Water Elevation'

grpd = UGS.groupby([grp])[data]
x = grpd.median().index
y = grpd.median()
grpd = UGS[UGS['MonType']=='P'].groupby([grp])[data]
x1 = grpd.median().index
y1 = grpd.median()
grpd = UGS[UGS['MonType']=='W'].groupby([grp])[data]
x2 = grpd.median().index
y2 = grpd.median()
grpd = UGS[UGS['MonType']=='S'].groupby([grp])[data]
x3 = grpd.median().index
y3 = grpd.median()

ax[1].plot(x, y, color = 'blue', label = 'All')
ax[1].plot(x1, y1, color = 'red', label = 'Near Pumping')
ax[1].plot(x2, y2, color = 'green', label = 'Away from Pumping & Springs')
ax[1].plot(x3, y3, color = 'purple', label = 'Near Springs')
f.subplots_adjust(hspace=0.05)

ax[1].set_ylabel(title)
ax[1].grid()

pdf.savefig()
pdf.close()


<matplotlib.figure.Figure at 0x13cf8030>

UGS Seasonal Decomposition & Clustering


In [15]:
pivTab = UGS.dropna(subset=['lev_va'])
pivTab['well_name'] = pivTab['site_no'].apply(lambda x: wellid[x],1)
siteList = list(pivTab.site_no.unique())

In [16]:
def clusterDecomp(df, grps, well_id, wls, component):
    '''
    INPUT
    -----
    df = time-indexed pandas dataframe
    grps = number of groups to use
    well_id = field in df identifying individual wells
    wls = field in df that you want to analyze
    component = seasonal decomposition field that you want to cluster; options: 'trend', 'residuals','seasonal', or the wls field
    
    RETURNS
    -------
    trends = pandas dataframe with cluster data
    col_linkage = output from cluster analysis
    labs = labels for dendrogram
    '''
    from scipy import cluster
    
    # conduct analysis for each site
    for site in df[well_id].unique():
        # seasonal decomposition function magic is here
        sd = sm.tsa.seasonal_decompose(df.ix[df[well_id]==site, wls].values, freq=365*2+1)
        # populate fields with the seasonal decomposition output
        df.ix[df[well_id]==site,'residuals'] = sd.resid
        df.ix[df[well_id]==site,'seasonal'] = sd.seasonal
        df.ix[df[well_id]==site,'trend'] = sd.trend
    
    # duplicate the date index
    df['date'] = df.index
    
    # pivot the data 
    seasonal_decomp = df.reset_index().pivot(index='date', columns='well_name', values=component).dropna()
    
    # correlate the data
    correlations_array = np.asarray(seasonal_decomp.corr(method = 'pearson'))
    
    # cluster the correlations
    col_linkage = scipy.cluster.hierarchy.linkage(scipy.spatial.distance.pdist(correlations_array.T), method='average')
    
    # group the clusters and match them to well ids
    clusters = scipy.cluster.hierarchy.cut_tree(col_linkage, n_clusters=grps)
    clusterdf = pd.DataFrame({well_id:seasonal_decomp.columns, 'cluster':np.concatenate(clusters.tolist())})
    
    # append cluster group to data
    trends = pd.merge(df, clusterdf, on=well_id)
    trends.reset_index(inplace=True)
    trends.set_index('date',inplace=True)
    
    # make labels for the dendrograms
    labs=[i + ' ' + str(clusterdf[clusterdf[well_id]==i]['cluster'].values) for i in seasonal_decomp.columns]

    return trends, col_linkage, labs

In [17]:
grps = 5

trends, tlink, tlabs= clusterDecomp(pivTab, grps, 'well_name', 'stdWL', 'trend')
season, slink, slabs= clusterDecomp(pivTab, grps, 'well_name', 'stdWL', 'seasonal')
raw, rlink, rlabs= clusterDecomp(pivTab, grps, 'well_name', 'stdWL', 'stdWL')

trendhydro = plt.figure()
f, ax = plt.subplots(2, sharex=True)
for i in range(grps):
    title = 'group ' + str(i)+ 'T'
    A = trends[trends['cluster']==i]
    y = A['trend'].groupby(A.index).median().rolling(window=24).mean()
    x = A['trend'].groupby(A.index).median().rolling(window=24).mean().index
    ax[0].plot(x, y, label = title)
ax[0].legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
ax[0].set_title('Results of Cluster Analysis on Trend Data')
ax[0].set_ylabel('Standardized Deviation from Mean Water Elevation')
for i in range(grps):
    title = 'group ' + str(i)+'S'
    A = season[season['cluster']==i]
    y = A['seasonal'].groupby(A.index).median().rolling(window=24).mean()
    x = A['seasonal'].groupby(A.index).median().rolling(window=24).mean().index
    ax[1].plot(x, y, label = title)
ax[1].set_ylabel('Standardized Deviation from Mean Water Elevation')
ax[1].legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
ax[1].set_title('Results of Cluster Analysis on Seasonal Data')
f.subplots_adjust(hspace=0.05, right=0.8)
plt.savefig(outputFolder+'clusterTrends.pdf')


c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\statsmodels\tsa\filters\filtertools.py:28: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return np.r_[[np.nan] * head, x, [np.nan] * tail]
<matplotlib.figure.Figure at 0x13c8a2b0>

In [18]:
plt.figure()
plt.subplot(211, title='Trend Clusters')
a,b,c,d,e = scipy.cluster.hierarchy.dendrogram(tlink, labels = tlabs, color_threshold=0.90) 
plt.xticks(rotation=90)
plt.subplot(212, title='Seasonal Clusters')
a,b,c,d,e = scipy.cluster.hierarchy.dendrogram(slink, labels = slabs, color_threshold=0.90) 
plt.xticks(rotation=90)
plt.subplots_adjust(hspace=0.5)
plt.savefig(outputFolder+'multDendro.pdf')



In [19]:
trends.groupby('well_name').max().to_csv(outputFolder+'trend_groups1.csv',index_label='Well')
season.groupby('well_name').max().to_csv(outputFolder+'seasonal_groups1.csv',index_label='Well')

In [20]:
pivotWells = pivTab.reset_index().pivot(index='DateTime',columns='well_name',values='stdWL').dropna()

Determine Slopes


In [24]:
from datetime import timedelta

In [39]:
import statsmodels.api as sm
data = sm.datasets.stackloss.load()

data.exog = sm.add_constant(data.exog)
rlm_model = sm.RLM(data.endog, data.exog, M=sm.robust.norms.HuberT())
rlm_results = rlm_model.fit()
print(rlm_results.summary())
print(rlm_results.params)


                    Robust linear Model Regression Results                    
==============================================================================
Dep. Variable:                      y   No. Observations:                   21
Model:                            RLM   Df Residuals:                       17
Method:                          IRLS   Df Model:                            3
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 13 Jun 2016                                         
Time:                        14:21:01                                         
No. Iterations:                    19                                         
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const        -41.0265      9.792     -4.190      0.000       -60.218   -21.835
x1             0.8294      0.111      7.472      0.000         0.612     1.047
x2             0.9261      0.303      3.057      0.002         0.332     1.520
x3            -0.1278      0.129     -0.994      0.320        -0.380     0.124
==============================================================================

If the model instance has been used for another fit with different fit
parameters, then the fit options might not be the correct ones anymore .
[-41.02649835   0.82938433   0.92606597  -0.12784672]

In [49]:
def RLM(svws, site):
    '''
    RETURNS
    x0, slope, x_prime, y_hat, wellId, wellName, montype
    '''
    
    x0 = svws[site]['julian']
    y = svws[site]['lev_va']
    x1 = svws[site].index.to_julian_date() - svws[site].index.to_julian_date()[0]
    x = sm.add_constant(x1)
    data = svws[site]['lev_va'].to_frame()

    est = sm.RLM(y, x).fit()
    wellId = site
    slope = est.params[1]
    wellName = wellid[site]
    montype = siteDict[site]
    x_prime = np.linspace(x0.min(),x0.max(),100)[:, np.newaxis]
    x_prime = sm.add_constant(x_prime)
    y_hat = est.predict(x_prime)

    
    const = est.params[0]
    x2 = [svws[site].index[0] + timedelta(i) for i in x1]
    y2 = [i*slope + const for i in x1]
    
    return x0, x2, y2, const, y, slope, x_prime, y_hat, wellId, wellName, montype

def OLS(svws, site):
    '''
    RETURNS
    x0, slope, x_prime, y_hat, wellId, wellName, montype
    '''
    
    x0 = svws[site].index.to_julian_date()
    x1 = svws[site].index.to_julian_date() - svws[site].index.to_julian_date()[0]#.days
    y = svws[site]['lev_va']

    x = sm.add_constant(x1)
    data = svws[site]['lev_va'].to_frame()

    est = sm.OLS(y, x).fit()
    wellId = site
    slope = est.params[1]
    wellName = wellid[site]
    montype = siteDict[site]
    x_prime = np.linspace(x0.min(),x0.max(),100)[:, np.newaxis]
    x_prime = sm.add_constant(x_prime)
    y_hat = est.predict(x_prime)
    rsqrd = (round(float(est.rsquared),2))
    

    
    const = est.params[0]
    x2 = [svws[site].index[0] + timedelta(i) for i in x1]
    y2 = [i*slope + const for i in x1]
    
    return x0, x2, y2, const, y, slope, x_prime, y_hat, wellId, wellName, montype, rsqrd

In [55]:
pdf = PdfPages(outputFolder + 'OLSRLM.pdf')

pivTab = UGS.dropna(subset=['lev_va'])

siteList = list(pivTab.site_no.unique())
svws = {}
svwsEarly = {}
svwsLate = {}
svwsPiv = {}

RLMslope, RLMslope_er, RLMslope_lt, OLSslope, OLSslope_er, OLSslope_lt = [],[],[],[],[],[]

OLSrsqrd, OLSrsqrd_er, OLSrsqrd_lt = [], [], []

wellName, wellId, montype = [], [], []

for site in siteList:
    #fig = plt.figure(figsize=(20,10))
    svws[site] = pivTab[pivTab.site_no == site]
    svwsEarly[site] = svws[site][svws[site].index < pd.datetime(2011,5,1)]
    svwsLate[site] = svws[site][svws[site].index > pd.datetime(2012,5,1)]
    
    n = plt.figure(wellid[site])

    x0, x2, y2, const, y, slope, x_prime, y_hat, wellId1, wellName1, montype1 = RLM(svws, site)
    print(slope, const)
    plt.scatter(x2, y, color='blue', label='intermediate')
    plt.plot(x2, y2, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='RLM fit for all data m= %.5f'%(slope))
    RLMslope.append(slope)

    x0, x2, y2, const, y, slope, x_prime, y_hat, wellId1, wellName1, montype1 = RLM(svwsEarly, site)
    print(slope, const)
    plt.scatter(x2, y, color = 'green', label='early')
    plt.plot(x2, y2, c='lightgreen', alpha=0.9, zorder = 3, linewidth=2.0, label='RLM fit Early m= %.5f'%(slope))
    RLMslope_er.append(slope)
    
    x0, x2, y2, const, y, slope, x_prime, y_hat, wellId1, wellName1, montype1 = RLM(svwsLate, site)
    print(slope, const)
    plt.scatter(x2, y, color='purple', label='late')
    plt.plot(x2, y2, c='pink', alpha=0.9, zorder = 3, linewidth=2.0, label='RLM fit Late m= %.5f'%(slope))
    RLMslope_lt.append(slope)
    

    x0, x2, y2, const, y, slope, x_prime, y_hat, wellId1, wellName1, montype1, rsqrd = OLS(svws, site)
    #plt.scatter(x0,y)
    #plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='OLS fit m= ' + str(slope))
    OLSslope.append(slope)
    OLSrsqrd.append(rsqrd)
    
    x0, x2, y2, const, y, slope, x_prime, y_hat, wellId1, wellName1, montype1, rsqrd = OLS(svwsEarly, site)
    #plt.scatter(x0,y, label='early')
    #plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='OLS fit Early m= ' + str(slope))
    OLSslope_er.append(slope)
    OLSrsqrd_er.append(rsqrd)
    
    x0, x2, y2, y, const, slope, x_prime, y_hat, wellId1, wellName1, montype1, rsqrd = OLS(svwsLate, site)
    #plt.scatter(x0,y, label='late')
    #plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='OLS fit Late m= ' + str(slope))
    OLSslope_lt.append(slope)
    OLSrsqrd_lt.append(rsqrd)
    
    wellId.append(wellId1)    
    wellName.append(wellName1)
    montype.append(montype1)

    plt.legend(scatterpoints=1)
    plt.title(str(wellid[site]))
    plt.xlabel('Date')
    ax = plt.gca()
    ax.get_yaxis().get_major_formatter().set_useOffset(False)
    plt.ylabel('Water Level Elevation (ft)')
    plt.grid()
    plt.tight_layout()
    pdf.savefig(n)
    plt.close()
pdf.close()    

RLMOLS = pd.DataFrame({'RLM slope (ft/day)':RLMslope, 'RLM slope early (ft/day)':RLMslope_er, 'RLM slope late (ft/day)':RLMslope_lt, 
              'OLS slope (ft/day)':OLSslope, 'OLS slope early (ft/day)':OLSslope_er, 'OLS slope late (ft/day)':OLSslope_lt , 
             'OLS r-squared':OLSrsqrd, 'OLS r-squared early':OLSrsqrd_er, 'OLS r-squared late':OLSrsqrd_lt, 
             'Well Name':wellName, 'Well ID':wellId, 'Monitoring Type':montype})
RLMOLS[u'RLM slope (ft/yr)'] = RLMOLS[u'RLM slope (ft/day)'] * 365.25
RLMOLS[u'OLS slope (ft/yr)'] = RLMOLS[u'OLS slope (ft/day)'] * 365.25
RLMOLS[u'RLM early slope (ft/yr)'] = RLMOLS[u'RLM slope early (ft/day)'] * 365.25
RLMOLS[u'OLS early slope (ft/yr)'] = RLMOLS[u'OLS slope early (ft/day)'] * 365.25
RLMOLS[u'RLM late slope (ft/yr)'] = RLMOLS[u'RLM slope late (ft/day)'] * 365.25
RLMOLS[u'OLS late slope (ft/yr)'] = RLMOLS[u'OLS slope late (ft/day)'] * 365.25
RLMOLS.to_csv(outputFolder+'RLMOLS.csv')

RLMOLS.groupby('Monitoring Type')['OLS slope (ft/yr)'].agg({'min':np.min, 'mean':np.mean,'median':np.median,
                                                        'max':np.max, 'std':np.std, 'cnt':(lambda x: np.count_nonzero(~np.isnan(x)))}).reset_index()


(-0.00057305037645607841, 4893.575250145419)
(-0.00065129519859562224, 4893.5798064911123)
(-0.00074092762832041192, 4893.1793677572196)
(-0.00061793604039931966, 4893.0603480163227)
(-0.00072041697572042956, 4893.0588055128883)
(-0.0007991686199239588, 4892.6318113120815)
(-0.00060290211985896715, 4986.2203191681119)
(-0.00063067501078815924, 4986.2330966509307)
(-0.00056855588142195115, 4985.6885455838046)
(-0.00087238209040440065, 4950.68095371443)
(-0.0017008704863023527, 4950.8112997650605)
(-0.0014322458200455163, 4950.2522636635722)
(-0.00013181060675801146, 5451.441495403903)
(0.0002246108914062267, 5451.3511627716807)
(-0.00027132565309090362, 5451.407797578132)
(-0.0011740447855923168, 4984.4657372874044)
(-0.00093577301522353162, 4984.0405789806882)
(-0.0018248910417699125, 4983.7315925784696)
(-0.00050844930564631708, 4817.2022766731534)
(-0.0016899682358678045, 4817.6844191538976)
(0.00082617103636701411, 4816.0769973040087)
(-9.3597445909616474e-05, 4817.5080995739099)
(-0.0010081021818240388, 4817.8277190102544)
(0.00076324386795409774, 4816.970757083247)
(-0.00023138841920433174, 4848.301980995644)
(-0.00051492661552150765, 4848.339633940167)
(-0.000287879786705093, 4848.1337428178294)
(-0.0015245093585438338, 5078.6023781039821)
(-0.0019526945332140472, 5078.7987099519214)
(-0.00099620103624168191, 5077.0210385971468)
(-0.001097814177976122, 5036.1267212379507)
(-0.001656370686747774, 5036.2281560580632)
(-0.0014239230853041809, 5035.3767568957946)
Out[55]:
Monitoring Type std cnt min max median mean
0 P 0.097252 4.0 -0.556290 -0.322284 -0.408615 -0.423951
1 S 0.067715 3.0 -0.162951 -0.028261 -0.083363 -0.091525
2 W 0.086463 4.0 -0.229702 -0.048203 -0.215918 -0.177435

USGS Data

Download Data

  • 160203 -- Great Salt Lake: The Great Salt Lake Basin, excluding the Bear, Weber, and Jordan River Basins. Idaho, Nevada, Utah. Area = 22400 sq.mi.

    • 16020301 -- Hamlin-Snake Valleys. Nevada, Utah. Area = 3100 sq.mi.

In [53]:
HUCName = {16020301:'Hamlin-Snake Valleys'}

allsites = USGS.getStationInfoFromHUC(16020301)
alldata = USGS.getWLfromHUC(16020301)
siteList = list(alldata.site_no.unique())
allsites.to_csv(outputFolder+'usgs_well_locals.csv')

Add Standardizing Fields


In [54]:
for site in siteList:
    mean = alldata.ix[alldata.site_no==site, 'lev_va'].mean()
    std = alldata.ix[alldata.site_no==site, 'lev_va'].std()
    alldata.ix[alldata.site_no==site, 'diff'] = alldata.ix[alldata.site_no==site, 'lev_va'].diff()
    alldata.ix[alldata.site_no==site, 'avgDiffWL'] = alldata.ix[alldata.site_no==site, 'lev_va'] - mean
    alldata.ix[alldata.site_no==site, 'stdWL'] = alldata.ix[alldata.site_no==site, 'avgDiffWL']/std

In [55]:
alldata.reset_index(inplace=True)
alldata['lev_dt'] = pd.to_datetime(alldata['lev_dt'])
alldata.set_index(['lev_dt'],inplace=True)

In [56]:
alldata.drop(['index'], axis=1, inplace=True)
alldata['year'] = alldata.index.year
alldata['month'] = alldata.index.month
alldata['doy'] = alldata.index.dayofyear
alldata['julian'] = alldata.index.to_julian_date()

Build Gantt Charts


In [57]:
balldata = alldata.reset_index()
balldata.drop_duplicates(subset = ['site_no','lev_dt'], inplace=True)
snakepivdata = balldata.pivot(index= 'lev_dt', columns = 'site_no', values = 'stdWL').sort_index()
pivdata = balldata.pivot(index= 'lev_dt', columns = 'site_no', values = 'lev_va')
pivdata.sort_index(inplace=True)
pivdata = pivdata.resample('A').mean()

info, fig = wa.gantt(pivdata)
infoless = info[info['count']>4]
snakelist = list(infoless.index.values)
df = alldata[alldata['site_no'].isin(snakelist)]
print(len(snakelist))
info, fig = wa.gantt(pivdata, snakelist)
plt.ylabel('USGS Site Number')
plt.title('Measurement Intervals for USGS Wells in Snake Valley')
plt.tight_layout()
plt.savefig(outputFolder+'SnakeGantt.pdf')


45
<matplotlib.figure.Figure at 0x2c3811d0>