notebook.community

Edit and run



In [2]:

    
%matplotlib inline
import os
import sys
import platform
import matplotlib
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import matplotlib.ticker as tick
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime,timedelta
from pylab import rcParams
rcParams['figure.figsize'] = 15, 15









    



C:\WinPython-64bit-2.7.10.3\python-2.7.10.amd64\lib\site-packages\pandas\computation\__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used

  UserWarning)

Print specs of major packages and system



In [3]:

    
print("Operating System " + platform.system() + " " + platform.release())
print("Python Version " + str(sys.version))
print("Pandas Version " + str(pd.__version__))
print("Numpy Version " + str(np.__version__))
print("Matplotlib Version " + str(matplotlib.__version__))









    



Operating System Windows 8
Python Version 2.7.10 (default, May 23 2015, 09:44:00) [MSC v.1500 64 bit (AMD64)]
Pandas Version 0.18.1
Numpy Version 1.10.4
Matplotlib Version 1.5.0rc3

Set data location



In [4]:

    
if platform.system() == 'Windows':
    drive = 'E:'
else:
    drive = '/media/p/Transcend/'
outputFolder = drive + '/GIS/'
raw_archive_folder = drive + '/PROJECTS/Snake Valley Water/Transducer Data/Raw_data_archive'

Import custom scripts and connection info



In [5]:

    
# custom functions for transducer data import
import Snake_Valley_Data_Import as svdi
import wellapplication as wa
USGS = wa.usgs()

Get and Modify Data From Database

Skip if you don't need fresh data

Create Query to get list of wells



In [6]:

    
# get connection info for MySQL database
sys.path.append(raw_archive_folder)
import engineGetter
engine = engineGetter.getEngine()









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-6-a02911004c0d> in <module>()
      2 sys.path.append(raw_archive_folder)
      3 import engineGetter
----> 4 engine = engineGetter.getEngine()

E:/PROJECTS/Snake Valley Water/Transducer Data/Raw_data_archive\engineGetter.pyc in getEngine()
      1 from sqlalchemy import create_engine
      2 def getEngine():
----> 3         return create_engine("mysql+pymysql://paulinkenbrandt:"+'5Z9966Mj22jcR4s45z'+"@168.180.168.172/groundwater")
      4 
      5 def modCheck():

C:\WinPython-64bit-2.7.10.3\python-2.7.10.amd64\lib\site-packages\sqlalchemy\engine\__init__.py in create_engine(*args, **kwargs)
    384     strategy = kwargs.pop('strategy', default_strategy)
    385     strategy = strategies.strategies[strategy]
--> 386     return strategy.create(*args, **kwargs)
    387 
    388 

C:\WinPython-64bit-2.7.10.3\python-2.7.10.amd64\lib\site-packages\sqlalchemy\engine\strategies.py in create(self, name_or_url, **kwargs)
     73                 if k in kwargs:
     74                     dbapi_args[k] = pop_kwarg(k)
---> 75             dbapi = dialect_cls.dbapi(**dbapi_args)
     76 
     77         dialect_args['dbapi'] = dbapi

C:\WinPython-64bit-2.7.10.3\python-2.7.10.amd64\lib\site-packages\sqlalchemy\dialects\mysql\pymysql.py in dbapi(cls)
     47     @classmethod
     48     def dbapi(cls):
---> 49         return __import__('pymysql')
     50 
     51     if py3k:

ImportError: No module named pymysql



In [ ]:

    
quer = "SELECT * FROM groundwater.well WHERE WellID <= 75 OR WellID = 136"
g = pd.read_sql_query(sql=quer,con=engine)
wells = list(g['WellID'].values)



In [ ]:

    
g.to_clipboard()

Use well list to populate a dictionary of water level elevation



In [ ]:

    
wellData = {}
for well in wells:
    quer = "SELECT DateTime, WaterElevation FROM groundwater.reading where WellID = " + str(int(well)) + " AND DateTime >= '2009'"
    wellData[well] = pd.read_sql_query(sql=quer, con=engine, parse_dates='DateTime', index_col = 'DateTime')



In [ ]:

    
for well in wells:
    wellData[well] = wellData[well].resample('6H')
    wellData[well] = wellData[well].interpolate()
    mean = wellData[well]['WaterElevation'].mean()
    std = wellData[well]['WaterElevation'].std()
    wellData[well]['avgDiffWL'] = wellData[well]['WaterElevation'] - mean
    wellData[well]['stdWL'] = wellData[well]['avgDiffWL']/std

Resample hourly water level data to monthly and daily values.



In [ ]:

    
AllHourly = pd.concat(wellData)
AllHourly.reset_index(inplace=True)
AllHourly.set_index('DateTime',inplace=True)
AllHourly.rename(columns={'level_0':'site_no','WaterElevation':'lev_va'}, inplace=True)
AllHourly['year'] = AllHourly.index.year
AllHourly['month'] = AllHourly.index.month
AllHourly['doy'] = AllHourly.index.dayofyear
AllHourly['diff'] = AllHourly['lev_va'].diff()



In [ ]:

    
AllHourly.to_csv(outputFolder + "QuarterDayUGSdata.csv")

Upload Data You Saved Earlier



In [5]:

    
df = pd.read_csv(outputFolder + "halfDayUGSdata.csv", low_memory=False, 
                 index_col='DateTime', parse_dates=True)
g = pd.read_csv(outputFolder + 'SitesWQualifier.csv')
wells = list(g['WellID'].values)



In [6]:

    
df.head(10)









    Out[6]:






  
    
      
      site_no
      lev_va
      avgDiffWL
      stdWL
      year
      month
      doy
      diff
    
    
      DateTime
      
      
      
      
      
      
      
      
    
  
  
    
      2009-01-01 00:00:00
      1
      5218.80
      2.974755
      1.083472
      2009
      1
      1
      NaN
    
    
      2009-01-01 12:00:00
      1
      5218.77
      2.944755
      1.072545
      2009
      1
      1
      -0.03
    
    
      2009-01-02 00:00:00
      1
      5218.86
      3.034755
      1.105325
      2009
      1
      2
      0.09
    
    
      2009-01-02 12:00:00
      1
      5218.83
      3.004755
      1.094398
      2009
      1
      2
      -0.03
    
    
      2009-01-03 00:00:00
      1
      5218.73
      2.904755
      1.057976
      2009
      1
      3
      -0.10
    
    
      2009-01-03 12:00:00
      1
      5218.73
      2.904755
      1.057976
      2009
      1
      3
      0.00
    
    
      2009-01-04 00:00:00
      1
      5218.67
      2.844755
      1.036123
      2009
      1
      4
      -0.06
    
    
      2009-01-04 12:00:00
      1
      5218.77
      2.944755
      1.072545
      2009
      1
      4
      0.10
    
    
      2009-01-05 00:00:00
      1
      5218.77
      2.944755
      1.072545
      2009
      1
      5
      0.00
    
    
      2009-01-05 12:00:00
      1
      5218.73
      2.904755
      1.057976
      2009
      1
      5
      -0.04



In [7]:

    
siteDict = pd.Series(g['MonType'].values,index = g['WellID']).to_dict()
df['MonType'] = df['site_no'].apply(lambda x: siteDict[x],1)



In [8]:

    
wellid = {}

for well in g.WellID:
    wellid[well] = g[g['WellID'] == well]['Well'].values[0]



In [9]:

    
df['julian'] = df.index.to_julian_date()

Monthly and Seasonal Trend Plots



In [10]:

    
def monthlyPlot(df, grp, data, title):
    grpd = df.groupby([grp])[data]
    x = grpd.median().index
    y = grpd.median()
    y1 = grpd.quantile(q=0.25)
    y2 = grpd.quantile(q=0.75)
    y3 = grpd.quantile(q=0.1)
    y4 = grpd.quantile(q=0.9)
    
    n = plt.figure()
    plt.plot(x, y, 'bo-',color = 'blue', label = 'Median', )
    plt.fill_between(x,y2,y1,alpha=0.2, label = '25-75%-tiles', color = 'green')
    plt.fill_between(x,y3,y1,alpha=0.2, color='red', label = '10-90%-tiles')
    plt.fill_between(x,y2,y4,alpha=0.2, color='red')
    if grp == 'month':
        plt.xlim(0,13)
        plt.xticks(range(0,13))
    #plt.ylim(-2.25,2.25)
    #plt.yticks(np.arange(-2.25,2.50,0.25))
        plt.xlabel('month')
    plt.ylabel(title)
    plt.grid()
    plt.legend(loc=3)
    
    return n

TypeDict = {'S':'Wetland Monitoring Sites','P':'Agricultural Monitoring Sites','W':'Sites Outside of Pumping and Evapotranspiration Influence'}



In [11]:

    
df.index.max









    Out[11]:





<bound method DatetimeIndex.max of DatetimeIndex(['2009-01-01 00:00:00', '2009-01-01 12:00:00',
               '2009-01-02 00:00:00', '2009-01-02 12:00:00',
               '2009-01-03 00:00:00', '2009-01-03 12:00:00',
               '2009-01-04 00:00:00', '2009-01-04 12:00:00',
               '2009-01-05 00:00:00', '2009-01-05 12:00:00',
               ...
               '2015-11-27 00:00:00', '2015-11-27 12:00:00',
               '2015-11-28 00:00:00', '2015-11-28 12:00:00',
               '2015-11-29 00:00:00', '2015-11-29 12:00:00',
               '2015-11-30 00:00:00', '2015-11-30 12:00:00',
               '2015-12-01 00:00:00', '2015-12-01 12:00:00'],
              dtype='datetime64[ns]', name=u'DateTime', length=374300, freq=None)>



In [13]:

    
pdf = PdfPages(outputFolder + 'monthly.pdf')
grp = "month"

data = "diff"
title = 'Change in Water Level (ft)'
monthlyPlot(df, grp, data, title)
plt.title('All Wells')
for f, h in df.groupby('MonType'):
    n = monthlyPlot(h, grp, data, title)
    plt.title(TypeDict[f])
    pdf.savefig(n)
    plt.close()

data = 'avgDiffWL'
title = 'Deviation from Mean Water Level (ft)'
monthlyPlot(df, grp, data, title)
plt.title('All Wells')
for f, h in df.groupby('MonType'):
    n = monthlyPlot(h, grp, data, title)
    plt.title(TypeDict[f])
    pdf.savefig(n)
    plt.close()
    
data = 'stdWL'
title = 'Standardized Deviation from Mean Water Level'
monthlyPlot(df, grp, data, title)
plt.title('All Wells')
for f, h in df.groupby('MonType'):
    n = monthlyPlot(h, grp, data, title)
    plt.title(TypeDict[f])
    pdf.savefig(n)
    plt.close()
    
grp = pd.TimeGrouper("M")
data = "diff"
title = 'Change in Water Level (ft)'
monthlyPlot(df, grp, data, title)
plt.title('All Wells')
plt.ylim(-0.1,0.1)
for f, h in df.groupby('MonType'):
    n = monthlyPlot(h, grp, data, title)
    plt.ylim(-0.1,0.1)
    plt.title(TypeDict[f])
    pdf.savefig(n)
    plt.close()
    
grp = pd.TimeGrouper("M")
data = "stdWL"
title = 'Standardized Deviation from Mean Water Level'
monthlyPlot(df, grp, data, title)
plt.ylim(-3,3)
plt.title('All Wells')
for f, h in df.groupby('MonType'):
    n = monthlyPlot(h, grp, data, title)
    plt.title(TypeDict[f])
    plt.ylim(-3,3)
    pdf.savefig(n)
    plt.close()
    
grp = pd.TimeGrouper("M")
data = 'avgDiffWL'
title = 'Deviation from Mean Water Level (ft)'
monthlyPlot(df, grp, data, title)
plt.title('All Wells')
for f, h in df.groupby('MonType'):
    n = monthlyPlot(h, grp, data, title)
    plt.title(TypeDict[f])
    pdf.savefig(n)
    plt.close()
    
pdf.close()



In [ ]:

    
data = 'stdWL'
title = 'Standardized Deviation from Mean Water Level'
monthlyPlot(df, grp, data, title)
plt.title('All Wells')
for f, h in df.groupby('MonType'):
    n = monthlyPlot(h, grp, data, title)
    plt.title(TypeDict[f])
    pdf.savefig(n)
    plt.close()

Regression to Raw Data



In [14]:

    
def RLM(svws, site):
    '''
    RETURNS
    x0, slope, x_prime, y_hat, wellId, wellName, montype
    '''
    
    x0 = svws[site]['julian']
    y = svws[site]['lev_va']

    x = sm.add_constant(x0)
    data = svws[site]['lev_va'].to_frame()

    est = sm.RLM(y, x).fit()
    wellId = site
    slope = est.params[1]
    wellName = wellid[site]
    montype = siteDict[site]
    x_prime = np.linspace(x0.min(),x0.max(),100)[:, np.newaxis]
    x_prime = sm.add_constant(x_prime)
    y_hat = est.predict(x_prime)
    
    return x0, y, slope, x_prime, y_hat, wellId, wellName, montype

def OLS(svws, site):
    '''
    RETURNS
    x0, slope, x_prime, y_hat, wellId, wellName, montype
    '''
    
    x0 = svws[site]['julian']
    y = svws[site]['lev_va']

    x = sm.add_constant(x0)
    data = svws[site]['lev_va'].to_frame()

    est = sm.OLS(y, x).fit()
    wellId = site
    slope = est.params[1]
    wellName = wellid[site]
    montype = siteDict[site]
    x_prime = np.linspace(x0.min(),x0.max(),100)[:, np.newaxis]
    x_prime = sm.add_constant(x_prime)
    y_hat = est.predict(x_prime)
    rsqrd = (round(float(est.rsquared),2))
    
    return x0, y, slope, x_prime, y_hat, wellId, wellName, montype, rsqrd



In [15]:

    
pdf = PdfPages(outputFolder + 'OLSRLM.pdf')

pivTab = df.dropna(subset=['lev_va'])

siteList = list(pivTab.site_no.unique())
svws = {}
svwsEarly = {}
svwsLate = {}
svwsPiv = {}

RLMslope, RLMslope_er, RLMslope_lt, OLSslope, OLSslope_er, OLSslope_lt = [],[],[],[],[],[]

OLSrsqrd, OLSrsqrd_er, OLSrsqrd_lt = [], [], []

wellName, wellId, montype = [], [], []

for site in siteList:
    #fig = plt.figure(figsize=(20,10))
    svws[site] = pivTab[pivTab.site_no == site]
    svwsEarly[site] = svws[site][svws[site].index < pd.datetime(2011,5,1)]
    svwsLate[site] = svws[site][svws[site].index > pd.datetime(2012,5,1)]
    
    n = plt.figure(wellid[site])
    
    x0, y, slope, x_prime, y_hat, wellId1, wellName1, montype1 = RLM(svws, site)
    
    
    plt.scatter(x0,y)
    plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='RLM fit m= ' + str(slope))
    RLMslope.append(slope)

    x0, y, slope, x_prime, y_hat, wellId1, wellName1, montype1 = RLM(svwsEarly, site)
    plt.scatter(x0,y, label='early')
    plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='RLM fit Early m= ' + str(slope))
    RLMslope_er.append(slope)
    
    x0, y, slope, x_prime, y_hat, wellId1, wellName1, montype1 = RLM(svwsLate, site)
    plt.scatter(x0,y, label='late')
    plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='RLM fit Late m= ' + str(slope))
    RLMslope_lt.append(slope)
    

    x0, y, slope, x_prime, y_hat, wellId1, wellName1, montype1, rsqrd = OLS(svws, site)
    #plt.scatter(x0,y)
    #plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='OLS fit m= ' + str(slope))
    OLSslope.append(slope)
    OLSrsqrd.append(rsqrd)
    
    x0, y, slope, x_prime, y_hat, wellId1, wellName1, montype1, rsqrd = OLS(svwsEarly, site)
    #plt.scatter(x0,y, label='early')
    #plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='OLS fit Early m= ' + str(slope))
    OLSslope_er.append(slope)
    OLSrsqrd_er.append(rsqrd)
    
    x0, y, slope, x_prime, y_hat, wellId, wellName, montype, rsqrd
    x0, y, slope, x_prime, y_hat, wellId1, wellName1, montype1, rsqrd = OLS(svwsLate, site)
    #plt.scatter(x0,y, label='late')
    #plt.plot(x_prime[:, 1], y_hat, c='red', alpha=0.9, zorder = 3, linewidth=2.0, label='OLS fit Late m= ' + str(slope))
    OLSslope_lt.append(slope)
    OLSrsqrd_lt.append(rsqrd)
    
    wellId.append(wellId1)    
    wellName.append(wellName1)
    montype.append(montype1)

    plt.legend()
    plt.title(str(wellid[site]))
    plt.xlabel('Julian Day')
    plt.ylabel('Discharge (gpm)')
    plt.grid()
    plt.tight_layout()
    pdf.savefig(n)
    plt.close()
pdf.close()    

RLMOLS = pd.DataFrame({'RLM slope (ft/day)':RLMslope, 'RLM slope early (ft/day)':RLMslope_er, 'RLM slope late (ft/day)':RLMslope_lt, 
              'OLS slope (ft/day)':OLSslope, 'OLS slope early (ft/day)':OLSslope_er, 'OLS slope late (ft/day)':OLSslope_lt , 
             'OLS r-squared':OLSrsqrd, 'OLS r-squared early':OLSrsqrd_er, 'OLS r-squared late':OLSrsqrd_lt, 
             'Well Name':wellName, 'Well ID':wellId, 'Monitoring Type':montype})
RLMOLS[u'RLM slope (ft/yr)'] = RLMOLS[u'RLM slope (ft/day)'] * 365.25
RLMOLS[u'OLS slope (ft/yr)'] = RLMOLS[u'OLS slope (ft/day)'] * 365.25
RLMOLS[u'RLM early slope (ft/yr)'] = RLMOLS[u'RLM slope early (ft/day)'] * 365.25
RLMOLS[u'OLS early slope (ft/yr)'] = RLMOLS[u'OLS slope early (ft/day)'] * 365.25
RLMOLS[u'RLM late slope (ft/yr)'] = RLMOLS[u'RLM slope late (ft/day)'] * 365.25
RLMOLS[u'OLS late slope (ft/yr)'] = RLMOLS[u'OLS slope late (ft/day)'] * 365.25
RLMOLS.to_csv(outputFolder+'RLMOLS.csv')

RLMOLS.groupby('Monitoring Type')['OLS slope (ft/yr)'].agg({'min':np.min, 'mean':np.mean,'median':np.median,
                                                        'max':np.max, 'std':np.std, 'cnt':(lambda x: np.count_nonzero(~np.isnan(x)))}).reset_index()



In [16]:



In [17]:

    
RLMOLS.groupby('Monitoring Type')['OLS slope (ft/yr)'].agg({'min':np.min, 'mean':np.mean,'median':np.median,
                                                        'max':np.max, 'std':np.std, 'cnt':(lambda x: np.count_nonzero(~np.isnan(x)))}).reset_index()









    Out[17]:






  
    
      
      Monitoring Type
      std
      cnt
      min
      max
      median
      mean
    
  
  
    
      0
      P
      0.482057
      27.0
      -1.667593
      -0.011758
      -0.416859
      -0.590087
    
    
      1
      S
      0.058530
      17.0
      -0.098835
      0.129458
      -0.024602
      -0.011464
    
    
      2
      W
      0.174813
      32.0
      -0.544534
      0.210549
      -0.032956
      -0.088056



In [ ]:

    
del(RLMOLS)

Seasonal Decomposition



In [ ]:

    
data



In [18]:

    
seasDecomp = {}

for site in siteList: 
    svws = pivTab[pivTab.site_no == site]
    data = svws['stdWL']
    sd =  sm.tsa.seasonal_decompose(data.values, freq=365*2+1)
    sdData = pd.DataFrame({'residuals':sd.resid,'seasonal':sd.seasonal,'trend':sd.trend})
    svws.reset_index(inplace=True)
    sdData1 = pd.concat([sdData,svws], axis=1)
    sdData1 = sdData1.dropna()
    sdData1.reset_index(inplace=True)
    seasDecomp[site] = sdData1.set_index(['DateTime'])



In [84]:

    
pdf = PdfPages(outputFolder + 'decompose2.pdf')

svws = {}
sd = {}
for site in siteList:  
    svws[site] = pivTab[pivTab.site_no == site]
    data = svws[site]['lev_va']
    sd[site] =  sm.tsa.seasonal_decompose(data.values, freq=365*4+1)
    resplot = sd[site].plot()
    plt.title(wellid[site])
    pdf.savefig(resplot)
    plt.close()
pdf.close()









    



---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-84-eb807061b73e> in <module>()
      7     data = svws[site]['lev_va']
      8     sd[site] =  sm.tsa.seasonal_decompose(data.values, freq=365*4+1)
----> 9     resplot = sd[site].plot()
     10     plt.title(wellid[site])
     11     pdf.savefig(resplot)

c:\python27\arcgis10.3\lib\site-packages\statsmodels\tsa\seasonal.pyc in plot(self)
    144             axes[3].set_xlim(0, self.nobs)
    145 
--> 146         fig.tight_layout()
    147         return fig
    148 

c:\python27\arcgis10.3\lib\site-packages\matplotlib\figure.pyc in tight_layout(self, renderer, pad, h_pad, w_pad, rect)
   1747 
   1748         if renderer is None:
-> 1749             renderer = get_renderer(self)
   1750 
   1751         kwargs = get_tight_layout_figure(self, self.axes, subplotspec_list,

c:\python27\arcgis10.3\lib\site-packages\matplotlib\tight_layout.pyc in get_renderer(fig)
    217 
    218         if canvas and hasattr(canvas, "get_renderer"):
--> 219             renderer = canvas.get_renderer()
    220         else:
    221             # not sure if this can happen

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in get_renderer(self, cleared)
    484 
    485         if need_new_renderer:
--> 486             self.renderer = RendererAgg(w, h, self.figure.dpi)
    487             self._lastKey = key
    488         elif cleared:

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in __init__(self, width, height, dpi)
     91         self.height = height
     92         if __debug__: verbose.report('RendererAgg.__init__ width=%s, height=%s'%(width, height), 'debug-annoying')
---> 93         self._renderer = _RendererAgg(int(width), int(height), dpi, debug=False)
     94         self._filter_renderers = []
     95 

MemoryError: In RendererAgg: Out of memory





    



Error in callback <function post_execute at 0x061CE730> (for post_execute):






    



---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
c:\python27\arcgis10.3\lib\site-packages\matplotlib\pyplot.pyc in post_execute()
    145             def post_execute():
    146                 if matplotlib.is_interactive():
--> 147                     draw_all()
    148 
    149             # IPython >= 2

c:\python27\arcgis10.3\lib\site-packages\matplotlib\_pylab_helpers.pyc in draw_all(cls, force)
    148         for f_mgr in cls.get_all_fig_managers():
    149             if force or f_mgr.canvas.figure.stale:
--> 150                 f_mgr.canvas.draw_idle()
    151 
    152 atexit.register(Gcf.destroy_all)

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backend_bases.pyc in draw_idle(self, *args, **kwargs)
   2024         if not self._is_idle_drawing:
   2025             with self._idle_draw_cntx():
-> 2026                 self.draw(*args, **kwargs)
   2027 
   2028     def draw_cursor(self, event):

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in draw(self)
    467         if __debug__: verbose.report('FigureCanvasAgg.draw', 'debug-annoying')
    468 
--> 469         self.renderer = self.get_renderer(cleared=True)
    470         # acquire a lock on the shared font cache
    471         RendererAgg.lock.acquire()

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in get_renderer(self, cleared)
    484 
    485         if need_new_renderer:
--> 486             self.renderer = RendererAgg(w, h, self.figure.dpi)
    487             self._lastKey = key
    488         elif cleared:

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in __init__(self, width, height, dpi)
     91         self.height = height
     92         if __debug__: verbose.report('RendererAgg.__init__ width=%s, height=%s'%(width, height), 'debug-annoying')
---> 93         self._renderer = _RendererAgg(int(width), int(height), dpi, debug=False)
     94         self._filter_renderers = []
     95 

MemoryError: In RendererAgg: Out of memory





    



---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
c:\python27\arcgis10.3\lib\site-packages\IPython\core\formatters.pyc in __call__(self, obj)
    335                 pass
    336             else:
--> 337                 return printer(obj)
    338             # Finally look for special method names
    339             method = _safe_get_formatter_method(obj, self.print_method)

c:\python27\arcgis10.3\lib\site-packages\IPython\core\pylabtools.pyc in <lambda>(fig)
    205 
    206     if 'png' in formats:
--> 207         png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
    208     if 'retina' in formats or 'png2x' in formats:
    209         png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))

c:\python27\arcgis10.3\lib\site-packages\IPython\core\pylabtools.pyc in print_figure(fig, fmt, bbox_inches, **kwargs)
    115 
    116     bytes_io = BytesIO()
--> 117     fig.canvas.print_figure(bytes_io, **kw)
    118     data = bytes_io.getvalue()
    119     if fmt == 'svg':

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backend_bases.pyc in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
   2178                     orientation=orientation,
   2179                     dryrun=True,
-> 2180                     **kwargs)
   2181                 renderer = self.figure._cachedRenderer
   2182                 bbox_inches = self.figure.get_tightbbox(renderer)

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in print_png(self, filename_or_obj, *args, **kwargs)
    525 
    526     def print_png(self, filename_or_obj, *args, **kwargs):
--> 527         FigureCanvasAgg.draw(self)
    528         renderer = self.get_renderer()
    529         original_dpi = renderer.dpi

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in draw(self)
    467         if __debug__: verbose.report('FigureCanvasAgg.draw', 'debug-annoying')
    468 
--> 469         self.renderer = self.get_renderer(cleared=True)
    470         # acquire a lock on the shared font cache
    471         RendererAgg.lock.acquire()

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in get_renderer(self, cleared)
    484 
    485         if need_new_renderer:
--> 486             self.renderer = RendererAgg(w, h, self.figure.dpi)
    487             self._lastKey = key
    488         elif cleared:

c:\python27\arcgis10.3\lib\site-packages\matplotlib\backends\backend_agg.pyc in __init__(self, width, height, dpi)
     91         self.height = height
     92         if __debug__: verbose.report('RendererAgg.__init__ width=%s, height=%s'%(width, height), 'debug-annoying')
---> 93         self._renderer = _RendererAgg(int(width), int(height), dpi, debug=False)
     94         self._filter_renderers = []
     95 

MemoryError: In RendererAgg: Out of memory





    





<matplotlib.figure.Figure at 0x72cf9070>



In [21]:

    
seasonal_decomp = pd.concat(sdData, copy=False)
seasonal_decomp.reset_index(inplace=True)
seasonal_decomp.columns = ['site','day','residuals','seasonal','trend']
trendPiv = seasonal_decomp.pivot(index='day',columns='site',values='trend')
trendPiv.dropna(inplace=True)
seasoPiv = seasonal_decomp.pivot(index='day',columns='site',values='seasonal')
seasoPiv.dropna(inplace=True)
residPiv = seasonal_decomp.pivot(index='day',columns='site',values='residuals')
residPiv.dropna(inplace=True)









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-21-4075a0c07802> in <module>()
----> 1 seasonal_decomp = pd.concat(sdData, copy=False)
      2 seasonal_decomp.reset_index(inplace=True)
      3 seasonal_decomp.columns = ['site','day','residuals','seasonal','trend']
      4 trendPiv = seasonal_decomp.pivot(index='day',columns='site',values='trend')
      5 trendPiv.dropna(inplace=True)

c:\python27\arcgis10.3\lib\site-packages\pandas\tools\merge.pyc in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
    832                        keys=keys, levels=levels, names=names,
    833                        verify_integrity=verify_integrity,
--> 834                        copy=copy)
    835     return op.get_result()
    836 

c:\python27\arcgis10.3\lib\site-packages\pandas\tools\merge.pyc in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
    847             raise TypeError('first argument must be an iterable of pandas '
    848                             'objects, you passed an object of type '
--> 849                             '"{0}"'.format(type(objs).__name__))
    850 
    851         if join == 'outer':

TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"



In [ ]:

    
import seaborn as sns
sns.clustermap(trendPiv.corr(method = 'spearman'),figsize=(18, 18))



In [ ]:

    
import seaborn as sns
sns.clustermap(seasoPiv.corr(method = 'spearman'),figsize=(18, 18))



In [ ]:

    
sns.clustermap(residPiv.corr(method = 'spearman'),figsize=(18, 18))



In [ ]:

    
trendPiv[[2,11,74,36]].plot()
seasoPiv[[61,40,58,65]].plot()
seasoPiv[[57,59,67,5,62,4,64,63,64,66]].plot()
seasoPiv[[1,70,36,72,38,74]].plot()
seasoPiv[[25,73,15,17,24,75,10,19,49,14,51,9,23,30]].plot()
seasoPiv[[55,56,68,2,35,69,60,37]].plot()



In [ ]:

    
trendPiv[[1,2,33,34,35,6,7,8,49,50,51,56]].plot()
trendPiv[[43,44,45,46,48,61,63,64,75]].plot()
trendPiv[[58,20,74,19,42,14,11,17,15,16,21,18,70,66,13,28,69,24,25]].plot()
trendPiv[[4,55,30,73,65,39,57,72]].plot()
trendPiv[[27,68,26,9,10]].plot()

Periodogram



In [ ]:

    
from scipy import signal

http://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.signal.periodogram.html



In [ ]:

    
svws = {}

for site in siteList[0:8]:
    data = seasoPiv[site]
    #svws[site] = pivTab[pivTab.site_no == site]
    #data = svws[site]['lev_va']
    #data = data.resample('1D')
    f, Pxx_den = signal.periodogram(data, 1, 'flattop', scaling='spectrum')
    plt.semilogy(f, np.sqrt(Pxx_den))
    #plt.xlim(0,400)
plt.grid(which='both')
print(1/(365.25*4))

Color Hydros



In [ ]:

    
pdf = PdfPages(outputFolder + 'ColorHydros.pdf')

pivTab = dfs[['year','doy','lev_va','site_no']]
siteList = list(pivTab.site_no.unique())

svws = {}
svwsPiv = {}

for site in siteList:
    fig = plt.figure(figsize=(20,10))
    svws[site] = pivTab[pivTab.site_no == site]
    svwsPiv[site] = svws[site].pivot("year", "doy", "lev_va")
    sns.heatmap(svwsPiv[site], robust=True, cmap="jet_r", fmt="d")
    plt.xticks(np.arange(0,370,30),['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'], rotation=45)
    plt.xlabel("Day of Calendar Year")
    plt.title(wellid[site])
    pdf.savefig(fig)
    plt.close()

pdf.close()



In [ ]:

    
pieData = pd.read_excel("U:/GWP/Wetland/WaterMonitoring/PiezometerData/Compiled/SVPiezoDailyAverages2009-Dec2014.xlsx")
pieData['year'] = pieData['Date'].apply(lambda x: int(x.year),1)
pieData.reset_index(inplace=True)
pieData.set_index('Date',inplace=True)
pieData['doy'] = pieData.index.dayofyear

pdf = PdfPages(outputFolder + 'piezometers.pdf')

colms = list(pieData.columns)

svws = {}
svwsPiv = {}

for site in colms:
    if 'Water Elevation' in site:
        fig = plt.figure(figsize=(20,10))
        svws[site] = pieData[['doy','year', site]]      
        svws[site].drop_duplicates(subset = ['year','doy'], inplace=True)
        svwsPiv[site] = svws[site].pivot("year", "doy", site)
        sns.heatmap(svwsPiv[site], robust=True, cmap="jet_r", fmt="d")
        plt.xticks(np.arange(0,370,10), np.arange(0,370,10), rotation=90)
        plt.xlabel("Day of Calendar Year")
        plt.title(site)
        pdf.savefig(fig)
        plt.close()
        
pdf.close()

Climate Data

Hals = http://wcc.sc.egov.usda.gov/nwcc/site?sitenum=2164
Tule = http://wcc.sc.egov.usda.gov/nwcc/site?sitenum=2163
Whlr = http://wcc.sc.egov.usda.gov/nwcc/site?sitenum=1147



In [ ]:

    
HalsSCAN = {}
TuleSCAN = {}
WhlrSNOTEL = {}

# import multiple files downloaded from respective sites
for i in range(2010,2017):
    HalsSCAN[i] = pd.read_csv(outputFolder + '2164_ALL_YEAR='+str(int(i))+'.csv', skiprows=3, 
                              parse_dates={'dt':[1,2]}, index_col='dt', na_values=-99.9)
    TuleSCAN[i] = pd.read_csv(outputFolder + '2163_ALL_YEAR='+str(int(i))+'.csv', skiprows=3, 
                              parse_dates={'dt':[1,2]}, index_col='dt', na_values=-99.9)
    WhlrSNOTEL[i] = pd.read_csv(outputFolder + '1147_ALL_YEAR='+str(int(i))+'.csv', skiprows=3,
                                parse_dates={'dt':[1,2]}, index_col='dt', na_values=-99.9)
    

    
Hals = pd.concat(HalsSCAN)
Tule = pd.concat(TuleSCAN)
Whlr = pd.concat(WhlrSNOTEL)

Whlr.reset_index(inplace=True)
Whlr.set_index('dt', inplace=True)
Tule.reset_index(inplace=True)
Tule.set_index('dt', inplace=True)
Hals.reset_index(inplace=True)
Hals.set_index('dt', inplace=True)

Whlr.drop(Whlr.columns[0],inplace=True,axis=1)
Tule.drop(Tule.columns[0],inplace=True,axis=1)
Hals.drop(Hals.columns[0],inplace=True,axis=1)

WhlrDaily = Whlr.resample('D', how='mean')
TuleDaily = Tule.resample('D', how='mean')
HalsDaily = Hals.resample('D', how='mean')



In [ ]:

    
HW = pd.merge(Hals, Whlr, left_index=True, right_index=True)
HWT = pd.merge(HW, Tule, left_index=True, right_index=True)



In [ ]:

    
WTD = pd.merge(WhlrDaily,TuleDaily, left_index=True, right_index=True)
WTH = pd.merge(WTD,HalsDaily, left_index=True, right_index=True)



In [64]:

    
Eskd = pd.read_csv(outputFolder + 'UCC_ghcn_USC00422607_2016_03_28_1459219796.csv', skiprows=15, parse_dates={'dt':[0]}, 
                   index_col='dt', na_values='M')
Eskd['Eskd Precipitation'] = pd.to_numeric(Eskd['Precipitation'], errors='coerce')
Eskd['Eskd Snow Depth'] = pd.to_numeric(Eskd['Snow Depth'], errors='coerce')
Eskd['Eskd Snow Fall'] = pd.to_numeric(Eskd['Snow Depth'], errors='coerce')
Eskd = Eskd[Eskd.index >= pd.datetime(1960,1,1)]
Eskd['Eskd cumdept'] = Eskd['Eskd Precipitation'].apply(lambda x: x- Eskd['Eskd Precipitation'].mean()).cumsum()
Eskd['Eskd stdcumdept'] = Eskd['Eskd Precipitation'].apply(lambda x: (x- Eskd['Eskd Precipitation'].mean())/Eskd['Eskd Precipitation'].std()).cumsum()



In [65]:

    
Part = pd.read_csv(outputFolder + 'UCC_ghcn_USC00426708_2016_03_28_1459222060.csv', skiprows=15, parse_dates={'dt':[0]}, 
                   index_col='dt', na_values='M')
Part['Part Precipitation'] = pd.to_numeric(Part['Precipitation'], errors='coerce')
Part['Part SnowIBEFPC Depth'] = pd.to_numeric(Part['Snow Depth'], errors='coerce')
Part['Part Snow Fall'] = pd.to_numeric(Part['Snow Depth'], errors='coerce')
Part = Part[Part.index >= pd.datetime(1960,1,1)]
Part['Part cumdept'] = Part['Part Precipitation'].apply(lambda x: x- Part['Part Precipitation'].mean()).cumsum()
Part['Part stdcumdept'] = Part['Part Precipitation'].apply(lambda x: (x- Part['Part Precipitation'].mean())/Part['Part Precipitation'].std()).cumsum()



In [66]:

    
Calo = pd.read_csv(outputFolder + 'UCC_ghcn_USC00421144_2016_03_28_1459222774.csv', skiprows=15, parse_dates={'dt':[0]}, 
                   index_col='dt', na_values='M')
Calo['Calo Precipitation'] = pd.to_numeric(Calo['Precipitation'], errors='coerce')
Calo['Calo Snow Depth'] = pd.to_numeric(Calo['Snow Depth'], errors='coerce')
Calo['Calo Snow Fall'] = pd.to_numeric(Calo['Snow Depth'], errors='coerce')
Calo = Calo[Calo.index >= pd.datetime(1960,1,1)]
Calo['Calo cumdept'] = Calo['Calo Precipitation'].apply(lambda x: x- Calo['Calo Precipitation'].mean()).cumsum()
Calo['Calo stdcumdept'] = Calo['Calo Precipitation'].apply(lambda x: (x- Calo['Calo Precipitation'].mean())/Calo['Calo Precipitation'].std()).cumsum()
Calo.drop(['Snow Depth','Snow Fall', 'Precipitation'], axis=1, inplace=1)



In [67]:

    
Fish = pd.read_csv(outputFolder + 'UCC_ghcn_USC00422852_2016_03_28_1459223049.csv', skiprows=15, parse_dates={'dt':[0]}, 
                   index_col='dt', na_values='M')
Fish['Fish Precipitation'] = pd.to_numeric(Fish['Precipitation'], errors='coerce')
Fish['Fish Snow Depth'] = pd.to_numeric(Fish['Snow Depth'], errors='coerce')
Fish['Fish Snow Fall'] = pd.to_numeric(Fish['Snow Depth'], errors='coerce')
Fish = Fish[Fish.index >= pd.datetime(1960,1,1)]
Fish['Fish cumdept'] = Fish['Fish Precipitation'].apply(lambda x: x- Fish['Fish Precipitation'].mean()).cumsum()
Fish['Fish stdcumdept'] = Fish['Fish Precipitation'].apply(lambda x: (x- Fish['Fish Precipitation'].mean())/Fish['Fish Precipitation'].std()).cumsum()
Fish.drop(['Snow Depth','Snow Fall', 'Precipitation'], axis=1, inplace=1)



In [68]:

    
IBAH = pd.read_csv(outputFolder + 'UCC_ghcn_USC00424174_2016_03_28_1459224176.csv', skiprows=15, parse_dates={'dt':[0]}, 
                   index_col='dt', na_values='M')
IBAH['IBAH Precipitation'] = pd.to_numeric(IBAH['Precipitation'], errors='coerce')
IBAH['IBAH Snow Depth'] = pd.to_numeric(IBAH['Snow Depth'], errors='coerce')
IBAH['IBAH Snow Fall'] = pd.to_numeric(IBAH['Snow Depth'], errors='coerce')
IBAH = IBAH[IBAH.index >= pd.datetime(1960,1,1)]
IBAH['IBAH cumdept'] = IBAH['IBAH Precipitation'].apply(lambda x: x- IBAH['IBAH Precipitation'].mean()).cumsum()
IBAH['IBAH stdcumdept'] = IBAH['IBAH Precipitation'].apply(lambda x: (x- IBAH['IBAH Precipitation'].mean())/IBAH['IBAH Precipitation'].std()).cumsum()



In [69]:

    
PC = pd.merge(Part,Calo, left_index=True,right_index=True,how='outer')
EF = pd.merge(Eskd,Fish, left_index=True,right_index=True,how='outer')
IBEF = pd.merge(IBAH,EF, left_index=True,right_index=True,how='outer')
IBEFPC = pd.merge(IBEF,PC, left_index=True,right_index=True,how='outer')



In [ ]:



In [70]:

    
IBEFPC[['Eskd stdcumdept','IBAH stdcumdept','Fish stdcumdept','Calo stdcumdept','Part stdcumdept']].plot()
plt.xlim('1/1/1950','1/1/2016')









    Out[70]:





(-7305L, 16801L)



In [71]:

    
IBEFPC['avgDept'] = IBEFPC[['Eskd cumdept','IBAH cumdept','Fish cumdept','Calo cumdept','Part cumdept']].mean(axis=1)
IBEFPC['StdAvgDept'] = IBEFPC[['Eskd stdcumdept','IBAH stdcumdept','Fish stdcumdept','Calo stdcumdept','Part stdcumdept']].mean(axis=1)



In [72]:

    
#IBEFPC['avgDept'].plot()

IBEFPC['movAvgDept'] = pd.rolling_mean(IBEFPC['avgDept'],60, center=True)
IBEFPC['movAvgDept'].plot()









    



c:\python27\arcgis10.3\lib\site-packages\ipykernel\__main__.py:3: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=60,center=True).mean()
  app.launch_new_instance()






    Out[72]:





<matplotlib.axes._subplots.AxesSubplot at 0x38983b30>



In [73]:

    
IBEFPC['movStdAvgDept'] = pd.rolling_mean(IBEFPC['StdAvgDept'], 60, center=True)









    



c:\python27\arcgis10.3\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=60,center=True).mean()
  if __name__ == '__main__':



In [74]:

    
IBEFPC['movStdAvgDept'].plot()









    Out[74]:





<matplotlib.axes._subplots.AxesSubplot at 0x38989a90>



In [76]:

    
IBEFPC.to_clipboard()
IBEFPC.to_csv(outputFolder+'climate.csv')



In [ ]:

    
All = pd.merge(IBEFPC, WTH, left_index=True, right_index=True)

Correlation



In [ ]:

    
df.columns



In [81]:



In [ ]:

    
d = df.pivot(columns='site_no', values='stdWL')
d.dropna(inplace=True)
d = d.resample('1D')
d.interpolate(method='time',inplace=True)



In [ ]:

    
from scipy import signal



In [ ]:

    
d.columns



In [ ]:

    
for col in d.columns:
    d['dt'+str(col)] = signal.detrend(d[col].values)



In [ ]:

    
dp = pd.merge(d, IBEFPC, left_index=True, right_index=True, how='inner')



In [ ]:

    
dp.dropna(inplace=True)
dp = dp.resample('1D')
dp.interpolate(method='time',inplace=True)



In [ ]:

    
useCols = [col for col in d.columns if 'dt' in str(col)]



In [ ]:



In [82]:

    
# use only detrended data
d2 = d.filter(regex='dt')



In [ ]:

    
d2.plot()



In [83]:

    
import seaborn as sns

corrmat = d2.corr(method = 'spearman') #default is 'pearson'
sns.set(context="paper", font="monospace")
rcParams['figure.figsize'] = 25, 25
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(18, 12))

# Draw the heatmap using seaborn
sns.heatmap(corrmat, vmax=.8, square=True)

# Use matplotlib directly to emphasize known networks
networks = corrmat.columns.get_level_values('site_no')
for i, network in enumerate(networks):
    if i and network != networks[i - 1]:
        ax.axhline(len(networks) - i, c="w")
        ax.axvline(i, c="w")
f.tight_layout()









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-83-0ff1788df7de> in <module>()
      8 
      9 # Draw the heatmap using seaborn
---> 10 sns.heatmap(corrmat, vmax=.8, square=True)
     11 
     12 # Use matplotlib directly to emphasize known networks

c:\python27\arcgis10.3\lib\site-packages\seaborn\matrix.pyc in heatmap(data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, linewidths, linecolor, cbar, cbar_kws, cbar_ax, square, ax, xticklabels, yticklabels, mask, **kwargs)
    433     plotter = _HeatMapper(data, vmin, vmax, cmap, center, robust, annot, fmt,
    434                           annot_kws, cbar, cbar_kws, xticklabels, yticklabels,
--> 435                           mask)
    436 
    437     # Add the pcolormesh kwargs here

c:\python27\arcgis10.3\lib\site-packages\seaborn\matrix.pyc in __init__(self, data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, cbar, cbar_kws, xticklabels, yticklabels, mask)
    145         # Determine good default values for the colormapping
    146         self._determine_cmap_params(plot_data, vmin, vmax,
--> 147                                     cmap, center, robust)
    148 
    149         # Save other attributes to the object

c:\python27\arcgis10.3\lib\site-packages\seaborn\matrix.pyc in _determine_cmap_params(self, plot_data, vmin, vmax, cmap, center, robust)
    161         calc_data = plot_data.data[~np.isnan(plot_data.data)]
    162         if vmin is None:
--> 163             vmin = np.percentile(calc_data, 2) if robust else calc_data.min()
    164         if vmax is None:
    165             vmax = np.percentile(calc_data, 98) if robust else calc_data.max()

c:\python27\arcgis10.3\lib\site-packages\numpy\core\_methods.pyc in _amin(a, axis, out, keepdims)
     27 
     28 def _amin(a, axis=None, out=None, keepdims=False):
---> 29     return umr_minimum(a, axis, None, out, keepdims)
     30 
     31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):

ValueError: zero-size array to reduction operation minimum which has no identity



In [ ]:

    
collist = corrmat.columns
six = {}
eigth = {}
sevfive = {}
for i in collist:
    ssn = corrmat[(corrmat[i] > 0.6) & (corrmat[i]<1.0)].index.tolist()
    esn = corrmat[(corrmat[i] > 0.8) & (corrmat[i]<1.0)].index.tolist()
    sfsn = corrmat[(corrmat[i] > 0.75) & (corrmat[i]<1.0)].index.tolist()
    six[wellid[i]] = [wellid[j] for j in ssn]
    eigth[wellid[i]] = [wellid[j] for j in esn]
    sevfive[wellid[i]] = [wellid[j] for j in sfsn]



In [ ]:

    
pd.DataFrame.from_dict(eigth, orient='index').to_clipboard()



In [ ]:

    
sns.clustermap(d2.corr(method = 'spearman'),figsize=(18, 18))



In [ ]:

    
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)



In [ ]:

    
%matplotlib inline
#mpl.rcParams.update(mpl.rcParamsDefault)
inline_rc = dict(mpl.rcParams)
mpl.rcParams.update(inline_rc)
rcParams['figure.figsize'] = 15, 15



In [ ]:

    
d[[69,37,36,38,136,34,35]].plot()



In [ ]:

    
d[[136,33,34,35,38,36,37,1,2]].plot()
d[[69,73,28,21,70,24,25,17,20,42,19,15,16,14,13,74,11,18]].plot()



In [ ]:

    
A1 = [59,58,60,63,68,41,67,61,64]
A2 = [136,33,34,35,72,39,57,66,5,62,4,71,40,65]
B1 = [55,52,53,6,7,8,12,54]
B2 = [3,47]
B3 = [28,13,24,25,20,19,74,15,16,11,14,18,17,42,21,70]
B4 = [73,22,23,30,29,31,1,2,69,38,36,37]
C1 = [27,26,9,10]
C2 = [32,48,51,49,50,45,46,56,43,44,75]
ClustGroups = [A1,A2,B1,B2,B3,B4,C1,C2]



In [ ]:

    
fc = {}
grpNames = ['A1','A2','B1','B2','B3','B4','C1','C2']
for i in range(len(ClustGroups)):
    fc[grpNames[i]] = [wellid[j] for j in ClustGroups[i]]



In [ ]:

    
d.columns



In [ ]:

    
rcParams['figure.figsize'] = 15, 15
d[[59,58,60,63,68,41,67,61,64]].plot()



In [ ]:

    
d[[136,33,34,35,72,39,57,66,5,62,4,71,40,65]].plot()



In [ ]:

    
d[[55,52,53]].plot()
d[[6,7,8,12,54]].plot()



In [ ]:

    
d[[28,13,24,25,20,19,74,15,16,11,14,18,17,42,21,70]].plot()



In [ ]:

    
fc['B3']



In [ ]:

    
A1



In [ ]:

    
wellid[3]



In [ ]:

    
for column in IBEFPC.columns:
    if ' Snow Depth' in column:
        print(column)
        IBEFPC[column].plot(label=column)
        plt.legend()

http://wcc.sc.egov.usda.gov/nwcc/sensors



In [ ]:

    
TuleDaily[u'SMS.I-1:-40 (pct)  (loam)'].plot()



In [ ]:

    
PW17B = df[df.site_no==44]
CentTuleMX = df[df.site_no==75]



In [ ]:

    
PW17andTule = pd.merge(PW17B, TuleDaily, left_index=True, right_index=True, how='inner' )
TuleandTule = pd.merge(CentTuleMX, TuleDaily, left_index=True, right_index=True, how='inner' )



In [ ]:

    
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
y1 = PW17andTule.lev_va
y2 = PW17andTule[u'SMS.I-1:-4 (pct)  (loam)']
x = PW17andTule.index


ax1.plot(x,y1,color='red', label='PW17B water level')

ax2.plot(x,y2,color='blue', label='soil moisture Tule')
#plt.legend()



In [ ]:

    
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
y1 = TuleandTule.lev_va
y2 = TuleandTule[u'SMS.I-1:-20 (pct)  (loam)']
x = TuleandTule.index


ax1.plot(x,y1,color='red', label='PW17B water level')

ax2.plot(x,y2,color='blue', label='soil moisture Tule')
#plt.legend()



In [ ]:

    
Whlr['WTEQ.I-1 (in) '].plot()



In [ ]:

    
WhlrDaily = Whlr.resample('D', how='mean')



In [ ]:

    
TuleFile = outputFolder + "UCC_ghcn_USR0000TULE_2016_03_06_1457300476.csv"
EskdaleFile = outputFolder + "UCC_ghcn_USC00422607_2016_03_06_1457300462.csv"
PartoonFile = outputFolder + "UCC_ghcn_USC00426708_2016_03_02_1456937496.csv"

Partoon = pd.read_csv(PartoonFile, na_values=['M','S', 'T'], skiprows=15, index_col=0, parse_dates=True)
Tule = pd.read_csv(TuleFile, na_values=['M','S', 'T'], skiprows=15, index_col=0, parse_dates=True)
Eskdale = pd.read_csv(EskdaleFile, na_values=['M','S', 'T'], skiprows=15, index_col=0, parse_dates=True)

Partoon = Partoon[Partoon.index > pd.datetime(1949,1,1)]

Partoon = Partoon.fillna(0)
Eskdale = Eskdale.fillna(0)



In [ ]:

    
def cumdept(df, prec='Precipitation'):
    df['date'] = df.index
    df['month'] = df.date.apply(lambda x: x.month, 1)
    df['dept'] = df[prec].apply(lambda x: x - df[prec].mean(),1)
    df['cumDept'] = df['dept'].cumsum()
    return df



In [ ]:

    
Partoon = cumdept(Partoon)
Eskdale = cumdept(Eskdale)



In [ ]:

    
Partoon['cumDept'].plot()
Eskdale['cumDept'].plot()



In [ ]:

    
url = 'http://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customSingleStationReport/daily/1147:NV:SNTL%7Cid=%22%22%7Cname/POR_BEGIN,POR_END/WTEQ::value,PREC::value,TMAX::value,TMIN::value,TAVG::value,PRCP::value'



In [ ]:

    
SNOTEL = pd.read_csv(url, skiprows=7, index_col=0 )



In [ ]:

    
SNOTEL.columns



In [ ]:

    
Eskdale['Precipitation'].plot()



In [ ]:

    
SNOTEL['Snow Water Equivalent (in)'].plot()



In [ ]:

    
def pptmonthlyPlot(df, grp, data, title):
    grpd = df.groupby([grp])[data]
    x = grpd.median().index
    y = grpd.mean()
    y1 = grpd.quantile(q=0.25)
    y2 = grpd.quantile(q=0.75)
    y3 = grpd.quantile(q=0.1)
    y4 = grpd.quantile(q=0.9)
    
    n = plt.figure()
    plt.plot(x, y, 'bo-',color = 'blue', label = 'Median', )
    plt.fill_between(x,y2,y1,alpha=0.2, label = '25-75%-tiles', color = 'green')
    plt.fill_between(x,y3,y1,alpha=0.2, color='red', label = '10-90%-tiles')
    plt.fill_between(x,y2,y4,alpha=0.2, color='red')
    if grp == 'month':
        plt.xlim(0,13)
        plt.xticks(range(0,13))
    #plt.ylim(-2.25,2.25)
    #plt.yticks(np.arange(-2.25,2.50,0.25))
        plt.xlabel('month')
    plt.ylabel(title)
    plt.grid()
    plt.legend(loc=3)
    
    return n


grp = 'month'
#pd.TimeGrouper("M")

data = 'cumDept'
title = 'Precip'
pptmonthlyPlot(pPPT, grp, data, title)
plt.title('Partoon')enerator/view_csv/customSingleStationReport/daily/1147:NV:SNTL%7C



In [ ]:

    
SNO = pd.read_csv(outputFolder+"1247_ALL_YEAR=2013.csv",skiprows=3, parse_dates={'datetime':[1,2]}, index_col='datetime')



In [ ]:

    
SNO[u'WTEQ.I-1 (in) '].plot()



In [ ]:

    
SNO2 = wa.transport.smoother(SNO, u'WTEQ.I-1 (in) ')



In [ ]:

    
SNO2[u'WTEQ.I-1 (in) '].plot()



In [ ]:

    
df.reset_index(inplace=True)



In [ ]:

    
import scipy
from scipy.stats import norm
def mk_ts(df, const, group1, orderby = 'year', alpha = 0.05):
    '''
    df = dataframe
    const = variable tested for trend
    group1 = variable to group by
    orderby = variable to order by (typically a date)
    '''
    
    def mk_test(x, alpha):
        """
        this perform the MK (Mann-Kendall) test to check if there is any trend present in 
        data or not

        Input:
            x:   a vector of data
            alpha: significance level

        Output:
            trend: tells the trend (increasing, decreasing or no trend)
            h: True (if trend is present) or False (if trend is absence)
            p: p value of the sifnificance test
            z: normalized test statistics 

        Examples
        --------
          >>> x = np.random.rand(100)
          >>> trend,h,p,z = mk_test(x,0.05) 

        Credit: http://pydoc.net/Python/ambhas/0.4.0/ambhas.stats/
        """
        n = len(x)
        ta = n*(n-1)/2
        # calculate S 
        s = 0
        for k in xrange(n-1): # this does not work with nan values
            for j in xrange(k+1,n):
                s += np.sign(x[j] - x[k])

        # calculate the unique data
        unique_x = np.unique(x)
        g = len(unique_x)

        # calculate the var(s)
        if n == g: # there is no tie
            var_s = (n*(n-1)*(2*n+5))/18
        else: # there are some ties in data
            tp = np.zeros(unique_x.shape)
            for i in xrange(len(unique_x)):
                tp[i] = sum(unique_x[i] == x)
            var_s = (n*(n-1)*(2*n+5) - np.sum(tp*(tp-1)*(2*tp+5)))/18
        
        # calculate z value
        if s > 0.0:
            z = (s - 1)/np.sqrt(var_s)
        elif s == 0.0:
            z = 0
        elif s < 0.0:
            z = (s + 1)/np.sqrt(var_s)
        else:
            print('problem with s')

        # calculate the p_value
        p = 2*(1-norm.cdf(abs(z))) # two tail test
        h = abs(z) > norm.ppf(1-alpha/2) 

        if (z<0) and h:
            trend = 'decreasing'
        elif (z>0) and h:
            trend = 'increasing'
        else:
            trend = 'no trend'

        return pd.Series({'trend':trend, 'varS':round(var_s,3), 'p':round(p,3), 'z':round(z,3), 's':round(s,3), 'n':n, 'ta':ta})

    def zcalc(Sp, Varp):
        if Sp > 0:
            return (Sp - 1)/Varp**0.5
        elif Sp < 0:
            return (Sp + 1)/Varp**0.5
        else:
            return 0   
    
    df.is_copy = False
    
    df[const] = pd.to_numeric(df.ix[:,const])
    # remove null values
    df[const].dropna(inplace=True)
    # remove index
    df.reset_index(inplace=True, drop=True)
    # sort by groups, then time
    df.sort_values(by=[group1,orderby],axis=0, inplace=True)
    
    # group by group and apply mk_test
    dg = df.groupby(group1).apply(lambda x: mk_test(x.loc[:,const].dropna().values, alpha))
    Var_S = dg.loc[:,'varS'].sum()
    S = dg.loc[:,'s'].sum()
    N = dg.loc[:,'n'].sum()
    Z = zcalc(S,Var_S)
    P = 2*(1-norm.cdf(abs(Z)))
    group_n = len(dg)
    h = abs(Z) > norm.ppf(1-alpha/2) 
    tau = S/dg.loc[:,'ta'].sum()

    if (Z<0) and h:
        trend = 'decreasing'
    elif (Z>0) and h:
        trend = 'increasing'
    else:
        trend = 'no trend'
    
    
    return pd.Series({'S':S, 'Z':round(Z,2), 'p':P, 'trend':trend, 'group_n':group_n, 'sample_n':N, 'Var_S':Var_S, 'tau':round(tau,2)})



In [ ]:

    
def USGSchemTab(df,chem,group, alpha=0.1):
    def func(y,x):
        # make conc nulls match value nulls
        if np.isnan(x)==True:
            return np.nan
        elif y=='<':
            return 0
        else:
            return 1
    
    def sigtest(x, c):
        if x >= alpha:
            np.nan
        elif c < 0:
            return '(-)'
        else:
            return '(+)'
    df.is_copy = False
    
    df[chem+'cond'] = usgs.apply(lambda df: func(df[chem+'rem'],df[chem]),axis=1)
    df[chem+'cond1'] = usgs.apply(lambda df: func(df[chem+'rem1'],df[chem]),axis=1)
    
    df.set_index([group],inplace=True)
    unit = df.index.get_level_values(0).unique()
    
    network,n,T1,p1,chng = [],[],[],[],[]
    for i in unit:
        network.append(i)
        n.append(len(df.loc[i,chem]))
        T1.append(scipy.stats.wilcoxon(df.loc[i,chem].values,df.loc[i,chem+'1'].values, zero_method='zsplit')[0])
        p1.append(scipy.stats.wilcoxon(df.loc[i,chem].values,df.loc[i,chem+'1'].values, zero_method='zsplit')[1])
        chng.append(round((df.loc[i,chem+'1'] - df.loc[i,chem]).median(),2))
    refdict = {'ntwrk':network,'n':n,'T':T1,'p':p1,'chng':chng}
    df.reset_index(inplace=True)
    Nresframe = pd.DataFrame(refdict)
    Nresframe.is_copy = False
    Nresframe['signif'] = Nresframe[['p','chng']].apply(lambda x: sigtest(x[0],x[1]),1)
    
    return Nresframe



In [ ]:

    
df.columns



In [ ]:

    
#dfs.drop('level_0', axis=1, inplace=True)
dfs.reset_index(inplace=True)
combo, names = [],[]
# we gill group stations by network to analyze each network.
for k, v in dfs.groupby('MonType'):
    combo.append(mk_ts(v, 'lev_va','site_no','lev_dt', 0.10))
    names.append(k)
pd.DataFrame(combo, index=names)



In [ ]:

    
combo, names = [],[]
# we gill group stations by network to analyze each network.
for k, v in df.groupby('MonType'):
    combo.append(mk_ts(v, 'lev_va','site_no','lev_dt', 0.10))
    names.append(k)
pd.DataFrame(combo, index=names)



In [ ]:

    
import statsmodels.api as sm



In [ ]:

    
from statsmodels.graphics.api import qqplot



In [ ]:

    
d = []
for k, v in dfs.groupby('MonType'):
    d.append(sm.stats.stattools.durbin_watson(v.lev_va.values))
d



In [ ]:

    
df.lev_va.values
dfs = df.dropna(subset=['lev_va'])



In [ ]:

    
for k, v in dfs.groupby('site_no'):
    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(v.lev_va.values.squeeze(), lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(v.lev_va, lags=40, ax=ax2)



In [ ]:

    
#dfs.drop('level_0', axis=1, inplace=True)
dfs.reset_index(inplace=True)
dfs.set_index("lev_dt",inplace=True)
yearMonitorType = dfs.groupby(['MonType','site_no',"Year"])['lev_va'].median().to_frame()



In [ ]:

    
types = list(yearMonitorType.index.levels[0])

d = []
for t in types:
    d.append(sm.stats.stattools.durbin_watson(yearMonitorType.loc[t,:].values))
    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(yearMonitorType.loc[t,:].values.squeeze(), lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(yearMonitorType.loc[t,:].values, lags=40, ax=ax2)
d



In [ ]:

    
monthly = LongStats.groupby(['site_no',pd.TimeGrouper("M")])['lev_va'].agg({'mean':np.mean,'median':np.median,
                                                                'standard':np.std, 'cnt':(lambda x: np.count_nonzero(np.isfinite(x))), 
                                                                'err':(lambda x: 1.96*wa.avgMeths.sumstats(x))})
weekly = LongStats.groupby(['site_no',pd.TimeGrouper("W")])['lev_va'].agg({'mean':np.mean,'median':np.median,
                                                                'standard':np.std, 'cnt':(lambda x: np.count_nonzero(np.isfinite(x))), 
                                                                'err':(lambda x: 1.96*wa.avgMeths.sumstats(x))})



In [ ]:

    
sites = pd.read_excel("M:\GIS\site.xlsx")



In [ ]:

    
monthly.reset_index(inplace=True)



In [ ]:

    
weekly.reset_index(inplace=True)



In [ ]:

    
print(monthly.columns,sites.columns)



In [ ]:

    
monthlyData = pd.merge(monthly,sites, left_on='site_no', right_on='WellID', how='left')
monthlyData.to_csv("M:\GIS\monthlyData.csv")



In [ ]:

    
x = LongStats.groupby(pd.TimeGrouper("M"))['stdWL'].median().index
y = LongStats.groupby(pd.TimeGrouper("M"))['stdWL'].median()
y1 = LongStats.groupby(pd.TimeGrouper("M"))['stdWL'].quantile(q=0.25)
y2 = LongStats.groupby(pd.TimeGrouper("M"))['stdWL'].quantile(q=0.75)
y3 = LongStats.groupby(pd.TimeGrouper("M"))['stdWL'].quantile(q=0.1)
y4 = LongStats.groupby(pd.TimeGrouper("M"))['stdWL'].quantile(q=0.9)

plt.plot(x, y, color = 'blue', label = 'Median')
#plt.xlim(200800,201600)
#plt.ylim(-5,5)
#plt.plot(x, y1, color = 'red', label = '25%-tile')
#plt.plot(x, y2, color = 'red', label = '75%-tile')
plt.fill_between(x,y2,y1,alpha=0.2, label = '25-75%-tiles', color = 'green')
plt.fill_between(x,y3,y1,alpha=0.2, color='red', label = '10-90%-tiles')
plt.fill_between(x,y2,y4,alpha=0.2, color='red')
plt.grid()
plt.legend()
plt.show()



In [ ]:

    
wlLongStatsGroups = wlLongStats.groupby(['date'])['stdWL'].agg({'mean':np.mean,'median':np.median,
                                                                'standard':np.std, 'cnt':(lambda x: np.count_nonzero(np.isfinite(x))), 
                                                                'err':(lambda x: 1.96*wa.avgMeths.sumstats(x))})
wlLongStatsGroups2 = wlLongStats.groupby(['date'])['levDiff'].agg({'mean':np.mean,'median':np.median, 'standard':np.std, 'cnt':(lambda x: np.count_nonzero(~np.isnan(x))), 
                                                                   'err':(lambda x: 1.96*wa.avgMeths.sumstats(x))})

wlLongStatsGroups['meanpluserr'] = wlLongStatsGroups['mean'] + wlLongStatsGroups['err']
wlLongStatsGroups['meanminuserr'] = wlLongStatsGroups['mean'] - wlLongStatsGroups['err']

wlLongStatsGroups2['meanpluserr'] = wlLongStatsGroups2['mean'] + wlLongStatsGroups2['err']
wlLongStatsGroups2['meanminuserr'] = wlLongStatsGroups2['mean'] - wlLongStatsGroups2['err']



In [ ]:

    
x = wlLongStats.groupby(['date'])['stdWL'].median().index.values
y = wlLongStats.groupby(['date'])['stdWL'].median().values
y1 = wlLongStats.groupby(['date'])['stdWL'].quantile(q=0.25).values
y2 = wlLongStats.groupby(['date'])['stdWL'].quantile(q=0.75).values


plt.plot(x, y)
plt.plot(x, y1)
plt.plot(x, y2)
plt.fill_between(x,y2,y1,alpha=0.5)



In [ ]:



In [ ]:

    
TallDaily = AllDaily.pivot(index='DateTime',columns='level_0',values='WaterElevation')



In [ ]:

    
UnstackedDaily = AllDaily.unstack(0)



In [ ]:

    
UnstackedDaily['WaterElevation'][range(1,20)].plot()



In [ ]:

    
daily[5].plot()



In [ ]:

    
g.drop(g.columns[0],inplace=True,axis=1)



In [ ]:

    
g

	site_no	lev_va	avgDiffWL	stdWL	year	month	doy	diff
DateTime
2009-01-01 00:00:00	1	5218.80	2.974755	1.083472	2009	1	1	NaN
2009-01-01 12:00:00	1	5218.77	2.944755	1.072545	2009	1	1	-0.03
2009-01-02 00:00:00	1	5218.86	3.034755	1.105325	2009	1	2	0.09
2009-01-02 12:00:00	1	5218.83	3.004755	1.094398	2009	1	2	-0.03
2009-01-03 00:00:00	1	5218.73	2.904755	1.057976	2009	1	3	-0.10
2009-01-03 12:00:00	1	5218.73	2.904755	1.057976	2009	1	3	0.00
2009-01-04 00:00:00	1	5218.67	2.844755	1.036123	2009	1	4	-0.06
2009-01-04 12:00:00	1	5218.77	2.944755	1.072545	2009	1	4	0.10
2009-01-05 00:00:00	1	5218.77	2.944755	1.072545	2009	1	5	0.00
2009-01-05 12:00:00	1	5218.73	2.904755	1.057976	2009	1	5	-0.04

	Monitoring Type	std	cnt	min	max	median	mean
0	P	0.482057	27.0	-1.667593	-0.011758	-0.416859	-0.590087
1	S	0.058530	17.0	-0.098835	0.129458	-0.024602	-0.011464
2	W	0.174813	32.0	-0.544534	0.210549	-0.032956	-0.088056