In [77]:
# import some modules using standard naming conventions
from pylab import *
import pandas as pd
from IPython.core.display import HTML
from matplotlib.dates import MonthLocator,DateFormatter

# full path name of spreadsheet file from Coalition for Buzzards Bay
#xls_path='/usgs/data2/notebook/Falmouth_data.xlsx'
#xls_path='D:\crs\docs\personal\science_project\Falmouth data for Sarah and Lily_mod.xlsx'
xls_path='/usgs/data2/notebook/Falmouth_data_for_Sarah_and_Lily_mod.xlsx'
# use the panda ExcelFile method to make an object out of the xls file
xls_file = pd.ExcelFile(xls_path)

# this version makes a time series and removes some weird values 
asn = xls_file.parse('all stations, raw nutrients',index_col='Date',parse_dates=True,
    na_values=['?',None,'NS','no sample left','Sample Destroyed','machine error','#VALUE!'])
asn.index=pd.to_datetime(asn.index)

# this version just reads it in
#asn = xls_file.parse('all stations, raw nutrients') # simpler version

# this version makes the index 'Date', but does not make the DataFrame a time series
#asn = xls_file.parse('all stations, raw nutrients',
#    na_values=['?',None,'NS','no sample left','Sample Destroyed','machine error','#VALUE!'])
#asn.set_index('Date',drop='false')


'all stations, raw nutrients'!$1:$1 True
'CBB WF2 nutrient data'!$4:$6 True

In [78]:
# define a function that looks in column var of a and replaces lim with val
def convert_detection_limit(a,var,lim,val):
    try:
        a[var][where(a[var]==lim)[0]] = val
    except:
        print 'No detection values found for %s' % var
    return a

# for example, fix detction limits for 4 nutrients
asn=convert_detection_limit(asn,'PO4 uM','<0.1',0.05)
asn=convert_detection_limit(asn,'NH4 uM','<0.1',0.05)
asn=convert_detection_limit(asn,'NO3 uM','<0.05',0.025)
asn=convert_detection_limit(asn,'Pheo ug/L','<0.05',0.025)

In [79]:
# try to use same function to remove S from sample depths
asn=convert_detection_limit(asn,'Depth (m)','S',0.0)
asn=convert_detection_limit(asn,'Depth (m)','1M',1.0)

In [80]:
asn.ix[0:4,8:10].head()


Out[80]:
Secchi m Total depth m
1993-07-15 NaN 3.7
1993-07-15 NaN 3.7
1993-08-01 NaN 3.5
1993-08-01 NaN 3.5

In [36]:
asn.tail()


Out[36]:
&ltclass 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5 entries, 0001-255-255 00:00:00 to 0001-255-255 00:00:00
Columns: 242 entries, Embayment to None.210
dtypes: float64(214), object(28)

In [37]:
# find the rows for QH2
#find all the data at station 
qh2=asn[asn['Station']=='QH2'] #quissett harbor, qh2
mg4=asn[asn['Station']=='MG4'] #megansett harbor, mg4
# subtract the mean Nitrate conc and plot
qh2m = qh2['NO3 uM'] - qh2['NO3 uM'].mean()
qh2m.plot(linestyle='none',marker='o')
# print just a few columns and render as HTML
new=mg4.reindex(columns=['Embayment','Station','Depth (m)','Temp C','Salt ppt','PO4 uM','POM uM','TDN uM'])
HTML(new[1:5].to_html())
#TODO - Also Remove 999 and <xx from columns


Out[37]:
Embayment Station Depth (m) Temp C Salt ppt PO4 uM POM uM TDN uM
1992-09-10 MEGANSETT HARBOR MG4 0.15 NaN 30.7 0.4962779 3.48496 11.7205
1992-09-16 MEGANSETT HARBOR MG4 0.15 NaN 29.7 0.5571862 3.910275 13.0195
1992-09-16 MEGANSETT HARBOR MG4 999 NaN 29.7 0.5571862 3.965878 15.9115
1993-07-14 MEGANSETT HARBOR MG4 999 NaN 31.7 0.4228856 5.97483 10.4845

In [37]:


In [38]:
figure # if you don't use this, the xlabel and ylabel commands won't work
plot(qh2['POM uM'],qh2['NO3 uM'],linestyle='none',marker='o')
xlabel('POM uM')
ylabel('NO3 uM')
title('Scatter plot')


Out[38]:
<matplotlib.text.Text at 0x5bf5a50>

In [39]:
# plot the first 31 rows of column labelled 'Salt ppt'
asn['Salt ppt'][0:30].plot()


Out[39]:
<matplotlib.axes.AxesSubplot at 0x3f47490>

In [40]:
# These are the values used to calculate the Buzz. Bay Env. Index
#Oxygen saturation (mn of lowest 20%)	 40  %	90 %
#Transparency	                         0.6 m    3 m
#Chlorophyll	                          10  ug/l 3 ug/l
#DIN                                      10  uM   1 uM
#Total Organic N                          0.6 ppm 0.28 ppm

# Define a function to calculate index for transparency
def bbei(val):
    val_0 = 0.6
    val_100 = 3.0
    # calculate according to formula
    score = (log(val)-log(val_0))/(log(val_100)-log(val_0))
    # limit score to be between 0 and 1
    score = min(score,1.0)
    score = max(score,0.0)
    return score

print(bbei(2))


0.748070363587

In [41]:
# More general function for several parameters
def bbei(val,param):
    # First test the first character of param to see
    # what kind of measurement, the assing values
    if(param[0].lower()=='o'):
        # limits for Oxygen saturation
        val_0 = 40.0
        val_100 = 90.0
    elif(param[0].lower()=='t'):
        # limits for Transparency (secchi depth)
        val_0 = 0.6
        val_100 = 3.0
    elif(param[0].lower()=='c'):
        # limits for Chlorophyll
        val_0 = 10.0
        val_100 = 3.0
    elif(param[0].lower()=='d'):
        # limits for DIN
        val_0 = 10.0
        val_100 = 1.0
    elif(param[0].lower()=='t'):
        # limits for Total organic N
        val_0 = 0.6
        val_100 = 0.28
    # calculate index, limit to between 0 and 1, and return
    return max(min( (log(val)-log(val_0))/(log(val_100)-log(val_0)) ,1),0)

print(bbei(2,param='DIN'))


0.698970004336

In [42]:
# multiple scatter plots on the same page
figure
subplot(121)
plot( qh2['DIN uM'],qh2['TON uM'],linestyle='none',marker='o')
ylabel('TON uM')
xlabel('DIN uM')
subplot(122)
plot( qh2['TDN uM'],qh2['TON uM'],linestyle='none',marker='o')
xlabel('TDN uM')


Out[42]:
<matplotlib.text.Text at 0x603dd50>

In [43]:
# include three Stations
wf=asn[asn['Station'].isin(['WF5','WF5pw','WF5pw/CBBWF2'])]
figure(figsize=(12,5))
wf['POM uM'].plot(linestyle='none',marker='o')


Out[43]:
<matplotlib.axes.AxesSubplot at 0x5be1ed0>

In [44]:
type(wf['POM uM'])


Out[44]:
pandas.core.series.TimeSeries

In [45]:
wf.values


Out[45]:
array([[West Falmouth Hbr, WF5pw, Snug, ..., nan, nan, nan],
       [West Falmouth Hbr, WF5pw, Snug, ..., nan, nan, nan],
       [West Falmouth Hbr, WF5pw, Snug, ..., nan, nan, nan],
       ..., 
       [West Falmouth, WF5pw/CBBWF2, Snug, ..., nan, nan, nan],
       [West Falmouth, WF5pw/CBBWF2, Snug, ..., nan, nan, nan],
       [West Falmouth, WF5pw/CBBWF2, Snug, ..., nan, nan, nan]], dtype=object)

In [46]:
wfd=wf[wf['Depth (m)'] >= 0.5]
wfd['Depth (m)'].values


Out[46]:
array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.52, 1.0,
       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.0, 1.0, 1.0, 1.0,
       1.0, 1.0, 1.0], dtype=object)

In [47]:
# print just a few columns and render as HTML
new=wfd.reindex(columns=['Embayment','Station','Depth (m)','Temp C','Salt ppt','NO3 uM','PO4 uM','POM uM','TDN uM'])
HTML(new[100: ].to_html())


Out[47]:
<class 'pandas.tseries.index.DatetimeIndex'> Length: 0, Freq: None, Timezone: None Empty DataFrame

In [48]:
model = pd.ols(x=wf['Year'],y=wf['NH4 uM'])
model


Out[48]:
-------------------------Summary of Regression Analysis-------------------------

Formula: Y ~ <x> + <intercept>

Number of Observations:         295
Number of Degrees of Freedom:   2

R-squared:         0.0071
Adj R-squared:     0.0037

Rmse:              1.1210

F-stat (1, 293):     2.0941, p-value:     0.1489

Degrees of Freedom: model 1, resid 293

-----------------------Summary of Estimated Coefficients------------------------
      Variable       Coef    Std Err     t-stat    p-value    CI 2.5%   CI 97.5%
--------------------------------------------------------------------------------
             x     0.0162     0.0112       1.45     0.1489    -0.0057     0.0381
     intercept   -31.1477    22.3587      -1.39     0.1646   -74.9708    12.6753
---------------------------------End of Summary---------------------------------

In [49]:
figure(figsize=(12,5))
model.y_fitted.plot(marker='x')
wf['NH4 uM'].plot(linestyle='none',marker='o',title='NH4 (uM) in West Falmouth Harbor')


Out[49]:
<matplotlib.axes.AxesSubplot at 0x5fc6e50>

In [64]:
wf_month=wf.groupby(lambda x: x.month).mean()

In [69]:
wf_month['DO mg/L'].head()


Out[69]:
7    5.905087
8    5.617639
Name: DO mg/L, dtype: float64

In [52]:
asn['Station'].unique()


Out[52]:
array([QH2, QH1, QH3, LSM1, GPP1, FLP1, MAC1, WF1pw, WF1, WF1pw/CBBWF2,
       WF5pw, WF5, WF5pw/CBBWF2, WF2pw, WF2, WF2pw/CBBWF4, WF3pw, WF3,
       WF3pw/CBBWF5, WF4pw, WF4, WF4pw/CBBWF1, WF6pw, WF6, WF6pw/CBBWF3,
       WF7pw, WF7, WF7pw/CBBWF9, WF8pw, WF8, nan, HB2, WH1, WH2, WH3, FC1,
       RH1, SQ2, SQ1, MG1, MG2, MG3, MG4, CBB1, MB1], dtype=object)

In [53]:
asn['Embayment'].unique()


Out[53]:
array([QUISSETT HARBOR, Little Sipp Marsh, GUNNING POINT POND, Flume Pond,
       Mashapaquit Creek, West Falmouth Hbr, West Falmouth, HERRING BROOK,
       Wild Harbor, nan, Wild Harbor River, Fiddler Cove, Rands Harbor,
       SQUETEAGUE HARBOR, MEGANSETT HARBOR, CBB BUOY, Manomet Bay], dtype=object)

In [72]:
pd.__version__


Out[72]:
'0.11.0'

In [ ]: