In [77]:
# import some modules using standard naming conventions
from pylab import *
import pandas as pd
from IPython.core.display import HTML
from matplotlib.dates import MonthLocator,DateFormatter
# full path name of spreadsheet file from Coalition for Buzzards Bay
#xls_path='/usgs/data2/notebook/Falmouth_data.xlsx'
#xls_path='D:\crs\docs\personal\science_project\Falmouth data for Sarah and Lily_mod.xlsx'
xls_path='/usgs/data2/notebook/Falmouth_data_for_Sarah_and_Lily_mod.xlsx'
# use the panda ExcelFile method to make an object out of the xls file
xls_file = pd.ExcelFile(xls_path)
# this version makes a time series and removes some weird values
asn = xls_file.parse('all stations, raw nutrients',index_col='Date',parse_dates=True,
na_values=['?',None,'NS','no sample left','Sample Destroyed','machine error','#VALUE!'])
asn.index=pd.to_datetime(asn.index)
# this version just reads it in
#asn = xls_file.parse('all stations, raw nutrients') # simpler version
# this version makes the index 'Date', but does not make the DataFrame a time series
#asn = xls_file.parse('all stations, raw nutrients',
# na_values=['?',None,'NS','no sample left','Sample Destroyed','machine error','#VALUE!'])
#asn.set_index('Date',drop='false')
In [78]:
# define a function that looks in column var of a and replaces lim with val
def convert_detection_limit(a,var,lim,val):
try:
a[var][where(a[var]==lim)[0]] = val
except:
print 'No detection values found for %s' % var
return a
# for example, fix detction limits for 4 nutrients
asn=convert_detection_limit(asn,'PO4 uM','<0.1',0.05)
asn=convert_detection_limit(asn,'NH4 uM','<0.1',0.05)
asn=convert_detection_limit(asn,'NO3 uM','<0.05',0.025)
asn=convert_detection_limit(asn,'Pheo ug/L','<0.05',0.025)
In [79]:
# try to use same function to remove S from sample depths
asn=convert_detection_limit(asn,'Depth (m)','S',0.0)
asn=convert_detection_limit(asn,'Depth (m)','1M',1.0)
In [80]:
asn.ix[0:4,8:10].head()
Out[80]:
In [36]:
asn.tail()
Out[36]:
In [37]:
# find the rows for QH2
#find all the data at station
qh2=asn[asn['Station']=='QH2'] #quissett harbor, qh2
mg4=asn[asn['Station']=='MG4'] #megansett harbor, mg4
# subtract the mean Nitrate conc and plot
qh2m = qh2['NO3 uM'] - qh2['NO3 uM'].mean()
qh2m.plot(linestyle='none',marker='o')
# print just a few columns and render as HTML
new=mg4.reindex(columns=['Embayment','Station','Depth (m)','Temp C','Salt ppt','PO4 uM','POM uM','TDN uM'])
HTML(new[1:5].to_html())
#TODO - Also Remove 999 and <xx from columns
Out[37]:
In [37]:
In [38]:
figure # if you don't use this, the xlabel and ylabel commands won't work
plot(qh2['POM uM'],qh2['NO3 uM'],linestyle='none',marker='o')
xlabel('POM uM')
ylabel('NO3 uM')
title('Scatter plot')
Out[38]:
In [39]:
# plot the first 31 rows of column labelled 'Salt ppt'
asn['Salt ppt'][0:30].plot()
Out[39]:
In [40]:
# These are the values used to calculate the Buzz. Bay Env. Index
#Oxygen saturation (mn of lowest 20%) 40 % 90 %
#Transparency 0.6 m 3 m
#Chlorophyll 10 ug/l 3 ug/l
#DIN 10 uM 1 uM
#Total Organic N 0.6 ppm 0.28 ppm
# Define a function to calculate index for transparency
def bbei(val):
val_0 = 0.6
val_100 = 3.0
# calculate according to formula
score = (log(val)-log(val_0))/(log(val_100)-log(val_0))
# limit score to be between 0 and 1
score = min(score,1.0)
score = max(score,0.0)
return score
print(bbei(2))
In [41]:
# More general function for several parameters
def bbei(val,param):
# First test the first character of param to see
# what kind of measurement, the assing values
if(param[0].lower()=='o'):
# limits for Oxygen saturation
val_0 = 40.0
val_100 = 90.0
elif(param[0].lower()=='t'):
# limits for Transparency (secchi depth)
val_0 = 0.6
val_100 = 3.0
elif(param[0].lower()=='c'):
# limits for Chlorophyll
val_0 = 10.0
val_100 = 3.0
elif(param[0].lower()=='d'):
# limits for DIN
val_0 = 10.0
val_100 = 1.0
elif(param[0].lower()=='t'):
# limits for Total organic N
val_0 = 0.6
val_100 = 0.28
# calculate index, limit to between 0 and 1, and return
return max(min( (log(val)-log(val_0))/(log(val_100)-log(val_0)) ,1),0)
print(bbei(2,param='DIN'))
In [42]:
# multiple scatter plots on the same page
figure
subplot(121)
plot( qh2['DIN uM'],qh2['TON uM'],linestyle='none',marker='o')
ylabel('TON uM')
xlabel('DIN uM')
subplot(122)
plot( qh2['TDN uM'],qh2['TON uM'],linestyle='none',marker='o')
xlabel('TDN uM')
Out[42]:
In [43]:
# include three Stations
wf=asn[asn['Station'].isin(['WF5','WF5pw','WF5pw/CBBWF2'])]
figure(figsize=(12,5))
wf['POM uM'].plot(linestyle='none',marker='o')
Out[43]:
In [44]:
type(wf['POM uM'])
Out[44]:
In [45]:
wf.values
Out[45]:
In [46]:
wfd=wf[wf['Depth (m)'] >= 0.5]
wfd['Depth (m)'].values
Out[46]:
In [47]:
# print just a few columns and render as HTML
new=wfd.reindex(columns=['Embayment','Station','Depth (m)','Temp C','Salt ppt','NO3 uM','PO4 uM','POM uM','TDN uM'])
HTML(new[100: ].to_html())
Out[47]:
In [48]:
model = pd.ols(x=wf['Year'],y=wf['NH4 uM'])
model
Out[48]:
In [49]:
figure(figsize=(12,5))
model.y_fitted.plot(marker='x')
wf['NH4 uM'].plot(linestyle='none',marker='o',title='NH4 (uM) in West Falmouth Harbor')
Out[49]:
In [64]:
wf_month=wf.groupby(lambda x: x.month).mean()
In [69]:
wf_month['DO mg/L'].head()
Out[69]:
In [52]:
asn['Station'].unique()
Out[52]:
In [53]:
asn['Embayment'].unique()
Out[53]:
In [72]:
pd.__version__
Out[72]:
In [ ]: