Retrieve data: The data must first be in a text file on dropbox
In [1]:
import pandas as pd
import urllib
In [2]:
#gets data from dropbox and saves it into the data directory under the given name
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/9x75i5prcl95bhk/ICO2sensordata_Sage_clipped_asc.txt?token_hash=AAG2bFyl5yuW9RtMWbagkFiPP1nCYM-gPcDFcC5Ay6i48w&dl=1','/usgs/data2/notebook/data/ICO2_Sage_clipped_asc.txt')
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/7rpzn295mlvvmhj/YSI_Sage.txt?token_hash=AAFqTDOTebMZeGMuUI4uN2LdulqPV13doh3oK8D2tI7OeQ&dl=1','/usgs/data2/notebook/data/YSI_Sage.txt')
Out[2]:
In [3]:
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/yxex97qk8deh5tj/lab_pH.txt?token_hash=AAEb0-rZ0Y2nlOsMuFsL2kyltvyiuqYng600F8FQLVJz2w&dl=1','/usgs/data2/notebook/data/lab_pH.txt')
Out[3]:
In [4]:
# reads columns with fixed width and chops out un-needed characters.
col_specs=[(0,10), (11,19), (23,30), (32,37), (58,62)]
df = pd.read_fwf('/usgs/data2/notebook/data/ICO2_Sage_clipped_asc.txt',colspecs=col_specs, skiprows=2,parse_dates =[[0,1]], index_col=0,
names=['date','time','co2','temp','press'],header=None,nrows=500000)
In [5]:
df['press']=df['press']/10.
In [6]:
df2 = pd.read_csv('/usgs/data2/notebook/data/YSI_Sage.txt',skiprows=[0,1],parse_dates =[[0,1]], index_col=0, sep=r"\s*",
names=['date','time','pH','depth', 'temp', 'sal'],header=None)
In [7]:
df3 = pd.read_csv('/usgs/data2/notebook/data/lab_pH.txt',skiprows=[0,1], parse_dates =[[0,1]], index_col=0, sep=r"\s*",
names=['date','time','Rave', 'Rstdev', 'pHave', 'pHstdev'],header=None)
In [8]:
# clip to time when instrument was in water
df = df['2013-06-04 20:38:51':'2013-06-11 17:17:11']
df2 = df2['2013-06-04 20:38:51':'2013-06-11 17:17:11']
df3 = df3['2013-06-04 19:49:51':'2013-06-11 17:17:11']
In [9]:
df.plot(subplots=True,sharex=True)
df2.plot(subplots=True,sharex=True)
Out[9]:
Data syntesis: calculating the 30 min, 10 min, and 1 min means in order to make the data more manageable
In [22]:
df_30min = df.resample('30min',how='mean')
df2_30min = df2.resample('30min',how='mean')
df_10min = df.resample('10min',how='mean')
df2_10min = df2.resample('10min',how='mean')
df_1min = df.resample('1min',how='mean')
df2_1min = df2.resample('1min',how='mean')
Explore the data: In this next section the colors have meaning: blue is for CO2, green is for pH, red is for depth, black is for temp, and yellow is for pressure
In [16]:
df_10min['co2'].plot(figsize=(15,6))
Out[16]:
In [21]:
df2['pH_adjusted']=df2['pH']-.3
In [23]:
#costructs figure and draws relevant information from different data frames
plt.figure()
df3['pHave'].plot(style='go')
df2_30min['pH_adjusted'].plot(figsize=(15,4),secondary_y=False,style='g')
Out[23]:
In [24]:
#costructs figure and draws relevant information from different data frames
plt.figure()
df_30min['co2'].plot()
df2_30min['pH'].plot(figsize=(15,4),secondary_y=True, style='g')
Out[24]:
In [25]:
#adjusts temperature from co2 sensor (black) down 5.3 degrees, to match YSI (cyan)
df['temp_adjusted']=df['temp']-5.3
plt.figure()
df['temp_adjusted'].plot(style='k')
df2['temp'].plot(figsize=(15,4),secondary_y=False, style='c')
Out[25]:
In [26]:
#costructs figure and draws relevant information from different data frames
plt.figure()
df_10min['co2'].plot()
df2_10min['depth'].plot(figsize=(15,4),secondary_y=True, style='r')
Out[26]:
In [27]:
df_10min[['co2','temp']].plot(figsize=(15,4),secondary_y = 'temp', style=['b','k']);
In [28]:
plt.figure()
df_10min['temp'].plot(style='k')
df2_10min['depth'].plot(figsize=(15,4),secondary_y=True, style='r')
Out[28]:
In [29]:
df2_10min[['depth','pH']].plot(figsize=(15,4),secondary_y = 'pH', style=['r','g']);
In [30]:
# look at correlation between pH and CO2
corrcoef(df_10min['co2'],df2_10min['pH'])
Out[30]:
In [31]:
plot(df2_10min['pH'],df_10min['co2'],'go');
grid();
1 min means for Aleck
In [32]:
df_1min.to_csv('/usgs/data2/notebook/data/ICO2_Sage_1min.txt', cols=['co2','temp','press'])
In [33]:
df5=pd.read_csv('/usgs/data2/notebook/data/ICO2_Sage_1min.txt')
In [34]:
df5.head
Out[34]:
In [ ]: