Reading the Docklight CO2 instrument text file with all of the original characters

Sage Pond continuous (collected from 2013-06-04 20:38:51 to 2013-06-11 17:17:11) and stopping just before tidal effort

Retrieve data: The data must first be in a text file on dropbox



In [1]:

    
import pandas as pd
import urllib



In [2]:

    
#gets data from dropbox and saves it into the data directory under the given name
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/9x75i5prcl95bhk/ICO2sensordata_Sage_clipped_asc.txt?token_hash=AAG2bFyl5yuW9RtMWbagkFiPP1nCYM-gPcDFcC5Ay6i48w&dl=1','/usgs/data2/notebook/data/ICO2_Sage_clipped_asc.txt')
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/7rpzn295mlvvmhj/YSI_Sage.txt?token_hash=AAFqTDOTebMZeGMuUI4uN2LdulqPV13doh3oK8D2tI7OeQ&dl=1','/usgs/data2/notebook/data/YSI_Sage.txt')









    Out[2]:





('/usgs/data2/notebook/data/YSI_Sage.txt',
 <httplib.HTTPMessage instance at 0x288b098>)



In [3]:

    
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/yxex97qk8deh5tj/lab_pH.txt?token_hash=AAEb0-rZ0Y2nlOsMuFsL2kyltvyiuqYng600F8FQLVJz2w&dl=1','/usgs/data2/notebook/data/lab_pH.txt')









    Out[3]:





('/usgs/data2/notebook/data/lab_pH.txt',
 <httplib.HTTPMessage instance at 0x2ae13f8>)



In [4]:

    
# reads columns with fixed width and chops out un-needed characters.
col_specs=[(0,10), (11,19), (23,30), (32,37), (58,62)]
df = pd.read_fwf('/usgs/data2/notebook/data/ICO2_Sage_clipped_asc.txt',colspecs=col_specs, skiprows=2,parse_dates =[[0,1]], index_col=0, 
    names=['date','time','co2','temp','press'],header=None,nrows=500000)



In [5]:

    
df['press']=df['press']/10.



In [6]:

    
df2 = pd.read_csv('/usgs/data2/notebook/data/YSI_Sage.txt',skiprows=[0,1],parse_dates =[[0,1]], index_col=0, sep=r"\s*", 
    names=['date','time','pH','depth', 'temp', 'sal'],header=None)



In [7]:

    
df3 = pd.read_csv('/usgs/data2/notebook/data/lab_pH.txt',skiprows=[0,1], parse_dates =[[0,1]], index_col=0, sep=r"\s*", 
    names=['date','time','Rave', 'Rstdev', 'pHave', 'pHstdev'],header=None)



In [8]:

    
# clip to time when instrument was in water
df = df['2013-06-04 20:38:51':'2013-06-11 17:17:11']
df2 = df2['2013-06-04 20:38:51':'2013-06-11 17:17:11']
df3 = df3['2013-06-04 19:49:51':'2013-06-11 17:17:11']



In [9]:

    
df.plot(subplots=True,sharex=True)
df2.plot(subplots=True,sharex=True)









    Out[9]:





array([<matplotlib.axes.AxesSubplot object at 0x2f389d0>,
       <matplotlib.axes.AxesSubplot object at 0xd3f84d0>,
       <matplotlib.axes.AxesSubplot object at 0xaa10490>,
       <matplotlib.axes.AxesSubplot object at 0x9f70510>], dtype=object)

Data syntesis: calculating the 30 min, 10 min, and 1 min means in order to make the data more manageable



In [22]:

    
df_30min = df.resample('30min',how='mean')
df2_30min = df2.resample('30min',how='mean')

df_10min = df.resample('10min',how='mean')
df2_10min = df2.resample('10min',how='mean')

df_1min = df.resample('1min',how='mean')
df2_1min = df2.resample('1min',how='mean')

Explore the data: In this next section the colors have meaning: blue is for CO2, green is for pH, red is for depth, black is for temp, and yellow is for pressure



In [16]:

    
df_10min['co2'].plot(figsize=(15,6))









    Out[16]:





<matplotlib.axes.AxesSubplot at 0x2ac61d0>



In [21]:

    
df2['pH_adjusted']=df2['pH']-.3



In [23]:

    
#costructs figure and draws relevant information from different data frames
plt.figure()
df3['pHave'].plot(style='go')
df2_30min['pH_adjusted'].plot(figsize=(15,4),secondary_y=False,style='g')









    Out[23]:





<matplotlib.axes.AxesSubplot at 0xcf2c910>



In [24]:

    
#costructs figure and draws relevant information from different data frames
plt.figure()
df_30min['co2'].plot()
df2_30min['pH'].plot(figsize=(15,4),secondary_y=True, style='g')









    Out[24]:





<matplotlib.axes.AxesSubplot at 0xb2b61d0>



In [25]:

    
#adjusts temperature from co2 sensor (black) down 5.3 degrees, to match YSI (cyan)
df['temp_adjusted']=df['temp']-5.3
plt.figure()
df['temp_adjusted'].plot(style='k')
df2['temp'].plot(figsize=(15,4),secondary_y=False, style='c')









    Out[25]:





<matplotlib.axes.AxesSubplot at 0x426dc50>



In [26]:

    
#costructs figure and draws relevant information from different data frames
plt.figure()
df_10min['co2'].plot()
df2_10min['depth'].plot(figsize=(15,4),secondary_y=True, style='r')









    Out[26]:





<matplotlib.axes.AxesSubplot at 0x3d8bb10>



In [27]:

    
df_10min[['co2','temp']].plot(figsize=(15,4),secondary_y = 'temp', style=['b','k']);



In [28]:

    
plt.figure()
df_10min['temp'].plot(style='k')
df2_10min['depth'].plot(figsize=(15,4),secondary_y=True, style='r')









    Out[28]:





<matplotlib.axes.AxesSubplot at 0x7907850>



In [29]:

    
df2_10min[['depth','pH']].plot(figsize=(15,4),secondary_y = 'pH', style=['r','g']);



In [30]:

    
# look at correlation between pH and CO2
corrcoef(df_10min['co2'],df2_10min['pH'])









    Out[30]:





array([[ 1.        , -0.84751757],
       [-0.84751757,  1.        ]])



In [31]:

    
plot(df2_10min['pH'],df_10min['co2'],'go');
grid();

1 min means for Aleck



In [32]:

    
df_1min.to_csv('/usgs/data2/notebook/data/ICO2_Sage_1min.txt', cols=['co2','temp','press'])



In [33]:

    
df5=pd.read_csv('/usgs/data2/notebook/data/ICO2_Sage_1min.txt')



In [34]:

    
df5.head









    Out[34]:





<bound method DataFrame.head of <class 'pandas.core.frame.DataFrame'>
Int64Index: 9880 entries, 0 to 9879
Data columns (total 4 columns):
date_time    9880  non-null values
co2          9880  non-null values
temp         9880  non-null values
press        9880  non-null values
dtypes: float64(3), object(1)>



In [ ]: