Reading the Docklight CO2 instrument text file with all of the original characters


In [29]:
import pandas as pd
import urllib

In [30]:
#gets data from dropbox and saves it into the data directory under the given name
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/4deyqskikyf02qr/ICO2sensordata_uglyish_asc.txt?token_hash=AAHPGWW6wb1xnWJl1JFAqoF0v0yiHlASR1_UQLB1z7f-Jg&dl=1','/usgs/data2/notebook/data/ICO2_uglyish_asc.txt')


Out[30]:
('/usgs/data2/notebook/data/ICO2_uglyish_asc.txt',
 <httplib.HTTPMessage instance at 0xf2e3a70>)

In [31]:
#text file looks like this
'''
Docklight Log File (ASCII) - Started 6/14/2013 11:34:17.033 
 RH0000   P1040X<CR><LF>
2013/06/04 19:42:23 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>
2013/06/04 19:42:25 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>
2013/06/04 19:42:27 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>
2013/06/04 19:42:29 - G 498.00 T26.79    HT0000 RH0000   P1040X<CR><LF>
2013/06/04 19:42:31 - G 498.00 T26.80    HT0000 RH0000   P1042X<CR><LF>
'''


Out[31]:
'\nDocklight Log File (ASCII) - Started 6/14/2013 11:34:17.033 \n RH0000   P1040X<CR><LF>\n2013/06/04 19:42:23 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>\n2013/06/04 19:42:25 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>\n2013/06/04 19:42:27 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>\n2013/06/04 19:42:29 - G 498.00 T26.79    HT0000 RH0000   P1040X<CR><LF>\n2013/06/04 19:42:31 - G 498.00 T26.80    HT0000 RH0000   P1042X<CR><LF>\n'

In [32]:
# reads columns with fixed width and chops out un-needed characters.
col_specs=[(0,10), (11,19), (23,30), (32,37), (43,47), (50,54), (58,62)]
'''
df = pd.read_fwf(StringIO(x),colspecs=col_specs, skiprows=2,parse_dates =[[0,1]], index_col=0, 
    names=['date','time','CO2','Temperature','Humidity','Relative Humidity','Pressure'],header=None)
'''
df = pd.read_fwf('/usgs/data2/notebook/data/ICO2_uglyish_asc.txt',colspecs=col_specs, skiprows=2,parse_dates =[[0,1]], index_col=0, 
    names=['date','time','CO2','Temperature','Humidity','Relative Humidity','Pressure'],header=None,nrows=500000)

In [33]:
df['CO2'].max()


Out[33]:
8052.0

In [34]:
df['CO2'].plot(figsize=(15,6))


Out[34]:
<matplotlib.axes.AxesSubplot at 0xf2d4b10>

In [35]:
# calculates 5 minute averages of all the data
df_5min = df.resample('5min',how = 'mean')
df_5min['Temperature']['2013-06-04 12:00:00':'2013-06-11 18:00:00']


Out[35]:
date_time
2013-06-04 19:40:00    26.804177
2013-06-04 19:45:00    26.888800
2013-06-04 19:50:00    27.030991
2013-06-04 19:55:00          NaN
2013-06-04 20:00:00          NaN
2013-06-04 20:05:00          NaN
2013-06-04 20:10:00          NaN
2013-06-04 20:15:00          NaN
2013-06-04 20:20:00          NaN
2013-06-04 20:25:00          NaN
2013-06-04 20:30:00          NaN
2013-06-04 20:35:00    28.801429
2013-06-04 20:40:00    29.013267
2013-06-04 20:45:00    29.330200
2013-06-04 20:50:00    29.563600
...
2013-06-11 16:50:00    25.417733
2013-06-11 16:55:00    25.456533
2013-06-11 17:00:00    25.470533
2013-06-11 17:05:00    25.500467
2013-06-11 17:10:00    25.520400
2013-06-11 17:15:00    25.476970
2013-06-11 17:20:00          NaN
2013-06-11 17:25:00          NaN
2013-06-11 17:30:00          NaN
2013-06-11 17:35:00          NaN
2013-06-11 17:40:00          NaN
2013-06-11 17:45:00          NaN
2013-06-11 17:50:00          NaN
2013-06-11 17:55:00          NaN
2013-06-11 18:00:00          NaN
Freq: 5T, Name: Temperature, Length: 1997, dtype: float64

In [36]:
# clip to time when instrument was in water
df_5min = df_5min['2013-06-04 20:35:00':'2013-06-11 17:15:00']

In [37]:
df_5min[['CO2','Temperature']].plot(figsize=(15,4),secondary_y = 'Temperature');



In [38]:
df_5min[['Humidity','Relative Humidity']].plot(figsize=(15,4));



In [39]:
df_5min['Pressure'].plot(figsize=(15,4));



In [40]:
# look at correlation between Temp and CO2
corrcoef(df_5min['Temperature'],df_5min['CO2'])


Out[40]:
array([[ 1.        , -0.32966428],
       [-0.32966428,  1.        ]])

In [41]:
plot(df_5min['Temperature'],df_5min['CO2'],'go');
grid();



In [14]: