Reading the Docklight CO2 instrument text file


In [1]:
import pandas as pd

In [2]:
#text file looks like this
'''
Docklight Log File (ASCII) - Started 6/14/2013 11:34:17.033 
 RH0000   P1040X<CR><LF>
2013/06/04 19:42:23 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>
2013/06/04 19:42:25 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>
2013/06/04 19:42:27 - G 498.00 T26.78    HT0000 RH0000   P1040X<CR><LF>
2013/06/04 19:42:29 - G 498.00 T26.79    HT0000 RH0000   P1040X<CR><LF>
2013/06/04 19:42:31 - G 498.00 T26.80    HT0000 RH0000   P1042X<CR><LF>
''';

In [3]:
col_specs=[(0,10), (11,19), (23,30), (32,37), (43,47), (50,54), (58,62)]
'''
df = pd.read_fwf(StringIO(x),colspecs=col_specs, skiprows=2,parse_dates =[[0,1]], index_col=0, 
    names=['date','time','CO2','Temperature','Humidity','Relative Humidity','Pressure'],header=None)
'''
df = pd.read_fwf('/usgs/data2/notebook/data/ICO2sensordata_v1.txt',colspecs=col_specs, skiprows=2,parse_dates =[[0,1]], index_col=0, 
    names=['date','time','CO2','Temperature','Humidity','Relative Humidity','Pressure'],header=None,nrows=500000)

In [4]:
df['CO2'].max()


Out[4]:
8052.0

In [5]:
df['CO2'].plot(figsize=(12,6))


Out[5]:
<matplotlib.axes.AxesSubplot at 0x3456f50>

In [6]:
df_5min=df.resample('5min',how='mean')

In [7]:
df_5min['Temperature'][0:20]


Out[7]:
date_time
2013-06-04 19:40:00    26.804177
2013-06-04 19:45:00    26.888800
2013-06-04 19:50:00    27.030991
2013-06-04 19:55:00          NaN
2013-06-04 20:00:00          NaN
2013-06-04 20:05:00          NaN
2013-06-04 20:10:00          NaN
2013-06-04 20:15:00          NaN
2013-06-04 20:20:00          NaN
2013-06-04 20:25:00          NaN
2013-06-04 20:30:00          NaN
2013-06-04 20:35:00    28.801429
2013-06-04 20:40:00    29.013267
2013-06-04 20:45:00    29.330200
2013-06-04 20:50:00    29.563600
2013-06-04 20:55:00    29.785733
2013-06-04 21:00:00    29.965400
2013-06-04 21:05:00    30.098667
2013-06-04 21:10:00    30.078000
2013-06-04 21:15:00    29.631400
Freq: 5T, Name: Temperature, dtype: float64

In [8]:
df_5min['Temperature']['2013-06-11 12:00:00':'2013-06-11 18:00:00']


Out[8]:
date_time
2013-06-11 12:00:00    24.309733
2013-06-11 12:05:00    24.347000
2013-06-11 12:10:00    24.381133
2013-06-11 12:15:00    24.431333
2013-06-11 12:20:00    24.486600
2013-06-11 12:25:00    24.562800
2013-06-11 12:30:00    24.635800
2013-06-11 12:35:00    24.711800
2013-06-11 12:40:00    24.784000
2013-06-11 12:45:00    24.848733
2013-06-11 12:50:00    24.905400
2013-06-11 12:55:00    24.976733
2013-06-11 13:00:00    25.056267
2013-06-11 13:05:00    25.145200
2013-06-11 13:10:00    25.214200
...
2013-06-11 16:50:00    25.417733
2013-06-11 16:55:00    25.456533
2013-06-11 17:00:00    25.470533
2013-06-11 17:05:00    25.500467
2013-06-11 17:10:00    25.520400
2013-06-11 17:15:00    25.476970
2013-06-11 17:20:00          NaN
2013-06-11 17:25:00          NaN
2013-06-11 17:30:00          NaN
2013-06-11 17:35:00          NaN
2013-06-11 17:40:00          NaN
2013-06-11 17:45:00          NaN
2013-06-11 17:50:00          NaN
2013-06-11 17:55:00          NaN
2013-06-11 18:00:00          NaN
Freq: 5T, Name: Temperature, Length: 73, dtype: float64

In [9]:
# clip to time when instrument was in water
df_5min=df_5min['2013-06-04 20:35:00':'2013-06-11 17:15:00']

In [10]:
df_5min[['CO2','Temperature']].plot(figsize=(12,4),secondary_y='Temperature');



In [11]:
df_5min[['Humidity','Relative Humidity']].plot(figsize=(12,4));



In [12]:
df_5min['Pressure'].plot(figsize=(12,4));



In [13]:
# look at correlation between Temp and CO2
corrcoef(df_5min['Temperature'],df_5min['CO2'])


Out[13]:
array([[ 1.        , -0.32966428],
       [-0.32966428,  1.        ]])

In [14]:
plot(df_5min['Temperature'],df_5min['CO2'],'go');
grid();



In [14]: