Retrieve data: The data must first be in a text file on dropbox
In [1]:
import pandas as pd
import urllib
import datetime
In [2]:
#retrieve data from dropbox by rightclicking on the file in dropbox from a web browser, and selecting share link
#then rightclick the download botton and select copy link address. Paste address below.
#with desired final destination after the comma
datafile = '/usgs/data2/notebook/data/ICO2_Hamblin_clipped_asc.txt'
# try opening the file to see if it's already been downloaded
try:
with open(datafile): pass
except:
print 'File not found. Downloading %s from Dropbox...' % datafile
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/qbnp51ab1dfaiia/ICO2sensordata_Hamblin_clipped_asc.txt?token_hash=AAGDuJOiJnjRlr81kRaPy1tlvMaWwkeFtSj3Jz3QMfRlPA&dl=1', datafile)
In [3]:
date = []; co2=[]; temp=[]; press=[]
fmt = '%Y/%m/%d %H:%M:%S'
In [4]:
f = open(datafile)
for line in f.readlines():
date_val, frag1 = line.split('- G')
date.append(datetime.datetime.strptime(date_val.strip(),fmt))
frag2, frag3 = frag1.split('HT0000 RH0000 P')
co2_val, temp_val = frag2.split('T')
press_val, frag4 = frag3.split('X')
if co2_val.isspace():
co2.append(NaN)
else:
co2.append(float(co2_val.strip()))
if temp_val.isspace():
temp.append(NaN)
else:
temp.append(float(temp_val.strip()))
if press_val.isspace():
press.append(NaN)
else:
press.append(float(press_val.strip()))
d={}
d['date']= asarray(date)
d['co2'] = asarray(co2)
d['temp'] = asarray(temp)
p = asarray(press)
# correct for pressure values where last decimal place was dropped
p = where(p<500,p*10.,p)
d['press'] = p
df = pd.DataFrame(d)
df.index = pd.to_datetime(df['date'])
df['press']=df['press']/10.
In [135]:
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/skihdky9rqchx66/YSI_Hamblin.txt?token_hash=AAH8XcrQL5NTwRK1QG1JYHmOKWAMfbsoZFDdoGiRnp8beA&dl=1', '/usgs/data2/notebook/data/YSI_Hamblin.txt')
df2 = pd.read_csv('/usgs/data2/notebook/data/YSI_Hamblin.txt',skiprows=[0], parse_dates =[[2,3]], index_col=0, sep=r"\s*",
names=['foo1','foo2','date', 'time', 'pH','depth','temp','sal'],header=None)
In [137]:
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/yxex97qk8deh5tj/lab_pH.txt?token_hash=AAEb0-rZ0Y2nlOsMuFsL2kyltvyiuqYng600F8FQLVJz2w&dl=1','/usgs/data2/notebook/data/lab_pH.txt')
df3 = pd.read_csv('/usgs/data2/notebook/data/lab_pH.txt',skiprows=[0,1], parse_dates =[[0,1]], index_col=0, sep=r"\s*",
names=['date','time','Rave', 'Rstdev', 'pHave', 'pHstdev'],header=None)
In [138]:
df = df['2013-06-19 00:00:00':'2013-06-26 02:00:00']
df2 = df2['2013-06-19 00:00:00':'2013-06-26 02:30:00']
df3 = df3['2013-06-19 00:00:00':'2013-06-26 02:00:00']
Data syntesis: calculating the 30 min, 10 min, and 1 min means in order to make the data more manageable
In [142]:
df_30min = df.resample('30min',how='mean')
df2_30min = df2.resample('30min',how='mean')
df_10min = df.resample('10min',how='mean')
df2_10min = df2.resample('10min',how='mean')
df_1min = df.resample('1min',how='mean')
df2_1min = df2.resample('1min',how='mean')
Explore the data: In this next section the colors have meaning: blue is for CO2, green is for pH, red is for depth, black is for temp, and yellow is for pressure
In [141]:
df['co2'].plot(figsize=(12,4))
Out[141]:
In [119]:
#costructs figure and draws relevant information from different data frames
plt.figure()
df2['pH'].plot(style='g')
df['co2'].plot(figsize=(15,6),secondary_y=True)
Out[119]:
In [13]:
#costructs figure and draws relevant information from different data frames
plt.figure()
df3['pHave'].plot(style='go')
df['co2'].plot(figsize=(15,6),secondary_y=True)
Out[13]:
In [21]:
df[['co2','temp']].plot(figsize=(12,4),secondary_y='temp', style= ['b','k']);
Making the data manageable: in this section the data is resampled to compile more manageabley sized data sets.
In [130]:
# calculates 30 minute averages of all the data
df_30min = df.resample('30min', how = 'mean')
df_30min[['co2','temp']].plot(figsize=(12,4),secondary_y='temp', style=['b','k']);
In [60]:
#costructs figure and draws relevant information from different data frames
plt.figure()
df2_30min['pH'].plot(style='g')
df_30min['co2'].plot(figsize=(15,6),secondary_y=True)
Out[60]:
In [123]:
In [131]:
#adjusts temperature from co2 sensor (black) down 5.3 degrees, to match YSI (cyan)
df['temp_adjusted']=df['temp']-5.3
plt.figure()
df2_30min['temp'].plot(style='c')
df_30min['temp_adjusted'].plot(figsize=(15,6),secondary_y=False, style='k')
Out[131]:
In [24]:
df_30min[['temp','press']].plot(figsize=(12,4),secondary_y='press', style=['k','y']);
In [81]:
#calculate 10 minute means
df_30min = df.resample('30min', how = 'mean')
df2_30min = df2.resample('30min', how = 'mean')
In [84]:
plot(df_30min['co2'], df2_30min['pH'],'go');
grid();
In [85]:
# look at correlation between pH and CO2
corrcoef(df_30min['co2'],df2_30min['pH'])
Out[85]:
In [ ]: