In [ ]:
import pandas as pd
import matplotlib.pyplot as pl
import datetime as dt
%matplotlib notebook
In [ ]:
pd.options.display.max_rows = 20
pd.options.display.max_columns = 100
In [ ]:
pwv = pd.read_csv('pwv_APEX_3h.csv', na_values='NaN')
In [ ]:
measurements = pd.read_csv('sc_measurements.csv')
sources = pd.read_csv('sc_sources.csv')
In [ ]:
targets = pd.read_csv('sg_targets.csv')
In [ ]:
execblocks = pd.read_csv('aqua_exeblock.csv', index_col=0)
execblocks.rename(columns={'SB_UID.1': 'SB_UID'}, inplace=True)
In [ ]:
pwv.values
In [ ]:
pwv
In [ ]:
# Get a quick summary of the dataframe
pwv.info()
In [ ]:
# Get the first columns
pwv.head()
In [ ]:
# Get the last rows
pwv.tail()
In [ ]:
# Extract/Slice data (rows)
pwv[:3]
In [ ]:
pwv.ix[:3,1:3]
In [ ]:
pwv[['Month', 'Day']][:3]
In [ ]:
pwv['PWV']
In [ ]:
?pd.read_csv
In [ ]:
print pwv.PWV.min()
print pwv.PWV.max()
print pwv.PWV.mean()
print pwv.PWV.median()
print pwv.PWV.std()
In [ ]:
pwv.PWV.describe()
In [ ]:
pwv.describe()
In [ ]:
pl.figure(1)
pwv.PWV.hist(bins=40)
In [ ]:
pl.figure(2)
pwv.PWV.plot()
In [ ]:
pl.figure(3)
pwv.plot(y='PWV')
In [ ]:
execblocks.info()
In [ ]:
execblocks.head()
In [ ]:
execblocks[['QA0STATUS', 'SE_STATUS']][:7]
In [ ]:
execblocks['QA0STATUS'].unique()
In [ ]:
execblocks['QA0STATUS'].value_counts()
In [ ]:
pl.figure(7)
execblocks['QA0STATUS'].value_counts().plot(kind='bar')
In [ ]:
execblocks['SB_UID'].value_counts()
In [ ]:
pl.figure()
execblocks['SB_UID'].value_counts()[:10].plot(kind='bar')
In [ ]:
execblocks['QA0STATUS'] == "Pass"
In [ ]:
execblocks[execblocks['QA0STATUS'] == "Pass"]
In [ ]:
execblocks.query('QA0STATUS == "Pass"')
In [ ]:
execblocks.query('QA0STATUS == "Pass" and SE_STATUS == "FAIL"')
In [ ]:
sblist = execblocks['SB_UID'].value_counts()[:10].index.values
sblist
In [ ]:
mostobserved = execblocks.query('SB_UID in @sblist')
mostobserved.head()
In [ ]:
mostobserved.groupby(['SB_UID', 'QA0STATUS']).aggregate({'EXECBLOCKUID': pd.np.count_nonzero, 'delta': pd.np.mean})
In [ ]:
table1 = mostobserved.groupby(['SB_UID', 'QA0STATUS']).aggregate({'EXECBLOCKUID': pd.np.count_nonzero, 'delta': pd.np.mean})
table1.unstack()
In [ ]:
table1.to_excel('table1.xls')
In [ ]:
execblocks.dropna().apply(lambda x: x['delta'] * 45., axis=1)
In [ ]:
execblocks['day'] = execblocks.apply(lambda x: x['STARTTIME'][:10], axis=1)
In [ ]:
execblocks
In [ ]:
effi = execblocks.groupby(['day', 'QA0STATUS']).aggregate({'EXECBLOCKUID': pd.np.count_nonzero})
effi
In [ ]:
In [ ]:
effipl = effi.unstack()['EXECBLOCKUID'].reset_index()
In [ ]:
effipl['Date'] = effipl.apply(lambda x: dt.datetime.strptime(x['day'], '%Y-%m-%d'), axis=1)
In [ ]:
effipl
In [ ]:
ax = effipl.plot(x='Date', y='Pass')
effipl.plot(x='Date', y='Fail', ax=ax)
In [ ]:
pwv['Date'] = pwv.apply(lambda x:
dt.datetime(int(x['Year']), int(x['Month']), int(x['Day']), int(x['Hour'])),
axis=1)
In [ ]:
pwv.set_index('Date', inplace=True, drop=False)