notebook.community

Edit and run



In [1]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
import seaborn as sns
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
cmap = sns.cubehelix_palette(light=1, as_cmap=True)
sns.palplot(sns.cubehelix_palette(light=1))
import pandas as pd
pd.set_option('display.max_columns', 100)



In [3]:

    
nrows = 53
df = pd.read_csv('../data/cln_20150206_cary5000.csv')
df.set_index('wavelength', inplace=True)
df.columns









    Out[3]:





Index([u'Baseline 100%T', u'empty_1', u'empty_2_1', u'empty_2_2', u'empty_2_3', u'empty_2_4', u'empty_2_5', u'empty_2_6', u'empty_2_7', u'empty_2_8', u'empty_2_9', u'empty_2_10', u'empty_2_11', u'empty_2_12', u'empty_2_13', u'empty_2_14', u'empty_2_15', u'empty_3_1', u'empty_3_2', u'empty_3_3', u'empty_3_4', u'empty_3_5', u'VG05_1_1', u'VG05_1_2', u'VG05_1_3', u'VG05_1_4', u'VG05_1_5', u'Baseline 100%T.1', u'empty_4_1', u'empty_4_2', u'VG09-12_1_0', u'VG09-12_1_2', u'VG09-12_1_4', u'VG09-12_1_6', u'VG09-12_1_8', u'VG09-12_1_10', u'VG09-12_1_12', u'VG09-12_1_14', u'VG09-12_1_16', u'VG09-12_1_18', u'VG09-12_1_20', u'VG09-12_1_22', u'VG09-12_1_24', u'VG09-12_1_26', u'VG09-12_1_28', u'VG09-12_1_30', u'VG09-12_1_32', u'VG09-12_1_34', u'VG09-12_1_36', u'VG09-12_1_38', u'VG09-12_1_40', u'VG09-12_1_42', u'VG09-12_1_44', u'VG09-12_1_46', u'VG09-12_1_48', u'VG09-12_1_50', u'empty_5_1', u'empty_5_2', u'empty_5_3', u'empty_5_4', u'empty_5_5'], dtype='object')



In [4]:

    
log = pd.read_csv('../data/logDT_20150206_cary5000', names=['time'])
log.set_index(df.columns, inplace=True)
log['DT'] = pd.to_datetime(log.time)
log.head()









    Out[4]:






  
    
      
      time
      DT
    
  
  
    
      Baseline 100%T
       2/6/2015 1:45:42 PM
      2015-02-06 13:45:42
    
    
      empty_1
       2/6/2015 1:48:36 PM
      2015-02-06 13:48:36
    
    
      empty_2_1
       2/6/2015 1:50:53 PM
      2015-02-06 13:50:53
    
    
      empty_2_2
       2/6/2015 1:51:57 PM
      2015-02-06 13:51:57
    
    
      empty_2_3
       2/6/2015 1:52:58 PM
      2015-02-06 13:52:58



In [5]:

    
experiments = pd.concat([log['DT'], df.apply(np.mean, axis=0), df.apply(np.std, axis=0)], axis=1, copy=True)
experiments.columns=['time','mean', 'stddev']
df.drop('empty_4_2', inplace=True, axis=1)
experiments.drop('empty_4_2', inplace=True)



In [6]:

    
experiments.head()









    Out[6]:






  
    
      
      time
      mean
      stddev
    
  
  
    
      Baseline 100%T
      2015-02-06 13:45:42
       0.108719
       0.030123
    
    
      empty_1
      2015-02-06 13:48:36
       0.995787
       0.000618
    
    
      empty_2_1
      2015-02-06 13:50:53
       0.992650
       0.001644
    
    
      empty_2_2
      2015-02-06 13:51:57
       0.990831
       0.001585
    
    
      empty_2_3
      2015-02-06 13:52:58
       0.990616
       0.001257

Re-normalize everything after the second baseline.



In [7]:

    
baseline2_id = experiments.index.get_loc('Baseline 100%T.1')
ids = experiments.index[baseline2_id:]
rebaseline = experiments['mean'][baseline2_id]/experiments['mean'][0]
corr_experiments = experiments.copy()

corr_experiments['rebaseline'] = 1
corr_experiments['rebaseline'][ids] = rebaseline
corr_experiments['mean'][ids] = corr_experiments['mean'][ids]*rebaseline









    



-c:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
-c:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [8]:

    
plt.plot_date(experiments.time, experiments['mean'], label='Raw')
plt.plot_date(experiments.loc['Baseline 100%T'].time,
              experiments.loc['Baseline 100%T']['mean']/experiments.loc['Baseline 100%T']['mean'],
              label='Baseline 1')
plt.plot_date(experiments.loc['Baseline 100%T.1'].time,
              experiments.loc['Baseline 100%T.1']['mean']/experiments.loc['Baseline 100%T']['mean'],
              label='Baseline 2')

plt.plot_date(experiments.time[ids],corr_experiments['mean'][ids], label='Rebaselined')

fit_inds = range(6,17) + range(55,60)
plt.plot_date(experiments.time[fit_inds],corr_experiments['mean'][fit_inds], label='Points in fit')


plt.legend(loc='best')
plt.xticks(rotation=30)
plt.ylim(0.99, 1.02)
plt.title('Baseline drift in Cary 5000')
plt.xlabel('Time')
plt.ylabel('Mean Transmission')









    Out[8]:





<matplotlib.text.Text at 0x1052a80d0>

Run a linear model.



In [9]:

    
from astroML.linear_model import LinearRegression



In [10]:

    
X = corr_experiments['time'][fit_inds]
X = X.values.astype(np.float)
X_all = corr_experiments['time'].values.astype(np.float)
corr_experiments['time_num'] = X_all
y = corr_experiments['mean'][fit_inds].values
dy = corr_experiments['stddev'][fit_inds].values



In [11]:

    
model = LinearRegression()
model.fit(X[:, None], y, dy)









    Out[11]:





<astroML.linear_model.linear_regression.LinearRegression at 0x1095123d0>



In [12]:

    
y_pred = model.predict(X_all[:, None])
corr_experiments['est_mean'] = y_pred



In [16]:

    
corr_experiments.head()









    Out[16]:






  
    
      
      time
      mean
      stddev
      rebaseline
      time_num
      est_mean
    
  
  
    
      Baseline 100%T
      2015-02-06 13:45:42
       0.108719
       0.030123
       1
       1.423230e+18
       0.988276
    
    
      empty_1
      2015-02-06 13:48:36
       0.995787
       0.000618
       1
       1.423231e+18
       0.989113
    
    
      empty_2_1
      2015-02-06 13:50:53
       0.992650
       0.001644
       1
       1.423231e+18
       0.989773
    
    
      empty_2_2
      2015-02-06 13:51:57
       0.990831
       0.001585
       1
       1.423231e+18
       0.990081
    
    
      empty_2_3
      2015-02-06 13:52:58
       0.990616
       0.001257
       1
       1.423231e+18
       0.990374



In [14]:

    
plt.plot(corr_experiments.time_num, corr_experiments['est_mean'], '.')
plt.plot(corr_experiments.time_num, corr_experiments['mean'], '.')

ratio = corr_experiments['mean']/corr_experiments['est_mean']

plt.plot(corr_experiments.time_num, ratio, '.')

plt.ylim(0.98, 1.02)









    Out[14]:





(0.98, 1.02)

Divide everything by this new projected baseline.



In [17]:

    
corr_experiments.to_csv('../data/rebaseline_20150206_cary5000.csv')

	time	DT
Baseline 100%T	2/6/2015 1:45:42 PM	2015-02-06 13:45:42
empty_1	2/6/2015 1:48:36 PM	2015-02-06 13:48:36
empty_2_1	2/6/2015 1:50:53 PM	2015-02-06 13:50:53
empty_2_2	2/6/2015 1:51:57 PM	2015-02-06 13:51:57
empty_2_3	2/6/2015 1:52:58 PM	2015-02-06 13:52:58

	time	mean	stddev
Baseline 100%T	2015-02-06 13:45:42	0.108719	0.030123
empty_1	2015-02-06 13:48:36	0.995787	0.000618
empty_2_1	2015-02-06 13:50:53	0.992650	0.001644
empty_2_2	2015-02-06 13:51:57	0.990831	0.001585
empty_2_3	2015-02-06 13:52:58	0.990616	0.001257