In [1]:
%pylab inline
In [2]:
import seaborn as sns
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
cmap = sns.cubehelix_palette(light=1, as_cmap=True)
sns.palplot(sns.cubehelix_palette(light=1))
import pandas as pd
pd.set_option('display.max_columns', 100)
In [3]:
nrows = 53
df = pd.read_csv('../data/cln_20150206_cary5000.csv')
df.set_index('wavelength', inplace=True)
df.columns
Out[3]:
In [4]:
log = pd.read_csv('../data/logDT_20150206_cary5000', names=['time'])
log.set_index(df.columns, inplace=True)
log['DT'] = pd.to_datetime(log.time)
log.head()
Out[4]:
In [5]:
experiments = pd.concat([log['DT'], df.apply(np.mean, axis=0), df.apply(np.std, axis=0)], axis=1, copy=True)
experiments.columns=['time','mean', 'stddev']
df.drop('empty_4_2', inplace=True, axis=1)
experiments.drop('empty_4_2', inplace=True)
In [6]:
experiments.head()
Out[6]:
Re-normalize everything after the second baseline.
In [7]:
baseline2_id = experiments.index.get_loc('Baseline 100%T.1')
ids = experiments.index[baseline2_id:]
rebaseline = experiments['mean'][baseline2_id]/experiments['mean'][0]
corr_experiments = experiments.copy()
corr_experiments['rebaseline'] = 1
corr_experiments['rebaseline'][ids] = rebaseline
corr_experiments['mean'][ids] = corr_experiments['mean'][ids]*rebaseline
In [8]:
plt.plot_date(experiments.time, experiments['mean'], label='Raw')
plt.plot_date(experiments.loc['Baseline 100%T'].time,
experiments.loc['Baseline 100%T']['mean']/experiments.loc['Baseline 100%T']['mean'],
label='Baseline 1')
plt.plot_date(experiments.loc['Baseline 100%T.1'].time,
experiments.loc['Baseline 100%T.1']['mean']/experiments.loc['Baseline 100%T']['mean'],
label='Baseline 2')
plt.plot_date(experiments.time[ids],corr_experiments['mean'][ids], label='Rebaselined')
fit_inds = range(6,17) + range(55,60)
plt.plot_date(experiments.time[fit_inds],corr_experiments['mean'][fit_inds], label='Points in fit')
plt.legend(loc='best')
plt.xticks(rotation=30)
plt.ylim(0.99, 1.02)
plt.title('Baseline drift in Cary 5000')
plt.xlabel('Time')
plt.ylabel('Mean Transmission')
Out[8]:
Run a linear model.
In [9]:
from astroML.linear_model import LinearRegression
In [10]:
X = corr_experiments['time'][fit_inds]
X = X.values.astype(np.float)
X_all = corr_experiments['time'].values.astype(np.float)
corr_experiments['time_num'] = X_all
y = corr_experiments['mean'][fit_inds].values
dy = corr_experiments['stddev'][fit_inds].values
In [11]:
model = LinearRegression()
model.fit(X[:, None], y, dy)
Out[11]:
In [12]:
y_pred = model.predict(X_all[:, None])
corr_experiments['est_mean'] = y_pred
In [16]:
corr_experiments.head()
Out[16]:
In [14]:
plt.plot(corr_experiments.time_num, corr_experiments['est_mean'], '.')
plt.plot(corr_experiments.time_num, corr_experiments['mean'], '.')
ratio = corr_experiments['mean']/corr_experiments['est_mean']
plt.plot(corr_experiments.time_num, ratio, '.')
plt.ylim(0.98, 1.02)
Out[14]:
Divide everything by this new projected baseline.
In [17]:
corr_experiments.to_csv('../data/rebaseline_20150206_cary5000.csv')