Data preprocessing

  1. Load simulation/data dataframe and apply specified quality cuts
  2. Extract desired features from dataframe
  3. Get separate testing and training datasets
  4. Feature selection

Load simulation, format feature and target matrices

In [4]:
df_sim = comp.load_dataframe(datatype='sim', config='IC79')
df_data = comp.load_dataframe(datatype='data', config='IC79')

n_sim = len(df_sim)
n_data = len(df_data)
print('{} simulation events'.format(n_sim))
print('{} data events'.format(n_data))

In [5]:
beta_bins=np.linspace(1.4, 9.5, 75)
plotting.make_verification_plot(df_data, df_sim, 'lap_beta', beta_bins, 'Laputop \\beta')

In [10]:
charge_hits_bins=np.linspace(0, 30, 75)
plotting.make_verification_plot(df_data, df_sim, 'charge_nhits_ratio', charge_hits_bins, 'Charge/Hits ratio')

In [11]:
rlogl_bins=np.linspace(-50, 0, 75)
plotting.make_verification_plot(df_data, df_sim, 'lap_rlogl', rlogl_bins, 'Laputop rlogl')

In [14]:
s125_bins=np.linspace(0, 2.5, 75)
plotting.make_verification_plot(df_data, df_sim, 'log_s125', s125_bins, 'S125')

In [12]:
df_sim['log_s125'].min(), df_sim['log_s125'].max()

(0.026478313225486924, 2.1315707028243409)

fig, ax = plt.subplots()
ax.errorbar(beta_midpoints, rate_sim, yerr=rate_sim_err, label='MC', marker='.', ms=8)
ax.errorbar(beta_midpoints, rate_data, yerr=rate_data_err, label='Data', marker='.', ms=8)
ax.set_yscale("log", nonposy='clip')
ax.set_xlabel('Laputop $\\beta$')

In [7]:
fig, ax = plt.subplots()
ratio, ratio_err = comp.ratio_error(rate_data, rate_data_err,
                                    rate_sim, rate_sim_err)
ax.errorbar(beta_midpoints, ratio, yerr=ratio_err, marker='.', ms=8)
ax.axhline(1.0, marker='None', ls=':')
ax.set_xlabel('Laputop $\\beta$')

/home/jbourbeau/cr-composition/composition/analysis/ RuntimeWarning: divide by zero encountered in true_divide
  ratio = num/den
/home/jbourbeau/cr-composition/composition/analysis/ RuntimeWarning: invalid value encountered in true_divide
  ratio = num/den

