In [68]:
import pandas
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd

In [21]:
df = pandas.read_hdf('xenon_test_data.h5')

In [11]:
df.head()


Out[11]:
run_number event_number s2_area_tailcut_set_by s2_over_tdiff tailcut_set_by s1_1_50p_width s1_1_aft s1_1_area s1_1_center_time s1_1_largest_hit_area ... s2_1_largest_hit_area s2_1_posrec_goodness_of_fit s2_1_posrec_goodness_of_fit_nn s2_1_rise_time s2_1_x s2_1_x_nn s2_1_y s2_1_y_nn s2_area_tot unknown_tot
0 8311 0 0.000000e+00 0.000000 0 NaN NaN NaN NaN NaN ... 24853.009766 5235.186035 5601.398438 2097.895490 39.483608 39.881187 5.400494 5.610683 2.248347e+06 104.887558
1 8311 1 1.807614e+06 0.006926 1 NaN NaN NaN NaN NaN ... 33773.347656 6311.308594 7602.618164 1732.494589 3.000274 3.363387 38.043476 38.652359 1.073046e+06 57.850218
2 8311 2 3.333279e+05 0.024004 1 NaN NaN NaN NaN NaN ... 9166.551758 3222.168213 4315.865723 1890.164546 17.401590 18.061518 22.682072 23.084337 7.985587e+05 38.340455
3 8311 3 6.279493e+05 0.013589 1 NaN NaN NaN NaN NaN ... 5824.653809 1523.922852 1529.161743 3616.212444 -4.440406 -4.329351 33.723080 33.980186 2.214664e+05 37.059336
4 8311 4 1.807614e+06 0.003888 4 NaN NaN NaN NaN NaN ... 103230.492188 57072.730469 59293.273438 1676.072953 0.600055 0.041201 40.203674 41.187222 2.451366e+06 105.301023

5 rows × 260 columns


In [18]:
type(df['s2']), df['s2'].head()


Out[18]:
(pandas.core.series.Series, 0    1.807614e+06
 1    3.333279e+05
 2    6.279493e+05
 3    1.461776e+05
 4    1.135693e+06
 Name: s2, dtype: float64)

In [25]:
list(df.columns)


Out[25]:
['run_number',
 'event_number',
 's2_area_tailcut_set_by',
 's2_over_tdiff',
 'tailcut_set_by',
 's1_1_50p_width',
 's1_1_aft',
 's1_1_area',
 's1_1_center_time',
 's1_1_largest_hit_area',
 's1_1_posrec_goodness_of_fit',
 's1_1_rise_time',
 's1_1_x',
 's1_1_y',
 'alt_s1_interaction_z',
 'alt_s2_interaction_s2_range_50p_area',
 'alt_s2_interaction_s2_range_80p_area',
 'alt_s2_interaction_x',
 'alt_s2_interaction_y',
 'alt_s2_interaction_z',
 'other_s2_area',
 'other_s2_delay_main_s1',
 'other_s2_delay_main_s2',
 'other_s2_pattern_fit',
 'r_pos_correction',
 's1_area_fraction_top_probability',
 's1_largest_hit_area',
 's1_pattern_fit',
 's1_range_80p_area',
 's1_rise_time',
 's1_tight_coincidence',
 's2_pattern_fit',
 's2_range_80p_area',
 's2_rise_time',
 'sum_s1s_before_main_s2',
 'x_nn',
 'x_tpff',
 'y_nn',
 'y_tpff',
 'z_pos_correction',
 'nearest_1e5pe_event',
 'nearest_1e6pe_event',
 'nearest_3e5pe_event',
 'nearest_busy',
 'nearest_busy_off',
 'nearest_busy_on',
 'nearest_event',
 'nearest_hev',
 'nearest_hev_off',
 'nearest_hev_on',
 'nearest_muon_veto_trigger',
 'nearest_s2_area',
 'next_1e5pe_event',
 'next_1e6pe_event',
 'next_3e5pe_event',
 'next_busy',
 'next_busy_off',
 'next_busy_on',
 'next_event',
 'next_hev',
 'next_hev_off',
 'next_hev_on',
 'next_muon_veto_trigger',
 'next_s2_area',
 'previous_1e5pe_event',
 'previous_1e6pe_event',
 'previous_3e5pe_event',
 'previous_busy',
 'previous_busy_off',
 'previous_busy_on',
 'previous_event',
 'previous_hev',
 'previous_hev_off',
 'previous_hev_on',
 'previous_muon_veto_trigger',
 'previous_s2_area',
 'area_before_main_s2',
 'n_true_peaks',
 'total_peak_area',
 'event_duration',
 'event_time',
 'ds_s1_b_n_distinct_channels',
 'int_a_drift_time',
 'int_a_s1_area_correction',
 'int_a_s1_pattern_fit',
 'int_a_s2_area_correction',
 'int_a_x',
 'int_a_y',
 'int_a_z',
 'int_b_drift_time',
 'int_b_s1_area_correction',
 'int_b_s1_pattern_fit',
 'int_b_s2_area_correction',
 'int_b_x',
 'int_b_y',
 'int_b_z',
 's1_2',
 's1_2_area_fraction_top',
 's1_2_center_time',
 's1_2_n_contributing_channels',
 's1_2_range_50p_area',
 's1_a',
 's1_a_area_fraction_top',
 's1_a_center_time',
 's1_a_n_contributing_channels',
 's1_a_range_50p_area',
 's1_b',
 's1_b_area_fraction_top',
 's1_b_center_time',
 's1_b_n_contributing_channels',
 's1_b_range_50p_area',
 's2_2',
 's2_2_area_fraction_top',
 's2_2_center_time',
 's2_2_n_contributing_channels',
 's2_2_range_50p_area',
 's2_a',
 's2_a_area_fraction_top',
 's2_a_center_time',
 's2_a_n_contributing_channels',
 's2_a_range_50p_area',
 's2_b',
 's2_b_area_fraction_top',
 's2_b_center_time',
 's2_b_n_contributing_channels',
 's2_b_range_50p_area',
 'cs1',
 'cs2',
 'cs2_bottom',
 'cs2_top',
 'r',
 'r_3d_nn',
 'r_3d_tpf',
 'r_correction_2d',
 'r_correction_3d_nn',
 'r_correction_3d_tpf',
 'r_observed',
 'r_observed_nn',
 'r_observed_tpf',
 's1_xyz_correction',
 's2_lifetime_correction',
 's2_xy_correction_bottom',
 's2_xy_correction_top',
 's2_xy_correction_tot',
 'ts',
 'x',
 'x_3d_nn',
 'x_3d_tpf',
 'x_observed_nn',
 'x_observed_tpf',
 'y',
 'y_3d_nn',
 'y_3d_tpf',
 'y_observed_nn',
 'y_observed_tpf',
 'z_3d_nn',
 'z_3d_tpf',
 'z_correction_2d',
 'z_correction_3d_nn',
 'z_correction_3d_tpf',
 'z_observed',
 'lone_hit_area',
 'lone_hit_area_fraction_top',
 'lone_hit_center_time',
 'lone_hit_hit_time_std',
 'lone_hit_n_contributing_channels',
 'lone_hit_n_hits',
 'lone_hit_n_saturated_channels',
 'lone_hit_range_50p_area',
 'lone_hit_range_90p_area',
 'lone_hit_x',
 'lone_hit_y',
 's1_area',
 's1_center_time',
 's1_hit_time_std',
 's1_n_contributing_channels',
 's1_n_hits',
 's1_n_saturated_channels',
 's1_range_90p_area',
 's1_x',
 's1_y',
 's2_area',
 's2_center_time',
 's2_hit_time_std',
 's2_n_contributing_channels',
 's2_n_hits',
 's2_n_saturated_channels',
 's2_range_90p_area',
 's2_x',
 's2_y',
 'unknown_area',
 'unknown_area_fraction_top',
 'unknown_center_time',
 'unknown_hit_time_std',
 'unknown_n_contributing_channels',
 'unknown_n_hits',
 'unknown_n_saturated_channels',
 'unknown_range_50p_area',
 'unknown_range_90p_area',
 'unknown_x',
 'unknown_y',
 'drift_time',
 'largest_coincidence',
 'largest_other_s1',
 'largest_other_s2',
 'largest_unknown',
 'largest_veto',
 's1',
 's1_area_fraction_top',
 's1_range_50p_area',
 's2',
 's2_area_fraction_top',
 's2_range_50p_area',
 'x_pax',
 'y_pax',
 'z',
 'area_before_largest_s1',
 'area_before_largest_s2',
 'n_interactions',
 'n_peaks',
 'n_pulses',
 'n_s1',
 'n_s2',
 's1_0_50p_width',
 's1_0_aft',
 's1_0_area',
 's1_0_center_time',
 's1_0_largest_hit_area',
 's1_0_posrec_goodness_of_fit',
 's1_0_rise_time',
 's1_0_x',
 's1_0_y',
 's1_area_tot',
 's2_0_50p_width',
 's2_0_aft',
 's2_0_area',
 's2_0_center_time',
 's2_0_largest_hit_area',
 's2_0_left',
 's2_0_posrec_goodness_of_fit',
 's2_0_posrec_goodness_of_fit_nn',
 's2_0_rise_time',
 's2_0_x',
 's2_0_x_nn',
 's2_0_y',
 's2_0_y_nn',
 's2_1_50p_width',
 's2_1_aft',
 's2_1_area',
 's2_1_center_time',
 's2_1_largest_hit_area',
 's2_1_posrec_goodness_of_fit',
 's2_1_posrec_goodness_of_fit_nn',
 's2_1_rise_time',
 's2_1_x',
 's2_1_x_nn',
 's2_1_y',
 's2_1_y_nn',
 's2_area_tot',
 'unknown_tot',
 'CutAllEnergy',
 'CutFiducialCylinder1T',
 'CutInteractionExists',
 'CutS2Threshold',
 'CutS2AreaFractionTop',
 'CutS2SingleScatter',
 'CutDAQVeto',
 'CutEndOfRunCheck',
 'CutBusyTypeCheck',
 'CutBusyCheck',
 'CutHEVCheck',
 'CutS1SingleScatter',
 'CutS1AreaFractionTop',
 'CutS2PatternLikelihood',
 'CutS2Tails',
 'CutInteractionPeaksBiggest',
 'CutLowEnergyBackground',
 'CutS1LowEnergyRange',
 'CutS2SingleScatterSimple',
 'CutS1PatternLikelihood',
 'CutS2Width',
 'CutS2WidthHigh',
 'CutS2WidthLow',
 'CutS1MaxPMT',
 'CutPreS2Junk']

In [26]:
df['event_number'].hist()


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7efe3f961f60>

In [15]:
!pwd


/ML_platform_tests/tutorial/chris

In [28]:
df['CutAllEnergy'].value_counts()


Out[28]:
False    101166
True         68
Name: CutAllEnergy, dtype: int64

In [30]:
df2 = df[df['CutAllEnergy']]

In [36]:
plt.plot(df2.cs1, df2.cs2, '.')


Out[36]:
[<matplotlib.lines.Line2D at 0x7efe3f825c88>]

In [39]:
plt.plot(df2.x, df2.z, '.')


Out[39]:
[<matplotlib.lines.Line2D at 0x7efe3d0be940>]

In [38]:
plt.plot(df.x, df.y, '.')


Out[38]:
[<matplotlib.lines.Line2D at 0x7efe3d0824e0>]

In [42]:
df['r'] = np.sqrt(df['x']**2 + df['y']**2)

In [46]:
df['r'].hist()


Out[46]:
<matplotlib.axes._subplots.AxesSubplot at 0x7efe380c1da0>

In [57]:
E = 40
plt.scatter(df2['cs1'], df2['cs2'])
plt.xlim(0, 300)
plt.ylim(0, 20e3)


Out[57]:
(0, 20000.0)

In [63]:
df2.loc[:, ('cs1', 'cs2')].head()


Out[63]:
cs1 cs2
152 778.113314 39673.505884
682 11.019045 1750.988897
1897 250.581678 7822.072664
3796 1266.924975 76882.724028
5781 13.925521 2272.624209

In [71]:
pd.to_datetime(df2['event_time']).head()


Out[71]:
152    2017-03-30 05:28:01.948269350
682    2017-03-30 05:28:57.160364610
1897   2017-03-30 05:31:05.326684670
3796   2017-03-30 05:34:32.639400520
5781   2017-03-30 05:37:59.041098160
Name: event_time, dtype: datetime64[ns]

In [72]:
df2.index = pd.to_datetime(df2['event_time'])

In [76]:
df2['z'].plot()


Out[76]:
<matplotlib.axes._subplots.AxesSubplot at 0x7efe30a2c0f0>

In [78]:
df['CutLowEnergyBackground'].value_counts()


Out[78]:
False    101213
True         21
Name: CutLowEnergyBackground, dtype: int64

In [82]:
df3 = df[df['CutLowEnergyBackground']]
plt.scatter(df3.cs1, df3.cs2)
plt.xlabel('cs1 [PE]')
plt.ylabel('cs1 [PE]')
plt.xscale('log')
plt.yscale('log')



In [86]:
df3 = df[df['CutAllEnergy']]
plt.scatter(df3.s2, df3.z)
plt.xlabel('s2 [PE]')
plt.ylabel('z [PE]')


Out[86]:
Text(0,0.5,'z [PE]')

In [ ]: