In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from multihist import Histdd, Hist1d
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import hax
from hax import cuts
hax.init(pax_version_policy='6.8',
minitree_paths=['./sr1_s1shape_minitrees/',
'/project2/lgrandi/xenon1t/minitrees/pax_v6.8.0/',
'/project/lgrandi/xenon1t/minitrees/pax_v6.8.0/'])
from pax import units, configuration
pax_config = configuration.load_configuration('XENON1T')
tpc_r = pax_config['DEFAULT']['tpc_radius']
tpc_z = -pax_config['DEFAULT']['tpc_length']
KR83m cuts similar to Adam's note: https://github.com/XENON1T/FirstResults/blob/master/PositionReconstructionSignalCorrections/S2map/s2-correction-xy-kr83m-fit-in-bins.ipynb
In [3]:
# Get SR1 krypton datasets
dsets = hax.runs.datasets
dsets = dsets[dsets['source__type'] == 'Kr83m']
dsets = dsets[dsets['trigger__events_built'] > 10000] # Want a lot of Kr, not diffusion mode
dsets = hax.runs.tags_selection(dsets, include='sciencerun0')
# Sample ten datasets randomly (with fixed seed, so the analysis is reproducible)
dsets = dsets.sample(10, random_state=0)
dsets.number.values
Out[3]:
In [4]:
# Suppress rootpy warning about root2rec.. too lazy to fix.
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
data = hax.minitrees.load(dsets.number,
'Basics DoubleScatter Corrections'.split(),
num_workers=5,
preselection=['int_b_x>-60.0',
'600 < s1_b_center_time - s1_a_center_time < 2000',
'-90 < z < -5'])
In [5]:
from hax.treemakers.peak_treemakers import PeakExtractor
dt = 10 * units.ns
wv_length = pax_config['BasicProperties.SumWaveformProperties']['peak_waveform_length']
waveform_ts = np.arange(-wv_length/2, wv_length/2 + 0.1, dt)
class GetS1s(PeakExtractor):
__version__ = '0.0.1'
uses_arrays = True
# (don't actually need all properties, but useful to check if there's some problem)
peak_fields = ['area', 'range_50p_area', 'area_fraction_top',
'n_contributing_channels', 'left', 'hit_time_std', 'n_hits',
'type', 'detector', 'center_time', 'index_of_maximum',
'sum_waveform',
]
peak_cut_list = ['detector == "tpc"', 'type == "s1"']
def get_data(self, dataset, event_list=None):
# Get the event list from the dataframe selected above
event_list = data[data['run_number'] == hax.runs.get_run_number(dataset)]['event_number'].values
return PeakExtractor.get_data(self, dataset, event_list=event_list)
def extract_data(self, event):
peak_data = PeakExtractor.extract_data(self, event)
# Convert sum waveforms from arcane pyroot buffer type to proper numpy arrays
for p in peak_data:
p['sum_waveform'] = np.array(list(p['sum_waveform']))
return peak_data
In [6]:
s1s = hax.minitrees.load(dsets.number, GetS1s, num_workers=5)
Pandas object array is very memory-ineficient. Takes about 25 MB/dataset to store it in this format (even compressed). If we'd want to extract more than O(10) datasets we'd get into trouble already at the extraction stage.
Least we can do is convert to sensible format (waveform matrix, ordinary dataframe) now. Unfortunately dataframe retains 'object' mark even after deleting sum waveform column. Converting to and from a record array removes this.
In [7]:
waveforms = np.vstack(s1s['sum_waveform'].values)
del s1s['sum_waveform']
s1s = pd.DataFrame(s1s.to_records())
Merge with the per-event data (which is useful e.g. for making position-dependent selections)
In [8]:
merged_data = hax.minitrees._merge_minitrees(s1s, data)
del merged_data['index']
In [9]:
np.savez_compressed('sr0_kr_s1s.npz', waveforms=waveforms)
merged_data.to_hdf('sr0_kr_s1s.hdf5', 'data')
In [10]:
len(s1s)
Out[10]:
In [11]:
from pax import units
plt.hist(s1s.left * 10 * units.ns / units.ms, bins=np.linspace(0, 2.5, 100));
plt.yscale('log')
S1 is usually at trigger.
In [12]:
plt.hist(s1s.area, bins=np.logspace(0, 3, 100));
plt.axvline(35, color='r')
plt.yscale('log')
plt.xscale('log')
In [13]:
np.sum(s1s['area'] > 35)/len(s1s)
Out[13]:
Single electron contamination is not so severe.