In [1]:
%matplotlib inline

import numpy as np
import pandas as pd

import ROOT
import root_numpy

import matplotlib.pyplot as plt

PATH = "/root/minibias2.root"

import warnings
warnings.filterwarnings('ignore')

import json


Welcome to ROOTaaS 6.06/02

In [2]:
import os.path as osp

In [4]:
with open("./branches_photon.config") as f:
    features = json.load(f)

In [5]:
features['per_event']


Out[5]:
{u'PF': {u'batch': 1536,
  u'branches': [u'recoPFCandidates_particleFlow__RECO.obj.pt_',
   u'recoPFCandidates_particleFlow__RECO.obj.eta_',
   u'recoPFCandidates_particleFlow__RECO.obj.phi_'],
  u'read_each': 8},
 u'calo': {u'batch': 64,
  u'branches': [u'recoCaloJets_ak5CaloJets__RECO.obj.pt_',
   u'recoCaloJets_ak5CaloJets__RECO.obj.eta_',
   u'recoCaloJets_ak5CaloJets__RECO.obj.phi_',
   u'recoCaloJets_ak5CaloJets__RECO.obj.mass_',
   u'recoCaloJets_ak5CaloJets__RECO.obj.vertex_.fCoordinates.fX',
   u'recoCaloJets_ak5CaloJets__RECO.obj.vertex_.fCoordinates.fY',
   u'recoCaloJets_ak5CaloJets__RECO.obj.vertex_.fCoordinates.fZ'],
  u'read_each': 1},
 u'muons': {u'batch': 24,
  u'branches': [u'recoMuons_muons__RECO.obj.pt_',
   u'recoMuons_muons__RECO.obj.eta_',
   u'recoMuons_muons__RECO.obj.phi_',
   u'recoMuons_muons__RECO.obj.mass_',
   u'recoMuons_muons__RECO.obj.vertex_.fCoordinates.fX',
   u'recoMuons_muons__RECO.obj.vertex_.fCoordinates.fY',
   u'recoMuons_muons__RECO.obj.vertex_.fCoordinates.fZ'],
  u'read_each': 1},
 u'photons': {u'batch': 12,
  u'branches': [u'recoPhotons_photons__RECO.obj.pt_',
   u'recoPhotons_photons__RECO.obj.eta_',
   u'recoPhotons_photons__RECO.obj.phi_',
   u'recoPhotons_photons__RECO.obj.vertex_.fCoordinates.fX',
   u'recoPhotons_photons__RECO.obj.vertex_.fCoordinates.fY',
   u'recoPhotons_photons__RECO.obj.vertex_.fCoordinates.fZ'],
  u'read_each': 1}}

In [6]:
features['per_lumisection']


Out[6]:
[u'EventAuxiliary.id_.run_',
 u'EventAuxiliary.id_.luminosityBlock_',
 u'EventAuxiliary.time_.timeHigh_',
 u'EventAuxiliary.time_.timeLow_',
 u'LumiScalerss_scalersRawToDigi__RECO.obj.instantLumi_']

In [119]:
def get_index(leaves, indxes):
    """
    Produces names of branches with indexes in them.
    """
    return [ leaf + "[%d]" % index for leaf in leaves for index in indxes ]

def split_by_events(data_root, leaves, batch_size, test_leaf = 0):
    """
    Turns data from root numpy into matrix <N events> by <number of features>.
    Returns if another batch is needed by testing test_leaf (usually pt - momentum) against exact float zero.
    If any particles has test_leaf equal to exact zeros, they are just doesn't exist in original root file, so
    needed to be truncated.
    Otherwise, event might be read incompletely, i.e. number of particles > batch_size.
    """
    events = list()

    for event_i in xrange(data_root.shape[0]):
        event = data_root[event_i]
        d = np.array([ event[i] for i in xrange(len(leaves) * batch_size) ]).reshape(len(leaves), -1).T
        
        event_idx = d[:, test_leaf] > 0.0
        
        ### other features must be exact zero also
        assert np.all(d[np.logical_not(event_idx), :] == 0.0)
        
        events.append(d[event_idx, :])

    need_another_batch = np.any([
        event.shape[0] == batch_size for event in events
    ])
    
    return need_another_batch, events
    
    
def read_batch(path, treename, leaves, batch_size, each = 1, test_leaf = 0):
    event_batches = None
    need_another_batch = True
    
    batch_offset = 0

    while need_another_batch:
        branches = get_index(leaves, np.arange(batch_size)[::each] + batch_offset)
        
        data_root = root_numpy.root2array(path, treename='Events', branches=branches, )

        need_another_batch, events = split_by_events(data_root, leaves, batch_size / each, test_leaf = 0)
        
        batch_offset += batch_size
        
        if event_batches is None:
            event_batches = [ [event] for event in events ]
        else:
            assert len(event_batches) == len(events)
            event_batches = [
                batches + [batch] for batches, batch in zip(event_batches, events)
            ]
    

    return [ np.vstack(event) for event in event_batches ]

def read_lumidata(path, lumifeatures):
    names = [ f.split('.')[-1] for f in lumifeatures ]

    lumidata = root_numpy.root2array(path, treename='Events', branches=lumifeatures)
    lumi = np.zeros(shape=(lumidata.shape[0], len(lumifeatures)))

    for i in xrange(lumidata.shape[0]):
        lumi[i, :] = np.array([ lumidata[i][j] for j in range(len(lumifeatures)) ])
    
    lumi = pd.DataFrame(lumi, columns=names)
    lumi['luminosityBlock_'] = lumi['luminosityBlock_'].astype('int64')
    lumi['run_'] = lumi['run_'].astype('int64')
    
    return lumi
    
def read_lumisection(path, features):
    lumi = read_lumidata(path, features['per_lumisection'])
    
    events = dict()
    for category in features['per_event']:
        fs = features['per_event'][category]['branches']
        read_each = features['per_event'][category]['read_each']
        batch_size = features['per_event'][category]['batch']
        
        assert batch_size > read_each
        assert batch_size % read_each == 0
        
        events[category] = read_batch(path, treename='Events', leaves=fs,
                                      batch_size=batch_size, each=read_each, test_leaf=0)
    
    return lumi, events

In [139]:
def get_percentile_paticles(event, n = 3, test_feature = 0):
    sort_idx = np.argsort(event[:, test_feature])
    event = event[sort_idx, :]

    if event.shape[0] >= n:
        ### preserving the last event with maximal momentum (test_feature)
        fetch_idx = [i * (event.shape[0] / n) for i in xrange(n-1)] + [event.shape[0] - 1]
        return event[fetch_idx, :]
    else:
        missing = n - event.shape[0]
        return np.vstack([
            np.zeros(shape=(missing, event.shape[1])),
            event
        ])
    
def integrate(event):
    try:
        pt = event[:, 0]
        eta = event[:, 1]
        phi = event[:, 2]
    
        theta = 2.0 * np.arctan(np.exp(-eta))
    
        px = np.sum(pt * np.cos(theta))
        py = np.sum(pt * np.sin(theta) * np.cos(phi))
        pz = np.sum(pt * np.sin(theta) * np.sin(phi))
        return np.array([px, py, pz])
    except:
        return np.zeros(shape=3)
    
    
def process_channel(channel, branches, prefix = "", n = 3, test_feature=0):
    selected = np.array([
        get_percentile_paticles(event, n = n, test_feature = test_feature).flatten()
        for event in channel
    ]).astype('float32')
    
    total_momentum = np.array([
        integrate(event[:3, :])
        for event in channel
    ]).astype('float32')
    
    branch_names = [ prefix + '_' + branch.split(".")[-1] for branch in branches ]
    
    feature_names = [
        "%s_q%d" % (branch, q + 1) for q in range(n) for branch in branch_names
    ] + [
        prefix + '_' + "P%s" % component for component in list("xyz")
    ] 
    
    df = pd.DataFrame(np.hstack([selected, total_momentum]), columns = feature_names)
    return df

In [158]:
def process(data, features, lumidata, n = 3, test_feature = 0):
    channels = list()
    names = list(lumidata.columns)

    for category in data:
        d = process_channel(data[category], features[category]['branches'],
                            prefix = category, n = n, test_feature = test_feature)
        
        names += list(d.columns)
        
        channels.append(d)
    
    df = pd.concat([lumidata] + channels, axis=1, names = names, ignore_index=True)
    df.columns = names
    return df

In [162]:
for cat in events:
    plt.hist([ event.shape[0] for event in events[cat] ])
    plt.title(cat)
    plt.show()



In [159]:
c = process(events, features['per_event'], lumidata)

In [160]:
c.to_pickle("c.pickled")

In [161]:
c


Out[161]:
run_ luminosityBlock_ timeHigh_ timeLow_ instantLumi_ muons_pt__q1 muons_eta__q1 muons_phi__q1 muons_mass__q1 muons_fX_q1 ... calo_pt__q3 calo_eta__q3 calo_phi__q3 calo_mass__q3 calo_fX_q3 calo_fY_q3 calo_fZ_q3 calo_Px calo_Py calo_Pz
0 146644 1224 1285463010 95585 19.096954 0.000000 0.000000 0.000000 0.000000 0.000000 ... 46.347794 0.376394 1.177975 6.275603 0.095045 0.013055 -4.442827 55.848282 16.159065 38.907120
1 146644 1224 1285463010 377120 19.096954 0.000000 0.000000 0.000000 0.000000 0.000000 ... 51.387669 2.291085 -0.824405 12.670225 0.093539 0.012319 -2.976851 66.443405 -24.959877 14.404644
2 146644 1224 1285463010 735485 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 46.546787 -0.192384 -1.388777 6.448455 0.092027 0.024673 12.028056 -38.463974 6.724241 -33.914780
3 146644 1224 1285463010 993188 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 42.410381 -0.826786 -2.899447 6.521376 0.094940 0.012578 2.479975 -57.288063 -24.574667 -5.050786
4 146644 1224 1285463011 614948 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 16.376713 -0.097495 -1.976657 1.653828 0.089640 0.014461 -10.516521 -6.239570 -1.939950 -7.766619
5 146644 1224 1285463011 545676 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 34.949863 0.082293 1.151670 3.676551 0.089812 0.017895 -3.835010 -5.585491 7.096694 29.687090
6 146644 1224 1285463011 845529 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 3.341619 -3.082617 0.844027 0.429878 0.101291 0.020989 3.308096 -3.327605 0.203133 0.228466
7 146644 1224 1285463011 846952 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 38.243462 0.595802 -0.955439 5.103288 0.091806 0.012328 -6.555016 -1.221660 14.850039 -21.151955
8 146644 1224 1285463012 497167 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 40.053913 -3.820030 0.598588 6.644757 0.097805 0.019607 3.335063 -88.108727 -7.162678 -2.472482
9 146644 1224 1285463012 446391 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 40.608257 -0.393190 1.728520 6.359371 0.090439 0.023509 -4.255210 -39.123051 -5.210696 33.882893
10 146644 1224 1285463013 107100 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 35.681240 -1.347164 2.067433 3.620281 0.102451 0.020197 -2.797534 -39.293259 -7.440313 13.324841
11 146644 1224 1285463013 257204 19.095190 0.000000 0.000000 0.000000 0.000000 0.000000 ... 44.155815 2.476231 1.642416 7.108055 0.099888 -0.000078 -5.775013 86.748604 8.100287 -8.159184
12 146644 1224 1285463013 800888 19.091549 0.000000 0.000000 0.000000 0.000000 0.000000 ... 72.485558 1.481400 2.963045 8.049902 0.091614 0.015224 4.710814 116.279808 15.055243 -8.680600
13 146644 1224 1285463013 734284 19.091549 0.000000 0.000000 0.000000 0.000000 0.000000 ... 44.495419 -0.576743 -1.522375 4.656542 0.086804 0.006931 0.284983 10.421871 3.374393 -34.588581
14 146644 1224 1285463013 762473 19.091549 0.000000 0.000000 0.000000 0.000000 0.000000 ... 50.902336 -1.270379 0.669376 5.520372 0.094071 0.021373 15.582873 -123.716438 12.427835 9.654193
15 146644 1224 1285463014 763851 19.091549 0.000000 0.000000 0.000000 0.000000 0.000000 ... 12.323133 0.603625 -2.374398 2.210751 0.095039 0.017868 -5.214990 14.934483 -4.680154 -1.566565
16 146644 1224 1285463015 821874 19.091549 0.000000 0.000000 0.000000 0.000000 0.000000 ... 39.022274 0.235087 -0.428322 5.021188 0.080390 0.017236 -1.587094 49.542419 31.651072 -15.039971
17 146644 1224 1285463015 633265 19.091549 0.000000 0.000000 0.000000 0.000000 0.000000 ... 43.663513 -2.314735 2.162484 6.168704 0.101227 0.019013 -0.540754 -37.495975 -6.817596 5.530629
18 146644 1224 1285463015 727881 19.091549 0.000000 0.000000 0.000000 0.000000 0.000000 ... 114.790825 2.061392 -2.155095 11.129515 0.091582 0.011812 5.548553 192.235443 16.485250 33.640118
19 146644 1224 1285463016 279479 19.091549 0.000000 0.000000 0.000000 0.000000 0.000000 ... 77.838341 0.847089 2.164245 5.382624 0.094922 0.025441 -3.264156 70.109116 -5.213368 4.516318
20 146644 1224 1285463017 271965 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 44.117477 0.652714 -0.136162 5.599448 0.085252 0.024102 -5.364876 31.728914 26.248301 -3.037836
21 146644 1224 1285463017 170146 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 90.549698 0.642228 2.806741 10.802125 0.091690 0.011569 2.373115 -10.563128 -56.546398 19.790443
22 146644 1224 1285463017 678527 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 27.723021 1.229086 1.575010 4.919202 0.091482 0.025966 -2.889679 18.259146 -0.230540 8.317849
23 146644 1224 1285463017 500678 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 23.308996 -1.894489 -0.465002 4.338595 0.089601 0.022710 0.158611 -33.290260 -7.215164 0.359009
24 146644 1224 1285463018 164232 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 308.370697 2.002419 0.098382 38.858437 0.093685 0.026864 15.195956 316.103882 -145.152527 -38.886753
25 146644 1224 1285463018 717520 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 39.872818 2.633668 -0.385257 4.338812 0.087471 0.025447 2.552140 78.905022 3.299271 -1.623478
26 146644 1224 1285463018 330699 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 55.556271 0.993327 -3.111747 3.013359 0.101982 0.010788 2.769473 52.887817 -35.719685 -1.369861
27 146644 1224 1285463018 701247 19.118963 1.091426 2.281927 1.803388 0.105658 0.064863 ... 37.179459 2.014039 0.209763 6.600767 0.091538 0.023917 3.679019 63.358337 -6.761423 -7.422030
28 146644 1224 1285463019 295707 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 52.643932 -2.076859 0.460579 6.422886 0.089132 0.017712 2.831056 -61.900944 -24.040382 -11.681614
29 146644 1224 1285463019 10793 19.118963 0.000000 0.000000 0.000000 0.000000 0.000000 ... 38.305386 -1.925141 1.107501 7.208207 0.089463 0.025304 -6.471725 -74.520172 -3.432788 -9.363762
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
11971 149291 671 1288332510 905553 99.206566 0.000000 0.000000 0.000000 0.000000 0.000000 ... 97.459457 -2.105803 -0.320418 3.251329 0.092903 0.020981 2.706471 -154.983307 11.067964 -3.621290
11972 149291 671 1288332511 94962 99.206566 0.000000 0.000000 0.000000 0.000000 0.000000 ... 70.814629 -2.148298 2.028113 6.101872 0.100924 0.017639 -1.006991 -65.518517 2.379776 -4.565071
11973 149291 671 1288332512 56147 99.206566 0.000000 0.000000 0.000000 0.000000 0.000000 ... 80.259117 -0.474285 -2.057880 12.459437 0.091394 0.017236 -4.448534 -56.148167 2.078008 -23.704744
11974 149291 671 1288332512 402952 99.206566 0.000000 0.000000 0.000000 0.000000 0.000000 ... 52.448250 -0.930994 -1.414776 9.073656 0.104558 0.019288 1.745430 -50.207829 16.104172 1.649970
11975 149291 671 1288332512 851042 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 46.876003 -0.191494 0.590796 6.937904 0.102391 0.014412 -2.182069 26.841898 23.804972 15.618079
11976 149291 671 1288332513 44898 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 53.466305 -0.961904 0.313754 5.161201 0.095817 0.014986 3.132270 -67.951180 -5.340895 -11.647705
11977 149291 671 1288332512 974203 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 36.662605 -2.709517 3.073610 9.698652 0.095068 0.012134 1.249357 -30.735035 7.267669 11.002167
11978 149291 671 1288332513 393570 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 65.011589 0.046134 -2.771478 9.448819 0.097171 0.029528 4.321219 59.628925 -28.449892 -8.064112
11979 149291 671 1288332513 736285 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 27.956614 -1.966944 0.346945 6.053647 0.086549 0.028266 -13.957143 -49.671337 -3.479212 -0.764197
11980 149291 671 1288332513 846018 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 44.704945 -1.673876 2.868509 5.118092 0.096738 0.018633 4.548218 -0.669325 -4.855789 2.152866
11981 149291 671 1288332514 90916 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 47.912907 2.520449 2.517056 10.709692 0.098858 0.021494 8.752708 32.840660 28.479765 -13.723590
11982 149291 671 1288332512 607834 99.206566 0.000000 0.000000 0.000000 0.000000 0.000000 ... 132.785797 -0.815445 0.348402 10.543168 0.098752 0.031402 -0.352882 12.886535 41.461193 15.783162
11983 149291 671 1288332513 742776 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 93.384247 -1.585089 -2.172143 14.152468 0.095629 0.013943 -5.708961 -162.773285 -13.979457 -20.779009
11984 149291 671 1288332515 476447 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 51.470802 0.715882 -3.022818 9.038096 0.091148 0.013490 3.191944 1.783207 -2.359090 3.184626
11985 149291 671 1288332515 650650 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 44.449158 -1.569316 1.637585 4.436020 0.094303 0.018967 0.989992 -79.425896 2.102893 12.296255
11986 149291 671 1288332516 228481 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 153.775558 0.313026 -1.342291 8.584235 0.098822 0.020554 2.096098 -30.704254 4.964280 -94.805939
11987 149291 671 1288332516 417090 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 42.849735 1.438365 -0.795665 2.570987 0.102278 0.015823 7.829156 11.975058 5.558404 -7.653160
11988 149291 671 1288332515 474936 99.243645 0.873380 -2.117153 0.747618 0.105658 0.117739 ... 71.571915 -2.066651 -1.909290 10.571046 0.093656 0.017092 1.016485 -97.003181 -2.309530 27.154245
11989 149291 671 1288332516 385966 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 60.151520 -1.074450 1.164549 8.088524 0.092859 0.018351 -2.179997 -81.561012 10.569630 21.970407
11990 149291 671 1288332516 216654 99.226578 1.026563 1.910375 -2.579506 0.105658 0.074314 ... 48.970528 1.008250 -0.654812 5.969172 0.088136 0.011066 7.370581 86.888512 20.030807 -19.687300
11991 149291 671 1288332517 26667 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 20.072474 1.855440 -0.878599 1.296617 0.095682 0.020704 -9.723393 4.361507 -1.168688 -0.613179
11992 149291 671 1288332516 853531 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 46.564445 -2.590379 -2.275867 1.277273 0.093686 0.022095 1.568694 -91.128967 3.882661 6.505018
11993 149291 671 1288332516 586936 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 83.446167 -2.349231 1.696467 12.626002 0.097314 0.026476 7.524070 -130.592178 -22.286766 -59.112728
11994 149291 671 1288332517 555856 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 38.282055 0.471276 -1.739896 6.639951 0.094780 0.023198 3.079193 3.439795 5.900452 -18.280853
11995 149291 671 1288332517 512817 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 48.318756 1.701066 2.515383 8.597171 0.093792 0.022604 -5.974466 54.595005 3.520764 4.154252
11996 149291 671 1288332517 916712 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 68.583397 1.335959 3.020314 1.579753 0.101462 0.017972 10.512605 57.874725 -33.236389 4.014710
11997 149291 671 1288332517 229770 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 52.881054 -2.074050 1.783942 9.732655 0.098739 0.026340 4.701293 -14.339216 2.186086 -16.119562
11998 149291 671 1288332517 747577 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 57.316307 0.441282 2.856360 12.872575 0.086521 0.023686 -1.934383 83.880005 -35.770206 10.668344
11999 149291 671 1288332517 923203 99.226578 0.000000 0.000000 0.000000 0.000000 0.000000 ... 51.785309 -1.748952 2.028631 5.981611 0.092001 0.009764 -14.984174 -67.806549 -2.345484 10.310633
12000 149291 671 1288332515 279391 99.243645 0.000000 0.000000 0.000000 0.000000 0.000000 ... 73.160019 -0.878903 -2.958134 2.101510 0.103056 0.013483 2.724434 -109.100014 -41.498108 -10.064618

12001 rows × 86 columns


In [120]:
%%time

lumidata, events = read_lumisection("root://eospublic.cern.ch//eos/opendata/cms/Run2010B/Photon/AOD/Apr21ReReco-v1/0000/041D347A-D271-E011-908B-0017A477003C.root", features)


CPU times: user 2min 20s, sys: 358 ms, total: 2min 21s
Wall time: 2min 28s


In [9]:
%%time

#data = read_lumisection("../../minibias2.root", features)


CPU times: user 7 µs, sys: 0 ns, total: 7 µs
Wall time: 16 µs

In [10]:
import cPickle

with open('minibias2.pickled', 'w') as f:
    cPickle.dump(a, f)

In [11]:
for category in a['events']:
    plt.hist( [ x.shape[0] for x in a['events'][category] ], bins=20)
    plt.title(category)
    plt.show()



In [ ]: