Models are built by HistFactory
from measurements that consist of channels which hold samples.
The measurement is the object which collects all the information that will then be turned into a model by HistFactory
. It is the top level combination of the channels that form the model.
A channel is a representation of observed data. Each channel is required to have a histogram of measured data associated with it. [3] Loosely, channels can be though of as "regions" in an analysis (e.g., signal regions, control regions, validation regions).
Samples are used to describe the channel (data) they are associated with. They can represent signals and backgrounds. Each sample has a histogram describing its shape and a list of systematics describing how that shape transforms based on a number of parameters. [3]
In [1]:
import ROOT
ROOT.RooMsgService.instance().setGlobalKillBelow(5)
%jsroot on
import sys
import os
# Don't require pip install to test out
sys.path.append(os.getcwd() + '/../../src')
from dfgmark import histfactorybench as hfbench
In [2]:
help(hfbench.sample_func_histogram)
In [3]:
help(hfbench.make_sample)
In [4]:
help(hfbench.make_channel)
In [5]:
help(hfbench.make_model)
In [6]:
help(hfbench.benchmark_fit)
In [7]:
help(hfbench.benchmark_fit_with_workspace)
Now let's create the samples (1 signal), the channel (1 signal region), and the model.
First, set the parameters for the model and make the signal sample histogram
In [8]:
n_events = 1000
n_bins = 10
range_low = 0
range_high = 10
frac_sig = 1
# pdf of a falling line
f1 = ROOT.TF1("f1","[0]*x + [1]", 0, 10)
f1.SetParameters(-1/50, 1/5)
signal_hist = hfbench.sample_func_histogram(
f1, n_events=n_events, range_low=0, range_high=10, hist_name="signal_hist")
data_hist = hfbench.sample_func_histogram(
f1, n_events=n_events, range_low=0, range_high=10, hist_name="data_hist")
In [9]:
c = ROOT.TCanvas()
signal_hist.Draw()
data_hist.SetLineColor(1)
data_hist.Draw("SAME")
c.Draw()
Now make the samples and add them and the channel data histogram to the channel
In [10]:
samples = []
samples.append(hfbench.make_sample("signal", signal_hist, 0.01, 0.01))
channels = []
SR = hfbench.make_channel(channel_name="SR", channel_data=data_hist, channel_samples=samples)
channels.append(SR)
Define the parameter of interest (POI) and then make the workspace and model
In [11]:
POI = ["SignalStrength"]
ws, model = hfbench.make_model(n_events, n_bins, channels, POI)
In [12]:
ws.Print()
In [13]:
model.graphVizTree("model.dot")
In [14]:
%%bash
dot -Tpdf -o Images/model.pdf model.dot
In [15]:
#hfbench.benchmark_fit(ws.var("obs_x_SR"), model, n_events, n_events, verbose=True)
hfbench.benchmark_fit_with_workspace(ws, model, n_events, n_events, verbose=True);
In [16]:
# check information on the fit
hfbench.benchmark_fit_with_workspace(ws, model, n_events, 1, verbose=True);
In [17]:
model.Print("v")
In [18]:
mc = ws.obj("ModelConfig")
mc.LoadSnapshot()
pl = ROOT.RooStats.ProfileLikelihoodCalculator(ws.data("obsData"), mc)
ROOT.SetOwnership(pl, False)
##ws.data("obsData").Print("v")
##mc.Print("v")
model.Print("v")
x = ws.var("obs_x_SR")
#pl.SetConfidenceLevel(0.99)
pl.SetConfidenceLevel(0.683)
interval = pl.GetInterval()
firstPOI = mc.GetParametersOfInterest().first()
lowerLimit = interval.LowerLimit(firstPOI)
upperLimit = interval.UpperLimit(firstPOI)
print("68% interval on {0} is [{1},{2}]\n".format(firstPOI.GetName(),lowerLimit,upperLimit))
plot = ROOT.RooStats.LikelihoodIntervalPlot(interval)
plot.SetRange(-3,3)
plot.SetNPoints(50)
plot.SetMaximum(0.1)
c = ROOT.TCanvas()
#c.SetLogy(True)
plot.Draw()
c.Draw()
c.SaveAs("nll.pdf")
c.IsA().Destructor(c) # As currently not exiting properly
In [19]:
c = ROOT.TCanvas()
f = ws.var("obs_x_SR").frame()
#print(ws.var("obs_x_SR"))
ws.data("obsData").plotOn(f)
f.Draw()
c.Draw()
print(ws.data("obsData"))
#h_test = super(ROOT.RooDataSet,ws.data("obsData")).createHistogram("h_test", ws.var("obs_x_SR"), ROOT.RooFit.Binning(n_bins, range_low, range_high))
h_test = hfbench.roodataset_to_hist("h_test", ws.var("obs_x_SR"), ws.data("obsData"))
c_test = ROOT.TCanvas()
h_test.Draw()
c_test.Draw()
Plot the channel (signal sample and data) together
In [20]:
model_config = ws.obj("ModelConfig")
hist_factory_nav = ROOT.RooStats.HistFactory.HistFactoryNavigation(model_config)
ROOT.SetOwnership(hist_factory_nav, False)
# draw the histogram stack
stack = hist_factory_nav.GetChannelStack("SR")
channel_data = hist_factory_nav.GetDataHist(ws.data("obsData"),"SR","channel_data")
channel_data.Sumw2(False)
channel_data.Sumw2()
sig_hist = hist_factory_nav.GetSampleHist("SR", "signal", "sig_hist")
fit_frame = ws.var("obs_x_SR").frame()
model.plotOn(fit_frame, ROOT.RooFit.Normalization(n_events), ROOT.RooFit.LineStyle(1)) #kDashed = 2
c1 = ROOT.TCanvas()
stack.Draw()
channel_data.Draw("SAME")
fit_frame.Draw("SAME")
leg = ROOT.TLegend(0.65, 0.65, 0.9, 0.9)
for hist in stack.GetHists():
samplename = hist.GetName().rsplit('_')[0]
leg.AddEntry(hist, samplename,"f")
leg.AddEntry(channel_data, "channel data","lep")
#leg.AddEntry(fit_frame, "fit to data","lep")
leg.AddEntry("", "fit to data","lep")
leg.Draw()
c1.Draw()
In [21]:
# To avoid a segfault at kerenel exit
canvas_list = [c, c_test, c1]
for canvas in canvas_list:
canvas.IsA().Destructor(canvas)
RooFit
and HistFactory