In [105]:
#Necessary imports
# lib for numeric calculations
import numpy as np
# standard lib for python plotting
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
# seaborn lib for more option in DS
import seaborn as sns
# so to obtain pseudo-random numbers
import random
# fits a curve to data
from scipy.optimize import leastsq #least squares
from scipy.optimize import curve_fit # non linear least squares
import math
In [97]:
Nbins = 100
def gauss(x, *p):
""" Gaussian model function"""
I, mu, sigma = p
return I * np.exp( - (x - mu) ** 2 / (2. * sigma ** 2))
# Create the data and bin information
data = np.random.normal(0, 1, size=10000)
bin_values, bin_edges = np.histogram(data, density=False, bins=Nbins)
bin_centres = (bin_edges[:-1] + bin_edges[1:])/2
# assign params
init_coeff = [0., 1., 1.]
# Fit the data
fit_coeff, var_matrix = curve_fit(gauss, bin_centres, bin_values, p0=init_coeff)
fit_coeff, var_matrix = curve_fit(gauss, bin_centres, bin_values, p0=init_coeff)
hist_fit = gauss(bin_centres, *fit_coeff)
# and visualize
plt.plot(bin_centres, hist_fit, label='Fitted data', color='red', linewidth=2)
hist = plt.hist(data, bins=Nbins, color='blue', alpha=.3)
In [122]:
def calc_chisq(hist, function, params, _ndf_func=None, prob=None):
"""
"""
bin_values, bin_edges = np.histogram(data, density=False, bins=Nbins)
bin_centres = (bin_edges[:-1] + bin_edges[1:])/2
resHisto = None
newHname = ''
#hist->Sumw2();
chisq = 0.0
nbins = len(bin_values)
entries_counted = 0.;
###n_entries = hist->Integral(1,nbins);
n_entries = 0
for i in bin_values:
n_entries += i
first_index = 0
last_index = 0
expected = 0.
observed = 0
err_sq = 0.
nbins_post_rebinning = 0
tmps = []
histo_map = {}
for i in range(nbins):
first_index = i
last_index = i
observed = bin_values[i]
expected = function(bin_centres[i], *params)
##err_sq = (hist->GetBinError(i))*(hist->GetBinError(i));
#if observed < 6, keep adding bins
while expected < 6. and i < nbins-1:
i += 1
# assert (i<=nbins)
last_index = i;
observed += bin_values[i]
expected += function(bin_centres[i], *params)
#err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));
entries_counted += observed
# make sure that there are > 6 events in the remaining bins
if n_entries - entries_counted < 6:
# finish it up if there are < 6 events remaining
while i < nbins-1:
i += 1
last_index = i
observed += bin_values[i]
#err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));
expected += function(bin_centres[i], *params)
entries_counted += bin_values[i]
#temp = (observed - expected) * (observed - expected) / err_sq
temp = (observed - expected) * (observed - expected) / expected
chisq += temp;
tmps.append(temp)
histo_map[i] = math.sqrt(temp)
# Set the values in the residual plot
for j in range(first_index, last_index):
#if temp==0:
# histo_map[j] = 0
# continue
if (expected - observed) > 0:
histo_map[j] = math.sqrt(temp)
else:
histo_map[j] = -1.* math.sqrt(temp)
nbins_post_rebinning +=1
print "First Index " , first_index , " , Last Index " , last_index
print "(last_index- first_index +1) " , (last_index- first_index +1)
print "The number of observed for this bin was ", observed
print "The number of expected for this bin was ", expected
print "I've counted through bin ", last_index
#ndf = nbins_post_rebinning -_ndf_func;
#prob = TMath::Prob(chisq,ndf);
print "**********************************************************************"
print "**********************************************************************"
print "**********************************************************************"
print "**********************************************************************"
print "The chi sq for this fit is ", chisq
print "There were this many bins after rebinning ", nbins_post_rebinning
print "and this many free parameters in the original fit ", _ndf_func
print "So the probability for this fit is ", prob
print "**********************************************************************"
print "**********************************************************************"
print "**********************************************************************"
print "**********************************************************************"
return histo_map
"""
//Considering all the bins
for (int i=1; i<=nbins; i++){
first_index = i;
last_index = i;
observed = hist->GetBinContent(i);
err_sq = (hist->GetBinError(i))*(hist->GetBinError(i));
expected = func->Eval(hist->GetBinCenter(i));
//if observed < 6, keep adding bins
while (expected < 6. && i<nbins){
i++;
assert (i<=nbins);
last_index = i;
observed += hist->GetBinContent(i);
err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));
expected += func->Eval(hist->GetBinCenter(i));
}
entries_counted += observed;
//make sure that there are < 6 events in the remaining bins
if(n_entries-entries_counted <6){
//finish it up if there are < 6 events remaining
while(i<nbins){
i++;
last_index = i;
observed += hist->GetBinContent(i);
err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));
expected += func->Eval(hist->GetBinCenter(i));
entries_counted += hist->GetBinContent(i);
}
}
cout << "First Index " << first_index << " Last Index " << last_index << endl;
cout << "(last_index- first_index +1) " << (last_index- first_index +1) << endl;
cout << "The number of observed for this bin was " << observed << endl;
cout << "The number of expected for this bin was " << expected << endl;
cout << "I've counted through bin " << last_index << endl;
// double temp = (observed-expected)*(observed-expected)/err_sq;
double temp = (observed-expected)*(observed-expected)/expected;
chisq += temp;
//Set the values in the residual plot
for (int j=first_index;j<=last_index;j++){
if ((expected-observed) > 0)
resHisto->SetBinContent(j,TMath::Sqrt(temp));
else
resHisto->SetBinContent(j,-1.*TMath::Sqrt(temp));
}
nbins_post_rebinning ++;
}
Int_t ndf = nbins_post_rebinning -_ndf_func;
#prob = TMath::Prob(chisq,ndf);
cout << "**********************************************************************" << endl;
cout << "**********************************************************************" << endl;
cout << "**********************************************************************" << endl;
cout << "**********************************************************************" << endl;
cout << "The chi sq for this fit is " << chisq << endl;
cout << "There were this many bins after rebinning " << nbins_post_rebinning << endl;
cout << "and this many free parameters in the original fit " << _ndf_func << endl;
cout << "So the probability for this fit is " << prob << endl;
cout << "**********************************************************************" << endl;
cout << "**********************************************************************" << endl;
cout << "**********************************************************************" << endl;
cout << "**********************************************************************" << endl;
resHisto->SetFillColor(4); resHisto->SetFillStyle(1001);
return resHisto;
"""
Out[122]:
In [123]:
#plt.plot(bin_centres, hist_fit, label='Fitted data', color='red', linewidth=2)
#hist = plt.hist(data, bins=Nbins, color='blue', alpha=.3)
histo_map = calc_chisq(hist, gauss, coeff)
#bin_width = bin_edges[1:] - bin_edges[:-1]
#plt.bar(bin_edges[:-1], tmps, width=bin_width)
plt.show()
In [128]:
histo_map.values()
bin_width = bin_edges[1:] - bin_edges[:-1]
plt.bar(bin_edges[:-1], histo_map.values(), width=bin_width)
Out[128]:
In [ ]: