In [105]:
#Necessary imports
# lib for numeric calculations
import numpy as np

# standard lib for python plotting
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

# seaborn lib for more option in DS
import seaborn as sns

# so to obtain pseudo-random numbers
import random 
                                     
# fits a curve to data
from scipy.optimize import leastsq #least squares
from scipy.optimize import curve_fit # non linear least squares

import math

Plot and Fit the Data


In [97]:
Nbins = 100
def gauss(x, *p):
    """ Gaussian model function"""
    I, mu, sigma = p
    return I * np.exp( - (x - mu) ** 2 / (2. * sigma ** 2))
# Create the data and bin information
data = np.random.normal(0, 1, size=10000)
bin_values, bin_edges = np.histogram(data, density=False, bins=Nbins)
bin_centres = (bin_edges[:-1] + bin_edges[1:])/2
# assign params
init_coeff = [0., 1., 1.]
# Fit the data
fit_coeff, var_matrix = curve_fit(gauss, bin_centres, bin_values, p0=init_coeff)
fit_coeff, var_matrix = curve_fit(gauss, bin_centres, bin_values, p0=init_coeff)


hist_fit = gauss(bin_centres, *fit_coeff)
# and visualize
plt.plot(bin_centres, hist_fit, label='Fitted data', color='red', linewidth=2)
hist = plt.hist(data, bins=Nbins, color='blue', alpha=.3)


See the Fit Residuals


In [122]:
def calc_chisq(hist, function, params, _ndf_func=None,  prob=None):
    """
    """
    bin_values, bin_edges = np.histogram(data, density=False, bins=Nbins)
    bin_centres = (bin_edges[:-1] + bin_edges[1:])/2
 
    resHisto = None
    newHname = ''

    #hist->Sumw2();
  
    chisq = 0.0
    nbins = len(bin_values)
  
    entries_counted = 0.;
    ###n_entries = hist->Integral(1,nbins);
    n_entries = 0
    for i in bin_values:
        n_entries += i
    
    first_index = 0
    last_index = 0
    expected = 0.
    observed = 0
    err_sq = 0.
    nbins_post_rebinning = 0
 
    tmps = []
    histo_map = {}
    
    for i in range(nbins):
        first_index = i
        last_index = i
        observed = bin_values[i]
        expected = function(bin_centres[i], *params) 
        ##err_sq = (hist->GetBinError(i))*(hist->GetBinError(i));


        
        
        #if observed < 6, keep adding bins
        while expected < 6. and i < nbins-1:
            i += 1
            #  assert (i<=nbins)
            last_index = i;
            observed += bin_values[i]
            expected += function(bin_centres[i], *params)   
            #err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));

        entries_counted += observed
        
        
        
        # make sure that there are > 6 events in the remaining bins
        if n_entries - entries_counted < 6:
            # finish it up if there are < 6 events remaining
            while i < nbins-1:
                i += 1 
                last_index = i
                observed += bin_values[i]
                #err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));
                expected += function(bin_centres[i], *params)  
                entries_counted +=  bin_values[i]
      
    
    
       #temp = (observed - expected) * (observed - expected) / err_sq
        temp = (observed - expected) * (observed - expected) / expected 
        chisq += temp;
        
        tmps.append(temp)
        histo_map[i] = math.sqrt(temp)

        # Set the values in the residual plot
        
        for j in range(first_index, last_index):
            #if temp==0:
            #    histo_map[j] = 0
            #    continue
            if (expected - observed) > 0: 
                histo_map[j] = math.sqrt(temp)
            else:
                histo_map[j] = -1.* math.sqrt(temp)

        nbins_post_rebinning +=1 
    

    print "First Index " , first_index , " ,    Last Index " , last_index
    print "(last_index- first_index +1) " , (last_index- first_index +1) 
    print "The number of observed for this bin was ", observed 
    print "The number of expected for this bin was ", expected 
    print "I've counted through bin ", last_index  

    #ndf = nbins_post_rebinning -_ndf_func;
    #prob = TMath::Prob(chisq,ndf); 
    print  "**********************************************************************" 
    print  "**********************************************************************" 
    print  "**********************************************************************" 
    print  "**********************************************************************" 
    print  "The chi sq for this fit is ", chisq
    print  "There were this many bins after rebinning ", nbins_post_rebinning 
    print  "and this many free parameters in the original fit ", _ndf_func  
    print  "So the probability for this fit is ", prob  
    print  "**********************************************************************" 
    print  "**********************************************************************" 
    print  "**********************************************************************" 
    print  "**********************************************************************" 

    return histo_map


"""
  //Considering all the bins
  for (int i=1; i<=nbins; i++){
    first_index = i;
    last_index = i;
    observed = hist->GetBinContent(i);
    err_sq = (hist->GetBinError(i))*(hist->GetBinError(i));
    expected = func->Eval(hist->GetBinCenter(i));    
    //if observed < 6, keep adding bins
    while (expected < 6. && i<nbins){
      i++;
      assert (i<=nbins);
      last_index = i;
      observed += hist->GetBinContent(i);
      err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));
      expected += func->Eval(hist->GetBinCenter(i));    
    }
    entries_counted += observed;

    //make sure that there are < 6 events in the remaining bins
    if(n_entries-entries_counted <6){
      //finish it up if there are < 6 events remaining
      while(i<nbins){
	i++;
	last_index = i;
	observed += hist->GetBinContent(i);
	err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));
	expected += func->Eval(hist->GetBinCenter(i));    
	entries_counted += hist->GetBinContent(i);
      }
    }
  
    cout << "First Index " << first_index << "     Last Index " << last_index << endl;
    cout << "(last_index- first_index +1) " << (last_index- first_index +1) << endl;
    cout << "The number of observed for this bin was " << observed << endl;
    cout << "The number of expected for this bin was " << expected << endl;
    cout << "I've counted through bin " << last_index << endl;

    //    double temp = (observed-expected)*(observed-expected)/err_sq;
    double temp = (observed-expected)*(observed-expected)/expected;
    chisq += temp;
    //Set the values in the residual plot
    for (int j=first_index;j<=last_index;j++){
      if ((expected-observed) > 0) 
	resHisto->SetBinContent(j,TMath::Sqrt(temp));
      else 
	resHisto->SetBinContent(j,-1.*TMath::Sqrt(temp));
    }


    nbins_post_rebinning ++;
  }
  Int_t ndf = nbins_post_rebinning -_ndf_func;
  #prob = TMath::Prob(chisq,ndf); 
  cout << "**********************************************************************" << endl;
  cout << "**********************************************************************" << endl;
  cout << "**********************************************************************" << endl;
  cout << "**********************************************************************" << endl;
  cout << "The chi sq for this fit is " << chisq << endl;
  cout << "There were this many bins after rebinning " << nbins_post_rebinning << endl;
  cout << "and this many free parameters in the original fit " << _ndf_func << endl;
  cout << "So the probability for this fit is " << prob << endl;
  cout << "**********************************************************************" << endl;
  cout << "**********************************************************************" << endl;
  cout << "**********************************************************************" << endl;
  cout << "**********************************************************************" << endl;
 
  resHisto->SetFillColor(4); resHisto->SetFillStyle(1001);
  return resHisto;
"""


Out[122]:
'\n  //Considering all the bins\n  for (int i=1; i<=nbins; i++){\n    first_index = i;\n    last_index = i;\n    observed = hist->GetBinContent(i);\n    err_sq = (hist->GetBinError(i))*(hist->GetBinError(i));\n    expected = func->Eval(hist->GetBinCenter(i));    \n    //if observed < 6, keep adding bins\n    while (expected < 6. && i<nbins){\n      i++;\n      assert (i<=nbins);\n      last_index = i;\n      observed += hist->GetBinContent(i);\n      err_sq += (hist->GetBinError(i))*(hist->GetBinError(i));\n      expected += func->Eval(hist->GetBinCenter(i));    \n    }\n    entries_counted += observed;\n\n    //make sure that there are < 6 events in the remaining bins\n    if(n_entries-entries_counted <6){\n      //finish it up if there are < 6 events remaining\n      while(i<nbins){\n\ti++;\n\tlast_index = i;\n\tobserved += hist->GetBinContent(i);\n\terr_sq += (hist->GetBinError(i))*(hist->GetBinError(i));\n\texpected += func->Eval(hist->GetBinCenter(i));    \n\tentries_counted += hist->GetBinContent(i);\n      }\n    }\n  \n    cout << "First Index " << first_index << "     Last Index " << last_index << endl;\n    cout << "(last_index- first_index +1) " << (last_index- first_index +1) << endl;\n    cout << "The number of observed for this bin was " << observed << endl;\n    cout << "The number of expected for this bin was " << expected << endl;\n    cout << "I\'ve counted through bin " << last_index << endl;\n\n    //    double temp = (observed-expected)*(observed-expected)/err_sq;\n    double temp = (observed-expected)*(observed-expected)/expected;\n    chisq += temp;\n    //Set the values in the residual plot\n    for (int j=first_index;j<=last_index;j++){\n      if ((expected-observed) > 0) \n\tresHisto->SetBinContent(j,TMath::Sqrt(temp));\n      else \n\tresHisto->SetBinContent(j,-1.*TMath::Sqrt(temp));\n    }\n\n\n    nbins_post_rebinning ++;\n  }\n  Int_t ndf = nbins_post_rebinning -_ndf_func;\n  #prob = TMath::Prob(chisq,ndf); \n  cout << "**********************************************************************" << endl;\n  cout << "**********************************************************************" << endl;\n  cout << "**********************************************************************" << endl;\n  cout << "**********************************************************************" << endl;\n  cout << "The chi sq for this fit is " << chisq << endl;\n  cout << "There were this many bins after rebinning " << nbins_post_rebinning << endl;\n  cout << "and this many free parameters in the original fit " << _ndf_func << endl;\n  cout << "So the probability for this fit is " << prob << endl;\n  cout << "**********************************************************************" << endl;\n  cout << "**********************************************************************" << endl;\n  cout << "**********************************************************************" << endl;\n  cout << "**********************************************************************" << endl;\n \n  resHisto->SetFillColor(4); resHisto->SetFillStyle(1001);\n  return resHisto;\n'

In [123]:
#plt.plot(bin_centres, hist_fit, label='Fitted data', color='red', linewidth=2)
#hist = plt.hist(data, bins=Nbins, color='blue', alpha=.3)
histo_map = calc_chisq(hist, gauss, coeff)

#bin_width = bin_edges[1:] - bin_edges[:-1]  
#plt.bar(bin_edges[:-1], tmps, width=bin_width) 


plt.show()


First Index  99  ,    Last Index  99
(last_index- first_index +1)  1
The number of observed for this bin was  1
The number of expected for this bin was  0.0596267915721
I've counted through bin  99
**********************************************************************
**********************************************************************
**********************************************************************
**********************************************************************
The chi sq for this fit is  113.749968762
There were this many bins after rebinning  100
and this many free parameters in the original fit  None
So the probability for this fit is  None
**********************************************************************
**********************************************************************
**********************************************************************
**********************************************************************

In [128]:
histo_map.values()
bin_width = bin_edges[1:] - bin_edges[:-1]  
plt.bar(bin_edges[:-1], histo_map.values(), width=bin_width)


Out[128]:
<Container object of 100 artists>

In [ ]: