In [1]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [2]:
from astroML.stats import binned_statistic

# generate data
xdata = np.random.randn(1000)
# set bins
bins=np.arange(-5., 5., .5)
# bin data
n,bin_edges = binned_statistic(xdata,xdata,statistic='count',bins=bins)
# convert bin_edge to bin_center
xpos = bins[:-1]+0.25

In [3]:
xpos, n


Out[3]:
(array([-4.75, -4.25, -3.75, -3.25, -2.75, -2.25, -1.75, -1.25, -0.75,
        -0.25,  0.25,  0.75,  1.25,  1.75,  2.25,  2.75,  3.25,  3.75,  4.25]),
 array([   0.,    0.,    1.,    1.,    5.,    9.,   45.,   85.,  136.,
         193.,  193.,  170.,   96.,   44.,   15.,    6.,    1.,    0.,    0.]))

If you don't care about the confidence interval of parameter


In [5]:
from lmfit.models import GaussianModel
# initialize the gaussian model
gm = GaussianModel()
# take a look at the parameter names
print gm.param_names
# I get RuntimeError since my numpy version is a little old


set(['sigma', 'center', 'amplitude'])

In [6]:
# guess parameters
par_guess = gm.guess(n,x=xpos)
# fit data
result = gm.fit(n, par_guess, x=xpos, method='leastsq')
# quick look at result
print result.fit_report()


[[Model]]
    Model(gaussian)
[[Fit Statistics]]
    # function evals   = 21
    # data points      = 19
    # variables        = 3
    chi-square         = 311.662
    reduced chi-square = 19.479
[[Variables]]
    sigma:       0.99084468 +/- 0.016391 (1.65%) (init= 0.75)
    center:      0.07393627 +/- 0.016391 (22.17%) (init= 0)
    amplitude:   500.006455 +/- 7.163327 (1.43%) (init= 289.5)
    fwhm:        2.33326087 +/- 0.038598 (1.65%)  == '2.3548200*sigma'
[[Correlations]] (unreported correlations are <  0.100)
    C(sigma, amplitude)          =  0.577 

In [7]:
# get best fit error and stderr
print result.params['amplitude'].value,result.params['amplitude'].stderr
print result.params['center'].value,result.params['center'].stderr
print result.params['sigma'].value,result.params['sigma'].stderr


500.0064558 7.16332780507
0.0739362774797 0.016391203505
0.99084468134 0.01639124672

In [8]:
fig = plt.figure()
plt.hist(xdata, bins=bins)
plt.plot(xpos, result.best_fit, 'green')


Out[8]:
[<matplotlib.lines.Line2D at 0x7fb8bac68210>]

If you want the confidence intervals


In [9]:
import lmfit
def my_gaussian_model(p, x, y):
    a = np.float(p['a'])
    b = np.float(p['b'])
    c = np.float(p['c'])
    return a/np.sqrt(2.*c) * np.exp( -np.power(x-b,2.)/2./np.power(c, 2.)) - y
pars = lmfit.Parameters()
pars.add_many(('a',0.1), ('b',0.1), ('c',0.1))

In [10]:
# initialize the minimizer
mini = lmfit.Minimizer(my_gaussian_model, pars, (xpos, n))

# do the minimization
result = mini.minimize(method='leastsq')

# print the fit report
print lmfit.fit_report(mini.params)

# NOTE
# the parameter 'a' in function my_gaussian_model is different from the built-in model in lmfit
# so the amplitude value is a little different


[[Variables]]
    a:   283.398719 +/- 3.315086 (1.17%) (init= 0.1)
    b:   0.07393573 +/- 0.016391 (22.17%) (init= 0.1)
    c:   0.99084415 +/- 0.016391 (1.65%) (init= 0.1)
[[Correlations]] (unreported correlations are <  0.100)

In [11]:
# predit the confidence interval of all parameters
ci, trace = lmfit.conf_interval(mini, sigmas=[0.68,0.95],
                                trace=True, verbose=False)
# ci = lmfit.conf_interval(mini)
lmfit.printfuncs.report_ci(ci)


     95.00%    68.00%     0.00%    68.00%    95.00%
a 276.37442 280.00241 283.39872 286.79468 290.42302
c   0.95680   0.97421   0.99084   1.00779   1.02609
b   0.03912   0.05709   0.07394   0.09077   0.10869

In [12]:
print ci.values()


[[(0.95, 276.37442131841283), (0.68, 280.002413883596), (0.0, 283.39871983546033), (0.68, 286.794684757274), (0.95, 290.4230160561106)], [(0.95, 0.9568030716402023), (0.68, 0.9742107677647883), (0.0, 0.99084407879876579), (0.68, 1.0077873699112407), (0.95, 1.0260932669362002)], [(0.95, 0.03911599012373461), (0.68, 0.05709015222817265), (0.0, 0.073935690008488361), (0.68, 0.09076606864537777), (0.95, 0.10869064689812913)]]

In [13]:
a,b,prob = trace['a']['a'], trace['a']['b'], trace['a']['prob']

cx, cy, grid = lmfit.conf_interval2d(mini,  'a','b',30,30)
plt.contourf(cx, cy, grid, np.linspace(0,1,11))
plt.xlabel('a')
plt.colorbar()
plt.ylabel('b')


Out[13]:
<matplotlib.text.Text at 0x7fb8bab47f10>

Other ways to do Gaussian fitting

use the astropy.modeling

astropy provides full documentation on their website

from astropy.modeling import models
blablabla

use the scipy.optimize

import scipy.optimize as opt
opt.minimize(...)

Conclusion:

I am too lazy, so I use lmfit which provides simple error estimations


In [ ]: