Parameter estimation by maximum likelihood method



In [6]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [7]:

    
from scipy.stats import norm
from lmfit import minimize, Parameters

Create a normally distributed random variable



In [11]:

    
# create some data
mymean = 28.74
mysigma = 8.33 # standard deviation!
rv_norm = norm(loc = mymean, scale = mysigma)
data = rv_norm.rvs(size = 150)
plt.hist(data, bins=20, facecolor='red', alpha=.3);

plt.ylabel('Number of events');
plt.xlim(0,100);

Define a model function



In [12]:

    
def mynorm(x, params):
    mu, sigma = params
    # scipy implementation
    mynorm = norm(loc = mu, scale = sigma)
    return mynorm.pdf(x)

mynorm(0, [0,1]) # 0.39









    Out[12]:





0.3989422804014327

Loglikelihood function to be minimize



In [13]:

    
def loglikelihood(params, data):
    mu = params['mean'].value
    sigma = params['std'].value
    
    l1 = np.log( mynorm(data, [mu, sigma]) ).sum()
    return(-l1) # return negative loglikelihood to minimize

myfoo = Parameters()
myfoo.add('mean', value = 20)
myfoo.add('std', value = 5.0)
loglikelihood(myfoo, data)









    Out[13]:





794.4880315495127



In [14]:

    
myparams = Parameters()
myparams.add('mean', value = 20.3)
myparams.add('std', value = 5.0)



In [15]:

    
out = minimize(fcn = loglikelihood, params=myparams, method='nelder', args=(data,))
print(out.userfcn(myparams, data)) # ~523.631337424









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-15-92b2e991c59d> in <module>()
      1 out = minimize(fcn = loglikelihood, params=myparams, method='nelder', args=(data,))
----> 2 print(out.userfcn(myparams, data)) # ~523.631337424

AttributeError: 'MinimizerResult' object has no attribute 'userfcn'



In [16]:

    
from lmfit import report_errors
report_errors(myparams)









    



[[Variables]]
    mean:   20.3000000 (init= 20.3)
    std:    5          (init= 5)
[[Correlations]] (unreported correlations are <  0.100)

The estimated mean and standard deviation should be identical to the mean and the standard deviation of the sample population



In [17]:

    
np.mean(data), np.std(data)









    Out[17]:





(28.812147384304652, 7.7946507327263665)

Plot histogram and model together



In [18]:

    
# Compute binwidth
counts, binedge = np.histogram(data, bins=20);

bincenter = [0.5 * (binedge[i] + binedge[i+1]) for i in xrange(len(binedge)-1)]
binwidth = (max(bincenter) - min(bincenter)) / len(bincenter)



In [19]:

    
# Adjust PDF function to data
ivar = np.linspace(0, 100, 100)
params =  [ myparams['mean'].value, myparams['std'].value ]
mynormpdf = mynorm(ivar, params)*binwidth*len(data)



In [20]:

    
# Plot everything together
plt.hist(data, bins=20, facecolor='white', histtype='stepfilled');

plt.plot(ivar, mynormpdf);
plt.ylabel('Number of events');



In [ ]: