In [2]:
# == Basic import == #
# plot within the notebook
%matplotlib inline
# No annoying warnings
import warnings
warnings.filterwarnings('ignore')
In [3]:
import numpy as np
import matplotlib.pyplot as mpl
In [4]:
sigma_int = 0.10
mu = -0.5
error = 0.12
error_noise = 0.03 # This means that the errors will be 0.12 +/- 0,03
npoints = 1000
In [5]:
errors = np.random.normal(loc=error, scale=error_noise, size=npoints)
data = np.random.normal(loc=mu, scale=sigma_int, size=npoints) + np.random.normal(loc=0,scale=errors)
In [6]:
fig = mpl.figure(figsize=[13,5])
ax = fig.add_subplot(111)
ax.errorbar(np.arange(npoints), data, yerr=errors, marker="o", ls="None",
mfc=mpl.cm.Blues(0.6,0.6), mec=mpl.cm.binary(0.8,1), ecolor="0.7",
ms=13, mew=2)
Out[6]:
The probability p to observe a point i with a value x given it's gaussian error dx and assuming the system has an intrinsic dispersion sigma is:
$$ p = G(x_i\ |\ \mu,\ \sqrt{dx_i ^2+ sigma^2}) $$where $G$ is the gaussian probability distribution function (pdf).
In Python you can measure $p$ using the scipy.stats norm class:
from scipy.stats import norm
import numpy as np
p = norm.pdf(x, loc= mu, scale= np.sqrt(dx**2 + sigma**2))
The likelihood to observe your sample given your model (here $\mu$ and $\sigma$) is simply the product of the probability to observe every given points of your sample. The best model with then be the one maximizing the Likelihood $\mathcal{L}$: $$ \mathcal{L} = \prod p_i $$
In practive we work with the log of the likelihood such that the formula is based on the sum of the log of the individual probabilities:
$$ \log\mathcal{L} = \sum \log(p_i) $$
In [46]:
from scipy.optimize import minimize
from scipy import stats
class WeightedMean( object ):
""" Class that allows us to fit for a weighted mean including intrinsic dispersion"""
PARAMETERS = [mu, sigma_int]
def __init__(self, data, errors):
""" Initialize the class
Parameters
----------
data: [array]
measured value
errors: [array]
measured errors
Return
------
Void
"""
# ---------------
# Test the input
if len(data) != len(errors):
raise ValueError("data and errors must have the same size (given %d vs. %d)"%(len(data),len(errors)))
self.data = np.asarray(data)
self.errors = np.asarray(errors)
self.npoints = len(self.data)
def set_guesses(self,guesses):
""" Set the 2 guesses for the fit: mu and sigma_intrinsic
Return
------
Void
"""
if len(guesses) != len(self.PARAMETERS):
raise ValueError("guess must have %d entries (%d given)"%(len(self.PARAMETERS), len(guesses)))
self.guesses = np.asarray(guesses)
def fit(self, guesses):
""" fit the parameters to the data
This uses scipy.optimize.minimize
"""
self.set_guesses(guesses)
self.fitout = minimize(self.minus_loglikelihood, self.guesses)
print self.fitout
def minus_loglikelihood(self, parameters):
""" The sum of the minus loglikelihood used for the fit
Parameters
----------
parameters: [array]
list of the values for the free parameters of the model
Return
------
float (- sum loglikelihood)
"""
mu, sigma_int = parameters
return - np.sum( np.log( stats.norm.pdf(self.data, loc=mu, scale=np.sqrt(sigma_int**2 + self.errors**2)) ))
In [47]:
wmean = WeightedMean(data,errors)
In [48]:
wmean.fit()
In [50]:
np.sqrt( 2.76810002e-05)
Out[50]:
In [ ]: