In [ ]:
# == Basic import == #
# plot within the notebook
%matplotlib inline
# No annoying warnings
import warnings
warnings.filterwarnings('ignore')
In [ ]:
import numpy as np
import matplotlib.pyplot as mpl
In [ ]:
sigma_int = 0.10 # Intrinsic dispersion to recover
mu = -0.5 # Mean value to recover
error = 0.12 # Tyipical noise of the data
error_noise = 0.03 # error of the noise (so that are not always the same) will be 0.12 +/- 0,03
npoints = 1000 # numper of points
In [ ]:
errors = np.random.normal(loc=error, scale=error_noise, size=npoints)
data = np.random.normal(loc=mu, scale=sigma_int, size=npoints) + np.random.normal(loc=0,scale=errors)
In [ ]:
fig = mpl.figure(figsize=[13,5])
ax = fig.add_subplot(111)
ax.errorbar(np.arange(npoints), data, yerr=errors, marker="o", ls="None",
mfc=mpl.cm.Blues(0.6,0.6), mec=mpl.cm.binary(0.8,1), ecolor="0.7",
ms=13, mew=2)
Some Theory to understand what is going on
The probability p to observe a point i with a value x given it's gaussian error dx and assuming the system has an intrinsic dispersion sigma is:
$$ p = G(x_i\ |\ \mu,\ \sqrt{dx_i ^2+ sigma^2}) $$where $G$ is the gaussian probability distribution function (pdf).
In Python you can measure $p$ using the scipy.stats norm class:
from scipy.stats import norm
import numpy as np
p = norm.pdf(x, loc= mu, scale= np.sqrt(dx**2 + sigma**2))
The likelihood to observe your sample given your model (here $\mu$ and $\sigma$) is simply the product of the probability to observe every given points of your sample. The best model with then be the one maximizing the Likelihood $\mathcal{L}$: $$ \mathcal{L} = \prod p_i $$
In practive we work with the log of the likelihood such that the formula is based on the sum of the log of the individual probabilities:
$$ \log\mathcal{L} = \sum \log(p_i) $$You should be able to code the minus_loklikelihood ($=-\log\mathcal{L}$) [remark that scipy.stats.norm may take vectors to speed things up]
In [ ]:
from scipy.optimize import minimize
from scipy import stats
class WeightedMean( object ):
""" Class allowing to fit a weighted mean including intrinsic dispersion"""
PARAMETERS = [mu, sigma_int]
def __init__(self, data, errors):
""" Initialize the class
Parameters
----------
data: [array]
measured value
errors: [array]
measured errors
Return
------
Void
"""
# => Input Test: Data and Errors must have the same size
# => Create self.data, self.errors (they should be numpy array)
# => Create self.npoints, which is just the number of data point
def set_guesses(self,guesses):
""" Set the 2 guesses for the fit: mu and sigma_intrinsic
Return
------
Void
"""
self.guesses = guesses
# =Input Test: guesses must have the same size as the number of parameters
# => Create self.guesses recording the numpy array of guesses
def fit(self, guesses=[0,1], verbose=True):
""" fit the parameters to the data
This uses scipy.optimize.minimize
Parameters:
-----------
guesses: [array]
Initial values for the fit.
verbose: [bool]
print the results if True
Return
------
Void (creates self.fitout)
"""
if "guesses" in dir(self) or self.guesses is None:
self.set_guesses(guesses)
# => Set the guesses using the appropriate function
# => fill the "***" in the following method
self.fitout = minimize("***", "***")
if verbose:
print self.fitout
def minus_loglikelihood(self, parameters):
""" The sum of the minus loglikelihood used for the fit
Parameters
----------
parameters: [array]
list of the values for the free parameters of the model
Return
------
float (- sum loglikelihood)
"""
mu, sigma_int = parameters
#=> return the - sum of the log of the individual likelihood (using stats.norm.pdf)
In [ ]:
wmean = WeightedMean(data,errors)
wmean.fit()