Based on the Intrinsic Dispersion
fitting class
you created last week create a new class
that allow to fit a step.
Let's imagine a physical process having a transition at a given threshold value. This could for instance be applied to astrophysics. Cepheids (pulsating stars) have an average magnitude (luminosity in log-scale) m0
if their pulsating period is lower than, say, 10
and an average magnitude m1
otherwise (nb: this is not true in real life). Each group has a natural dispersion of sigma0
and sigma1
, respectively.
The pulsation and magnitude of each stars have been measured (see data) and we want to derive the average magnitudes (m0
, m1
) and natural dispersions (sigma0
, sigma1
) for the Cepheids.
The data are recorded in data/Cepheid_StepFit.dat
The data have errors both on the pulsation and manitude measurements. All errors are gaussian errors
Because of the errors on the pulsation measurements, a given star will have a probability p0
to belong to the low-pulsation group (below 10
) and a probability p1=1-p0
to belong to the high-pulsation one.
In [2]:
# No annoying warnings
import warnings
warnings.filterwarnings('ignore')
# == Basic import == #
# plot within the notebook
%matplotlib inline
import numpy as np
import matplotlib.pyplot as mpl
In [22]:
from scipy.optimize import minimize
from scipy import stats
In [23]:
import astropy.table as t
In [24]:
table = t.Table.read("data/Cepheid_StepFit.dat", format="ascii")
In [25]:
table
Out[25]:
In [35]:
class StepFit( object ):
""" Class to fit a bimodal guassian distribution """
FREEPARAMETERS = ["mean_a", "mean_b", "sigma_a", "sigma_b"]
NAME = "StepFit"
def __init__(self, x, y, xerr, yerr, xcut=None):
""" Initialize the instance
Parameter:
----------
x,y,dx,dy: [N-length array]
x and y-axis data with their respective errors.
Return
------
Void
"""
if len(x) != len(xerr) or len(y) != len(yerr) or len(x) != len(y):
raise ValueError("x, y , xerr and yerr must have the same size.")
self.x = np.asarray(x)
self.xerr = np.abs(np.asarray(xerr))
self.y = np.asarray(y)
self.yerr = np.abs(np.asarray(yerr))
if xcut is not None:
self.set_proba(xcut)
# ---------------- #
# Main Methods #
# ---------------- #
def fit(self, guesses, verbose=True):
""" """
# - Setup
self.set_guesses(guesses)
# - Fit
self.fitout = minimize(self.minus_loglikelihood, self.guesses)
if verbose:
print self.fitout
def minus_loglikelihood(self, parameters):
""" The - sum (log(Likelihood)) for the given parameters
This method can be used for fitting procedure
Parameters
----------
parameters: [N-length array]
Values setting the model. N must be the size of mode's
freeparameters (see self.FREEPARAMETERS)
Return
------
float
"""
# - Test if the probability has been initialized
if "proba_a" not in dir(self):
raise AttributeError(" The probility of each object to be from 'a' or 'b' has not been set.")
mean_a,mean_b, sigma_a, sigma_b = parameters
Likelihood = self.proba_a * stats.norm.pdf(self.y,
loc=mean_a, scale= np.sqrt(sigma_a**2 + self.yerr**2)) +\
(1-self.proba_a) * stats.norm.pdf(self.y,
loc=mean_b, scale= np.sqrt(sigma_b**2 + self.yerr**2))
return -np.sum(np.log(Likelihood))
# --------- #
# SETTER #
# --------- #
def set_proba(self, xcut):
""" define the probability of each points
to be above or below the xcut given x and xerr.
Parameters
----------
xcut: [float]
Value defining the location of the step.
Return
------
Void
"""
if type(xcut) is not float:
raise TypeError("xcut must be a float.")
self.xcut = xcut
self.proba_a = stats.norm.cdf(xcut, loc=self.x, scale=self.xerr)
def set_guesses(self, guesses):
""" set the initial fit parameters
Parameters
----------
guesses: [N-length array]
the Initial values for the fit procedure.
N must be the number of free parameters.
(see self.FREEPARAMETERS)
Return
------
Void
"""
if len(guesses) != len(self.FREEPARAMETERS):
raise ValueError("guess must have %d entries (%d given)"%(len(self.FREEPARAMETERS), len(guesses)))
self.guesses = np.asarray(guesses)
# --------- #
# PLOTTER #
# --------- #
def plot(self, xbins=10, ybins=10, **kwargs):
""" plot the data and the associated best fit (if any) """
has_proba = "proba_a" in dir(self)
fit_done = "fitout" in dir(self)
# ----------- #
# Axes #
# ----------- #
fig = mpl.figure(figsize=[10,8])
ax_size=0.63
# rect (left, lower, width, heigth) in fig coords.
ax = fig.add_axes([0.15,0.15,ax_size,ax_size])
axhx = fig.add_axes([0.15,0.8,ax_size,0.18],sharex=ax)
axhy = fig.add_axes([0.8,0.15,0.18,ax_size],sharey=ax)
# - remove the ticks labels
[[label.set_visible(False) for label in lticks]
for lticks in [axhx.get_xticklabels(),axhx.get_yticklabels()]]
[[label.set_visible(False) for label in lticks]
for lticks in [axhy.get_xticklabels(),axhy.get_yticklabels()]]
# ----------- #
# Scatter #
# ----------- #
# - Scatter
prop = dict(marker="o", s=80, linewidths = 1, cmap = mpl.cm.Blues, zorder=6)
prop["facecolors"] = 0.5 if not has_proba else self.proba_a
for k,v in kwargs.items():
prop[k] = v
# Python 3 only
# prop = **prop + **kwargs
ax.scatter(self.x,self.y, **prop)
ax.errorbar(self.x, self.y, xerr=self.xerr, yerr=self.yerr,
ecolor="0.7", zorder=0.3, marker="s",ls="None")
# ----------- #
# Histo #
# ----------- #
prop_hist = dict(histtype="step", lw=2,ec="k")
if not has_proba:
axhx.hist(self.x, xbins, **prop_hist)
axhy.hist(self.y, ybins,
orientation="horizontal",**prop_hist)
else:
# - b-group
axhx.hist(self.x, xbins, weights = 1-self.proba_a,
fill=False,
**prop_hist)
axhy.hist(self.y, ybins, weights = 1-self.proba_a,
fill=False,
orientation="horizontal",
**prop_hist)
# - a-group
prop_hist["lw"] = 1
prop_hist["ec"] = prop["cmap"](1.,0.8)
axhx.hist(self.x, xbins, weights = self.proba_a,
fill=True, fc= prop["cmap"](1.,0.4),
**prop_hist)
axhy.hist(self.y, ybins, weights = self.proba_a,
fill=True, fc= prop["cmap"](1.,0.4),
orientation="horizontal",
**prop_hist)
# ------------- #
# Fit Results #
# ------------- #
if fit_done:
mean_a,mean_b,sigma_a, sigma_b = self.fitout["x"]
mean_a_err,mean_b_err = np.sqrt(self.fitout["hess_inv"][0][0]),np.sqrt(self.fitout["hess_inv"][1][1])
ax.plot([10,20], [mean_b,mean_b], scalex=False, lw=0.5,color="k")
ax.plot([0,10], [mean_a,mean_a], scalex=False, lw=0.5,color=prop["cmap"](0.9,0.9))
ax.fill_between([10,20], [mean_b+mean_b_err,mean_b+mean_b_err],
[mean_b-mean_b_err,mean_b-mean_b_err],
color="k", alpha=0.3)
ax.fill_between([0,10], [mean_a+mean_a_err,mean_a+mean_a_err],
[mean_a-mean_a_err,mean_a-mean_a_err],
color=prop["cmap"](0.9,0.9), alpha=0.3)
ax.set_ylim(-1.5,1.5)
ax.set_xlim(0,20)
In [36]:
step = StepFit(table["pulsation"], table["mag"], table["puls_err"], table["mag_err"], xcut=10.)
In [39]:
step.fit([-1,1,0.3,0.4], verbose=False)
step.plot(marker="s",s=130, cmap=mpl.cm.jet)
In [9]:
print step.fitout
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [15]:
x = np.random.random(100)
y = np.random.random(100)
yerr = np.random.random(100)/3.
In [18]:
def scatter_it(x,y, yerr, prop={},**kwargs):
"""
**kwargs goes to matplotlib's scatter method
"""
fig = mpl.figure()
ax = fig.add_subplot(111)
ax.plot(x,y, **kwargs)
ax.errorbar(x,y,yerr=yerr,**prop)
In [20]:
scatter_it(x,y,yerr,prop=dict(ecolor="0.7"), ls="None", marker="o")
In [ ]:
mpl.cm.
mpl.scatter