Based on the Intrinsic Dispersion fitting class you created last week create a new class that allow to fit a step.
Let's imagine a physical process having a transition at a given threshold value. This could for instance be applied to astrophysics. Cepheids (pulsating stars) have an average magnitude (luminosity in log-scale) m0 if their pulsating period is lower than, say, 10 and an average magnitude m1 otherwise (nb: this is not true in real life). Each group has a natural dispersion of sigma0 and sigma1, respectively.
The pulsation and magnitude of each stars have been measured (see data) and we want to derive the average magnitudes (m0, m1) and natural dispersions (sigma0, sigma1) for the Cepheids.
The data are recorded in data/Cepheid_StepFit.dat
The data have errors both on the pulsation and manitude measurements. All errors are gaussian errors
Because of the errors on the pulsation measurements, a given star will have a probability p0 to belong to the low-pulsation group (below 10) and a probability p1=1-p0 to belong to the high-pulsation one.
In [2]:
# No annoying warnings
import warnings
warnings.filterwarnings('ignore')
# == Basic import == #
# plot within the notebook
%matplotlib inline
import numpy as np
import matplotlib.pyplot as mpl
In [22]:
from scipy.optimize import minimize
from scipy import stats
In [23]:
import astropy.table as t
In [24]:
table = t.Table.read("data/Cepheid_StepFit.dat", format="ascii")
In [25]:
table
Out[25]:
In [35]:
class StepFit( object ):
""" Class to fit a bimodal guassian distribution """
FREEPARAMETERS = ["mean_a", "mean_b", "sigma_a", "sigma_b"]
NAME = "StepFit"
def __init__(self, x, y, xerr, yerr, xcut=None):
""" Initialize the instance
Parameter:
----------
x,y,dx,dy: [N-length array]
x and y-axis data with their respective errors.
Return
------
Void
"""
if len(x) != len(xerr) or len(y) != len(yerr) or len(x) != len(y):
raise ValueError("x, y , xerr and yerr must have the same size.")
self.x = np.asarray(x)
self.xerr = np.abs(np.asarray(xerr))
self.y = np.asarray(y)
self.yerr = np.abs(np.asarray(yerr))
if xcut is not None:
self.set_proba(xcut)
# ---------------- #
# Main Methods #
# ---------------- #
def fit(self, guesses, verbose=True):
""" """
# - Setup
self.set_guesses(guesses)
# - Fit
self.fitout = minimize(self.minus_loglikelihood, self.guesses)
if verbose:
print self.fitout
def minus_loglikelihood(self, parameters):
""" The - sum (log(Likelihood)) for the given parameters
This method can be used for fitting procedure
Parameters
----------
parameters: [N-length array]
Values setting the model. N must be the size of mode's
freeparameters (see self.FREEPARAMETERS)
Return
------
float
"""
# - Test if the probability has been initialized
if "proba_a" not in dir(self):
raise AttributeError(" The probility of each object to be from 'a' or 'b' has not been set.")
mean_a,mean_b, sigma_a, sigma_b = parameters
Likelihood = self.proba_a * stats.norm.pdf(self.y,
loc=mean_a, scale= np.sqrt(sigma_a**2 + self.yerr**2)) +\
(1-self.proba_a) * stats.norm.pdf(self.y,
loc=mean_b, scale= np.sqrt(sigma_b**2 + self.yerr**2))
return -np.sum(np.log(Likelihood))
# --------- #
# SETTER #
# --------- #
def set_proba(self, xcut):
""" define the probability of each points
to be above or below the xcut given x and xerr.
Parameters
----------
xcut: [float]
Value defining the location of the step.
Return
------
Void
"""
if type(xcut) is not float:
raise TypeError("xcut must be a float.")
self.xcut = xcut
self.proba_a = stats.norm.cdf(xcut, loc=self.x, scale=self.xerr)
def set_guesses(self, guesses):
""" set the initial fit parameters
Parameters
----------
guesses: [N-length array]
the Initial values for the fit procedure.
N must be the number of free parameters.
(see self.FREEPARAMETERS)
Return
------
Void
"""
if len(guesses) != len(self.FREEPARAMETERS):
raise ValueError("guess must have %d entries (%d given)"%(len(self.FREEPARAMETERS), len(guesses)))
self.guesses = np.asarray(guesses)
# --------- #
# PLOTTER #
# --------- #
def plot(self, xbins=10, ybins=10, **kwargs):
""" plot the data and the associated best fit (if any) """
has_proba = "proba_a" in dir(self)
fit_done = "fitout" in dir(self)
# ----------- #
# Axes #
# ----------- #
fig = mpl.figure(figsize=[10,8])
ax_size=0.63
# rect (left, lower, width, heigth) in fig coords.
ax = fig.add_axes([0.15,0.15,ax_size,ax_size])
axhx = fig.add_axes([0.15,0.8,ax_size,0.18],sharex=ax)
axhy = fig.add_axes([0.8,0.15,0.18,ax_size],sharey=ax)
# - remove the ticks labels
[[label.set_visible(False) for label in lticks]
for lticks in [axhx.get_xticklabels(),axhx.get_yticklabels()]]
[[label.set_visible(False) for label in lticks]
for lticks in [axhy.get_xticklabels(),axhy.get_yticklabels()]]
# ----------- #
# Scatter #
# ----------- #
# - Scatter
prop = dict(marker="o", s=80, linewidths = 1, cmap = mpl.cm.Blues, zorder=6)
prop["facecolors"] = 0.5 if not has_proba else self.proba_a
for k,v in kwargs.items():
prop[k] = v
# Python 3 only
# prop = **prop + **kwargs
ax.scatter(self.x,self.y, **prop)
ax.errorbar(self.x, self.y, xerr=self.xerr, yerr=self.yerr,
ecolor="0.7", zorder=0.3, marker="s",ls="None")
# ----------- #
# Histo #
# ----------- #
prop_hist = dict(histtype="step", lw=2,ec="k")
if not has_proba:
axhx.hist(self.x, xbins, **prop_hist)
axhy.hist(self.y, ybins,
orientation="horizontal",**prop_hist)
else:
# - b-group
axhx.hist(self.x, xbins, weights = 1-self.proba_a,
fill=False,
**prop_hist)
axhy.hist(self.y, ybins, weights = 1-self.proba_a,
fill=False,
orientation="horizontal",
**prop_hist)
# - a-group
prop_hist["lw"] = 1
prop_hist["ec"] = prop["cmap"](1.,0.8)
axhx.hist(self.x, xbins, weights = self.proba_a,
fill=True, fc= prop["cmap"](1.,0.4),
**prop_hist)
axhy.hist(self.y, ybins, weights = self.proba_a,
fill=True, fc= prop["cmap"](1.,0.4),
orientation="horizontal",
**prop_hist)
# ------------- #
# Fit Results #
# ------------- #
if fit_done:
mean_a,mean_b,sigma_a, sigma_b = self.fitout["x"]
mean_a_err,mean_b_err = np.sqrt(self.fitout["hess_inv"][0][0]),np.sqrt(self.fitout["hess_inv"][1][1])
ax.plot([10,20], [mean_b,mean_b], scalex=False, lw=0.5,color="k")
ax.plot([0,10], [mean_a,mean_a], scalex=False, lw=0.5,color=prop["cmap"](0.9,0.9))
ax.fill_between([10,20], [mean_b+mean_b_err,mean_b+mean_b_err],
[mean_b-mean_b_err,mean_b-mean_b_err],
color="k", alpha=0.3)
ax.fill_between([0,10], [mean_a+mean_a_err,mean_a+mean_a_err],
[mean_a-mean_a_err,mean_a-mean_a_err],
color=prop["cmap"](0.9,0.9), alpha=0.3)
ax.set_ylim(-1.5,1.5)
ax.set_xlim(0,20)
In [36]:
step = StepFit(table["pulsation"], table["mag"], table["puls_err"], table["mag_err"], xcut=10.)
In [39]:
step.fit([-1,1,0.3,0.4], verbose=False)
step.plot(marker="s",s=130, cmap=mpl.cm.jet)
In [9]:
print step.fitout
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [15]:
x = np.random.random(100)
y = np.random.random(100)
yerr = np.random.random(100)/3.
In [18]:
def scatter_it(x,y, yerr, prop={},**kwargs):
"""
**kwargs goes to matplotlib's scatter method
"""
fig = mpl.figure()
ax = fig.add_subplot(111)
ax.plot(x,y, **kwargs)
ax.errorbar(x,y,yerr=yerr,**prop)
In [20]:
scatter_it(x,y,yerr,prop=dict(ecolor="0.7"), ls="None", marker="o")
In [ ]:
mpl.cm.
mpl.scatter