This section follows the section 7.10 of Modern Statistical Methods in Astronomy by Feigelson and Babu
The statsmodels package provides an interface to fitting that is similar to what's available in R.
In [ ]:
import statsmodels
statsmodels.__version__
In [ ]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import matplotlib.pyplot as plt
The file SDSS_QSO.dat is from Feigelson's Astrostatistics school, September 2014 at Caltech
In [ ]:
df = pd.read_csv('data/SDSS_QSO.dat', delim_whitespace=True)
In [ ]:
df.head()
In [ ]:
df.describe()
In [ ]:
# Keep only rows with positive magnitudes, and 18 < i < 22
In [ ]:
df = df[(df.u_mag > 10) & (df.i_mag > 18) & (df.i_mag < 22) ]
In [ ]:
# Set a limit on u magnitude errors
df.loc[df.sig_u_mag<0.02,'sig_u_mag'] = 0.02
df.sig_u_mag.min()
In [ ]:
mod_ols = smf.ols('u_mag ~ i_mag', data=df)
res_ols = mod_ols.fit()
In [ ]:
print(res_ols.summary())
In [ ]:
print('Parameters: ', res_ols.params)
print('R2: ', res_ols.rsquared)
print('AIC', res_ols.aic)
In [ ]:
In [ ]:
mod_wls = smf.wls('u_mag ~ i_mag', data=df, weights=df.sig_u_mag**-2)
In [ ]:
res_wls = mod_wls.fit()
In [ ]:
print(res_wls.summary())
In [ ]:
i_range = np.arange(18,22,0.2)
In [ ]:
p_ols = res_ols.predict({"i_mag": i_range})
p_ols
In [ ]:
%matplotlib inline
prstd, iv_l, iv_u = wls_prediction_std(res_wls,sm.add_constant(i_range), weights=1/(0.02*0.02))
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(df.i_mag, df.u_mag, marker='.', c='k', s=0.5, label="data", alpha=0.5)
ax.plot(i_range, res_wls.predict({"i_mag":i_range}), 'b-', label="Weighted LS")
ax.plot(i_range, res_ols.predict({"i_mag":i_range}), 'g--.', label="Ordinary LS")
ax.plot(i_range, iv_u, 'r--', label='95% confidence')
ax.plot(i_range, iv_l, 'r--')
ax.legend(loc='best')
plt.xlabel('SDSS i (mag)')
plt.ylabel('SDSS u (mag)')
plt.xlim(17.9, 21.6)
This example is drawn from Figure 8.2 of the AstroML book.
Generate data for 100 random supernovae.
In [ ]:
from astroML.cosmology import Cosmology
from astroML.datasets import generate_mu_z
import numpy as np
In [ ]:
#------------------------------------------------------------
# Generate data
z_sample, mu_sample, dmu = generate_mu_z(100, random_state=0)
cosmo = Cosmology()
z = np.linspace(0.01, 2, 1000)
mu_true = np.asarray(map(cosmo.mu, z))
Put the data into a Pandas dataframe
In [ ]:
sndata = pd.DataFrame({"z": z_sample, "mu": mu_sample, "mu_err": dmu})
In [ ]:
fig = plt.figure(figsize=(5, 5))
plt.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)
plt.xlabel(r'$z$')
plt.ylabel(r'$\mu$')
Fit a straight line
In [ ]:
snmod_wls = smf.wls("mu ~ z", data=sndata, weights=dmu**-2)
In [ ]:
snres_wls = snmod_wls.fit()
In [ ]:
print(snres_wls.summary())
In [ ]:
z_range = np.arange(0,1.8,0.1)
Overplot the straight line fit
In [ ]:
fig = plt.figure(figsize=(5, 5))
plt.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)
plt.plot(z_sample, snres_wls.fittedvalues, 'b-', label="Weighted LS")
plt.xlabel(r'$z$')
plt.ylabel(r'$\mu$')
In the cells below, fit 2nd and 4th degree polynomials and overplot on the data
In [ ]:
snmod_wls_2nd = smf.wls("mu ~ z + np.power(z,2)", data=sndata, weights=dmu**-2)