Regression/Machine learning example

This example comes from the astroML site.


In [ ]:
# Author: Jake VanderPlas <vanderplas@astro.washington.edu>
# License: BSD
#   The figure is an example from astroML: see http://astroML.github.com
import numpy as np
from matplotlib import pyplot as plt

from sklearn.neighbors import KNeighborsRegressor

#from astroML.datasets import fetch_sdss_galaxy_colors

n_neighbors = 2

In [ ]:
#data = fetch_sdss_galaxy_colors() ## Not working for me
from astropy.table import Table
data = Table.read('data/sdss_galaxies_qsos_50k.fits')

N = len(data)

In [ ]:
data[:10]

In [ ]:
# shuffle data
np.random.seed(0)
np.random.shuffle(data)

In [ ]:
# put colors in a matrix
X = np.zeros((N, 4))
X[:, 0] = data['u'] - data['g']
X[:, 1] = data['g'] - data['r']
X[:, 2] = data['r'] - data['i']
X[:, 3] = data['i'] - data['z']
z = data['specz']

In [ ]:
# divide into training and testing data
Ntrain = N / 2
Xtrain = X[:Ntrain]
ztrain = z[:Ntrain]

Xtest = X[Ntrain:]
ztest = z[Ntrain:]

In [ ]:
knn = KNeighborsRegressor(n_neighbors, weights='uniform')
%time zpred = knn.fit(Xtrain, ztrain).predict(Xtest)

In [ ]:
axis_lim = np.array([-0.1, 2.5])

rms = np.sqrt(np.mean((ztest - zpred) ** 2))
print("RMS error = %.2g" % rms)

In [ ]:
%matplotlib inline
ax = plt.axes()
plt.scatter(ztest, zpred, c='k', lw=0, s=4)
plt.plot(axis_lim, axis_lim, '--k')
plt.plot(axis_lim, axis_lim + rms, ':k')
plt.plot(axis_lim, axis_lim - rms, ':k')
plt.xlim(axis_lim)
plt.ylim(axis_lim)

plt.text(0.99, 0.02, "RMS error = %.2g" % rms,
         ha='right', va='bottom', transform=ax.transAxes,
         bbox=dict(ec='w', fc='w'), fontsize=16)

plt.title('Photo-z: Nearest Neigbor Regression')
plt.xlabel(r'$\mathrm{z_{spec}}$', fontsize=14)
plt.ylabel(r'$\mathrm{z_{phot}}$', fontsize=14)
plt.show()

In [ ]: