In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
from astropy.io import ascii
from astropy.table import Table, Column
%matplotlib inline
In [2]:
from astropy.coordinates import SkyCoord
from astropy import units as u
In [3]:
cat = ascii.read('cat_lau.csv')
In [4]:
RA = cat['RAh'] + cat['RAm']*(1/60.) + cat['RAs']*(1./3600.)
Dec= -1 * (cat['DEd']*u.deg + cat['DEm']*u.arcmin + cat['DEs']*u.arcsec)
coord = SkyCoord(ra=RA*15.*u.deg, dec=Dec)
In [5]:
cat['l'] = coord.galactic.l.deg
cat['b'] = coord.galactic.b.deg
cat['J-Ks'] = cat['J$_{2\\prime\\prime}$'] - cat['Ks$_{2\\prime\\prime}$']
cat['H-Ks'] = cat['H$_{2\\prime\\prime}$'] - cat['Ks$_{2\\prime\\prime}$']
cat['J-H'] = cat['J$_{2\\prime\\prime}$'] - cat['H$_{2\\prime\\prime}$']
In [6]:
plt.figure(figsize=(12, 6))
#plt.suptitle(r'$(2 \pi R_{1/2}^2)$', fontsize=14)
plt.subplot(121)
plt.scatter(cat['l'][cat['b']>0.],
cat['b'][cat['b']>0.],
c=cat['J-Ks'][cat['b']>0.],
s=3.*2*np.pi*cat['R$_{1/2}$'][cat['b']>0.]**2.,
cmap='RdBu_r')
plt.colorbar(label=r'$J-Ks$')
plt.title(r'Tile d115 $area=(2 \pi R_{1/2}^2)$')
plt.xlabel(r'$l$')
plt.ylabel(r'$b$')
plt.grid()
plt.subplot(122)
plt.scatter(cat['l'][cat['b']<0.],
cat['b'][cat['b']<0.],
c=cat['J-Ks'][cat['b']<0.],
s=3.*2*np.pi*cat['R$_{1/2}$'][cat['b']<0.]**2.,
cmap='RdBu_r')
plt.colorbar(label=r'$J-Ks$')
plt.title(r'Tile d010 $area=(2 \pi R_{1/2}^2)$')
plt.xlabel(r'$l$')
plt.ylabel(r'$b$')
plt.grid()
plt.tight_layout()
plt.show()
In [7]:
plt.figure(figsize=(12, 6))
#plt.suptitle(r'$(2 \pi R_{1/2}^2)$', fontsize=14)
plt.subplot(121)
plt.scatter(cat['l'][cat['b']>0.],
cat['b'][cat['b']>0.],
c=cat['Visual'][cat['b']>0.]=='True',
s=3.*2*np.pi*cat['R$_{1/2}$'][cat['b']>0.]**2.)
#cmap='tab10')
plt.colorbar(label=r'Visual?')
plt.title(r'Tile d115 $area=(2 \pi R_{1/2}^2)$')
plt.xlabel(r'$l$')
plt.ylabel(r'$b$')
plt.grid()
plt.subplot(122)
plt.scatter(cat['l'][cat['b']<0.],
cat['b'][cat['b']<0.],
c=cat['Visual'][cat['b']<0.]=='True',
s=3.*2*np.pi*cat['R$_{1/2}$'][cat['b']<0.]**2.)
#cmap='RdBu_r')
plt.colorbar(label=r'Visual?')
plt.title(r'Tile d010 $area=(2 \pi R_{1/2}^2)$')
plt.xlabel(r'$l$')
plt.ylabel(r'$b$')
plt.grid()
plt.tight_layout()
plt.show()
In [8]:
filtrado = (cat['l']>308.8) & (cat['b']<-2.)
plt.subplot(121)
plt.scatter(cat['J'][filtrado], cat['J-Ks'][filtrado])
plt.xlabel('J')
plt.ylabel('J-Ks')
plt.subplot(122)
plt.hist(cat['J-Ks'][filtrado])
plt.xlabel(r'$J-Ks$')
Out[8]:
In [9]:
d010 = ascii.read('d010_resto.dat', format='ipac')
d115 = ascii.read('d115_resto.dat', format='ipac')
In [4]:
print cat.colnames
In [541]:
gxs = cat[['J', 'H', 'Ks', 'J-Ks', 'J-H', 'H-Ks', 'R$_{1/2}$', 'C', '$\\epsilon$', 'n']].to_pandas()
In [542]:
d010['J-Ks'] = d010['MAG_APER_J_C'] - d010['MAG_APER_Ks_C']
d010['H-Ks'] = d010['MAG_APER_H_C'] - d010['MAG_APER_Ks_C']
d010['J-H'] = d010['MAG_APER_J_C'] - d010['MAG_APER_H_C']
d115['J-Ks'] = d115['MAG_APER_J_C'] - d115['MAG_APER_Ks_C']
d115['H-Ks'] = d115['MAG_APER_H_C'] - d115['MAG_APER_Ks_C']
d115['J-H'] = d115['MAG_APER_J_C'] - d115['MAG_APER_H_C']
In [543]:
strs_d010= d010[['MAG_PSF_J_C','MAG_PSF_H_C','MAG_PSF_Ks_C', 'J-Ks', 'J-H', 'H-Ks',
'FLUX_RADIUS_051','C','ELLIPTICITY','SPHEROID_SERSICN']].to_pandas()
In [544]:
strs_d115= d115[['MAG_PSF_J_C','MAG_PSF_H_C','MAG_PSF_Ks_C', 'J-Ks', 'J-H', 'H-Ks',
'FLUX_RADIUS_051','C','ELLIPTICITY','SPHEROID_SERSICN']].to_pandas()
In [545]:
strs = pd.concat([strs_d010, strs_d115])
In [546]:
strs = strs[strs.MAG_PSF_J_C < 30]
gxs = gxs[gxs.J<30]
In [547]:
pd.scatter_matrix(gxs, alpha=0.3, figsize=(8, 8), diagonal='kde')
plt.show()
In [548]:
strs = strs[(strs.MAG_PSF_J_C>12.)&(strs.MAG_PSF_H_C>12.)&(strs.MAG_PSF_Ks_C>12.)&(strs.C>0.)]
strs = strs[(strs.MAG_PSF_J_C<25.)&(strs.MAG_PSF_H_C<25.)&(strs.MAG_PSF_Ks_C<25.)&(strs.FLUX_RADIUS_051<5.)]
print 'now we have {} stars'.format(len(strs))
print r'After $R_{1/2}<1.005$ we have'+ ' {} stars'.format(len(strs[strs.FLUX_RADIUS_051>1.005]))
strs = strs[strs.FLUX_RADIUS_051>1.005]
In [549]:
pd.scatter_matrix(strs, alpha=0.3, figsize=(8, 8), diagonal='kde')
plt.show()
In [551]:
bins = np.arange(9, 20, 1.)
plt.figure(figsize=(6, 2))
plt.subplot(131)
plt.hist(strs['MAG_PSF_Ks_C'], log=True, bins=bins)
plt.xlabel(r'Ks mag')
plt.subplot(132)
plt.hist(strs['MAG_PSF_H_C'], log=True, bins=bins)
plt.xlabel(r'H mag')
plt.subplot(133)
plt.hist(strs['MAG_PSF_J_C'], log=True, bins=bins)
plt.xlabel(r'J mag')
plt.show()
In [552]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn import neighbors
import sklearn.cross_validation as cv
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
In [553]:
def experiment(clf, x, y, nfolds=10):
skf = StratifiedKFold(n_splits=nfolds)
probabilities = np.array([])
predictions = np.array([])
y_testing = np.array([])
for train, test in skf.split(x, y):
x_train = x[train]
y_train = y[train]
clf.fit(x_train, y_train)
x_test = x[test]
y_test = y[test]
pr = clf.predict(x_test)
probs = clf.predict_proba(x_test)[:, 0]
probabilities = np.hstack([probabilities, probs])
predictions = np.hstack([predictions, pr])
y_testing = np.hstack([y_testing, y_test])
print metrics.classification_report(y_testing, predictions)
fpr, tpr, thresholds = metrics.roc_curve(y_testing, 1.-probabilities)
pr_rec_curve = metrics.precision_recall_curve(y_testing, 1.-probabilities)
roc_auc = metrics.auc(fpr, tpr)
return {'fpr': fpr,
'tpr': tpr,
'thresh': thresholds,
'roc_auc': roc_auc,
'pr_rec_curve': pr_rec_curve,
'y_test': y_testing,
'predictions': predictions,
'probabilities': probabilities,
'confusion_matrix': metrics.confusion_matrix(y_testing, predictions),
}
In [554]:
#choices = np.random.choice(40000, 2000)
strs2 = strs
In [555]:
X = np.vstack((strs2.as_matrix(), gxs.as_matrix()))
Y = np.hstack((np.repeat(0, len(strs2)), np.repeat(1, len(gxs))))
In [556]:
res_Dtree = experiment(DecisionTreeClassifier(max_leaf_nodes=12), X, Y, 10)
In [557]:
res_knn = experiment(neighbors.KNeighborsClassifier(n_neighbors=3, weights='uniform'), X, Y, 10)
In [563]:
res_svc = experiment(svm.SVC(kernel='linear', probability=True), X, Y, 10)
In [574]:
res_Rforest = experiment(RandomForestClassifier(n_estimators=1000,
max_features=8,
min_samples_leaf=20,
n_jobs=8), X, Y, 10)
In [565]:
res_C45 = experiment(DecisionTreeClassifier(criterion='entropy', min_samples_leaf=15), X, Y, 10)
In [575]:
plt.figure(figsize=(6, 6))
#plt.figaspect(.8)
fpr = res_Dtree['fpr']
tpr = res_Dtree['tpr']
roc_auc = metrics.auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1.5, color='blue', label='Dtree AUC={:06.4f}'.format(roc_auc))
fpr = res_C45['fpr']
tpr = res_C45['tpr']
roc_auc = metrics.auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1.5, color='red', label='C45 AUC={:06.4f}'.format(roc_auc))
fpr = res_Rforest['fpr']
tpr = res_Rforest['tpr']
roc_auc = metrics.auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1.5, color='green', label='Rforest AUC={:06.4f}'.format(roc_auc))
fpr = res_svc['fpr']
tpr = res_svc['tpr']
roc_auc = metrics.auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1.5, color='black', label='SVM AUC={:06.4f}'.format(roc_auc))
fpr = res_knn['fpr']
tpr = res_knn['tpr']
roc_auc = metrics.auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1.5, color='magenta', label='KNN AUC={:06.4f}'.format(roc_auc))
#plt.xlim(0, 0.2)
#plt.ylim(0.8, 1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.title('Receiver Operating Characteristic Curve')
Out[575]:
In [576]:
plt.figure(figsize=(6, 6))
#plt.figaspect(.8)
pr, rec, thr = res_Dtree['pr_rec_curve']
plt.plot(rec, pr, lw=1.5, color='blue', label='Dtree AUC={:06.4f}'.format(roc_auc))
pr, rec, thr = res_C45['pr_rec_curve']
plt.plot(rec, pr, lw=1.5, color='red', label='C45 AUC={:06.4f}'.format(roc_auc))
pr, rec, thr = res_Rforest['pr_rec_curve']
plt.plot(rec, pr, lw=1.5, color='green', label='Rforest AUC={:06.4f}'.format(roc_auc))
pr, rec, thr = res_svc['pr_rec_curve']
plt.plot(rec, pr, lw=1.5, color='black', label='SVM AUC={:06.4f}'.format(roc_auc))
pr, rec, thr = res_knn['pr_rec_curve']
plt.plot(rec, pr, lw=1.5, color='magenta', label='KNN AUC={:06.4f}'.format(roc_auc))
#plt.xlim(0, 0.2)
#plt.ylim(0.8, 1)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend(loc='lower right')
plt.title('Precision-Recall Curve')
Out[576]:
In [569]:
clf = DecisionTreeClassifier(criterion='entropy', max_leaf_nodes=10)
clf.fit(X, Y)
Out[569]:
In [570]:
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus
dot_data = export_graphviz(clf, out_file=None,
feature_names=gxs.columns,
class_names=['star', 'galaxy'],
filled=True,
rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())
Out[570]:
In [ ]: