In [1]:
import pynbody as pb
import numpy as np
import matplotlib.pyplot as plt
%matplotlib nbagg
In [2]:
s=pb.load('test_grafic++.02000')
s.physical_units()
In [250]:
h=s.halos()
In [ ]:
In [3]:
print s.d['mass'][0]
mtot = s['mass'].sum()
In [4]:
plt.figure()
hg = pb.plot.hist2d(s.d['x'], s.d['y'], colorbar=True)
In [5]:
plt.figure()
p=pb.plot.hist2d(s.d['x'], s.d['z'], colorbar=True)
In [29]:
d=2300
f=pb.filt.BandPass('x', str(-d)+' kpc', str(d)+' kpc')&pb.filt.BandPass('y', str(-d)+' kpc', str(d)+' kpc') & pb.filt.BandPass('z', str(-d)+' kpc', str(d)+' kpc')
In [30]:
len(s[f])
Out[30]:
In [480]:
plt.figure()
h = pb.plot.hist2d(s[f].d['y'], s[f].d['z'], colorbar=True)
In [ ]:
In [92]:
data=s.d[f]
subdata = np.array([data['x'],data['y'],data['z']])
In [277]:
plt.close('all')
from matplotlib.widgets import Slider, Button, RadioButtons
fig=plt.figure(figsize=(10,10))
ax=fig.add_subplot(211)
ax2=fig.add_subplot(212)
H, b, e=np.histogram2d(subdata[0], subdata[1], bins=300)
G, b, e=np.histogram2d(subdata[0], subdata[2], bins=300)
ax.imshow(np.log10(H.T+1), origin='lower', extent=(-d, d, -d, d))
ax2.imshow(np.log10(G.T+1), origin='lower', extent=(-d, d, -d, d))
ax.set_xlim(-d, d)
ax.set_ylim(-d, d)
ax2.set_xlim(-d, d)
ax2.set_ylim(-d, d)
ind=np.array(np.random.random(10)*len(subdata[0]), dtype='int')
print repr(ind)
for p in range(10):
xp, yp, zp = subdata[0][ind], subdata[1][ind], subdata[2][ind]
ax.plot(xp[p],yp[p],marker='.',color='w')
ax.text(xp[p],yp[p],str(p),size=16,color='w')
ax2.plot(xp[p],zp[p],marker='.',color='w')
ax2.text(xp[p],zp[p],str(p),size=16,color='w')
In [169]:
# void = 0, filament = 1, halo = 2
train_class = [1,2,1, 2,1,0,1,1, 0,1, 2,1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 0, 2, 1, 0, 2,2, 1]
train_ind = [222302,66250 ,126701 ,118113 , 57768 , 79664 ,152720 , 30732, 131215 , 32862, 59165 ,123622 ,158104 , 75093 , 58209 ,173018, 126477, 139393 , 44318 ,177947, 126922, 69881, 10218, 62286, 62323, 176011, 84562, 105399,
53284, 189209]
In [603]:
np.bincount(train_class) / 30.
Out[603]:
In [481]:
subdata.shape
Out[481]:
In [567]:
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(subdata.T)
distances, indices = nbrs.kneighbors(subdata.T)
Out[567]:
In [607]:
dist20=abs(subdata-subdata[:,indices.T[-1]])
diff20 = dist20.max(axis=0)-dist20.min(axis=0)
dist10=abs(subdata-subdata[:,indices.T[9]])
diff10 = dist10.max(axis=0)-dist10.min(axis=0)
dist5=abs(subdata-subdata[:,indices.T[4]])
diff5 = dist5.max(axis=0)-dist5.min(axis=0)
In [608]:
features=[s[f].d['rho'], s[f].d['v_disp'], diff5, diff10, diff20]
In [622]:
plt.figure()
plt.hist(diff20, bins=20)
Out[622]:
In [609]:
len(features)
Out[609]:
In [610]:
features=np.array(features).T
In [611]:
X_train = features[train_ind]
y_train = train_class
#print y_train
#print X_train
from sklearn.cross_validation import cross_val_score, ShuffleSplit
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.grid_search import GridSearchCV
grid = GridSearchCV(GradientBoostingClassifier(),
{"n_estimators": [10, 50, 100],
"learning_rate": [0.0001, 0.001, 0.01, 0.1],
"max_leaf_nodes": [2, 4, 8, 16]}, n_jobs=-1)
grid.fit(X_train, y_train)
Out[611]:
In [556]:
grid.best_estimator_
Out[556]:
In [612]:
from sklearn.ensemble.partial_dependence import plot_partial_dependence
plot_partial_dependence(grid.best_estimator_, X_train, [0, 1, 2], label=0)
print grid.best_estimator_.feature_importances_
In [613]:
rf = grid.best_estimator_
rf_preds = rf.predict(features)
rf_predprob = rf.predict_proba(features)
Importance of features:
In [614]:
rf.feature_importances_
Out[614]:
In [615]:
print 1. * sum(rf_preds == 0)/len(rf_preds)
print 1. * sum(rf_preds == 1)/len(rf_preds)
print 1. * sum(rf_preds == 2)/len(rf_preds)
In [616]:
plt.figure()
#v = pb.plot.hist2d(s[f].d['y'], s[f].d['z'])
v = 0
plt.scatter(s[f].d['y'], s[f].d['z'], edgecolor="None", c=rf_predprob[:,v], marker='.', vmin=0, vmax=0.9)
plt.colorbar()
plt.set_cmap("Greys")
plt.title('Void Confidence')
Out[616]:
In [617]:
plt.figure()
#v = pb.plot.hist2d(s[f].d['y'], s[f].d['z'])
v = 1
plt.scatter(s[f].d['y'], s[f].d['z'], edgecolor="None", c=rf_predprob[:,v], marker='.', vmin=0, vmax=0.9)
plt.colorbar()
plt.set_cmap("Greys")
plt.title('Filament Confidence')
Out[617]:
In [618]:
plt.figure()
#v = pb.plot.hist2d(s[f].d['y'], s[f].d['z'])
v = 2
plt.scatter(s[f].d['y'], s[f].d['z'], edgecolor="None", c=rf_predprob[:,v], marker='.', vmin=0, vmax=0.9)
plt.colorbar()
plt.set_cmap("Greys")
plt.title('Halo Confidence')
Out[618]:
In [601]:
plt.figure()
v = pb.plot.hist2d(s[f].d['x'], s[f].d['y'])
v = 2
plt.scatter(s[f].d['x'][rf_preds == v], s[f].d['y'][rf_preds == v], edgecolor="None", c='w', marker='.')
Out[601]:
In [202]:
plt.figure()
v = plt.hexbin(s[f].d['x'], s[f].d['y'], bins = 'log', mincnt = 3)
plt.colorbar()
plt.scatter(s[f].d['x'][rf_preds == 0], s[f].d['y'][rf_preds == 0], edgecolor="None", c='w',s = 4)
Out[202]:
In [548]:
overdensity = np.array(features[:,0]/np.mean(s[f]['rho']))
In [602]:
plt.figure()
plt.xlabel('Density')
plt.ylabel('V_disp')
classes = ["Void","Filaments","Halo"]
plt.set_cmap('jet')
plt.scatter(np.log10(overdensity), features[:,1], marker='.', edgecolor='none', c=rf_preds)
plt.scatter(np.log10(overdensity[train_ind]), np.array(features[:,1])[train_ind], marker='*', edgecolor='m', c=train_class, s=200)
Out[602]:
Results from clustering positional data:
In [36]:
pb.plot.hist2d(s[f].d['x'], s[f].d['y'], colorbar=True)
plt.scatter(sub[nobkd,0], sub[nobkd,1], s=8, edgecolor='none' , c=labels[nobkd])
Out[36]: