In [12]:
import h5py, numpy, sklearn.metrics.pairwise, sklearn.cross_validation
TRAINING_H5 = '../training.h5'
In [21]:
with h5py.File(TRAINING_H5, 'r') as f:
features = numpy.hstack([f['astro'], f['cnn_outputs']])
features, _ = sklearn.cross_validation.train_test_split(features, train_size=0.4, stratify=f['labels'])
distances = sklearn.metrics.pairwise.pairwise_distances(features, n_jobs=-1)
sq_distances = (distances**2).reshape((-1,))
low, med, up = numpy.percentile(sq_distances, [25, 50, 75])
In [22]:
1/low, 1/med, 1/up
Out[22]:
In [20]:
1/low
Out[20]:
In [ ]: