A body of black

that carried no reflection

defying its own room

un-earthly eggs of decreation

(In Flames)


Analysis of potentially hazardous asteroids' orbital parameters

Using data on already discovered asteroid's we'll try to analyse orbital parameters of near-Earth asteroids and find out familes of orbits with high probability of yet undiscovered potentially hazardous asteroids existance.


In [1]:
import pickle
from copy import copy, deepcopy
import numpy as np
import pandas as pd
import scipy.stats as ss
from sklearn import neighbors, svm
import matplotlib as mpl

# Import Asterion modules
import read_database as rdb
import generate_orbits as go
import learn_data as ld
import visualize_data as vd
import asterion_learn as al

# Plotting settings
%matplotlib inline
# font = {'size': 32}
# font = {'size': 25}
font = {'size': 14}
mpl.rc('font', **font)

Asteroid database

Load asteroid database:


In [2]:
# columns = ['a', 'e', 'i', 'w', 'om', 'q', 'H', 'neo', 
#                'pha', 'moid', 'per', 'n', 'ma', 'epoch']

columns = ['a', 'e', 'i', 'w', 'om', 'q', 'H', 'neo', 'pha', 'moid']
database = rdb.load_database(columns, jobtime=True)
database.head(n=5)


Loading asteroid database...
Asteroid database loaded in 6.207178 seconds.
Out[2]:
neo pha H e a q i om w moid
0 N N 3.34 0.075705 2.768134 2.558572 10.591705 80.314273 72.814757 1.59307
1 N N 4.13 0.230835 2.772779 2.132725 34.841162 173.087844 309.999215 1.23093
2 N N 5.33 0.256601 2.668831 1.984005 12.990102 169.862997 248.236797 1.03424
3 N N 3.20 0.089067 2.361348 2.151031 7.140406 103.842512 151.111635 1.13866
4 N N 6.85 0.191552 2.574343 2.081222 5.367954 141.591962 358.878896 1.09307

Extract NEAs


In [3]:
neas, num_neas = rdb.get_neas(database, columns)

Exclude extremal NEAs


In [4]:
print len(neas[neas.a > 4])
print len(neas[neas.i > 90])


12
2

In [5]:
neas = neas[neas.a < 4]
neas = neas[neas.i < 90]

Extract close-approaching NEAs


In [6]:
neas_close = neas[neas.q <= 1.08]

Extract groups of NEAs


In [7]:
# apollos, num_apollos = rdb.get_apollos(neas)
# atens, num_atens = rdb.get_atens(neas)
# amors, num_amors = rdb.get_amors(neas)

# print "num_neas:", num_neas
# print "apollos:", num_apollos, "ratio:", num_apollos/float(num_neas)
# print "atens:", num_atens, "ratio:", num_atens/float(num_neas)
# print "amors:", num_amors, "ratio:", num_amors/float(num_neas)

Extract PHAs and NHAs


In [8]:
haz_real, nohaz_real = rdb.get_hazMOID(neas_close)
print "Number of real PHAs", len(haz_real)
print "Number of real NHAs:", len(nohaz_real)


Number of real PHAs 6606
Number of real NHAs: 4476

In [9]:
# vd.display_allparams([neas_close, neas_close], vd.combs, vd.colnames)

Plot orbital distributions of rel PHAs and NHAs in the w-q plane


In [10]:
cutcol=['w', 'q']
vd.plot_distribution2d(cutcol, haz=haz_real, nohaz=nohaz_real, invertaxes=[0,1], labels=True)


/usr/lib/pymodules/python2.7/matplotlib/collections.py:548: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == 'face':

Generate virtual asteroids

Specify a full subset of orbital parameters


In [11]:
names = ['a', 'i', 'w', 'om', 'q']
full_names = [vd.colnames[nm] for nm in names]
data_subset = neas_close[names]

Fit distributions of asteroids' orbital parameters with continuous distributions


In [12]:
statdists = [ss.rayleigh, ss.lognorm, ss.uniform, go.HarmonicDistribution(), ss.johnsonsu]
distlist = go.get_param_distributions(data_subset, names, statdists, n=30, verbose=True)
go.plot_param_distributions(distlist, full_names, figsize=(16, 12))


Data cdf(xmax): 1.000000 	rayleigh_cdf(xmax): 0.999225
Data cdf(xmax): 1.000000 	lognorm_cdf(xmax): 0.982480
Data cdf(xmax): 1.000000 	uniform_cdf(xmax): 1.000000
Data cdf(xmax): 1.000000 	harmonic_cdf(xmax): 0.999970
Data cdf(xmax): 1.000000 	johnsonsu_cdf(xmax): 0.923356

Generate random asteroids on the basis of the fitted continuous distributions


In [13]:
rnum_str = '2e5'
rnum = int(float(rnum_str))
distdict = {name:dist for name, dist in zip(names, distlist)}
randdata = go.gen_orbits(distdict, num=rnum)

Check generated dataset to contain only asteroids with elliptical orbits


In [14]:
e_rand = randdata['e']
print "Total number of generated asteroids:", len(randdata)
print "Number of failed asteroids:", len(e_rand[e_rand < 0])
print "Number of asteroids with hyperbolic orbits:", len(e_rand[e_rand > 1])


Total number of generated asteroids: 200000
Number of failed asteroids: 0
Number of asteroids with hyperbolic orbits: 0

Calculate MOID for generated virtual asteroids or load previously computed results


In [15]:
haz_dump = './asteroid_data/neas_haz_gen_%s.p' % rnum_str
nohaz_dump = './asteroid_data/neas_nohaz_gen_%s.p' % rnum_str
try:
    haz_gen = rdb.loadObject(haz_dump)
    nohaz_gen = rdb.loadObject(nohaz_dump)
except:
    rdb.calc_moid(randdata, jobtime=True)
    haz_gen, nohaz_gen = rdb.get_hazMOID(randdata)
    
    rdb.dumpObject(haz_gen, haz_dump)
    rdb.dumpObject(nohaz_gen, nohaz_dump)

In [16]:
print "Number of generated non-uniform PHAs", len(haz_gen)
print "Number of generated non-uniform NHAs:", len(nohaz_gen)


Number of generated non-uniform PHAs 97817
Number of generated non-uniform NHAs: 102183

Generate additional set of uniformly distributed virtual asteroids


In [17]:
names = ['a', 'i', 'w', 'om', 'q']
data_subset = neas_close[names]
unum_str = '3e4'
unum = int(float(unum_str))
statdists_u = [ss.uniform]*len(names)

distlist_u = go.get_param_distributions(data_subset, names, statdists_u, n=30, verbose=True)
distdict_u = {name:dist for name, dist in zip(names, distlist_u)}
randdata_u = go.gen_orbits(distdict_u, num=unum)


Data cdf(xmax): 1.000000 	uniform_cdf(xmax): 1.000000
Data cdf(xmax): 1.000000 	uniform_cdf(xmax): 1.000000
Data cdf(xmax): 1.000000 	uniform_cdf(xmax): 1.000000
Data cdf(xmax): 1.000000 	uniform_cdf(xmax): 1.000000
Data cdf(xmax): 1.000000 	uniform_cdf(xmax): 1.000000

Check generated dataset to contain only asteroids with elliptical orbits


In [18]:
e_randu = randdata_u['e']
print "Total number of generated uniform asteroids:", len(randdata_u)
print "Number of failed asteroids:", len(e_randu[e_randu < 0])
print "Number of asteroids with hyperbolic orbits:", len(e_randu[e_randu > 1])


Total number of generated uniform asteroids: 30000
Number of failed asteroids: 0
Number of asteroids with hyperbolic orbits: 0

Calculate MOID for uniformly distributed virtual asteroids or load previously computed results


In [19]:
haz_dump = './asteroid_data/neas_haz_genu_%s.p' % unum_str
nohaz_dump = './asteroid_data/neas_nohaz_genu_%s.p' % unum_str
try:
    haz_genu = rdb.loadObject(haz_dump)
    nohaz_genu = rdb.loadObject(nohaz_dump)
except:
    rdb.calc_moid(randdata_u, jobtime=True)
    haz_genu, nohaz_genu = rdb.get_hazMOID(randdata_u)

    rdb.dumpObject(haz_genu, haz_dump)
    rdb.dumpObject(nohaz_genu, nohaz_dump)

In [20]:
print "Number of generated uniform PHAs", len(haz_genu)
print "Number of generated uniform NHAs:", len(nohaz_genu)


Number of generated uniform PHAs 7173
Number of generated uniform NHAs: 22827

Analyze virtual asteroids' orbital parameters


In [21]:
datasets_real = [haz_real, nohaz_real]
datasets_gen = [haz_gen, nohaz_gen]
datasets_genu = [haz_genu, nohaz_genu]

Investigate NEA distributions for pairs of orbital parameters


In [22]:
# vd.plot_alldistcombs(haz_real, nohaz_real)

Plot distributions in a w-q plane for the non-uniform generated virtual asteroids


In [23]:
cols = ['w', 'q']
vd.plot_distributions2d(cols, haz_gen, nohaz_gen, labels=True, invertaxes=[0,1])


Plot distributions in a w-q plane for the uniform generated virtual asteroids


In [24]:
vd.plot_distributions2d(cols, haz_genu, nohaz_genu, labels=True, invertaxes=[0,1])



Extract several clusters of NEAs

Amplify uniform datasets by their symmetric copies over the vertical planes crossing w=90 and w=180


In [25]:
haz_genu = ld.add_doublemirror_column(haz_genu, 'w', 180)
nohaz_genu = ld.add_doublemirror_column(nohaz_genu, 'w', 180)

In [26]:
# haz_gen_mir = ld.add_doublemirror_column(haz_gen, 'w', 180)
# nohaz_gen_mir = ld.add_doublemirror_column(nohaz_gen, 'w', 180)

In [27]:
vd.plot_distributions2d(cols, haz_genu, nohaz_genu, labels=True, invertaxes=[0,1])


Cut w and q columns from the datasets


In [28]:
haz_real_cut, nohaz_real_cut = ld.cut_params(haz_real, nohaz_real, cols)
haz_gen_cut, nohaz_gen_cut = ld.cut_params(haz_gen, nohaz_gen, cols)
haz_genu_cut, nohaz_genu_cut = ld.cut_params(haz_genu, nohaz_genu, cols)
# haz_gen_mir_cut, nohaz_gen_mir_cut = ld.cut_params(haz_gen_mir, nohaz_gen_mir, cols)

Normalize datasets


In [29]:
bounds = ld.common_bounds([haz_real_cut, nohaz_real_cut, 
                           haz_gen_cut, nohaz_gen_cut,
                           haz_genu_cut, nohaz_genu_cut])

haz_real_cut, haz_real_sc = ld.normalize_dataset(haz_real_cut, bounds=bounds)
nohaz_real_cut, nohaz_real_sc = ld.normalize_dataset(nohaz_real_cut, bounds=bounds)

haz_gen_cut, haz_gen_sc = ld.normalize_dataset(haz_gen_cut, bounds=bounds)
nohaz_gen_cut, nohaz_gen_sc = ld.normalize_dataset(nohaz_gen_cut, bounds=bounds)

haz_genu_cut, haz_genu_sc = ld.normalize_dataset(haz_genu_cut, bounds=bounds)
nohaz_genu_cut, nohaz_genu_sc = ld.normalize_dataset(nohaz_genu_cut, bounds=bounds)

# haz_gen_mir_cut, haz_gen_mir_sc = ld.normalize_dataset(haz_gen_mir_cut, bounds=bounds)
# nohaz_gen_mir_cut, nohaz_gen_mir_sc = ld.normalize_dataset(nohaz_gen_mir_cut, bounds=bounds)

Get common scales for each column in the datasets


In [30]:
scale_sets = [haz_gen_sc, nohaz_gen_sc, 
              haz_genu_sc, nohaz_genu_sc, 
              haz_real_sc, nohaz_real_sc] 

scales = ld.common_scales(scale_sets)

Find NEA clusters from the uniform dataset of NHAs


In [31]:
# DBSCAN parameters
eps = [0.022]
min_samples = [105]
dens_layers = zip(eps, min_samples)

extracted_clusters = al.extract_dbclusters(nohaz_genu_cut, dens_layers)

In [32]:
vd.plot_densclusters(extracted_clusters, labels=cols, cmap='brg', 
                     scales=scales, invertaxes=[0, 1], figsize=(9,8))


Merge symmetric clusters


In [33]:
merged_clusters = al.merge_dbclusters(extracted_clusters, [[1,2,6], [4,5], [3,7]], merge_rest=False)
vd.plot_densclusters(merged_clusters, scales=scales, labels=cols, cmap='brg', invertaxes=[0, 1], figsize=(9,8))


Transform clusters


In [34]:
c1, c2, c3, c4 = merged_clusters

Split last cluster by the middle value of 'q' parameter


In [35]:
qsplit = 0.46
c4_q = c4[:,1]
c41 = c4[np.where(c4_q <= qsplit)]
c42 = c4[np.where(c4_q > qsplit)]

Merge clusters


In [36]:
c1_ = np.concatenate((c3, c42))
c2_ = deepcopy(c1)
c3_ = np.concatenate((c2, c41))

Set IDs for transformed clusters


In [37]:
c1_[:,2], c2_[:,2], c3_[:,2] = 1.0, 2.0, 3.0

Build new set of clusters


In [38]:
split_clusters = [c1_, c2_, c3_]
vd.plot_densclusters(split_clusters, scales=scales, labels=cols, cmap='brg', invertaxes=[0, 1], figsize=(9,8))


Extract 'XX' cluster from the dataset of generated uniform PHAs


In [39]:
eps = [0.028]
min_samples = [80]
dens_layers = zip(eps, min_samples)
extracted_clusters_X = al.extract_dbclusters(haz_genu_cut, dens_layers)

In [40]:
vd.plot_densclusters(extracted_clusters_X, scales=scales, labels=cols, cmap='spring', 
                     invertaxes=[0, 1], figsize=(9,8))


Edit IDs of the asteroids in the PHA belt and add them to the cluster set


In [41]:
c5, c6 = extracted_clusters_X
c5[:,2] = 4.0
split_clusters.append(c5)

In [42]:
vd.plot_densclusters(split_clusters, scales=scales, labels=cols, cmap='rainbow', 
                     invertaxes=[0, 1], figsize=(9,8))



Outline domains of NEAs using SVM


In [43]:
clf = svm.SVC(gamma=6., C=1000)
xtrain, clf, haz_prob = al.classify_dbclusters(split_clusters, clf, haz_real_cut, nohaz_real_cut)

In [45]:
vd.plot_clf2d(clf, cols, num=400, haz_cut=haz_real_cut, nohaz_cut=nohaz_real_cut, figsize=(10,9),
              clustprobs=haz_prob[1:], invertaxes=[0, 1], scales=scales, cmap='winter')


Classify asteroid datasets by cluster IDs


In [ ]:
haz_real_clf = clf.predict(haz_real_cut)
nohaz_real_clf = clf.predict(nohaz_real_cut)

In [ ]:
haz_gen_clf = clf.predict(haz_gen_cut)
nohaz_gen_clf = clf.predict(nohaz_gen_cut)

In [ ]:
haz_genu_clf = clf.predict(haz_genu_cut)
nohaz_genu_clf = clf.predict(nohaz_genu_cut)

Export domains of real and virtual NEAs as separate datasets


In [ ]:
dirpath = './asteroid_data/'      
domain_ids = range(1, len(split_clusters) + 1)

In [ ]:
def export_subgroups(db_suffix, datasets, datasets_clf):
    
    haz_nmb = dirpath + 'haz_' + db_suffix +'_dom%d.p'
    nohaz_nmb = dirpath + 'nohaz_' + db_suffix + '_dom%d.p'
    haz_db, nohaz_db = datasets
    haz_clf, nohaz_clf = datasets_clf

    for i in domain_ids:
        haz_inds = np.where(haz_clf == i)[0]
        nohaz_inds = np.where(nohaz_clf == i)[0]
        haz_rest_inds = np.where(haz_clf != i)[0]
        nohaz_rest_inds = np.where(nohaz_clf != i)[0]
        
        haz_gen_dom = haz_db.iloc[haz_inds]
        nohaz_gen_dom = nohaz_db.iloc[nohaz_inds]
        haz_gen_rest = haz_db.iloc[haz_rest_inds]
        nohaz_gen_rest = nohaz_db.iloc[nohaz_rest_inds]
        
        haz_name = haz_nmb %i
        nohaz_name = nohaz_nmb %i
        haz_name_rest = haz_name[:-2] + '_rest.p'
        nohaz_name_rest = nohaz_name[:-2] + '_rest.p'
        
        rdb.dumpObject(haz_gen_dom, haz_name)
        rdb.dumpObject(nohaz_gen_dom, nohaz_name)
        rdb.dumpObject(haz_gen_rest, haz_name_rest)
        rdb.dumpObject(nohaz_gen_rest, nohaz_name_rest)

In [ ]:
export_subgroups('real', [haz_real, nohaz_real], [haz_real_clf, nohaz_real_clf])
export_subgroups('gen', [haz_gen, nohaz_gen], [haz_gen_clf, nohaz_gen_clf])
export_subgroups('genu', [haz_genu, nohaz_genu], [haz_genu_clf, nohaz_genu_clf])