In [1]:
import pickle
from copy import copy, deepcopy
import numpy as np
import pandas as pd
from sklearn import neighbors, svm
import matplotlib as mpl
# Import Asterion modules
import read_database as rdb
import learn_data as ld
import asterion_learn as al
import visualize_data as vd
# Plotting settings for the current notebook
%matplotlib inline
# font = {'size': 25}
font = {'size': 14}
mpl.rc('font', **font)
plotgrid = np.array([[0.0, 0.0], [1.0, 1.0]])
Load generated and real NEAs from the domain #1
In [2]:
dirpath = './asteroid_data/'
real_datasets = ['haz_real', 'nohaz_real']
gen_datasets = ['haz_gen', 'nohaz_gen']
genu_datasets = ['haz_gen', 'nohaz_gen']
name_suffixes = ['_dom1.p', '_dom1_rest.p']
In [3]:
dumps_real = [dirpath + ds + ns for ns in name_suffixes for ds in real_datasets]
dumps_gen = [dirpath + ds + ns for ns in name_suffixes for ds in gen_datasets]
dumps_genu = [dirpath + ds + ns for ns in name_suffixes for ds in genu_datasets]
haz_real, nohaz_real, haz_real_rest, nohaz_real_rest = map(rdb.loadObject, dumps_real)
haz_gen, nohaz_gen, haz_gen_rest, nohaz_gen_rest = map(rdb.loadObject, dumps_gen)
haz_genu, nohaz_genu, haz_genu_rest, nohaz_genu_rest = map(rdb.loadObject, dumps_genu)
In [4]:
gen_num = sum(map(len, [haz_gen, nohaz_gen]))
real_num = sum(map(len, [haz_real, nohaz_real]))
print "Number of virtual asteroids in the domain:", gen_num
print "Number of real asteroids in the domain:", real_num
Investigate distributions of orbital parameters for the domain #1
In [5]:
# vd.plot_alldistcombs(haz_gen, nohaz_gen, labels=True)
In [6]:
cutcol = ['w', 'q']
q_split = 1.066
p1, p2 = [[0.0, q_split], [360.0, q_split]]
vd.plot_distributions2d(cutcol, haz_gen, nohaz_gen, line=[p1, p2], invertaxes=[0,1], labels=True)
In [7]:
haz_gen_dom1, haz_gen_dom1__ = ld.split_by_colval(haz_gen, 'q', q_split)
nohaz_gen_dom1, nohaz_gen_dom1__ = ld.split_by_colval(nohaz_gen, 'q', q_split)
haz_genu_dom1, haz_genu_dom1__ = ld.split_by_colval(haz_genu, 'q', q_split)
nohaz_genu_dom1, nohaz_genu_dom1__ = ld.split_by_colval(nohaz_genu, 'q', q_split)
haz_real_dom1, haz_real_dom1__ = ld.split_by_colval(haz_real, 'q', q_split)
nohaz_real_dom1, nohaz_real_dom1__ = ld.split_by_colval(nohaz_real, 'q', q_split)
In [8]:
gen_dom1_num = sum(map(len, [haz_gen_dom1, nohaz_gen_dom1]))
real_dom1_num = sum(map(len, [haz_real_dom1, nohaz_real_dom1]))
print gen_dom1_num
print real_dom1_num
In [ ]:
In [9]:
haz_gen_extracted_aa = []
nohaz_gen_trapped_aa = []
haz_real_extracted_aa = []
nohaz_real_trapped_aa = []
Atiras
In [10]:
haz_gen_atiras, haz_gen_atiras_num = rdb.get_atiras(haz_gen_dom1)
nohaz_gen_atiras, nohaz_gen_atiras_num = rdb.get_atiras(nohaz_gen_dom1)
atiras_gen_num = haz_gen_atiras_num + nohaz_gen_atiras_num
haz_real_atiras, haz_real_atiras_num = rdb.get_atiras(haz_real_dom1)
nohaz_real_atiras, nohaz_real_atiras_num = rdb.get_atiras(nohaz_real_dom1)
atiras_real_num = haz_real_atiras_num + nohaz_real_atiras_num
print "Number of virtual Atiras:", atiras_gen_num
print "Number of real Atiras:", atiras_real_num
Atens
In [11]:
haz_gen_atens, haz_gen_atens_num = rdb.get_atens(haz_gen_dom1)
nohaz_gen_atens, nohaz_gen_atens_num = rdb.get_atens(nohaz_gen_dom1)
atens_gen_num = haz_gen_atens_num + nohaz_gen_atens_num
haz_real_atens, haz_real_atens_num = rdb.get_atens(haz_real_dom1)
nohaz_real_atens, nohaz_real_atens_num = rdb.get_atens(nohaz_real_dom1)
atens_real_num = haz_real_atens_num + nohaz_real_atens_num
print "Number of virtual Atens:", atens_gen_num
print "Number of real Atens:", atens_real_num
Atiras + Atens
In [12]:
haz_gen_atiras_atens = pd.concat((haz_gen_atiras, haz_gen_atens))
nohaz_gen_atiras_atens = pd.concat((nohaz_gen_atiras, nohaz_gen_atens))
haz_gen_atiras_atens_num = len(haz_gen_atiras_atens)
nohaz_gen_atiras_atens_num = len(nohaz_gen_atiras_atens)
atiras_atens_gen_num = haz_gen_atiras_atens_num + nohaz_gen_atiras_atens_num
haz_real_atiras_atens = pd.concat((haz_real_atiras, haz_real_atens))
nohaz_real_atiras_atens = pd.concat((nohaz_real_atiras, nohaz_real_atens))
haz_real_atiras_atens_num = len(haz_real_atiras_atens)
nohaz_real_atiras_atens_num = len(nohaz_real_atiras_atens)
atiras_atens_real_num = haz_real_atiras_atens_num + nohaz_real_atiras_atens_num
In [13]:
print "Number of virtual PHAs in the group:", haz_gen_atiras_atens_num
print "Number of virtual NHAs in the group:", nohaz_gen_atiras_atens_num
print "Number of virtual Atiras and Atens:", atiras_atens_gen_num
print "Virtual Atiras and Atens group weight:", float(atiras_atens_gen_num)/gen_dom1_num
In [14]:
print "Number of real PHAs in the group:", haz_real_atiras_atens_num
print "Number of real NHAs in the group:", nohaz_real_atiras_atens_num
print "Number of real Atiras and Atens:", atiras_atens_real_num
print "Real Atiras and Atens group weight:", float(atiras_atens_real_num)/real_dom1_num
Plot distributions of 'a' and 'i' parameters
In [15]:
cutcol = ['a', 'i']
vd.plot_distributions2d(cutcol, haz_gen_atiras_atens, nohaz_gen_atiras_atens, labels=True)
Cut a and i columns and nomalize datasets
In [16]:
cutcol = ['a', 'i']
pairs, atiras_atens_ai_sc = ld.cut_normalize(cutcol,
[haz_gen_atiras_atens, nohaz_gen_atiras_atens],
[haz_real_atiras_atens, nohaz_real_atiras_atens])
haz_gen_cut, nohaz_gen_cut = pairs[0]
haz_real_cut, nohaz_real_cut = pairs[1]
Find decision surface with SVM
In [17]:
clf_atiras_atens_ai = svm.LinearSVC()
xtrain, ytrain = ld.mix_up(haz_gen_cut, nohaz_gen_cut)
clf_atiras_atens_ai = clf_atiras_atens_ai.fit(xtrain, ytrain)
In [18]:
# reload(al)
# clf_aa_ai = svm.LinearSVC()
# splitres = al.split_by_clf(clf_aa_ai, cutcol, haz_gen_atiras_atens,
# nohaz_gen_atiras_atens)
# haz_gen_aa_ai, nohaz_gen_aa_ai = splitres[0]
# haz_gen_aa_ai__, nohaz_gen_aa_ai__ = splitres[1]
# aa_ai_sc = splitres[2]
Estimate split quality for virtual Atiras & Atens
In [19]:
predicted_gen = al.clf_split_quality(clf_atiras_atens_ai, haz_gen_cut, nohaz_gen_cut)
haz_gen_atiras_atens_ai = haz_gen_atiras_atens.iloc[predicted_gen[0]]
nohaz_gen_atiras_atens_ai = haz_gen_atiras_atens.iloc[predicted_gen[1]]
haz_gen_atiras_atens_ai__ = haz_gen_atiras_atens.iloc[predicted_gen[2]]
nohaz_gen_atiras_atens_ai__ = haz_gen_atiras_atens.iloc[predicted_gen[3]]
Estimate split quality for real Atiras & Atens
In [20]:
predicted_real = al.clf_split_quality(clf_atiras_atens_ai, haz_real_cut, nohaz_real_cut)
haz_real_atiras_atens_ai = haz_real_atiras_atens.iloc[predicted_real[0]]
nohaz_real_atiras_atens_ai = haz_real_atiras_atens.iloc[predicted_real[1]]
haz_real_atiras_atens_ai__ = haz_real_atiras_atens.iloc[predicted_real[2]]
nohaz_real_atiras_atens_ai__ = haz_real_atiras_atens.iloc[predicted_real[3]]
Plot decision boundary
In [21]:
vd.plot_clf2d(clf_atiras_atens_ai, cutcol, haz_cut=haz_gen_cut, nohaz_cut=nohaz_gen_cut,
num=400, scales=atiras_atens_ai_sc, cmap='winter', figsize=(8,8))
In [22]:
haz_gen_extracted_aa.append(haz_gen_atiras_atens_ai)
nohaz_gen_trapped_aa.append(nohaz_gen_atiras_atens_ai)
haz_real_extracted_aa.append(haz_real_atiras_atens_ai)
nohaz_real_trapped_aa.append(nohaz_real_atiras_atens_ai)
Divisions quality for virtual Atiras & Atens
In [23]:
vd.print_summary(haz_gen_extracted_aa, nohaz_gen_trapped_aa,
haz_gen_atiras_atens, nohaz_gen_atiras_atens, 'virtual')
Divisions quality for real Atiras & Atens
In [24]:
vd.print_summary(haz_real_extracted_aa, nohaz_real_trapped_aa,
haz_real_atiras_atens, nohaz_real_atiras_atens, 'real')
In [25]:
haz_gen_extracted_ap = []
nohaz_gen_trapped_ap = []
haz_real_extracted_ap = []
nohaz_real_trapped_ap = []
In [26]:
haz_gen_apollo, haz_gen_apollo_num = rdb.get_apollos(haz_gen_dom1)
nohaz_gen_apollo, nohaz_gen_apollo_num = rdb.get_apollos(nohaz_gen_dom1)
apollo_gen_num = haz_gen_apollo_num + nohaz_gen_apollo_num
haz_real_apollo, haz_real_apollo_num = rdb.get_apollos(haz_real_dom1)
nohaz_real_apollo, nohaz_real_apollo_num = rdb.get_apollos(nohaz_real_dom1)
apollo_real_num = haz_real_apollo_num + nohaz_real_apollo_num
In [27]:
print "Number of virtual PHAs in the group:", haz_gen_apollo_num
print "Number of virtual NHAs in the group:", nohaz_gen_apollo_num
print "Number of virtual Apollo:", apollo_gen_num
print "Apollo group weight:", float(apollo_gen_num)/gen_dom1_num
In [28]:
print "Number of real PHAs in the group:", haz_real_apollo_num
print "Number of real NHAs in the group:", nohaz_real_apollo_num
print "Number of real Apollo:", apollo_real_num
print "Apollo group weight:", float(apollo_real_num)/real_dom1_num
In [29]:
# vd.display_allparams([apollos_haz, apollos_nohaz], vd.combs, vd.colnames)
Amplify datasets by their symmetric copies over 'w' parameter
In [30]:
# haz_gen_apollo_se = ld.add_mirror_column(haz_gen_apollo, 'w', 180.0)
# nohaz_gen_apollo_se = ld.add_mirror_column(nohaz_gen_apollo, 'w', 180.0)
haz_gen_apollo_se = ld.add_doublemirror_column(haz_gen_apollo, 'w', 180.0)
nohaz_gen_apollo_se = ld.add_doublemirror_column(nohaz_gen_apollo, 'w', 180.0)
In [31]:
cutcol = ['w', 'q']
vd.plot_distributions2d(cutcol, haz_gen_apollo_se, nohaz_gen_apollo_se, invertaxes=[0,1], labels=True)
Cut off annoying tips
In [32]:
haz_gen_apollo_bq = haz_gen_apollo[haz_gen_apollo.q > 0.7]
nohaz_gen_apollo_bq = nohaz_gen_apollo[nohaz_gen_apollo.q > 0.7]
haz_gen_apollo_sq = haz_gen_apollo[haz_gen_apollo.q <= 0.7]
nohaz_gen_apollo_sq = nohaz_gen_apollo[nohaz_gen_apollo.q <= 0.7]
haz_gen_apollo_se_bq = haz_gen_apollo_se[haz_gen_apollo_se.q > 0.7]
nohaz_gen_apollo_se_bq = nohaz_gen_apollo_se[nohaz_gen_apollo_se.q > 0.7]
Cut w and i columns and nomalize datasets
In [33]:
cutcol = ['w', 'q', 'i']
pairs, apollo_wqi_sc = ld.cut_normalize(cutcol,
[haz_gen_apollo, nohaz_gen_apollo],
[haz_real_apollo, nohaz_real_apollo],
[haz_gen_apollo_se_bq, nohaz_gen_apollo_se_bq])
haz_gen_cut, nohaz_gen_cut = pairs[0]
haz_real_cut, nohaz_real_cut = pairs[1]
haz_gen_se_cut, nohaz_gen_se_cut = pairs[2]
Train SVM
In [34]:
clf_apollo_wqi = svm.SVC(gamma=40.0, C=0.05, class_weight={0: 1.2})
# clf_apollo_wqi = svm.SVC(gamma=35.0, C=0.05, class_weight={0: 1.2})
xtrain, ytrain = ld.mix_up(haz_gen_se_cut, nohaz_gen_se_cut)
clf_apollo_wqi = clf_apollo_wqi.fit(xtrain, ytrain)
In [35]:
# # reload(al)
# cutcol = ['w', 'q', 'i']
# # clf_apollo_wqi3 = neighbors.KNeighborsClassifier()
# # splitres = al.split_by_clf(clf_apollo_wqi3, cutcol, haz_gen_apollo_se_bq,
# # nohaz_gen_apollo_se_bq,
# # haz_gen_apollo,
# # nohaz_gen_apollo)
# clf_apollo_wqi3 = svm.SVC(gamma=40.0, C=0.05, class_weight={0: 1.2}) #class_weight={0: 1.5}
# # clf_apollo_wqi3 = svm.NuSVC(gamma=10.0, class_weight={0: 1.5}) #class_weight={0: 1.5}
# #(20 0.5), (30 0.1) (5 0.01)
# splitres = al.split_by_clf(clf_apollo_wqi3, cutcol, haz_gen_apollo_se_bq,
# nohaz_gen_apollo_se_bq,
# haz_gen_apollo,
# nohaz_gen_apollo)
# haz_gen_apollo_wqi3, nohaz_gen_apollo_wqi3 = splitres[0]
# haz_gen_apollo_wqi3__, nohaz_gen_apollo_wqi3__ = splitres[1]
# apollo_wqi3_sc = splitres[2]
# clf_apollo_wqi = clf_apollo_wqi3
Estimate split quality for virtual Apollos
In [36]:
predicted_gen = al.clf_split_quality(clf_apollo_wqi, haz_gen_cut, nohaz_gen_cut)
haz_gen_apollo_wqi = haz_gen_apollo.iloc[predicted_gen[0]]
nohaz_gen_apollo_wqi = nohaz_gen_apollo.iloc[predicted_gen[1]]
haz_gen_apollo_wqi__ = haz_gen_apollo.iloc[predicted_gen[2]]
nohaz_gen_apollo_wqi__ = nohaz_gen_apollo.iloc[predicted_gen[3]]
Estimate split quality for real Apollos
In [37]:
predicted_real = al.clf_split_quality(clf_apollo_wqi, haz_real_cut, nohaz_real_cut)
haz_real_apollo_wqi = haz_real_apollo.iloc[predicted_real[0]]
nohaz_real_apollo_wqi = nohaz_real_apollo.iloc[predicted_real[1]]
haz_real_apollo_wqi__ = haz_real_apollo.iloc[predicted_real[2]]
nohaz_real_apollo_wqi__ = nohaz_real_apollo.iloc[predicted_real[3]]
In [ ]:
Prepare w-q domain mask to exclude out-of-domain points from the plot
In [38]:
genu = pd.concat((haz_gen, nohaz_gen))
genu_rest = pd.concat((haz_genu_rest, nohaz_genu_rest))
apollo_wq_sc = apollo_wqi_sc[:2]
In [39]:
cutcol_ = ['w', 'q']
clfmask = svm.SVC(gamma=10.0, C=1e3) # class_weight={1: 2}
clfmask = al.sgmask_clf2d_fit(clfmask, cutcol_, genu, genu_rest, apollo_wq_sc)
In [40]:
vd.plot_clf2d(clfmask, cutcol_, num=250, figsize=(6,6), scales=apollo_wq_sc,
labels=True, cmap='Blues', invertaxes=[0, 1])
In [41]:
# cutcol_ = ['w', 'q']
# # labels = [vd.colnames[nm] for nm in cutcol]
# clfmask = svm.SVC(gamma=10.0, C=1e3) # class_weight={1: 2}
# clfmask = al.sgmask_clf(haz_gen_apollo_se, nohaz_gen_apollo_se,
# haz_genu_rest, nohaz_genu_rest, clfmask, cutcol_)
# # clfmask = al.sgmask_clf(haz_gen_apollo, nohaz_gen_apollo,
# # haz_genu_rest, nohaz_genu_rest, clfmask, cutcol)
In [42]:
# plotgrid = np.array([[0.0, 0.0], [1.0, 1.0]])
# scales = ld.dfcommon_bounds([haz_gen_apollo_se, nohaz_gen_apollo_se], cutcol_)
# # vd.plot_classifier(plotgrid, clfmask, num=200, figsize=(6,6), scales=scales,
# # labels = labels, cmap='Blues', invertaxes=[0, 1])
# vd.plot_clf2d(clfmask, cutcol_, num=200, figsize=(6,6), scales=scales,
# labels=True, cmap='Blues', invertaxes=[0, 1])
In [ ]:
Plot decision surface
In [43]:
# cutcol = ['w', 'q', 'i']
vd.plot_clf3d(clf_apollo_wqi, cutcol, num=250, labels=True, figsize=(10,9), mode='2d',
scales=apollo_wqi_sc, clf_masks=[(clfmask, 0)], invertaxes=[0, 1])
In [44]:
haz_gen_extracted_ap.append(haz_gen_apollo_wqi)
nohaz_gen_trapped_ap.append(nohaz_gen_apollo_wqi)
haz_real_extracted_ap.append(haz_real_apollo_wqi)
nohaz_real_trapped_ap.append(nohaz_real_apollo_wqi)
Amplify datasets by their symmetric copies over the 'w' parameter
In [45]:
haz_gen_apollo_wqi__se = ld.add_doublemirror_column(haz_gen_apollo_wqi__, 'w', 180.0)
nohaz_gen_apollo_wqi__se = ld.add_doublemirror_column(nohaz_gen_apollo_wqi__, 'w', 180.0)
Cut w, q and a columns and nomalize datasets
In [46]:
# reload(ld)
cutcol = ['w', 'q', 'a']
pairs, apollo_wqa_sc = ld.cut_normalize(cutcol, [haz_gen_apollo_wqi__, nohaz_gen_apollo_wqi__],
[haz_real_apollo_wqi__, nohaz_real_apollo_wqi__],
[haz_gen_apollo_wqi__se, nohaz_gen_apollo_wqi__se])
haz_gen_cut, nohaz_gen_cut = pairs[0]
haz_real_cut, nohaz_real_cut = pairs[1]
haz_gen_se_cut, nohaz_gen_se_cut = pairs[2]
Train SVM
In [47]:
clf_apollo_wqa = svm.SVC(gamma=40.0, C=0.1, class_weight={0: 1.5})
xtrain, ytrain = ld.mix_up(haz_gen_se_cut, nohaz_gen_se_cut)
clf_apollo_wqa = clf_apollo_wqa.fit(xtrain, ytrain)
In [48]:
# # reload(al)
# cutcol = ['w', 'q', 'a']
# clf_apollo_wqa3 = svm.SVC(gamma=40.0, C=0.1, class_weight={0: 1.5}) #class_weight={0: 1.5}
# # (30 0.5)
# splitres = al.split_by_clf(clf_apollo_wqa3, cutcol, haz_gen_apollo_wqi3__se,
# nohaz_gen_apollo_wqi3__se,
# haz_gen_apollo_wqi3__,
# nohaz_gen_apollo_wqi3__)
# haz_gen_apollo_wqa3, nohaz_gen_apollo_wqa3 = splitres[0]
# haz_gen_apollo_wqa3__, nohaz_gen_apollo_wqa3__ = splitres[1]
# apollo_wqa3_sc = splitres[2]
Estimate split quality for virtual Apollos
In [49]:
predicted_gen = al.clf_split_quality(clf_apollo_wqa, haz_gen_cut, nohaz_gen_cut)
haz_gen_apollo_wqa = haz_gen_apollo_wqi__.iloc[predicted_gen[0]]
nohaz_gen_apollo_wqa = nohaz_gen_apollo_wqi__.iloc[predicted_gen[1]]
haz_gen_apollo_wqa__ = haz_gen_apollo_wqi__.iloc[predicted_gen[2]]
nohaz_gen_apollo_wqa__ = nohaz_gen_apollo_wqi__.iloc[predicted_gen[3]]
Estimate split quality for real Apollos
In [50]:
predicted_real = al.clf_split_quality(clf_apollo_wqa, haz_real_cut, nohaz_real_cut)
haz_real_apollo_wqa = haz_real_apollo_wqi__.iloc[predicted_real[0]]
nohaz_real_apollo_wqa = nohaz_real_apollo_wqi__.iloc[predicted_real[1]]
haz_real_apollo_wqa__ = haz_real_apollo_wqi__.iloc[predicted_real[2]]
nohaz_real_apollo_wqa__ = nohaz_real_apollo_wqi__.iloc[predicted_real[3]]
In [51]:
# clf_masks = [(clfmask, 0)]
# cutcol = ['w', 'q', 'a']
# labels = [vd.colnames[nm] for nm in cutcol]
vd.plot_clf3d(clf_apollo_wqa, cutcol, num=250, labels=True, figsize=(10,9), mode='2d',
scales=apollo_wqa_sc, clf_masks=[(clfmask, 0)], invertaxes=[0, 1])
In [52]:
haz_gen_extracted_ap.append(haz_gen_apollo_wqa)
nohaz_gen_trapped_ap.append(nohaz_gen_apollo_wqa)
haz_real_extracted_ap.append(haz_real_apollo_wqa)
nohaz_real_trapped_ap.append(nohaz_real_apollo_wqa)
Divisions quality for virtual Apollos
In [53]:
vd.print_summary(haz_gen_extracted_ap, nohaz_gen_trapped_ap, haz_gen_apollo, nohaz_gen_apollo, 'virtual')
Divisions quality for real Apollos
In [54]:
vd.print_summary(haz_real_extracted_ap, nohaz_real_trapped_ap, haz_real_apollo, nohaz_real_apollo, 'real')
In [ ]:
In [55]:
haz_gen_extracted_am = []
nohaz_gen_trapped_am = []
haz_real_extracted_am = []
nohaz_real_trapped_am = []
In [56]:
haz_gen_amor, haz_gen_amors_num = rdb.get_amors(haz_gen_dom1)
nohaz_gen_amor, nohaz_gen_amors_num = rdb.get_amors(nohaz_gen_dom1)
amors_gen_num = haz_gen_amors_num + nohaz_gen_amors_num
haz_real_amor, haz_real_amors_num = rdb.get_amors(haz_real_dom1)
nohaz_real_amor, nohaz_real_amors_num = rdb.get_amors(nohaz_real_dom1)
amors_real_num = haz_real_amors_num + nohaz_real_amors_num
In [57]:
print "Number of virtual PHAs in the group:", haz_gen_amors_num
print "Number of virtual NHAs in the group:", nohaz_gen_amors_num
print "Number of virtual Amor:", amors_gen_num
print "Amor group weight:", float(amors_gen_num)/gen_dom1_num
In [58]:
print "Number of real PHAs in the group:", haz_real_amors_num
print "Number of real NHAs in the group:", nohaz_real_amors_num
print "Number of real Amor:", amors_real_num
print "Amor group weight:", float(amors_real_num)/real_dom1_num
In [59]:
haz_gen_amor_se = ld.add_doublemirror_column(haz_gen_amor, 'w', 180.0)
nohaz_gen_amor_se = ld.add_doublemirror_column(nohaz_gen_amor, 'w', 180.0)
In [60]:
ef = 0.25
haz_gen_amor_see = ld.extend_by_copies(haz_gen_amor_se, 'w', extend_factor=ef)
nohaz_gen_amor_see = ld.extend_by_copies(nohaz_gen_amor_se, 'w', extend_factor=ef)
In [61]:
cutcol = ['w', 'i']
# labels = [vd.colnames[nm] for nm in cutcol]
# vd.display_param2d(cutcol, labels, [haz_gen_amor_see, nohaz_gen_amor_see])
vd.plot_distributions2d(cutcol, haz_gen_amor_see, nohaz_gen_amor_see, labels=True)
Cut w and i columns and nomalize datasets
In [62]:
pairs, amor_wi_sc = ld.cut_normalize(cutcol, [haz_gen_amor, nohaz_gen_amor],
[haz_real_amor, nohaz_real_amor],
[haz_gen_amor_see, nohaz_gen_amor_see])
haz_gen_cut, nohaz_gen_cut = pairs[0]
haz_real_cut, nohaz_real_cut = pairs[1]
haz_gen_see_cut, nohaz_gen_see_cut = pairs[2]
Train SVM
In [63]:
clf_amor_wi = svm.SVC(gamma=30., C=0.1, class_weight={1: 5.5})
xtrain, ytrain = ld.mix_up(haz_gen_see_cut, nohaz_gen_see_cut)
clf_amor_wi = clf_amor_wi.fit(xtrain, ytrain)
In [64]:
# reload(al)
# clf_amor_wi = svm.SVC(gamma=30., C=0.1, class_weight={1: 5.5})
# # clf = svm.SVC(gamma=12., C=0.02, class_weight={1: 5.5})
# # clf = svm.SVC(gamma=10., C=0.08, class_weight={1: 5.5})
# splitres = al.split_by_clf(clf_amor_wi, cutcol, haz_gen_amor_see,
# nohaz_gen_amor_see,
# haz_gen_amor,
# nohaz_gen_amor)
# haz_gen_amor_wi, nohaz_gen_amor_wi = splitres[0]
# haz_gen_amor_wi__, nohaz_gen_amor_wi__ = splitres[1]
# amor_wi_sc = splitres[2]
Estimate split quality for virtual Amors
In [65]:
predicted_gen = al.clf_split_quality(clf_amor_wi, haz_gen_cut, nohaz_gen_cut)
haz_gen_amor_wi = haz_gen_amor.iloc[predicted_gen[0]]
nohaz_gen_amor_wi = nohaz_gen_amor.iloc[predicted_gen[1]]
haz_gen_amor_wi__ = haz_gen_amor.iloc[predicted_gen[2]]
nohaz_gen_amor_wi__ = nohaz_gen_amor.iloc[predicted_gen[3]]
Estimate split quality for real Amors
In [66]:
predicted_real = al.clf_split_quality(clf_amor_wi, haz_real_cut, nohaz_real_cut)
haz_real_amor_wi = haz_real_amor.iloc[predicted_real[0]]
nohaz_real_amor_wi = nohaz_real_amor.iloc[predicted_real[1]]
haz_real_amor_wi__ = haz_real_amor.iloc[predicted_real[2]]
nohaz_real_amor_wi__ = nohaz_real_amor.iloc[predicted_real[3]]
Plot decision surface
In [67]:
vd.plot_clf2d(clf_amor_wi, cutcol, num=400, haz_cut=haz_gen_cut, nohaz_cut=nohaz_gen_cut, s=2,
figsize=(8,8), scales=amor_wi_sc, labels=True, cmap='winter', extend_factors=[ef, 0])
# plotbounds=plt_bounds
In [68]:
# vd.display_param2d(cutcol, labels, [haz_gen_amor_wi, nohaz_gen_amor_wi])
In [69]:
haz_gen_amor_wi_se = ld.add_mirror_column(haz_gen_amor_wi, 'w', 180.0)
nohaz_gen_amor_wi_se = ld.add_mirror_column(nohaz_gen_amor_wi, 'w', 180.0)
In [70]:
# ef = 0.25
# haz_gen_amor_wi_see = ld.extend_by_copies(haz_gen_amor_wi_se , 'w', extend_factor=ef)
# nohaz_gen_amor_wi_see = ld.extend_by_copies(nohaz_gen_amor_wi_se , 'w', extend_factor=ef)
In [71]:
cutcol = ['w', 'om']
vd.plot_distributions2d(cutcol, haz_gen_amor_wi_se, nohaz_gen_amor_wi_se, labels=True)
Cut w, om and q columns and nomalize datasets
In [72]:
cutcol = ['w', 'om', 'q']
pairs, amor_womq_sc = ld.cut_normalize(cutcol, [haz_gen_amor_wi, nohaz_gen_amor_wi],
[haz_real_amor_wi, nohaz_real_amor_wi],
[haz_gen_amor_wi_se, nohaz_gen_amor_wi_se])
haz_gen_cut, nohaz_gen_cut = pairs[0]
haz_real_cut, nohaz_real_cut = pairs[1]
haz_gen_se_cut, nohaz_gen_se_cut = pairs[2]
Train SVM
In [73]:
clf_amor_womq = svm.SVC(gamma=20.0, C=8.0, class_weight={0: 2.4})
xtrain, ytrain = ld.mix_up(haz_gen_se_cut, nohaz_gen_se_cut)
clf_amor_womq = clf_amor_womq.fit(xtrain, ytrain)
In [74]:
# cutcol = ['w', 'om', 'q']
# # clf = svm.SVC(gamma=10.0, C=1000, class_weight={0: 2.0})
# clf_amor_womq = svm.SVC(gamma=20.0, C=8.0, class_weight={0: 2.4})
# splitres = al.split_by_clf(clf_amor_womq, cutcol, haz_gen_amor_wi_se,
# nohaz_gen_amor_wi_se,
# haz_gen_amor_wi,
# nohaz_gen_amor_wi)
# haz_gen_amor_wqom, nohaz_gen_amor_wqom = splitres[0]
# haz_gen_amor_wqom__, nohaz_gen_amor_wqom__ = splitres[1]
# amor_wqom_sc = splitres[2]
Estimate split quality for virtual Amors
In [75]:
predicted_gen = al.clf_split_quality(clf_amor_womq, haz_gen_cut, nohaz_gen_cut)
haz_gen_amor_womq = haz_gen_amor_wi.iloc[predicted_gen[0]]
nohaz_gen_amor_womq = nohaz_gen_amor_wi.iloc[predicted_gen[1]]
haz_gen_amor_womq__ = haz_gen_amor_wi.iloc[predicted_gen[2]]
nohaz_gen_amor_womq__ = nohaz_gen_amor_wi.iloc[predicted_gen[3]]
Estimate split quality for real Amors
In [76]:
predicted_real = al.clf_split_quality(clf_amor_womq, haz_real_cut, nohaz_real_cut)
haz_real_amor_womq = haz_real_amor_wi.iloc[predicted_real[0]]
nohaz_real_amor_womq = nohaz_real_amor_wi.iloc[predicted_real[1]]
haz_real_amor_womq__ = haz_real_amor_wi.iloc[predicted_real[2]]
nohaz_real_amor_womq__ = nohaz_real_amor_wi.iloc[predicted_real[3]]
In [ ]:
In [77]:
cutcol = ['w', 'om', 'q']
vd.plot_clf3d(clf_amor_womq, cutcol, num=250, labels=True, figsize=(10,9), mode='2d', scales=amor_womq_sc)
In [78]:
haz_gen_extracted_am.append(haz_gen_amor_womq)
nohaz_gen_trapped_am.append(nohaz_gen_amor_womq)
haz_real_extracted_am.append(haz_real_amor_womq)
nohaz_real_trapped_am.append(nohaz_real_amor_womq)
Divisions quality for virtual Amors
In [79]:
vd.print_summary(haz_gen_extracted_am, nohaz_gen_trapped_am, haz_gen_amor, nohaz_gen_amor, 'virtual')
Divisions quality for real Amors
In [80]:
vd.print_summary(haz_real_extracted_am, nohaz_real_trapped_am, haz_real_amor, nohaz_real_amor, 'real')
In [ ]:
Virtual asteroids
In [81]:
haz_gen_extracted = haz_gen_extracted_aa + haz_gen_extracted_ap + haz_gen_extracted_am
nohaz_gen_trapped = nohaz_gen_trapped_aa + nohaz_gen_trapped_ap + nohaz_gen_trapped_am
In [82]:
vd.print_summary(haz_gen_extracted, nohaz_gen_trapped, haz_gen, nohaz_gen, 'virtual')
Real asteroids
In [83]:
haz_real_extracted = haz_real_extracted_aa + haz_real_extracted_ap + haz_real_extracted_am
nohaz_real_trapped = nohaz_real_trapped_aa + nohaz_real_trapped_ap + nohaz_real_trapped_am
In [84]:
vd.print_summary(haz_real_extracted, nohaz_real_trapped, haz_real, nohaz_real, 'real')
In [ ]:
In [ ]: