In [13]:

    
import pickle as pk
import matplotlib.pylab as plt
import numpy as np
from numpy.linalg import norm
from math import sqrt, exp
%matplotlib nbagg
from PyKEP import *
import seaborn as sns
sns.set_style("whitegrid")



In [14]:

    
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)

We load the data file obtained from the optimization (in low-thrust) of many legs (GTOC7):

ast1, ast2, $t_1$, $t_2$, $m^*$, $m_i$, $\Delta V^*$, $\Delta V$, $\Delta V_L$, $\Delta V_D$, $m^*_L$, $m^*_D$

We want to learn $\Delta V$ (i.e. the final mass) from the rest (known quickly using a Lambert solver and Damon model). The ast1 and ast2 ids can be used with the gtoc7 asteroids to know the orbitalparameters.



In [15]:

    
a = pk.load(open("learn.pkl","rb"))
print("DATA = ", a)
print("ONE ROW = ", a[0])









    



DATA =  [[ 11830.          10256.          10962.60236546 ...,   4906.74618359
    2544.20354737   1476.60103897]
 [  6846.           6073.          11098.51009776 ...,   6586.44735414
    1251.97554234    775.87757295]
 [ 15646.           8853.          10472.90296903 ...,   4091.14474818
    2968.07001635   1992.14286758]
 ..., 
 [  4952.          14339.          10326.68381981 ...,   4057.38011553
    2788.37388882   1485.32852058]
 [ 12662.          11131.           9791.69375163 ...,   4885.57653231
    2874.03260757   2312.48314192]
 [ 14007.           7294.          10253.04425878 ...,   4728.99722101
    2062.81669447   1294.33686228]]
ONE ROW =  [ 11830.          10256.          10962.60236546  11220.6581701
   1427.64964893   1352.56164444   4894.91224655   4225.21779204
   2629.03746956   4906.74618359   2544.20354737   1476.60103897]

Visualizing the data

We plot the data with a high alpha as to show the data density. The plots are a result of knowledge in the domain, thus manual feature engineering ...

First we plot the starting mass (fraction w.r.t the maximual allowed $m^*$) vs the $\Delta V$ (fraction w.r.t. $\Delta V^*$). This last fraction is, essentially ,the duration of the coast arc. 1 -> no coast, 0-> full coast. We observe that no trajectory has a $\Delta V$ higher than the one relative to maxmum mass (obvious as that correspond to no coast arc), you thus cannot thrust for longer. A strange feature that should be discussed / checked is that many trajectories show no coast arc even if they are very light.



In [29]:

    
plt.figure()
skip=10
plt.subplot(1,3,1)
plt.scatter(  a[::skip,5]/a[::skip,4], a[::skip,7] / a[::skip,6], marker='o', color = 'k', alpha=0.05)
#plt.hexbin(  a[:,5]/a[:,4], a[:,7] / a[:,6], bins='log')
plt.xlabel("$ m_i / m^*$")
plt.ylabel("$ \Delta V / \Delta V^*$")
#plt.xlim(0.3,1)
#plt.ylim(0.5,1.02)









    














    











    Out[29]:





<matplotlib.text.Text at 0x7f0d8093bc50>

Second, we plot the starting mass (fraction w.r.t the maximual allowed $m^*$) vs the $\Delta V$ (fraction w.r.t. $\Delta V^L$). We observe a few low-thrust trajectories with a $\Delta V$ which is improving w.r.t Lambert. These are not extremely frequent, but correspond to cases where a two impulse orbital transfer is not optimal



In [30]:

    
plt.subplot(1,3,2)
plt.scatter(  a[::skip,5]/a[::skip,4], a[::skip,7] / a[::skip,8], marker='o', alpha=0.05, edgecolor="none", color = 'k')
#plt.hexbin(  a[:,5]/a[:,4], a[:,7] / a[:,8], bins='log')
plt.xlabel("$ {m_i} / {m^*}$")
plt.ylabel("$ {\Delta V} / {\Delta V_L}$")
plt.xlim(0.3,1)
plt.ylim(0.5,2.3)









    Out[30]:





(0.5, 2.3)

Third, we plot the starting mass (fraction w.r.t the maximual allowed $m^*$) vs $\Delta V - \Delta V_L$ (fraction w.r.t. $\Delta V^*$). This third plot shows the highes correlation between variables.



In [31]:

    
plt.subplot(133)
plt.scatter(  a[::skip,5]/a[::skip,4], (a[::skip,7]-a[::skip,8]) / (a[::skip,6]), marker='o', alpha=0.05,edgecolor="none", color = 'k')
#plt.hexbin(  a[:,5]/a[:,4], (a[:,7]-a[:,8]) / (a[:,6]), bins='log')
plt.xlim(0.3,1)
plt.ylim(-1,0.6)
plt.xlabel("$ {m_i} / {m^*}$")
plt.ylabel("$ {(\Delta V - \Delta V_L} ) / {\Delta V^*}$")



In [32]:

    
plt.tight_layout(1)



In [33]:

    
plt.figure()
plt.scatter(  a[:,5]/a[:,4],  ( 1. - np.exp(-a[:,7] / 3000 / 9.80665)), marker='o', alpha=0.02,edgecolor="none", c=np.array([a[:,5]  / max(a[:,8])]*3).transpose())
plt.xlim(0.3,1)
#plt.ylim(-2,1)
plt.xlabel("$ ({m_i} / {m^*}) $")
plt.ylabel("$ (m_i - m_f ) / m_i $")









    














    











    Out[33]:





<matplotlib.text.Text at 0x7f0d80800e10>

Now we do some machine learning

From each line ast1, ast2, $t_1$, $t_2$, $m^*$, $m_i$, $\Delta V^*$, $\Delta V$, $\Delta V_L$, $\Delta V_D$, $m^*_L$, $m^*_D$

As attributes we use $t_2-t_1$, $m^*_D$, $m_i$, $\Delta V^*_D$, $\Delta V_L$, $m^*_L$, as these are the ones we can compute at the cost of one lambert problem solution



In [34]:

    
X = list()
#X_queries = list()
y = list()
for row in a:
    a1 = planet.gtoc7(int(row[0])).osculating_elements(epoch(0))[0] 
    a2 = planet.gtoc7(int(row[1])).osculating_elements(epoch(0))[0]
    n1 = sqrt(MU_SUN/a1**3)
    n2 = sqrt(MU_SUN/a2**3)
    #e1 = planet.gtoc7(int(row[0])).osculating_elements(epoch(0))[1] 
    #e2 = planet.gtoc7(int(row[1])).osculating_elements(epoch(0))[1]
    r1,v1 = planet.gtoc7(int(row[0])).eph(row[2]) # MJD2000 assumed in data
    r2,v2 = planet.gtoc7(int(row[1])).eph(row[3])
    h1 = np.cross(r1,v1)
    h2 = np.cross(r2,v2)
    cost = np.dot(h1,h2) / norm(h1) / norm(h2)
    X.append(np.array((row[3] - row[2], row[5], row[11], row[10], cost, n1, n2, norm([a-b for a,b in zip(r1,r2)]),norm([a-b for a,b in zip(v1,v2)]))))
    #X_queries.append(np.array((row[3] - row[2], row[11], row[5], row[9], row[8], row[10])))
    y.append(row[5] * ( 1 - np.exp(-row[7] / 3000 / 9.80665)) )
X = np.array(X)
y = np.array(y)
#X_queries = np.array(X_queries)



In [35]:

    
print(X[0])
print(y[0])









    



[  2.58055805e+02   1.35256164e+03   1.47660104e+03   2.54420355e+03
   9.99065966e-01   4.81712020e-08   5.12879740e-08   3.54455760e+11
   1.45237988e+04]
180.946956378



In [36]:

    
# Split data in train and test data
# A random permutation, to split the data randomly
N = 30000
np.random.seed(23)
indices = np.random.permutation(len(X))
traj_X_train = X[indices[:-N]]
traj_y_train = y[indices[:-N]]
traj_X_test  = X[indices[-N:]]
traj_y_test  = y[indices[-N:]]
#traj_X_query_test = X_queries[indices[-N:]]

Decision trees



In [37]:

    
#from sklearn import linear_model
#regr = linear_model.LinearRegression()
from sklearn import tree
regr = tree.DecisionTreeRegressor()
regr.fit(traj_X_train, traj_y_train)

# The mean square error
print("RMSE", sqrt(np.mean((regr.predict(traj_X_test)-traj_y_test)**2)))
#print("RMSE (queries)", sqrt(np.mean((regr.predict(traj_X_query_test)-traj_y_test)**2)))


# Explained variance score: 1 is perfect prediction
# and 0 means that there is no linear relationship
# between X and Y.
print("EVS", regr.score(traj_X_test, traj_y_test))









    



RMSE 19.213834191121016
EVS 0.918086862323

Ensemble



In [43]:

    
#from sklearn import linear_model
#regr = linear_model.LinearRegression()
from sklearn import ensemble
regr = ensemble.GradientBoostingRegressor(max_depth=8)
regr.fit(traj_X_train, traj_y_train)

# The mean square error
print("MAE", np.mean(np.abs((regr.predict(traj_X_test)-traj_y_test))))
#print("MAX", sqrt(np.mean((lambert_prediction-traj_y_test)**2)))
print("RMSE", sqrt(np.mean((regr.predict(traj_X_test)-traj_y_test)**2)))
print("Max", max(np.abs(regr.predict(traj_X_test)-traj_y_test)))









    



MAE 7.5079551643
RMSE 12.189544692481228
Max 145.609473384



In [39]:

    
plt.figure()
h = plt.hist(regr.predict(traj_X_test)-traj_y_test, 100)
#plt.xlabel("error (Kg) on the test set")
plt.axes()









    














    











    Out[39]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f0d807dd438>

Lets see what would happen if, instead we predict the final mass as:

$$ m_f = m_i * exp(\frac{-\Delta V_L}{I_{sp}g_0) $$



In [40]:

    
lambert_prediction = a[indices[-N:], 5] * (1 - np.exp( - a[indices[-N:], 8] / 3000 / 9.80665) )
# The mean square error
print("MAE", np.mean(np.abs((lambert_prediction-traj_y_test))))
#print("MAX", sqrt(np.mean((lambert_prediction-traj_y_test)**2)))
print("RMSE", sqrt(np.mean((lambert_prediction-traj_y_test)**2)))
print("Max", max(np.abs(lambert_prediction-traj_y_test)))









    



MAE 31.4457383414
RMSE 42.776498713068996
Max 639.826315194



In [41]:

    
plt.figure()
h = plt.hist(regr.predict(traj_X_test)-traj_y_test, 100, alpha=0.8,normed=False, log=False)
h = plt.hist(lambert_prediction-traj_y_test, 100, alpha=0.5, normed=False, log=False)
plt.xlabel("Error (Kg)")
plt.ylabel("N. instances")
plt.xlim(-200,200)









    














    











    Out[41]:





(-200, 200)



In [20]:

    
plt.tight_layout(1)
import time

We try them all :)



In [60]:

    
from sklearn import ensemble, neighbors, tree, svm
import time

regressors = [
             ensemble.RandomForestRegressor(),
             ensemble.AdaBoostRegressor(),
             ensemble.BaggingRegressor(),
             ensemble.BaggingRegressor(ensemble.ExtraTreesRegressor()),
             ensemble.ExtraTreesRegressor(),
             ensemble.GradientBoostingRegressor(max_depth=7),
             tree.DecisionTreeRegressor(),
             tree.ExtraTreeRegressor(),
             #svm.LinearSVR(), 
             #svm.NuSVR(), 
             #svm.SVR() 
             ]

regressors = [ensemble.AdaBoostRegressor(loss='square', n_estimators=100, learning_rate=1.5)]
    
for regr in regressors:
    tstart = time.time()
    regr.fit(traj_X_train, traj_y_train)
    tend = time.time()

    print(type(regr))
    # The mean square error
    print("MAE", np.mean(np.abs((regr.predict(traj_X_test)-traj_y_test))))
    #print("MAX", sqrt(np.mean((lambert_prediction-traj_y_test)**2)))
    print("RMSE", sqrt(np.mean((regr.predict(traj_X_test)-traj_y_test)**2)))
    print("Max", max(np.abs(regr.predict(traj_X_test)-traj_y_test)))


    # Explained variance score: 1 is perfect prediction
    # and 0 means that there is no linear relationship
    # between X and Y.
    print("EVS", regr.score(traj_X_test, traj_y_test))
    print("cpu time", tend-tstart)









    



<class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>
MAE 24.7813340118
RMSE 30.47651669718731
Max 127.180459974
EVS 0.79391036946
cpu time 6.184276342391968



In [479]:

    
regressors[0].name()









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-479-7186718883c6> in <module>()
----> 1 regressors[0].name()

AttributeError: 'KNeighborsRegressor' object has no attribute 'name'



In [456]:

    
type(regressors[0])









    Out[456]:





sklearn.ensemble.forest.RandomForestRegressor



In [ ]: