In [1]:
    
import pickle as pk
import matplotlib.pylab as plt
import numpy as np
from numpy.linalg import norm
from math import sqrt, exp
%matplotlib nbagg
from PyKEP import *
import seaborn as sns
sns.set_style("whitegrid")
import sklearn
from sklearn import *
import pandas as pd
import lasagne
import theano
import theano.tensor as T
import tqdm
    
    
In [2]:
    
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
#rc('text', usetex=True)
    
We load the data file obtained from the optimization (in low-thrust) of many legs (GTOC7):
ast1, ast2, $t_1$, $t_2$, $m^*$, $m_i$, $\Delta V^*$, $\Delta V$, $\Delta V_L$, $\Delta V_D$, $m^*_L$, $m^*_D$
We want to learn $\Delta V$ (i.e. the final mass) from the rest (known quickly using a Lambert solver and Damon model). The ast1 and ast2 ids can be used with the gtoc7 asteroids to know the orbitalparameters.
In [3]:
    
a = pk.load(open("learn.pkl","rb"))
data = pd.DataFrame(a, columns=['ast1', 'ast2', 't1', 't2', 'm_star', 'm_initial', 'dv_star',
                                'dv_optimal', 'dv_lambert', 'dv_damon', 'm_star_lambert', 'm_star_damon'])
data.iloc[0]
    
    Out[3]:
From each line ast1, ast2, $t_1$, $t_2$, $m^*$, $m_i$, $\Delta V^*$, $\Delta V$, $\Delta V_L$, $\Delta V_D$, $m^*_L$, $m^*_D$
As attributes we use $t_2-t_1$, $m^*_D$, $m_i$, $\Delta V^*_D$, $\Delta V_L$, $m^*_L$, as these are the ones we can compute at the cost of one lambert problem solution
In [4]:
    
def comp_v_inf(r1, r2, v1, v2, t):
    l = lambert_problem(r1, r2, t, MU_SUN, False, 1)
    v1_inf = l.get_v1()[0] - v1
    v2_inf = l.get_v2()[0] - v2
    return np.hstack((v1_inf, v2_inf))
    
In [5]:
    
def compute_m_D_star(v1_inf, v2_inf, DeltaT, T_max, I_sp, g_0):
    v2p1 = v2_inf + v1_inf
    v2m1 = v2_inf - v1_inf
    
    alpha = np.sum(v2p1 * v2p1, axis=1) / np.sum(v2p1 * v2m1, axis=1)
    
    tau = 0.5 * (alpha + 1 - np.sign(alpha) * np.sqrt(1 + alpha**2))
    
    # aD = damon(v1_inf, v2_inf, DeltaT)[-1] / DeltaT # this function is not vectorized
    aD = norm((v2m1.transpose() * tau - v2p1.transpose()) / (tau * DeltaT), axis=0)
    
    return 2 * T_max / aD / (1 + np.exp((-aD * DeltaT) / (I_sp * g_0)))
    
In [6]:
    
# a, e, i, Omega, omega, M
orbital_elements = np.array([(
        planet.gtoc7(int(ast1)).osculating_elements(epoch(0)),
        planet.gtoc7(int(ast2)).osculating_elements(epoch(0)),
    ) for ast1, ast2 in zip(data['ast1'], data['ast2'])])
rv = np.array([(
        planet.gtoc7(int(ast1)).eph(t1),
        planet.gtoc7(int(ast2)).eph(t2)
    ) for ast1, ast2, t1, t2 in zip(data['ast1'], data['ast2'], data['t1'], data['t2'])])
a1 = orbital_elements[:, 0, 0]
a2 = orbital_elements[:, 1, 0]
r1 = rv[:, 0, 0, :]
v1 = rv[:, 0, 1, :]
r2 = rv[:, 1, 0, :]
v2 = rv[:, 1, 1, :]
    
In [7]:
    
n1 = np.sqrt(MU_SUN/a1**3)
n2 = np.sqrt(MU_SUN/a2**3)
h1 = np.cross(r1, v1)
h2 = np.cross(r2, v2)
cost = np.sum(h1 * h2, axis=1) / norm(h1, axis=1) / norm(h2, axis=1)
DeltaT = (a[:, 3] - a[:, 2]) * DAY2SEC
v_inf = np.array([comp_v_inf(rve[0, 0, :], rve[1, 0, :], rve[0, 1, :], rve[1, 1, :], dt) for rve, dt in zip(rv, DeltaT)])
v1_inf = v_inf[:, :3]
v2_inf = v_inf[:, 3:]
    
In [8]:
    
T_max = 0.3
I_sp = 3000
g_0 = 9.80665
#m_D_star = compute_m_D_star(v1_inf, v2_inf, DeltaT, T_max, I_sp, g_0) # this value should equal to data['m_star_damon']
m_final = data['m_initial'] * (1 - np.exp(-data['dv_optimal'] / I_sp / g_0))
    
In [9]:
    
base_features = np.vstack((DeltaT, cost, norm(r1, axis=1), norm(r2, axis=1),
                           norm(v1_inf, axis=1), norm(v2_inf, axis=1), norm(v1, axis=1), norm(v2, axis=1)))
base_features = np.hstack((base_features.transpose(), v_inf,
                           data[['dv_lambert', 'dv_damon', 'm_star_lambert', 'm_star_damon']],
                           rv.reshape(-1, 3 * 4), orbital_elements.reshape(-1, 2 * 6)))
    
In [10]:
    
X = np.hstack((base_features, data[['m_initial', 'm_star']]))
#y = data['m_star']
y = m_final
    
In [11]:
    
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(X, y, test_size=0.3, random_state=0)
    
In [12]:
    
regressors = [
#    sklearn.linear_model.LinearRegression(),
#    sklearn.linear_model.PassiveAggressiveRegressor(),
#    sklearn.linear_model.RANSACRegressor(),
#    sklearn.linear_model.SGDRegressor(),
#    sklearn.linear_model.TheilSenRegressor(),
    #sklearn.linear_model.ARDRegression(), # out of memory...
#    sklearn.linear_model.BayesianRidge(),
#    sklearn.linear_model.ElasticNet(),
#    sklearn.linear_model.Lars(),
#    sklearn.linear_model.Lasso(),
    #sklearn.neighbors.RadiusNeighborsRegressor(), # out of memory...
#    sklearn.neighbors.KNeighborsRegressor(),
#    sklearn.tree.DecisionTreeRegressor(),#min_samples_split=13, min_samples_leaf=5),
#    sklearn.ensemble.ExtraTreesRegressor(),
#    sklearn.ensemble.AdaBoostRegressor(),
    sklearn.ensemble.GradientBoostingRegressor(max_depth=8),
#    sklearn.ensemble.BaggingRegressor(),
#    sklearn.ensemble.RandomForestRegressor(),
    #sklearn.gaussian_process.GaussianProcess(), # out of memory...
    #sklearn.isotonic.IsotonicRegression(), # X is not 1D
    #sklearn.kernel_ridge.KernelRidge(), # out of memory...
#    sklearn.svm.SVR(),
#    sklearn.svm.LinearSVR(),
#    sklearn.svm.NuSVR(),
#    sklearn.cross_decomposition.PLSRegression(),
]
best = None
best_mse = np.Inf
for regr in regressors:
    print(type(regr))
    regr.fit(X_train, y_train)
    mse = sklearn.metrics.mean_squared_error(y_test, regr.predict(X_test))
    if mse < best_mse:
        best_mse = mse
        best = regr
mae = sklearn.metrics.mean_absolute_error(y_test, regr.predict(X_test))
print("And the winner is", type(best))
print("MAE", mae)
print("RMSE", np.sqrt(best_mse))
    
    
In [13]:
    
X = base_features
y = data['m_star']
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(X, y, test_size=0.3, random_state=0)
    
In [14]:
    
# error of the damon model
mse = sklearn.metrics.mean_squared_error(data['m_star'], data['m_star_damon'])
mae = sklearn.metrics.mean_absolute_error(data['m_star'], data['m_star_damon'])
print("MAE", mae)
print("RMSE", np.sqrt(mse))
    
    
In [15]:
    
# learn to predict m_star
m_star_predictor = ensemble.GradientBoostingRegressor(max_depth=8)
m_star_predictor.fit(X_train, y_train)
# error
mse = sklearn.metrics.mean_squared_error(y_test, m_star_predictor.predict(X_test))
mae = sklearn.metrics.mean_absolute_error(y_test, m_star_predictor.predict(X_test))
print("MAE", mae)
print("RMSE", np.sqrt(mse))
    
    
In [16]:
    
# create a prediction of m_star as feature
m_star_predicted = m_star_predictor.predict(X)
    
In [17]:
    
# training data to learn m_final
X = np.hstack((base_features, data[['m_initial']], m_star_predicted.reshape(-1, 1)))
y = m_final
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(X, y, test_size=0.3, random_state=0)
    
In [18]:
    
# learn to predict m_final
m_final_predictor = ensemble.GradientBoostingRegressor(max_depth=8)
m_final_predictor.fit(X_train, y_train)
# error
mse = sklearn.metrics.mean_squared_error(y_test, m_final_predictor.predict(X_test))
mae = sklearn.metrics.mean_absolute_error(y_test, m_final_predictor.predict(X_test))
print("MAE", mae)
print("RMSE", np.sqrt(mse))
    
    
In [19]:
    
# what if we use the m_star prediction for a regressor that learned with m_star?
mse = sklearn.metrics.mean_squared_error(y_test, regressors[0].predict(X_test))
mae = sklearn.metrics.mean_absolute_error(y_test, regressors[0].predict(X_test))
print("MAE", mae)
print("RMSE", np.sqrt(mse))
    
    
In [20]:
    
# the error of the m_final predictor (trained with the m_star prediction)
plt.figure()
h = plt.hist(y_test - m_final_predictor.predict(X_test), 100)
plt.xlabel("error (kg) on the test set")
plt.axes()
    
    
    
    Out[20]:
Lets see what would happen if, instead we predict the final mass as:
$$ m_f = m_i * exp(\frac{-\Delta V_L}{I_{sp}g_0) $$
In [21]:
    
# error of the lambert model
m_final_lambert = data['m_initial'] * (1 - np.exp(-data['dv_lambert'] / I_sp / g_0))
mse = sklearn.metrics.mean_squared_error(m_final, m_final_lambert)
mae = sklearn.metrics.mean_absolute_error(m_final, m_final_lambert)
print("MAE", mae)
print("RMSE", np.sqrt(mse))
    
    
In [22]:
    
# error of the damon model
m_final_damon = data['m_initial'] * (1 - np.exp(-data['dv_damon'] / I_sp / g_0))
mse = sklearn.metrics.mean_squared_error(m_final, m_final_damon)
mae = sklearn.metrics.mean_absolute_error(m_final, m_final_damon)
print("MAE", mae)
print("RMSE", np.sqrt(mse))
    
    
In [23]:
    
# plot lambert versus machine learning
m_final_lambert_test = X_test[:, -2] * (1 - np.exp(-X_test[:, 14] / I_sp / g_0))
plt.figure()
h = plt.hist(y_test - m_final_lambert_test, 100, alpha=0.8,normed=False, log=False)
h = plt.hist(y_test - m_final_predictor.predict(X_test), 100, alpha=0.5, normed=False, log=False)
plt.xlabel("Error (kg)")
plt.ylabel("N. instances")
plt.xlim(-200,200)
plt.tight_layout(1)
    
    
    
    
In [41]:
    
class DeepLearningRegressor:
    def __init__(self, layers=[20, 40, 20, 10, 5], learning_rate=0.001, batch_size=16, num_epochs=500, scaler=preprocessing.StandardScaler):
        self.layers = layers
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        
        self.input_scaler = scaler()
        self.output_scaler = scaler()
        
        self.X_val = None
        self.y_val = None
    
    @staticmethod
    def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
        assert len(inputs) == len(targets)
        if shuffle:
            indices = np.arange(len(inputs))
            np.random.shuffle(indices)
        for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
            if shuffle:
                excerpt = indices[start_idx:start_idx + batchsize]
            else:
                excerpt = slice(start_idx, start_idx + batchsize)
            yield inputs[excerpt], targets[excerpt]
    def fit(self, X, y, X_val=None, y_val=None):
        self.X_train = self.input_scaler.fit_transform(X)
        self.y_train = self.output_scaler.fit_transform(y.reshape((-1, 1)))
        
        if y_val is not None:
            self.X_val = self.input_scaler.transform(X_val)
            self.y_val = self.output_scaler.transform(y_val.reshape((-1, 1)))
        
        # input layer
        nn = lasagne.layers.InputLayer((None, X.shape[-1]))
        input_var = nn.input_var
        
        # hidden layers (ReLu)
        for nodes in self.layers:
            nn = lasagne.layers.DenseLayer(nn, nodes)
        
        # output layer (linear)
        nn = lasagne.layers.DenseLayer(nn, 1, nonlinearity=lasagne.nonlinearities.linear)
        
        output_var = lasagne.layers.get_output(nn)
        target_var = T.matrix('target')
        loss = lasagne.objectives.squared_error(target_var, output_var).mean()
        
        params = lasagne.layers.get_all_params(nn)
        updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=self.learning_rate)
        
        self.train_fn = theano.function([input_var, target_var], loss, updates=updates)
        self.val_fn = theano.function([input_var, target_var], loss)
        self.pred_fn = theano.function([input_var], output_var)
        
        self.train_loss = []
        self.val_loss = []
        
        self.keep_training()
    
    def keep_training(self):
        for epoch in tqdm.trange(self.num_epochs):
            train_batches = 0
            train_err = 0
            val_batches = 0
            val_err = 0
            for inputs, targets in DeepLearningRegressor.iterate_minibatches(self.X_train, self.y_train, self.batch_size, shuffle=True):
                train_err += self.train_fn(inputs, targets)
                train_batches += 1
            self.train_loss.append(train_err / train_batches)
            if self.y_val is not None:
                for inputs, targets in DeepLearningRegressor.iterate_minibatches(self.X_val, self.y_val, self.batch_size, shuffle=False):
                    val_err += self.val_fn(inputs, targets)
                    val_batches += 1
                self.val_loss.append(val_err / val_batches)
                
    def predict(self, X):
        return self.output_scaler.inverse_transform(self.pred_fn(self.input_scaler.transform(X)))
    
    def plot_loss(self):
        plt.figure()
        plt.plot(self.train_loss)
        plt.plot(self.val_loss)
    
In [39]:
    
m_nn_predictor = DeepLearningRegressor()
m_nn_predictor.fit(X_train, y_train, X_test, y_test)
# error
mse = sklearn.metrics.mean_squared_error(y_test, m_nn_predictor.predict(X_test))
mae = sklearn.metrics.mean_absolute_error(y_test, m_nn_predictor.predict(X_test))
print("MAE", mae)
print("RMSE", np.sqrt(mse))
    
    
    
    
In [42]:
    
m_nn_predictor.plot_loss()
    
    
    
    Out[42]: