In [63]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib inline
from matplotlib import pyplot as plt
import csv
import importlib
from tqdm import tqdm
from scripts import proj1_helpers, helpers
from scripts import implementation, feature_processing, k_fold, model_linear, model_logistic
import numpy as np


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [2]:
train_path = '../data/train.csv'
test_path  = '../data/test.csv'

In [3]:
# loading data
y, X, idx = proj1_helpers.load_csv_data(train_path)
y_t, X_t, ids_t = proj1_helpers.load_csv_data(test_path)

In [10]:
deg = 5
X_p, (x_mean, x_std) = feature_processing.process_X(X, deg)
X_t_p, _ = feature_processing.process_X(X_t, deg, (x_mean, x_std))


100%|██████████| 5/5 [00:41<00:00,  6.94s/it]
100%|██████████| 5/5 [01:39<00:00, 16.63s/it]

In [12]:
#Logistic
y_01 = helpers.y_to_01(np.array([y]).T)
np.random.seed(1)
w0 = np.zeros((X_p.shape[1], 1))

In [162]:
model = implementation.reg_logistic_regression_newton#_batch
model_args = {'initial_w': w0, 'max_iters': 20, 'gamma': 1e-1, 'debug': False}#, 'batch_size': 50000}
loss = model_linear.compute_accuracy_loss

lambdas = np.logspace(-6, -3, 15)

idx_min, rmse_all, lambdas = k_fold.cross_validation_select(X_p, y_01, model, loss, kw_model = model_args, seed = 1,
                                                            k_fold = 3, lambdas = lambdas, do_plot = True, do_tqdm = False)


100%|██████████| 20/20 [00:36<00:00,  1.89s/epoch, acc=-0.81, grad=728, loss=7.15e+04, w=2.74e+03]     
100%|██████████| 20/20 [00:39<00:00,  1.96s/epoch, acc=-0.81, grad=258, loss=7.15e+04, w=950]   
100%|██████████| 20/20 [00:47<00:00,  2.25s/epoch, acc=-0.81, grad=273, loss=7.14e+04, w=797]
100%|██████████| 20/20 [00:37<00:00,  1.78s/epoch, acc=-0.81, grad=586, loss=7.16e+04, w=2.39e+03]     
100%|██████████| 20/20 [00:47<00:00,  2.68s/epoch, acc=-0.81, grad=225, loss=7.15e+04, w=672]
100%|██████████| 20/20 [00:41<00:00,  2.62s/epoch, acc=-0.81, grad=265, loss=7.14e+04, w=663]
100%|██████████| 20/20 [00:42<00:00,  1.91s/epoch, acc=-0.81, grad=434, loss=7.16e+04, w=1.99e+03]     
100%|██████████| 20/20 [00:38<00:00,  1.98s/epoch, acc=-0.81, grad=207, loss=7.15e+04, w=516]
100%|██████████| 20/20 [00:43<00:00,  2.07s/epoch, acc=-0.81, grad=254, loss=7.14e+04, w=594]
100%|██████████| 20/20 [00:36<00:00,  1.82s/epoch, acc=-0.81, grad=309, loss=7.16e+04, w=1.58e+03]
100%|██████████| 20/20 [00:36<00:00,  1.83s/epoch, acc=-0.81, grad=195, loss=7.15e+04, w=437]
100%|██████████| 20/20 [00:41<00:00,  2.33s/epoch, acc=-0.81, grad=240, loss=7.14e+04, w=554]
100%|██████████| 20/20 [00:42<00:00,  2.07s/epoch, acc=-0.81, grad=222, loss=7.16e+04, w=1.2e+03] 
100%|██████████| 20/20 [00:45<00:00,  2.47s/epoch, acc=-0.81, grad=183, loss=7.15e+04, w=397]
100%|██████████| 20/20 [00:40<00:00,  1.90s/epoch, acc=-0.81, grad=221, loss=7.14e+04, w=521]
100%|██████████| 20/20 [00:40<00:00,  1.92s/epoch, acc=-0.81, grad=168, loss=7.16e+04, w=884]
100%|██████████| 20/20 [00:45<00:00,  2.37s/epoch, acc=-0.81, grad=171, loss=7.15e+04, w=375]
100%|██████████| 20/20 [01:46<00:00,  4.05s/epoch, acc=-0.81, grad=198, loss=7.14e+04, w=486]
100%|██████████| 20/20 [01:01<00:00,  2.59s/epoch, acc=-0.81, grad=136, loss=7.16e+04, w=651]
100%|██████████| 20/20 [00:53<00:00,  2.60s/epoch, acc=-0.81, grad=156, loss=7.15e+04, w=357]
100%|██████████| 20/20 [00:54<00:00,  3.02s/epoch, acc=-0.81, grad=174, loss=7.14e+04, w=446]
100%|██████████| 20/20 [00:49<00:00,  2.25s/epoch, acc=-0.81, grad=116, loss=7.16e+04, w=493]
100%|██████████| 20/20 [00:55<00:00,  2.55s/epoch, acc=-0.81, grad=142, loss=7.15e+04, w=338]
100%|██████████| 20/20 [00:47<00:00,  2.61s/epoch, acc=-0.81, grad=150, loss=7.14e+04, w=403]
100%|██████████| 20/20 [00:42<00:00,  2.11s/epoch, acc=-0.81, grad=100, loss=7.16e+04, w=391]
100%|██████████| 20/20 [00:43<00:00,  2.26s/epoch, acc=-0.81, grad=127, loss=7.15e+04, w=316]
100%|██████████| 20/20 [00:54<00:00,  3.42s/epoch, acc=-0.81, grad=129, loss=7.14e+04, w=359]
100%|██████████| 20/20 [01:07<00:00,  2.45s/epoch, acc=-0.81, grad=92.1, loss=7.16e+04, w=324]
100%|██████████| 20/20 [00:39<00:00,  2.13s/epoch, acc=-0.81, grad=113, loss=7.15e+04, w=291]
100%|██████████| 20/20 [00:42<00:00,  2.21s/epoch, acc=-0.81, grad=112, loss=7.14e+04, w=318]
100%|██████████| 20/20 [00:39<00:00,  1.99s/epoch, acc=-0.81, grad=86, loss=7.16e+04, w=276]  
100%|██████████| 20/20 [01:04<00:00,  2.73s/epoch, acc=-0.81, grad=101, loss=7.15e+04, w=263]
100%|██████████| 20/20 [00:44<00:00,  2.12s/epoch, acc=-0.82, grad=98, loss=7.14e+04, w=279] 
100%|██████████| 20/20 [00:44<00:00,  2.24s/epoch, acc=-0.81, grad=79, loss=7.16e+04, w=236]  
100%|██████████| 20/20 [00:42<00:00,  2.13s/epoch, acc=-0.81, grad=89.4, loss=7.15e+04, w=232]
100%|██████████| 20/20 [00:42<00:00,  2.12s/epoch, acc=-0.81, grad=85.8, loss=7.14e+04, w=241]
100%|██████████| 20/20 [00:41<00:00,  2.08s/epoch, acc=-0.81, grad=71.6, loss=7.16e+04, w=200]
100%|██████████| 20/20 [00:42<00:00,  2.11s/epoch, acc=-0.81, grad=77.3, loss=7.15e+04, w=198]
100%|██████████| 20/20 [00:42<00:00,  2.14s/epoch, acc=-0.81, grad=74.1, loss=7.14e+04, w=203]
100%|██████████| 20/20 [00:35<00:00,  1.76s/epoch, acc=-0.81, grad=64.9, loss=7.16e+04, w=166]
100%|██████████| 20/20 [00:35<00:00,  1.86s/epoch, acc=-0.81, grad=69, loss=7.15e+04, w=166]  
100%|██████████| 20/20 [00:35<00:00,  1.80s/epoch, acc=-0.81, grad=65, loss=7.14e+04, w=167]  
100%|██████████| 20/20 [00:44<00:00,  2.63s/epoch, acc=-0.81, grad=59.6, loss=7.17e+04, w=136] 
100%|██████████| 20/20 [00:43<00:00,  1.88s/epoch, acc=-0.81, grad=64.3, loss=7.16e+04, w=141] 
100%|██████████| 20/20 [00:37<00:00,  1.88s/epoch, acc=-0.81, grad=59.9, loss=7.15e+04, w=137] 

In [75]:
lambda_ = lambdas[idx_min]
print(lambda_)


4.64158883361e-06

In [76]:
np.random.seed(42)
w11, l = implementation.reg_logistic_regression_batch(y_01, X_p, lambda_ = lambda_,
                                                      initial_w = w0, max_iters = 1000, gamma = 1e-5,
                                                      debug = 'plot', batch_size = 50000)


 83%|████████▎ | 834/1000 [03:57<00:43,  3.78epoch/s, acc=-0.79, diff=278, grad=1.14e+03, loss=1.11e+05, w=4.02]     

In [103]:
np.random.seed(42)
w12, l = implementation.reg_logistic_regression_batch(y_01, X_p, lambda_ = lambda_,
                                                      initial_w = w11, max_iters = 300, gamma = 1e-6,
                                                      debug = 'plot', batch_size = 50000)


 83%|████████▎ | 250/300 [01:13<00:12,  3.85epoch/s, acc=-0.79, diff=267, grad=862, loss=1.11e+05, w=4.08]

In [113]:
np.random.seed(42)
w101, l = implementation.reg_logistic_regression_batch(y_01, X_p, lambda_ = 0.1,
                                                      initial_w = w0, max_iters = 300, gamma = 1e-5,#1e-5,
                                                      debug = 'plot', batch_size = 50000)


 83%|████████▎ | 250/300 [01:13<00:15,  3.17epoch/s, acc=-0.78, diff=924, grad=1.27e+03, loss=1.14e+05, w=2.59]     

In [146]:
np.random.seed(42)
w01, l = implementation.reg_logistic_regression_newton(y_01, X_p, lambda_ = 1e-4,
                                                      initial_w = w0, max_iters = 50, gamma = 1e-1,
                                                      debug = 'plot')#'plot')#, batch_size = 50000)


100%|██████████| 50/50 [02:39<00:00,  5.00s/epoch, acc=-0.82, grad=22.2, loss=1.01e+05, w=481]

In [165]:
np.random.seed(42)
w010, l = implementation.reg_logistic_regression_newton(y_01, X_p, lambda_ = 1e-4,
                                                      initial_w = w01, max_iters = 20, gamma = 0.1,
                                                      debug = 'plot')#'plot')#, batch_size = 50000)


100%|██████████| 20/20 [01:05<00:00,  3.30s/epoch, acc=-0.82, grad=3.84, loss=1.01e+05, w=493]

In [167]:
y_pred = proj1_helpers.predict_labels(w010, X_t_p)

In [168]:
output_path = '../data/logreg_1_submission.csv'
proj1_helpers.create_csv_submission(ids_t, y_pred, output_path)

In [30]:
tqdm.get_lock().locks = []
from tqdm import tqdm