In [23]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib inline
from matplotlib import pyplot as plt
import csv
import importlib
from tqdm import tqdm
from scripts import proj1_helpers, helpers, implementation, feature_processing, k_fold, model_linear, model_logistic, plots
import numpy as np
In [3]:
train_path = '../data/train.csv'
test_path = '../data/test.csv'
In [ ]:
# loading data
y, X, idx = proj1_helpers.load_csv_data(train_path)
y_t, X_t, ids_t = proj1_helpers.load_csv_data(test_path)
In [32]:
model = implementation.ridge_regression
loss = model_linear.compute_accuracy_loss#compute_loss_reg
lambdas = [0]#np.logspace(-5,1,10)
In [31]:
np.min(X)
Out[31]:
No preprocessing
In [36]:
X_p = feature_processing.add_polynomial(X, [], max_degrees = [])
idx_min, loss_val_all, lambdas = k_fold.cross_validation_select(X_p, y, model, loss, seed = 1, k_fold = 5, lambdas = lambdas, do_plot = False, do_tqdm = False)
np.mean(loss_val_all, axis=2)
Out[36]:
In [38]:
X_p = feature_processing.add_polynomial(X, [], max_degrees = [])
X_p, _, _ = feature_processing.standardize(X_p)
idx_min, loss_val_all, lambdas = k_fold.cross_validation_select(X_p, y, model, loss, seed = 1, k_fold = 5, lambdas = lambdas, do_plot = False, do_tqdm = False)
np.mean(loss_val_all, axis=2)
Out[38]:
In [39]:
need_impute = [0, 5, 6, 12, 23, 24, 25, 26, 27, 28]
X_p = feature_processing.indicator_missing(X, need_impute)
X_p = feature_processing.impute_with_mean(X_p, need_impute)
X_p = feature_processing.add_polynomial(X_p, [], max_degrees = [])
X_p, _, _ = feature_processing.standardize(X_p)
idx_min, loss_val_all, lambdas = k_fold.cross_validation_select(X_p, y, model, loss, seed = 1, k_fold = 5, lambdas = lambdas, do_plot = False, do_tqdm = False)
np.mean(loss_val_all, axis=2)
Out[39]:
In [43]:
need_impute = [0, 5, 6, 12, 23, 24, 25, 26, 27, 28]
categorical = [23]
X_p = feature_processing.indicator_missing(X, need_impute)
X_p = feature_processing.impute_with_mean(X_p, need_impute)
X_p = feature_processing.add_polynomial(X_p, [], max_degrees = [])
X_p = feature_processing.binarize_categorical(X_p, categorical)
X_p, _, _ = feature_processing.standardize(X_p)
idx_min, loss_val_all, lambdas = k_fold.cross_validation_select(X_p, y, model, loss, seed = 1, k_fold = 5, lambdas = lambdas, do_plot = False, do_tqdm = False)
np.mean(loss_val_all, axis=2)
Out[43]:
In [42]:
need_impute = [0, 5, 6, 12, 23, 24, 25, 26, 27, 28]
categorical = [23]
need_poly = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,24,25,26,27,28,29]
X_p = feature_processing.indicator_missing(X, need_impute)
X_p = feature_processing.impute_with_mean(X_p, need_impute)
X_p = feature_processing.add_polynomial(X_p, need_poly, max_degrees = 3)
X_p = feature_processing.binarize_categorical(X_p, categorical)
X_p, _, _ = feature_processing.standardize(X_p)
idx_min, loss_val_all, lambdas = k_fold.cross_validation_select(X_p, y, model, loss, seed = 1, k_fold = 5, lambdas = lambdas, do_plot = False, do_tqdm = False)
np.mean(loss_val_all, axis=2)
Out[42]:
In [71]:
need_impute = [0, 5, 6, 12, 23, 24, 25, 26, 27, 28]
categorical = [23]
need_poly = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,24,25,26,27,28,29]
X_p = feature_processing.indicator_missing(X, need_impute)
X_p = feature_processing.impute_with_mean(X_p, need_impute)
X_p = feature_processing.add_polynomial(X_p, [])#need_poly, max_degrees = 2)
X_p = feature_processing.binarize_categorical(X_p, categorical)
X_p, _, _ = feature_processing.standardize(X_p)
lambdas1 = np.logspace(-6, -5, 2)
In [72]:
idx_min, loss_val_all, lambdas = k_fold.cross_validation_select(X_p, y, model, loss, seed = 1, k_fold = 5, lambdas = lambdas1, do_plot = True, do_tqdm = True)
np.mean(loss_val_all, axis=2)
Out[72]:
In [49]:
tqdm.get_lock().locks = []
In [73]:
np.min(np.mean(loss_val_all, axis=2)[1])
Out[73]:
In [ ]: