In [1]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib inline
from matplotlib import pyplot as plt
import csv
import importlib
from tqdm import tqdm
from scripts import proj1_helpers, helpers
from scripts import implementation, feature_processing, k_fold, model_linear, model_logistic
import numpy as np

In [2]:
train_path = '../data/train.csv'
test_path  = '../data/test.csv'
output_path = '../data/lr_cv_augm.csv'

In [ ]:
# loading data
y, X, idx = proj1_helpers.load_csv_data(train_path)
y_t, X_t, ids_t = proj1_helpers.load_csv_data(test_path)

In [ ]:
deg = 5
lambda_ = 2e-4
gamma = 0.1
max_iters = 80

In [ ]:
print("Preprocessing data")
X_p, (x_mean, x_std) = feature_processing.process_X(X, deg)
X_t_p, _ = feature_processing.process_X(X_t, deg, (x_mean, x_std))

#Logistic regression

# Converting -1,1 to 0,1
y_01 = helpers.y_to_01(np.array([y]).T)

# starting point: zero vector
w0 = np.zeros((X_p.shape[1], 1))

In [ ]:
np.random.seed(42)
w1, l = implementation.reg_logistic_regression_newton(y_01, X_p, lambda_ = lambda_,
                                                      initial_w = w0, max_iters = max_iters, gamma = gamma,
                                                      debug = False)

In [ ]:
np.multiply(X[:, 0], X[:, 1])

In [ ]:
X[:, 0]

In [ ]:
X[:, 1]

In [ ]:
np.hstack((X, np.multiply(X[:, 0], X[:, 1]).reshape(-1, 1))).shape

In [ ]: