In [1]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib inline
from matplotlib import pyplot as plt
import csv
import importlib
from tqdm import tqdm
from scripts import proj1_helpers, helpers
from scripts import implementation, feature_processing, k_fold, model_linear, model_logistic
import numpy as np
In [2]:
train_path = '../data/train.csv'
test_path = '../data/test.csv'
output_path = '../data/lr_cv_augm.csv'
In [ ]:
# loading data
y, X, idx = proj1_helpers.load_csv_data(train_path)
y_t, X_t, ids_t = proj1_helpers.load_csv_data(test_path)
In [ ]:
deg = 5
lambda_ = 2e-4
gamma = 0.1
max_iters = 80
In [ ]:
print("Preprocessing data")
X_p, (x_mean, x_std) = feature_processing.process_X(X, deg)
X_t_p, _ = feature_processing.process_X(X_t, deg, (x_mean, x_std))
#Logistic regression
# Converting -1,1 to 0,1
y_01 = helpers.y_to_01(np.array([y]).T)
# starting point: zero vector
w0 = np.zeros((X_p.shape[1], 1))
In [ ]:
np.random.seed(42)
w1, l = implementation.reg_logistic_regression_newton(y_01, X_p, lambda_ = lambda_,
initial_w = w0, max_iters = max_iters, gamma = gamma,
debug = False)
In [ ]:
np.multiply(X[:, 0], X[:, 1])
In [ ]:
X[:, 0]
In [ ]:
X[:, 1]
In [ ]:
np.hstack((X, np.multiply(X[:, 0], X[:, 1]).reshape(-1, 1))).shape
In [ ]: