In [2]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib inline
from scripts.plot_histograms import *
import csv
import importlib
from scripts import proj1_helpers, implementation, helpers
import numpy as np
from matplotlib import pyplot as plt
from scripts.helpers import impute_with_mean, get_header
Configuration
In [3]:
train_path = '../data/train.csv'
test_path = '../data/test.csv'
Data import
In [4]:
header = get_header(train_path)
y, X_tr, ids_tr = proj1_helpers.load_csv_data(train_path)
X_tr_imp = impute_with_mean(X_tr)
_, X_te, ids_te = proj1_helpers.load_csv_data(test_path)
X_te_imp = impute_with_mean(X_te)
Histograms of features
In [5]:
do_hist(X_tr, header, shape=(6, 5))
savefig('hist_tr')
In [6]:
do_hist(X_tr_imp, header, shape=(6, 5))
savefig('hist_tr_imp')
In [7]:
do_hist(X_te, header, shape=(6, 5))
savefig('hist_te')
In [8]:
do_hist(X_te_imp, header, shape=(6, 5))
savefig('hist_te_imp')
Boxplots of features
In [9]:
do_boxplot(X_tr, header, shape=(15,2))
savefig('box_tr')
In [10]:
do_boxplot(X_tr_imp, header, shape=(15,2))
savefig('box_tr_imp')
Target value vs features
In [11]:
do_Xy(X_tr, y, header, shape=(6, 5))
savefig('xy_tr')
In [ ]:
do_Xy(X_tr_imp, y, header, shape=(6, 5))
savefig('xy_tr_imp')
Feature vs feature plots
In [ ]:
for i, (a, b) in enumerate(get_ranges(X_tr_imp.shape[1], 6)):
do_hist_scatter(X_tr_imp, y, header, idx_x = a, idx_y = b, bins = 15)
savefig('hist-scatter-tr-imp-%d' % i)
In [ ]: