notebook.community

Edit and run



In [161]:

    
# Import third party libraries

# Numerical library
import numpy as np

# Used for matrix inversion
from numpy.linalg import inv

# Plotting library
import matplotlib.pyplot as plt
from matplotlib import gridspec

import collections

# Allows for printing inline for jupyter notebook
%matplotlib inline



In [162]:

    
# Load datasets and store in ndarray
training_data = open('housing_train.txt','r')
X_train = np.loadtxt(training_data)

testing_data = open('housing_test.txt', 'r')
X_test = np.loadtxt(testing_data)



In [163]:

    
## Part 1
# Split off known target values
y_train = X_train[:,13]
y_test = X_test[:,13]

# Add dimension to y_train and transpose
y_train = y_train[np.newaxis].T
y_test = y_test[np.newaxis].T

# Remove column 13 from X
X_train = np.delete(X_train, 13, axis=1)
X_test = np.delete(X_test, 13, axis=1)

# Function to create array of dummy ones and returned 
# columnar vector
def make_dummy_vector(target):
    temp = np.ones(len(target))
    return temp[np.newaxis].T

# Create dummy 1 values
dummy_train = make_dummy_vector(X_train)
dummy_test = make_dummy_vector(X_test)

# Add dummy data to feature matrices
X_train = np.concatenate((dummy_train, X_train), axis=1)
X_test = np.concatenate((dummy_test, X_test), axis=1)



In [164]:

    
## WE SHOULD TALK ABOUT THIS AS A GROUP
# Transpose X for further calculations
#X_train = X_train.T
#X_test = X_test.T

print X_train.shape









    



(433L, 14L)



In [165]:

    
## PART 2
# Compute optimal weight vector w = (X^T * X)^-1 (X^T * Y)
def calc_w_vector(X, y):
    return np.dot(inv(np.dot(X.T,X)), np.dot(X.T,y))

def alt_calc(X,y):
    return np.dot(np.dot(inv(X), inv(X.T), np.dot(X.T,y)))
    
# Limit printout to 3 decimal places
np.set_printoptions(precision=3)

# Caculate w vectors
w_train = calc_w_vector(X_train,y_train)
w_test = calc_w_vector(X_test,y_test)

# Print both weight vectors to console
print 'w_train vector:'
print('\n'.join('{}: {}'.format(*k) for k in enumerate(w_train)))

print ' \r\nw_test vector:'
print('\n'.join('{}: {}'.format(*k) for k in enumerate(w_test)))









    



w_train vector:
0: [ 39.584]
1: [-0.101]
2: [ 0.046]
3: [-0.003]
4: [ 3.072]
5: [-17.225]
6: [ 3.711]
7: [ 0.007]
8: [-1.599]
9: [ 0.374]
10: [-0.016]
11: [-1.024]
12: [ 0.01]
13: [-0.586]
 
w_test vector:
0: [ 16.494]
1: [-0.03]
2: [ 0.01]
3: [-0.16]
4: [ 1.129]
5: [-6.583]
6: [ 4.438]
7: [-0.077]
8: [-0.845]
9: [-0.025]
10: [ 0.005]
11: [-0.7]
12: [ 0.01]
13: [-0.037]



In [166]:

    
## PART 3
# Functions
def calc_sse(X, y, w):
    return np.dot(np.subtract(y, np.dot(X, w)).T, np.subtract(y,np.dot(X, w)))

# Apply learned weight vectors
target_func_train = np.dot(X_train, w_train)
target_func_test = np.dot(X_test, w_test)

# Print error output, not sure about the 0 values
print 'Training Model: \r\nSSE: %.2f \r\n' % calc_sse(X_train, y_train, w_train)

print 'Testing Model: \r\nSSE: %.2f' % calc_sse(X_test, y_test, w_test)









    



Training Model: 
SSE: 9561.19 

Testing Model: 
SSE: 852.51



In [167]:

    
## PART 4
# Repeating part 2 and 3 without a dummy features of 1's in X

# Remove dummy column from both tables
X_train_no_dummy = X_train[:, (1,2,3,4,5,6,7,8,9,10,11,12,13)]
X_test_no_dummy = X_test[:, (1,2,3,4,5,6,7,8,9,10,11,12,13)]

# Caculate w vectors
w_train_no_dummy = calc_w_vector(X_train_no_dummy,y_train)
w_test_no_dummy = calc_w_vector(X_test_no_dummy,y_test)

# Print both weight vectors to console
print 'w_train_no_dummy vector:'
print('\n'.join('{}: {}'.format(*k) for k in enumerate(w_train_no_dummy)))

print ' \r\nw_test_no_dummy vector:'
print('\n'.join('{}: {}'.format(*k) for k in enumerate(w_test_no_dummy)))









    



w_train_no_dummy vector:
0: [-0.098]
1: [ 0.049]
2: [-0.025]
3: [ 3.451]
4: [-0.355]
5: [ 5.817]
6: [-0.003]
7: [-1.021]
8: [ 0.227]
9: [-0.012]
10: [-0.388]
11: [ 0.017]
12: [-0.485]
 
w_test_no_dummy vector:
0: [ 0.011]
1: [ 0.01]
2: [-0.19]
3: [ 1.126]
4: [-1.137]
5: [ 5.801]
6: [-0.081]
7: [-0.649]
8: [-0.129]
9: [ 0.008]
10: [-0.572]
11: [ 0.011]
12: [ 0.072]

Thoughts about results

The above results make it seems like our model will be centered around the orgin beacuse we did not calcuate a true b value in the w vector.



In [168]:

    
## PART 4 cont.
# Apply learned weight vectors
target_func_train_no_dummy = np.dot(X_train_no_dummy, w_train_no_dummy)
target_func_test_no_dummy = np.dot(X_test_no_dummy, w_test_no_dummy)

# Print error output, not sure about the 0 values
print 'Training Model without Dummy: \r\nSSE: %.2f \r\n' % calc_sse(X_train_no_dummy, y_train, w_train_no_dummy)

print 'Testing Model without dummy: \r\nSSE: %.2f' % calc_sse(X_test_no_dummy, y_test, w_test_no_dummy)









    



Training Model without Dummy: 
SSE: 10598.06 

Testing Model without dummy: 
SSE: 883.85



In [169]:

    
def generate_uniform_feat(low,high,length):
    return np.random.uniform(low,high,length)[np.newaxis].T

# Set up values and experiment names
a_vals = [10,100,200,400,600,800,1000,1200,1400,1600]
experiments = np.arange(10)
inst, new_features = ["train", "test"], {}

# Loop through and create feature for train and test
ctr = 0
for exp in experiments:
    for item in inst:
        if item == 'train':
            new_features["f_{0}_{1}".format(item,exp)] = generate_uniform_feat(0, a_vals[ctr], 433)
        else:
            new_features["f_{0}_{1}".format(item,exp)] = generate_uniform_feat(0, a_vals[ctr], 74)
    ctr += 1



In [170]:

    
## PART 5
# Set up cases for 2,4,6,8,10 additional uniformly distributed features
def concatenate_features(tuple_obj, is_col):
    return np.concatenate((tuple_obj), axis=is_col)

def build_tuple(exp, item, ctr): 
    template = 'f_'
    return tuple(args)

def slicedict(d, s):
    return {k:v for k,v in d.iteritems() if k.endswith(s)}

new_datasets = [2,4,6,8,10]

# Need to add factor for dynamic addition of features
# New test matricies
two_feat_train = np.concatenate((X_train,new_features['f_train_0'],new_features['f_train_1']), axis=1)
two_feat_test = np.concatenate((X_test,new_features['f_test_0'],new_features['f_test_1']), axis=1)

four_feat_train = np.concatenate((two_feat_train,new_features['f_train_2'],new_features['f_train_3']), axis=1)
four_feat_test = np.concatenate((two_feat_test, new_features['f_test_2'],new_features['f_test_3']), axis=1)

six_feat_train = np.concatenate((four_feat_train, new_features['f_train_4'],new_features['f_train_5']), axis=1)
six_feat_test = np.concatenate((four_feat_test, new_features['f_test_4'],new_features['f_test_5']), axis=1)

eight_feat_train = np.concatenate((six_feat_train, new_features['f_train_6'],new_features['f_train_7']), axis=1)
eight_feat_test = np.concatenate((six_feat_test, new_features['f_test_6'],new_features['f_test_7']), axis=1)

ten_feat_train = np.concatenate((eight_feat_train,new_features['f_train_8'],new_features['f_train_9']), axis=1)
ten_feat_test = np.concatenate((eight_feat_test,new_features['f_test_8'],new_features['f_test_9']), axis=1)

# Create weight vector for each test case
w_two_train = calc_w_vector(two_feat_train, y_train)
w_two_test = calc_w_vector(two_feat_test, y_test)

w_four_train = calc_w_vector(four_feat_train, y_train)
w_four_test = calc_w_vector(four_feat_test, y_test)

w_six_train = calc_w_vector(six_feat_train, y_train)
w_six_test = calc_w_vector(six_feat_test, y_test)

w_eight_train = calc_w_vector(eight_feat_train, y_train)
w_eight_test = calc_w_vector(eight_feat_test, y_test)

w_ten_train = calc_w_vector(ten_feat_train, y_train)
w_ten_test = calc_w_vector(ten_feat_test, y_test)

np.set_printoptions(suppress=True)

# Store SSE scores in lists
sse_train = []
sse_test = []

sse_train.append(calc_sse(two_feat_train, y_train,w_two_train))
sse_test.append(calc_sse(two_feat_test, y_test,w_two_test))

sse_train.append(calc_sse(four_feat_train, y_train,w_four_train))
sse_test.append(calc_sse(four_feat_test, y_test,w_four_test))

sse_train.append(calc_sse(six_feat_train, y_train,w_six_train))
sse_test.append(calc_sse(six_feat_test, y_test,w_six_test))

sse_train.append(calc_sse(eight_feat_train, y_train,w_eight_train))
sse_test.append(calc_sse(eight_feat_test, y_test,w_eight_test))

sse_train.append(calc_sse(ten_feat_train, y_train,w_ten_train))
sse_test.append(calc_sse(ten_feat_test, y_test,w_ten_test))



In [171]:

    
# Plot the values gathered above
fig = plt.figure(figsize=(9,6))
plt.scatter(new_datasets,sse_train)
plt.plot(np.resize(new_datasets,(len(new_datasets),1)),np.resize(sse_train,(len(sse_train),1)))
fig.suptitle('Effect of Adding Features \nof Uniformly Random Variables to X_train', fontsize=16)
plt.xlabel('Number of Added Features', fontsize=16)
plt.ylabel('Sum Squared \nError (SSE)', fontsize=16)

# Save plot
fig.savefig('Part_5_Train.png')

# Plot of effect of alterficial features on X_test
fig = plt.figure(figsize=(9,6))
plt.scatter(new_datasets,sse_test)
plt.plot(np.resize(new_datasets,(len(new_datasets),1)),np.resize(sse_test,(len(sse_test),1)))
fig.suptitle('Effect of Adding Features \nof Uniformly Random Variables to X_test', fontsize=16)
plt.xlabel('Number of Added Features', fontsize=16)
plt.ylabel('Sum Squared \nError (SSE)', fontsize=16)

# Save plot
fig.savefig('Part_5_Testn.png')



In [172]:

    
# Calculate SSE for each feature experiment
print 'Two_train features added: \r\nSSE: %.2f \r\n' % calc_sse(two_feat_train, y_train,w_two_train)
print 'Four_train features added: \r\nSSE: %.2f \r\n' % calc_sse(four_feat_train, y_train,w_four_train)
print 'Six_train features added: \r\nSSE: %.2f \r\n' % calc_sse(six_feat_train, y_train,w_six_train)
print 'Eight_train features added: \r\nSSE: %.2f \r\n' % calc_sse(eight_feat_train, y_train,w_eight_train)
print 'Ten_train features added: \r\nSSE: %.2f \r\n' % calc_sse(ten_feat_train, y_train,w_ten_train)

print 'Two_test features added: \r\nSSE: %.2f \r\n' % calc_sse(two_feat_test, y_test,w_two_test)
print 'Four_test features added: \r\nSSE: %.2f \r\n' % calc_sse(four_feat_test, y_test,w_four_test)
print 'Six_test features added: \r\nSSE: %.2f \r\n' % calc_sse(six_feat_test, y_test,w_six_test)
print 'Eight_test features added: \r\nSSE: %.2f \r\n' % calc_sse(eight_feat_test, y_test,w_eight_test)
print 'Ten_test features added: \r\nSSE: %.2f \r\n' % calc_sse(ten_feat_test, y_test,w_ten_test)









    



Two_train features added: 
SSE: 9530.49 

Four_train features added: 
SSE: 9520.31 

Six_train features added: 
SSE: 9421.84 

Eight_train features added: 
SSE: 9386.06 

Ten_train features added: 
SSE: 9239.61 

Two_test features added: 
SSE: 808.12 

Four_test features added: 
SSE: 802.38 

Six_test features added: 
SSE: 776.85 

Eight_test features added: 
SSE: 736.06 

Ten_test features added: 
SSE: 661.84



In [173]:

    
# Compute optimal weight vector w -- (X^T * X + lamda * I)^-1 (X^T * Y)
def calc_w_vector_identity(X, y, lamda):
    I = np.identity(len(np.dot(X.T,X)))
    return np.dot(inv(np.dot(X.T,X) + lamda * I), np.dot(X.T,y))
    
# Limit printout to 3 decimal places
np.set_printoptions(precision=3)
w_train_results, w_test_results = {}, {}
# lamdas = [value * 0.01 for value in range(5, 5001, 5)]
# lamdas.insert(0, 0.01)
lamdas = [0.01, 0.05, 0.1, 0.5, 1, 5, 10, 20, 30, 40, 50, 100, 200, 300]

# Caculate w vectors
for lamda in lamdas:
    w_train_results.setdefault(lamda, [])
    w_train = calc_w_vector_identity(X_train,y_train, lamda)
    w_train_results[lamda] = np.asarray(w_train)
    
    w_test_results.setdefault(lamda, [])
    w_test = calc_w_vector_identity(X_test,y_test, lamda)
    w_test_results[lamda] = w_test

sse_train, sse_test = {}, {}
# Print error output, not sure about the 0 values
print 'Training Model:'
for key in w_train_results:
    w_train = w_train_results[key]
    target_func_train = np.dot(X_train, w_train)
    sse = calc_sse(X_train, y_train, w_train)
    sse_train.setdefault(key, sse[0][0])
    print '[%0.2f]-SSE: %.2f \r\n' % (key, sse)

print 'Testing Model:'
for key in w_test_results:
    w_test = w_test_results[key]
    target_func_test = np.dot(X_test, w_test)
    sse = calc_sse(X_test, y_test, w_test)
    sse_test.setdefault(key, sse[0][0])
    print '[%0.2f]-SSE: %.2f \r\n' % (key, sse)









    



Training Model:
[0.50]-SSE: 9790.23 

[1.00]-SSE: 9977.30 

[100.00]-SSE: 11407.59 

[5.00]-SSE: 10381.24 

[0.10]-SSE: 9585.75 

[40.00]-SSE: 10795.47 

[10.00]-SSE: 10495.34 

[300.00]-SSE: 13435.20 

[0.05]-SSE: 9568.44 

[200.00]-SSE: 12482.54 

[50.00]-SSE: 10890.30 

[20.00]-SSE: 10610.29 

[30.00]-SSE: 10703.37 

[0.01]-SSE: 9561.53 

Testing Model:
[0.50]-SSE: 873.74 

[1.00]-SSE: 878.50 

[100.00]-SSE: 1526.53 

[5.00]-SSE: 900.44 

[0.10]-SSE: 860.10 

[40.00]-SSE: 1205.19 

[10.00]-SSE: 939.23 

[300.00]-SSE: 1894.70 

[0.05]-SSE: 855.89 

[200.00]-SSE: 1769.14 

[50.00]-SSE: 1277.19 

[20.00]-SSE: 1032.06 

[30.00]-SSE: 1123.23 

[0.01]-SSE: 852.76



In [174]:

    
# Order dictionary of training data
ordered_sse_train = collections.OrderedDict(sorted(sse_train.items()))

# Get values for plotting
lambda_train = ordered_sse_train.keys()
sse_values_train = ordered_sse_train.values()
w_train_final = w_train_results.values()

# Format and plot figure
fig = plt.figure(figsize=(9,6))
fig.suptitle('Effect of Adding Features \nof Uniformly Random Variables to X_train', fontsize=16)
plt.xlabel('Lambda values', fontsize=16)
plt.ylabel('Sum Squared \nError (SSE)', fontsize=16)
plt.scatter(lambda_train, sse_values_train)
plt.plot(lambda_train,sse_values_train)

# Save plot
fig.savefig('Part_6_Train.png')

# Order dictionary of testing data
ordered_sse_test = collections.OrderedDict(sorted(sse_test.items()))

# Get values for plotting
lambda_test = ordered_sse_test.keys()
sse_values_test = ordered_sse_test.values()
w_test_final = w_test_results.values()

# Format and plot figure
fig = plt.figure(figsize=(9,6))
fig.suptitle('Effect of Adding Features \nof Uniformly Random Variables to X_test', fontsize=16)
plt.scatter(lambda_test, sse_values_test)
plt.plot(lambda_test,sse_values_test)
plt.xlabel('Lambda values', fontsize=16)
plt.ylabel('Sum Squared \nError (SSE)', fontsize=16)

# Save plot
fig.savefig('Part_6_Test.png')



In [175]:

    
print w_train_final[0]









    



[[ 21.321]
 [ -0.099]
 [  0.048]
 [ -0.024]
 [  3.162]
 [ -7.249]
 [  4.581]
 [  0.   ]
 [ -1.315]
 [  0.314]
 [ -0.015]
 [ -0.73 ]
 [  0.013]
 [ -0.55 ]]



In [176]:

    
## PART 8
# SEE with regularization term
# def calc_sse_reg(X, y, w, lamda):
#     return np.add(np.dot(np.subtract(y, np.dot(X, w)).T, np.subtract(y,np.dot(X, w))),np.dot(lamda,np.linalg.norm(w) ** 2))
def calc_sse_reg(X, y, w, lamda):
    fst = 0
    for i in range(len(X)):
        sub = y[i] - np.dot(w.T, X[i])
        fst += np.dot(sub, sub)
    
    snd = lamda * (np.linalg.norm(w, 2) ** 2)
    return fst + snd

## Not sure if we should be doing matrix addition above, or rather just adding the norm. THe above method seems to improve the SSE.
## Also I am not sure about the order of the norm
reg_train_sse, reg_test_sse, ctr = [], [], 0

for i in range(len(w_train_final)):
    tmp_train_sse, tmp_test_sse = [], []
    for item in lamdas:
        sse = calc_sse_reg(X_train, y_train, w_train_final[i], item)
        tmp_train_sse.append(sse)
        sse = calc_sse_reg(X_test, y_test, w_test_final[i], item)
        tmp_test_sse.append(sse)
    reg_train_sse.append(tmp_train_sse)
    reg_test_sse.append(tmp_test_sse)



In [177]:

    
# Plot the values gathered above
fig = plt.figure(figsize=(9,6))
fig.suptitle('Effect of Noramilzation Term of Train', fontsize=16)
for i in range(14):
    plt.scatter(lamdas, reg_train_sse[i])
    plt.plot(lamdas, reg_train_sse[i])    
    plt.xlabel('Lambda Values', fontsize=16)
    plt.ylabel('Sum Squared \nError (SSE)', fontsize=16)
    
# Save plot
fig.savefig('Part_8_Train.png')

# Plot the values gathered above
fig = plt.figure(figsize=(9,6))
fig.suptitle('Effect of Noramilzation Term Test', fontsize=16)
for i in range(14):
    plt.scatter(lamdas, reg_test_sse[i])
    plt.plot(lamdas, reg_test_sse[i])    
    plt.xlabel('Lambda Values', fontsize=16)
    plt.ylabel('Sum Squared \nError (SSE)', fontsize=16)
    
# Save plot
fig.savefig('Part_8_Test.png')



In [178]:

    
print calc_sse_reg(X_train,y_train,w_train_final[0],0.01)









    



9795.63314196



In [179]:

    
print X_train.shape
print w_train.shape









    



(433L, 14L)
(14L, 1L)



In [180]:

    
print X_train.T









    



[[   1.       1.       1.    ...,    1.       1.       1.   ]
 [   0.006    0.027    0.027 ...,    0.061    0.11     0.047]
 [  18.       0.       0.    ...,    0.       0.       0.   ]
 ..., 
 [  15.3     17.8     17.8   ...,   21.      21.      21.   ]
 [ 396.9    396.9    392.83  ...,  396.9    393.45   396.9  ]
 [   4.98     9.14     4.03  ...,    5.64     6.48     7.88 ]]



In [181]:

    
# Don't show scientific notation
np.set_printoptions(suppress=True)

print "Printing X_train:"
print X_train









    



Printing X_train:
[[   1.       0.006   18.    ...,   15.3    396.9      4.98 ]
 [   1.       0.027    0.    ...,   17.8    396.9      9.14 ]
 [   1.       0.027    0.    ...,   17.8    392.83     4.03 ]
 ..., 
 [   1.       0.061    0.    ...,   21.     396.9      5.64 ]
 [   1.       0.11     0.    ...,   21.     393.45     6.48 ]
 [   1.       0.047    0.    ...,   21.     396.9      7.88 ]]



In [182]:

    
print "Printing y_train:"
print y_train









    



Printing y_train:
[[ 24. ]
 [ 21.6]
 [ 34.7]
 [ 33.4]
 [ 36.2]
 [ 28.7]
 [ 22.9]
 [ 27.1]
 [ 16.5]
 [ 18.9]
 [ 15. ]
 [ 18.9]
 [ 21.7]
 [ 20.4]
 [ 18.2]
 [ 19.9]
 [ 23.1]
 [ 17.5]
 [ 20.2]
 [ 18.2]
 [ 13.6]
 [ 19.6]
 [ 15.2]
 [ 14.5]
 [ 15.6]
 [ 12.7]
 [ 14.5]
 [ 13.2]
 [ 13.1]
 [ 13.5]
 [ 18.9]
 [ 20. ]
 [ 21. ]
 [ 24.7]
 [ 30.8]
 [ 34.9]
 [ 26.6]
 [ 25.3]
 [ 24.7]
 [ 21.2]
 [ 19.3]
 [ 20. ]
 [ 19.7]
 [ 20.5]
 [ 25. ]
 [ 23.4]
 [ 18.9]
 [ 35.4]
 [ 24.7]
 [ 31.6]
 [ 23.3]
 [ 19.6]
 [ 18.7]
 [ 16. ]
 [ 22.2]
 [ 25. ]
 [ 33. ]
 [ 23.5]
 [ 19.4]
 [ 22. ]
 [ 17.4]
 [ 20.9]
 [ 24.2]
 [ 21.7]
 [ 22.8]
 [ 23.4]
 [ 24.1]
 [ 21.4]
 [ 23.9]
 [ 24.8]
 [ 22.9]
 [ 23.9]
 [ 26.6]
 [ 22.5]
 [ 22.2]
 [ 23.6]
 [ 28.7]
 [ 22.6]
 [ 22. ]
 [ 22.9]
 [ 25. ]
 [ 20.6]
 [ 28.4]
 [ 21.4]
 [ 38.7]
 [ 43.8]
 [ 33.2]
 [ 27.5]
 [ 20.1]
 [ 19.5]
 [ 19.5]
 [ 20.4]
 [ 19.8]
 [ 19.4]
 [ 21.7]
 [ 22.8]
 [ 18.8]
 [ 18.7]
 [ 18.5]
 [ 18.3]
 [ 21.2]
 [ 19.2]
 [ 20.4]
 [ 19.3]
 [ 22. ]
 [ 20.3]
 [ 20.5]
 [ 17.3]
 [ 18.8]
 [ 21.4]
 [ 15.7]
 [ 16.2]
 [ 18. ]
 [ 14.3]
 [ 19.2]
 [ 19.6]
 [ 17.1]
 [ 13.3]
 [ 17.8]
 [ 14. ]
 [ 14.4]
 [ 13.4]
 [ 15.6]
 [ 11.8]
 [ 13.8]
 [ 17.8]
 [ 15.4]
 [ 21.5]
 [ 19.6]
 [ 15.3]
 [ 19.4]
 [ 17. ]
 [ 15.6]
 [ 13.1]
 [ 41.3]
 [ 24.3]
 [ 23.3]
 [ 27. ]
 [ 50. ]
 [ 50. ]
 [ 50. ]
 [ 22.7]
 [ 25. ]
 [ 50. ]
 [ 19.1]
 [ 23.1]
 [ 23.6]
 [ 22.6]
 [ 29.4]
 [ 23.2]
 [ 24.6]
 [ 29.9]
 [ 37.2]
 [ 39.8]
 [ 36.2]
 [ 37.9]
 [ 32.5]
 [ 26.4]
 [ 29.6]
 [ 50. ]
 [ 32. ]
 [ 31.1]
 [ 29.1]
 [ 50. ]
 [ 33.3]
 [ 30.3]
 [ 34.6]
 [ 34.9]
 [ 32.9]
 [ 24.1]
 [ 42.3]
 [ 48.5]
 [ 50. ]
 [ 22.6]
 [ 24.4]
 [ 22.5]
 [ 24.4]
 [ 20. ]
 [ 21.7]
 [ 19.3]
 [ 22.4]
 [ 28.1]
 [ 23.7]
 [ 25. ]
 [ 23.3]
 [ 28.7]
 [ 21.5]
 [ 23. ]
 [ 30.1]
 [ 44.8]
 [ 50. ]
 [ 37.6]
 [ 31.6]
 [ 46.7]
 [ 31.5]
 [ 24.3]
 [ 31.7]
 [ 41.7]
 [ 48.3]
 [ 29. ]
 [ 24. ]
 [ 25.1]
 [ 31.5]
 [ 23.7]
 [ 23.3]
 [ 22. ]
 [ 20.1]
 [ 22.2]
 [ 23.7]
 [ 17.6]
 [ 18.5]
 [ 24.3]
 [ 20.5]
 [ 24.5]
 [ 21.9]
 [ 20.9]
 [ 44. ]
 [ 50. ]
 [ 36. ]
 [ 43.1]
 [ 48.8]
 [ 31. ]
 [ 36.5]
 [ 22.8]
 [ 30.7]
 [ 50. ]
 [ 43.5]
 [ 20.7]
 [ 21.1]
 [ 25.2]
 [ 24.4]
 [ 35.2]
 [ 32.4]
 [ 32. ]
 [ 33.2]
 [ 33.1]
 [ 29.1]
 [ 35.1]
 [ 45.4]
 [ 35.4]
 [ 46. ]
 [ 50. ]
 [ 32.2]
 [ 22.3]
 [ 24.8]
 [ 28.5]
 [ 37.3]
 [ 27.9]
 [ 23.9]
 [ 21.7]
 [ 28.6]
 [ 27.1]
 [ 20.3]
 [ 22.5]
 [ 29. ]
 [ 24.8]
 [ 22. ]
 [ 26.4]
 [ 33.1]
 [ 36.1]
 [ 28.4]
 [ 33.4]
 [ 28.2]
 [ 22.8]
 [ 20.3]
 [ 16.1]
 [ 22.1]
 [ 19.4]
 [ 21.6]
 [ 23.8]
 [ 16.2]
 [ 17.8]
 [ 19.8]
 [ 23.1]
 [ 21. ]
 [ 23.8]
 [ 23.1]
 [ 20.4]
 [ 18.5]
 [ 25. ]
 [ 24.6]
 [ 23. ]
 [ 22.2]
 [ 19.3]
 [ 22.6]
 [ 19.8]
 [ 17.1]
 [ 19.4]
 [ 22.2]
 [ 20.7]
 [ 21.1]
 [ 19.5]
 [ 18.5]
 [ 20.6]
 [ 19. ]
 [ 18.7]
 [ 32.7]
 [ 16.5]
 [ 23.9]
 [ 31.2]
 [ 17.5]
 [ 17.2]
 [ 23.1]
 [ 24.5]
 [ 26.6]
 [ 22.9]
 [ 24.1]
 [ 18.6]
 [ 30.1]
 [ 25. ]
 [ 19.9]
 [ 20.8]
 [ 16.8]
 [ 21.9]
 [ 27.5]
 [ 21.9]
 [ 23.1]
 [ 50. ]
 [ 50. ]
 [ 50. ]
 [ 50. ]
 [ 50. ]
 [ 13.8]
 [ 13.9]
 [ 13.3]
 [ 13.1]
 [ 10.2]
 [ 10.4]
 [ 10.9]
 [ 11.3]
 [ 12.3]
 [  8.8]
 [  7.2]
 [ 10.5]
 [  7.4]
 [ 10.2]
 [ 11.5]
 [ 15.1]
 [ 23.2]
 [  9.7]
 [ 13.8]
 [ 12.7]
 [ 13.1]
 [ 12.5]
 [  8.5]
 [  5. ]
 [  6.3]
 [  5.6]
 [  7.2]
 [ 12.1]
 [  8.3]
 [  8.5]
 [  5. ]
 [ 11.9]
 [ 27.9]
 [ 17.2]
 [  7. ]
 [  7.2]
 [  7.5]
 [ 10.4]
 [  8.8]
 [  8.4]
 [ 16.7]
 [ 14.2]
 [ 20.8]
 [ 13.4]
 [ 11.7]
 [  8.3]
 [  9.5]
 [ 14.5]
 [ 14.1]
 [ 16.1]
 [ 14.3]
 [ 11.7]
 [ 13.4]
 [  9.6]
 [  8.7]
 [  8.4]
 [ 12.8]
 [ 10.5]
 [ 17.1]
 [ 18.4]
 [ 15.4]
 [ 10.8]
 [ 14.1]
 [ 13. ]
 [ 13.4]
 [ 15.2]
 [ 16.1]
 [ 17.8]
 [ 14.9]
 [ 14.1]
 [ 12.7]
 [ 13.5]
 [ 14.9]
 [ 20. ]
 [ 16.4]
 [ 17.7]
 [ 19.5]
 [ 20.2]
 [ 21.4]
 [ 20.1]
 [ 19.9]
 [ 19.6]
 [ 23.2]
 [ 29.8]
 [ 13.8]
 [ 13.3]
 [ 16.7]
 [ 12. ]
 [ 14.6]
 [ 21.4]
 [ 23. ]
 [ 23.7]
 [ 25. ]
 [ 21.8]
 [ 20.6]
 [ 21.2]
 [ 19.1]
 [ 20.6]
 [ 21.8]
 [ 24.5]
 [ 23.1]
 [ 19.7]
 [ 18.3]
 [ 21.2]
 [ 17.5]
 [ 16.8]
 [ 22.4]
 [ 20.6]
 [ 23.9]
 [ 22. ]
 [ 11.9]]



In [183]:

    
# Plot feature 1: Crime rate by town
plt.scatter(X_train[:, 0],y_train)









    Out[183]:





<matplotlib.collections.PathCollection at 0x11160dd8>



In [184]:

    
# Plot feature 2: Residential land zoned for lots over 25,0000 sq. ft
plt.scatter(X_train[:, 1],y_train)









    Out[184]:





<matplotlib.collections.PathCollection at 0x11377438>



In [185]:

    
# Multiplotting feature 1 & 2
plt.scatter(X_train[:, 0],y_train)
plt.scatter(X_train[:, 1],y_train)









    Out[185]:





<matplotlib.collections.PathCollection at 0x1158b7f0>



In [186]:

    
# Plot feature 3: Proportion of non-retail business acres per town
plt.scatter(X_train[:, 2],y_train)









    Out[186]:





<matplotlib.collections.PathCollection at 0x115e9d68>



In [187]:

    
# Plot feature 4: Charles River dummy variable (= 1 if tract bounds river, 0 otherwise)
plt.scatter(X_train[:, 3],y_train)









    Out[187]:





<matplotlib.collections.PathCollection at 0x11951d68>



In [188]:

    
# Plot feature 5: Nitric oxides concentration (parts per 10 million)
plt.scatter(X_train[:, 4],y_train)









    Out[188]:





<matplotlib.collections.PathCollection at 0x11ad1e48>



In [189]:

    
# Plot feature 6: Average number fo rooms per dwelling
plt.scatter(X_train[:, 5],y_train)









    Out[189]:





<matplotlib.collections.PathCollection at 0x109ab470>



In [190]:

    
# Plot feature 7: Porportion of owner-occupied units built prior to 1940
plt.scatter(X_train[:, 6],y_train)









    Out[190]:





<matplotlib.collections.PathCollection at 0x108275f8>



In [191]:

    
# Plot feature 8: Weighted distances to five Boston employment centers
plt.scatter(X_train[:, 7],y_train)









    Out[191]:





<matplotlib.collections.PathCollection at 0x106e9128>



In [192]:

    
# Plot feature 9: Index of accessability to radial highways
plt.scatter(X_train[:, 8],y_train)









    Out[192]:





<matplotlib.collections.PathCollection at 0xf7567b8>



In [193]:

    
# Plot feature 10: Full-value property-tax rate per $10,000
plt.scatter(X_train[:, 9],y_train)









    Out[193]:





<matplotlib.collections.PathCollection at 0xe2302b0>



In [194]:

    
# Plot feature 11: Pupil-teacher ratio by town
plt.scatter(X_train[:, 10],y_train)









    Out[194]:





<matplotlib.collections.PathCollection at 0xdfc7ef0>



In [195]:

    
# Plot feature 12: 1000(Bk - 0.63)^2 where Bk is the population fo blacks by town
plt.scatter(X_train[:, 11],y_train)









    Out[195]:





<matplotlib.collections.PathCollection at 0x1061ec18>



In [196]:

    
# Plot feature 13: % lower status of the population
plt.scatter(X_train[:, 12],y_train)









    Out[196]:





<matplotlib.collections.PathCollection at 0xf4de128>



In [ ]: