In [44]:
#This notebook needs to run in python2.7
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.datasets import load_boston
from sklearn import linear_model
pd.set_option('precision',3)
pd.set_option('display.width',160)
In [45]:
boston=load_boston()
dataset = pd.DataFrame(boston.data,columns=boston.feature_names)
dataset['target'] = boston.target
print(dataset.describe())
In [46]:
observations = len(dataset)
variables = dataset.columns[:-1]
X = dataset.ix[:,:-1]
y = dataset['target'].values
In [47]:
variables
Out[47]:
In [48]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
In [49]:
Xc = sm.add_constant(X)
linear_regression = sm.OLS(y,Xc)
fitted_model = linear_regression.fit()
fitted_model.summary()
Out[49]:
In [50]:
X = dataset.ix[:,:-1]
correlation_martix = X.corr()
print(correlation_martix)
In [51]:
def visualize_correlation_matrix(data,hurdle=0.0):
R = np.corrcoef(data,rowvar=0)
R[np.where(np.abs(R)<hurdle)] = 0.0
heatmap = plt.pcolor(R,cmap=mpl.cm.coolwarm,alpha=0.8)
heatmap.axes.set_frame_on(False)
heatmap.axes.set_yticks(np.arange(R.shape[0]) + 0.5, minor=False)
heatmap.axes.set_xticks(np.arange(R.shape[1]) + 0.5, minor=False)
heatmap.axes.set_xticklabels(variables,minor=False)
heatmap.axes.set_yticklabels(variables,minor=False)
plt.xticks(rotation=90)
plt.tick_params(axis='both',which='both',bottom='off',top='off',left='off',right='off')
plt.colorbar()
plt.show()
visualize_correlation_matrix(X,hurdle=0.5)
In [52]:
corr = np.corrcoef(X,rowvar=0)
eigenvalues, eigenvectors = np.linalg.eig(corr)
print (eigenvalues)
In [53]:
print(eigenvectors[:,8])
In [54]:
print(variables[2],variables[8],variables[9])
In [59]:
#feature scaling
from sklearn.preprocessing import StandardScaler
observations = len(dataset)
variables = dataset.columns
standardization = StandardScaler()
Xst = standardization.fit_transform(X)
original_means = standardization.mean_
originanal_stds = standardization.scale_
original_var = standardization.var_
Xst = np.column_stack((Xst,np.ones(observations)))
y = dataset['target'].values
print(original_means)
print(originanal_stds)
print(original_var)
In [56]:
print(X.head())
print(Xst)
In [ ]: