In [3]:
%pylab inline
from sklearn import datasets
from sklearn.metrics import mean_squared_error
from sklearn import linear_model
from sklearn.ensemble import RandomForestClassifier
boston = datasets.load_boston() # Boston house-prices
X = boston['data'] # 13 features (e.g. crime, # rooms, age, etc.)
Y = boston['target'] # response (median house price)
half = floor(len(Y)/2)
train_X = X[:half]
train_Y = Y[:half]
test_X = X[half:]
test_Y = Y[half:]
In [18]:
rf1 = RandomForestClassifier(n_estimators=1000, max_features=6)
rf1.fit(train_X,train_Y)
preds = rf1.predict(test_X)
mse = mean_squared_error(test_Y, preds); print mse
In [17]:
plot(test_Y, preds-test_Y, 'o')
Out[17]:
In [14]:
lr = linear_model.LinearRegression()
In [15]:
lr.fit(train_X,train_Y)
lr_preds = lr.predict(test_X)
lr_mse = mean_squared_error(test_Y, lr_preds); print lr_mse
In [21]:
from sklearn.cross_validation import cross_val_score
import sklearn.cross_validation as cross_v
In [23]:
scores = cross_val_score(rf1, X, Y, cv = cross_v.KFold(len(Y),5))
In [ ]: