In [14]:
from sklearn import datasets
import numpy as np
import pandas as pd
import bokeh
from bokeh.plotting import output_notebook
from datascienceutils import analyze
from datascienceutils import predictiveModels as pm
output_notebook(bokeh.resources.INLINE)
# Load the diabetes dataset
diabetes = datasets.load_diabetes()
# Use only one feature
diabetes_X = diabetes.data[:, np.newaxis, 2]
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]
# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]
In [15]:
df = pd.DataFrame(diabetes.data)
target = diabetes.target
analyze.correlation_analyze(df)
In [16]:
# Train the model using the training sets
lin_model = pm.train(diabetes_X_train, diabetes_y_train, 'linearRegression')
print('Coefficients: \n', lin_model.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((lin_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % lin_model.score(diabetes_X_test, diabetes_y_test))
In [17]:
# Train the model using the training sets
log_model = pm.train(diabetes_X_train, diabetes_y_train, 'logisticRegression')
#print('Coefficients: \n', log_model.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((log_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % log_model.score(diabetes_X_test, diabetes_y_test))
In [18]:
# Train the model using the training sets
rf_model = pm.train(diabetes_X_train, diabetes_y_train, 'randomForest')
#print('Coefficients: \n', rf_model.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((rf_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % rf_model.score(diabetes_X_test, diabetes_y_test))
In [19]:
# Train the model using the training sets
sgd_model = pm.train(diabetes_X_train, diabetes_y_train, 'sgd')
sgd_model.fit(diabetes_X_train, diabetes_y_train)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((sgd_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % sgd_model.score(diabetes_X_test, diabetes_y_test))
In [20]:
# Train the model using the training sets
xgb_model = pm.train(diabetes_X_train, diabetes_y_train, 'xgboost')
xgb_model.fit(diabetes_X_train, diabetes_y_train)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((xgb_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % xgb_model.score(diabetes_X_test, diabetes_y_test))
In [21]:
# Train the model using the training sets
svm_model = pm.train(diabetes_X_train, diabetes_y_train, 'svm')
svm_model.fit(diabetes_X_train, diabetes_y_train)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((svm_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % svm_model.score(diabetes_X_test, diabetes_y_test))
In [22]:
# Train the model using the training sets
bnb_model = pm.train(diabetes_X_train, diabetes_y_train, 'bernoulliNB')
bnb_model.fit(diabetes_X_train, diabetes_y_train)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((bnb_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % bnb_model.score(diabetes_X_test, diabetes_y_test))
In [23]:
# Train the model using the training sets
knn_model = pm.train(diabetes_X_train, diabetes_y_train, 'knn')
knn_model.fit(diabetes_X_train, diabetes_y_train)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((knn_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % knn_model.score(diabetes_X_test, diabetes_y_test))
In [24]:
# Train the model using the training sets
kde_model = pm.train(diabetes_X_train, diabetes_y_train, 'kde')
kde_model.fit(diabetes_X_train, diabetes_y_train)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((kde_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % kde_model.score(diabetes_X_test, diabetes_y_test))
In [ ]:
# Train the model using the training sets
kde_model = pm.train(diabetes_X_train, diabetes_y_train, 'kde')
kde_model.fit(diabetes_X_train, diabetes_y_train)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((kde_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % kde_model.score(diabetes_X_test, diabetes_y_test))
In [ ]:
# Train the model using the training sets
mnb_model = pm.train(diabetes_X_train, diabetes_y_train, 'multinomialNB')
print('Coefficients: \n', mnb_model.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((mnb_model.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % mnb_model.score(diabetes_X_test, diabetes_y_test))
In [ ]:
X, y = datasets.load_diabetes(return_X_y=True)
X.shape