Fairy tales kind of situation
Breast Cancer Wisconsin (Diagnostic) Data Set https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)
In [ ]:
from sklearn.datasets import load_breast_cancer
ourdata = load_breast_cancer()
print ourdata.DESCR
In [ ]:
print ourdata.data.shape
ourdata.data
In [ ]:
ourdata.target
In [ ]:
ourdata.target.shape
In [ ]:
ourdata.target_names
In [ ]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(ourdata.data, ourdata.target, test_size=0.3)
In [ ]:
print X_train.shape
print y_train.shape
print X_test.shape
print y_test.shape
In [ ]:
from scipy.stats import itemfreq
itemfreq(y_train)
In [ ]:
from sklearn.neighbors import KNeighborsClassifier
hx_knn = KNeighborsClassifier()
In [ ]:
hx_knn.fit(X_train, y_train)
In [ ]:
hx_knn.predict(X_train)
In [ ]:
from sklearn.metrics import confusion_matrix, f1_score
print confusion_matrix(y_train, hx_knn.predict(X_train))
print f1_score(y_train, hx_knn.predict(X_train))
In [ ]:
print confusion_matrix(y_test, hx_knn.predict(X_test))
print f1_score(y_test, hx_knn.predict(X_test))
In [ ]:
from sklearn.linear_model import LogisticRegression
hx_log = LogisticRegression()
In [ ]:
hx_log.fit(X_train, y_train)
In [ ]:
hx_log.predict(X_train)
In [ ]:
# Training set evaluation
print confusion_matrix(y_train, hx_log.predict(X_train))
print f1_score(y_train, hx_log.predict(X_train))
In [ ]:
# test set evaluation
print confusion_matrix(y_test, hx_log.predict(X_test))
print f1_score(y_test, hx_log.predict(X_test))
In [ ]:
from sklearn.datasets import load_boston
bostondata = load_boston()
print bostondata.target
print bostondata.data.shape
In [ ]:
### Learn more about the dataset
print bostondata.DESCR
In [ ]:
# how the first row of data looks like
bostondata.data[1,]
In [ ]:
BX_train, BX_test, By_train, By_test = train_test_split(bostondata.data, bostondata.target, test_size=0.3)
In [ ]:
from sklearn.linear_model import LinearRegression
hx_lin = LinearRegression()
In [ ]:
hx_lin.fit(BX_train, By_train)
In [ ]:
hx_lin.predict(BX_train)
In [ ]:
import matplotlib.pyplot as plt
plt.scatter(By_train, hx_lin.predict(BX_train))
plt.ylabel("predicted value")
plt.xlabel("actual value")
plt.show()
In [ ]:
### performance evaluation: training set
from sklearn.metrics import mean_squared_error
mean_squared_error(By_train, hx_lin.predict(BX_train))
In [ ]:
### performance evaluation: test set
mean_squared_error(By_test, hx_lin.predict(BX_test))
In [ ]: