In [1]:
X = np.arange(10).reshape((5,2))
X
Out[1]:
In [3]:
Y = np.arange(5)
Y
Out[3]:
In [4]:
from sklearn.cross_validation import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.33, random_state=42)
In [5]:
X_train
Out[5]:
In [6]:
Y_train
Out[6]:
In [7]:
X_test
Out[7]:
In [8]:
Y_test
Out[8]:
In [11]:
N = 5
X = np.arange(8 * N).reshape(-1,2) * 10
Y = np.hstack([np.ones(N), np.ones(N) * 2, np.ones(N) * 3, np.ones(N) * 4])
print("X:\n", X, sep="")
print("Y:\n", Y, sep="")
In [33]:
from sklearn.cross_validation import KFold
cv = KFold(len(X), n_folds = 3, shuffle=True, random_state = 1)
for train_index, test_index in cv:
print("test y:", Y[test_index])
print("." * 80)
print("train y: ", Y[train_index])
print("=" * 80)
In [32]:
for t, tt in cv:
print(t, tt)
In [29]:
KFold?
In [34]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
X, Y, coef = make_regression(n_samples=1000, n_features=1, noise = 20, coef= True, shuffle=True, random_state=0)
model = LinearRegression()
cv = KFold(1000, 10)
scores = np.zeros(10)
for i, (train_index, test_index) in enumerate(cv):
X_train = X[train_index]
Y_train = Y[train_index]
X_test = X[test_index]
Y_test = Y[test_index]
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
scores[i] = mean_squared_error(Y_test, y_pred)
scores
Out[34]:
In [37]:
sns.distplot(scores)
Out[37]:
In [41]:
from sklearn.cross_validation import cross_val_score
sns.distplot(cross_val_score(model, X, Y, "mean_squared_error",cv))
Out[41]:
In [42]:
cross_val_score?
In [43]:
cross_val_score(model, X, Y, "mean_squared_error",cv)
Out[43]:
In [ ]: