notebook.community

Edit and run



In [1]:

    
X = np.arange(10).reshape((5,2))
X









    Out[1]:





array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])



In [3]:

    
Y = np.arange(5)
Y









    Out[3]:





array([0, 1, 2, 3, 4])



In [4]:

    
from sklearn.cross_validation import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.33, random_state=42)



In [5]:

    
X_train









    Out[5]:





array([[4, 5],
       [0, 1],
       [6, 7]])



In [6]:

    
Y_train









    Out[6]:





array([2, 0, 3])



In [7]:

    
X_test









    Out[7]:





array([[2, 3],
       [8, 9]])



In [8]:

    
Y_test









    Out[8]:





array([1, 4])



In [11]:

    
N = 5
X = np.arange(8 * N).reshape(-1,2) * 10
Y = np.hstack([np.ones(N), np.ones(N) * 2, np.ones(N) * 3, np.ones(N) * 4])
print("X:\n", X, sep="")
print("Y:\n", Y, sep="")









    



X:
[[  0  10]
 [ 20  30]
 [ 40  50]
 [ 60  70]
 [ 80  90]
 [100 110]
 [120 130]
 [140 150]
 [160 170]
 [180 190]
 [200 210]
 [220 230]
 [240 250]
 [260 270]
 [280 290]
 [300 310]
 [320 330]
 [340 350]
 [360 370]
 [380 390]]
Y:
[ 1.  1.  1.  1.  1.  2.  2.  2.  2.  2.  3.  3.  3.  3.  3.  4.  4.  4.
  4.  4.]



In [33]:

    
from sklearn.cross_validation import KFold
cv = KFold(len(X), n_folds = 3, shuffle=True, random_state = 1)
for train_index, test_index in cv:
    print("test y:", Y[test_index])
    print("." * 80)
    print("train y: ", Y[train_index])
    print("=" * 80)









    



test y: [ 1.  1.  1.  2.  3.  3.  4.]
................................................................................
train y:  [ 1.  1.  2.  2.  2.  2.  3.  3.  3.  4.  4.  4.  4.]
================================================================================
test y: [ 1.  1.  2.  3.  4.  4.  4.]
................................................................................
train y:  [ 1.  1.  1.  2.  2.  2.  2.  3.  3.  3.  3.  4.  4.]
================================================================================
test y: [ 2.  2.  2.  3.  3.  4.]
................................................................................
train y:  [ 1.  1.  1.  1.  1.  2.  2.  3.  3.  3.  4.  4.  4.  4.]
================================================================================



In [32]:

    
for t, tt in cv:
    print(t, tt)









    



[ 0  1  2  3  4  5  7  8  9 10 12 14 15] [ 6 11 13 16 17 18 19]
[ 0  3  6  7  8 10 11 13 15 16 17 18 19] [ 1  2  4  5  9 12 14]
[ 1  2  4  5  6  9 11 12 13 14 16 17 18 19] [ 0  3  7  8 10 15]



In [29]:

    
KFold?



In [34]:

    
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

X, Y, coef = make_regression(n_samples=1000, n_features=1, noise = 20, coef= True, shuffle=True, random_state=0)
model = LinearRegression()
cv = KFold(1000, 10)

scores = np.zeros(10)
for i, (train_index, test_index) in enumerate(cv):
    X_train = X[train_index]
    Y_train = Y[train_index]
    X_test = X[test_index]
    Y_test = Y[test_index]
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_test)
    scores[i] =  mean_squared_error(Y_test, y_pred)
    
scores









    Out[34]:





array([ 301.58271911,  341.91498985,  410.58098438,  499.68109613,
        461.00979825,  384.106544  ,  434.90159273,  377.65506997,
        366.60959935,  371.14031438])



In [37]:

    
sns.distplot(scores)









    Out[37]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f63ebf12210>



In [41]:

    
from sklearn.cross_validation import cross_val_score
sns.distplot(cross_val_score(model, X, Y, "mean_squared_error",cv))









    Out[41]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f63ebd85350>



In [42]:

    
cross_val_score?



In [43]:

    
cross_val_score(model, X, Y, "mean_squared_error",cv)









    Out[43]:





array([-301.58271911, -341.91498985, -410.58098438, -499.68109613,
       -461.00979825, -384.106544  , -434.90159273, -377.65506997,
       -366.60959935, -371.14031438])



In [ ]: