notebook.community

Edit and run



In [1]:

    
from tpot import TPOTClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split



In [2]:

    
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
                                                    train_size=0.75, test_size=0.25)
X_train.shape, X_test.shape, y_train.shape, y_test.shape









    Out[2]:





((1347, 64), (450, 64), (1347,), (450,))



In [3]:

    
tpot = TPOTClassifier(verbosity=2, max_time_mins=5, population_size=40)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))









    



Warning: xgboost.XGBClassifier is not available and will not be used by TPOT.






    



Optimization Progress: 100%|██████████| 80/80 [01:08<00:00,  1.39pipeline/s]





    



Generation 1 - Current best internal CV score: 0.9628970273793229






    



Optimization Progress: 100%|██████████| 120/120 [01:34<00:00,  1.56pipeline/s]





    



Generation 2 - Current best internal CV score: 0.9829041339667848






    



Optimization Progress: 100%|██████████| 160/160 [02:13<00:00,  1.26pipeline/s]





    



Generation 3 - Current best internal CV score: 0.9829041339667848






    



Optimization Progress: 100%|██████████| 200/200 [02:39<00:00,  1.42pipeline/s]





    



Generation 4 - Current best internal CV score: 0.9829041339667848






    



Optimization Progress: 100%|██████████| 240/240 [03:37<00:00,  3.76s/pipeline]





    



Generation 5 - Current best internal CV score: 0.9829288814415318






    



Optimization Progress: 100%|██████████| 280/280 [04:55<00:00,  1.22s/pipeline]





    



Generation 6 - Current best internal CV score: 0.9851265982875169






    



Optimization Progress: 100%|██████████| 320/320 [06:34<00:00,  3.85s/pipeline]





    



Generation 7 - Current best internal CV score: 0.9851265982875169






    



                                                           8,  3.01s/pipeline]





    



TPOT closed prematurely. Will use the current best pipeline.

Best pipeline: KNeighborsClassifier(input_matrix, KNeighborsClassifier__n_neighbors=4, KNeighborsClassifier__p=2, KNeighborsClassifier__weights=distance)
0.984444444444



In [4]:

    
tpot.export('tpot_digits_pipeline.py')



In [ ]:

    
# %load tpot_digits_pipeline.py
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_classes, testing_classes = \
            train_test_split(features, tpot_data['target'], random_state=None)

exported_pipeline = KNeighborsClassifier(n_neighbors=4, p=2, weights="distance")

exported_pipeline.fit(training_features, training_classes)
results = exported_pipeline.predict(testing_features)



In [ ]: