In [1]:
from tpot import TPOTClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
In [2]:
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
train_size=0.75, test_size=0.25)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
Out[2]:
In [3]:
tpot = TPOTClassifier(verbosity=2, max_time_mins=5, population_size=40)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
In [4]:
tpot.export('tpot_digits_pipeline.py')
In [ ]:
# %load tpot_digits_pipeline.py
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_classes, testing_classes = \
train_test_split(features, tpot_data['target'], random_state=None)
exported_pipeline = KNeighborsClassifier(n_neighbors=4, p=2, weights="distance")
exported_pipeline.fit(training_features, training_classes)
results = exported_pipeline.predict(testing_features)
In [ ]: