In [1]:
!pip install numpy scipy scikit-learn pandas
In [2]:
!pip install deap update_checker tqdm stopit
In [53]:
#Using the Python Library
import numpy as np
import xgboost as xgb
In [4]:
print(dir(xgb))
In [5]:
import sys
sys.path.append("C:\\Users\\KOGENTIX\\xgboost\\python-package\\")
print (sys.path)
In [37]:
from sklearn import datasets, neighbors, linear_model, tree, svm
In [18]:
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
n_samples = len(X_digits)
n_samples
Out[18]:
In [19]:
X_train = X_digits[:int(.9 * n_samples)]
y_train = y_digits[:int(.9 * n_samples)]
X_test = X_digits[int(.9 * n_samples):]
y_test = y_digits[int(.9 * n_samples):]
In [65]:
y_train
Out[65]:
In [68]:
from scipy import stats
In [69]:
stats.itemfreq(y_train)
Out[69]:
In [21]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
clf.predict(X_test)
Out[21]:
In [22]:
print(clf.score(X_test, y_test))
In [23]:
xr = xgb.XGBRegressor()
In [24]:
xr=xr.fit(X_train, y_train)
In [25]:
xr.predict(X_test)
Out[25]:
In [26]:
print(xr.score(X_test, y_test))
In [27]:
knn = neighbors.KNeighborsClassifier()
In [29]:
knn.fit(X_train, y_train).score(X_test, y_test)
Out[29]:
In [30]:
logistic = linear_model.LogisticRegression()
In [31]:
logistic.fit(X_train, y_train).score(X_test, y_test)
Out[31]:
In [38]:
clfs = svm.SVC()
In [39]:
clfs.fit(X_train, y_train).score(X_test, y_test)
Out[39]:
In [56]:
from sklearn.metrics import accuracy_score, auc, roc_curve
In [49]:
accuracy_score(clf.predict(X_test), y_test)
Out[49]:
In [59]:
fpr, tpr, thresholds = roc_curve(clf.predict(X_test), y_test, pos_label=2)
auc(fpr, tpr)
Out[59]:
In [50]:
accuracy_score(clfs.predict(X_test), y_test)
Out[50]:
In [60]:
fpr, tpr, thresholds = roc_curve(clfs.predict(X_test), y_test, pos_label=2)
auc(fpr, tpr)
Out[60]:
In [51]:
accuracy_score(knn.predict(X_test), y_test)
Out[51]:
In [61]:
fpr, tpr, thresholds = roc_curve(knn.predict(X_test), y_test, pos_label=2)
auc(fpr, tpr)
Out[61]:
In [52]:
accuracy_score(logistic.predict(X_test), y_test)
Out[52]:
! pip install scikit-mdr skrebate
!pip install tpot
In [33]:
from tpot import TPOTClassifier
In [34]:
pipeline_optimizer = TPOTClassifier()
In [35]:
pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5,
random_state=42, verbosity=2)
In [36]:
pipeline_optimizer.fit(X_train, y_train)
Out[36]:
In [41]:
print(pipeline_optimizer.score(X_test, y_test))
In [42]:
pipeline_optimizer.export('tpot_exported_pipeline.py')
Out[42]:
In [43]:
import os as os
In [44]:
os.getcwd()
Out[44]:
In [ ]:
# This is the exported pipeline
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Normalizer
#### NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
train_test_split(features, tpot_data['target'].values, random_state=42)
#### Score on the training set was:0.9647684149987755
exported_pipeline = make_pipeline(
Normalizer(norm="max"),
KNeighborsClassifier(n_neighbors=6, p=2, weights="distance")
)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
In [45]:
##tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2
In [ ]: