In [1]:
%matplotlib inline
import numpy as np
from pycobra.cobra import Cobra
from pycobra.ewa import Ewa
from pycobra.visualisation import Visualisation
from pycobra.diagnostics import Diagnostics
In [2]:
# setting up our random data-set
rng = np.random.RandomState(42)
# D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing
n_features = 2
D1, D2, D3, D4 = 200, 200, 200, 200
D = D1 + D2 + D3 + D4
X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features)
# Y = np.power(X[:,1], 2) + np.power(X[:,3], 3) + np.exp(X[:,10])
Y = np.power(X[:,0], 2) + np.power(X[:,1], 3)
# training data-set
X_train = X[:D1 + D2]
X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4]
X_eps = X[D1 + D2:D1 + D2 + D3]
# for testing
Y_train = Y[:D1 + D2]
Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]
Y_eps = Y[D1 + D2:D1 + D2 + D3]
# set up our COBRA machine with the data
cobra = Cobra(epsilon=0.5)
cobra.fit(X_train, Y_train)
Out[2]:
In [3]:
cobra_vis = Visualisation(cobra, X_test, Y_test)
In [4]:
# to plot our machines, we need a linspace as input. This is the 'scale' to plot and should be the range of the results
# since our data ranges from -1 to 1 it is such - and we space it out to a hundred points
cobra_vis.plot_machines(machines=["COBRA"])
In [5]:
cobra_vis.plot_machines()
In [6]:
cobra_vis.QQ()
In [7]:
cobra_vis.boxplot()
In [8]:
ewa = Ewa()
ewa.set_beta(X_beta=X_eps, y_beta=Y_eps)
ewa.fit(X_train, Y_train)
Out[8]:
In [9]:
ewa_vis = Visualisation(ewa, X_test, Y_test)
In [10]:
ewa_vis.QQ("EWA")
In [11]:
ewa_vis.boxplot()
In [12]:
from sklearn import datasets
from sklearn.metrics import accuracy_score
from pycobra.classifiercobra import ClassifierCobra
bc = datasets.load_breast_cancer()
X_cc = bc.data[:-40]
y_cc = bc.target[:-40]
X_cc_test = bc.data[-40:]
y_cc_test = bc.target[-40:]
In [13]:
cc = ClassifierCobra()
In [14]:
cc.fit(X_cc, y_cc)
Out[14]:
In [15]:
cc_vis = Visualisation(cc, X_cc_test, y_cc_test)
In [16]:
cc_vis.boxplot()
Remember that all the estimators in the Pycobra package are scikit-learn compatible - we can also use the scikit-learn metrics and tools to analyse our machines!
In [17]:
from sklearn.metrics import classification_report
print(classification_report(y_cc_test, cc.predict(X_cc_test)))
We're now going to experiment with plotting colors and data. After we get information about which indices are used by which machines the best for a fixed epsilon (or not, we can toggle this option), we can plot the distribution of machines.
Why is this useful? Since we're dealing with a 2-D space now, we're attempting to see if there are some parts in the input space which are picked up by certain machines. This could lead to interesting experiments and
We first present a plot where the machine colors are mixed depending on which machines were selected; after which we plot one machine at a time.
In [18]:
indices, MSE = cobra_vis.indice_info(X_test=X_eps[0:50], y_test=Y_eps[0:50], epsilon=0.50)
In [19]:
cobra_vis.color_cobra(X_test=X_eps[0:50], indice_info=indices, single=True)
In [20]:
cobra_vis.color_cobra(X_test=X_eps[0:50], indice_info=indices)
In [21]:
cobra_vis.voronoi(X_test=X_eps[0:50], indice_info=indices, single=True)
Out[21]:
In [22]:
cobra_vis.voronoi(X_test=X_eps[0:50], indice_info=indices)
Out[22]:
In [23]:
cobra_vis.voronoi(X_test=X_eps[0:50], indice_info=indices, MSE=MSE, gradient=True)
Out[23]:
Licensed under the MIT License - https://opensource.org/licenses/MIT