In [ ]:
import sys
print(sys.version)

In [ ]:
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
%matplotlib inline
import scipy as sp
import time

import pandas as pd
import seaborn as sns

In [ ]:
from scipy.spatial.distance import cdist

In [ ]:
import sys
sys.path.append('../code/')

from k_means import KMeans

Train on sklearn classification data


In [ ]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=300, n_features=5, 
                           n_informative=5, n_redundant=0, n_repeated=0, 
                           n_classes=2, n_clusters_per_class=1, 
                           weights=None, flip_y=0.001, class_sep=2.0, 
                           hypercube=True, shift=0.0, scale=1.0, 
                           shuffle=True, random_state=None)
n = 200
X_train, y_train = X[0:n], y[0:n]
X_test, y_test = X[n:], y[n:]

In [ ]:
train_X = X
train_y = y
km_sklearn = KMeans(k=3, train_X=X, train_y=y, 
            test_X=X_test, test_y=y_test,
            pca_obj = None)

In [ ]:
km_sklearn.run()

In [ ]:
p = km_sklearn.plot_squared_reconstruction_error_normalized()

In [ ]:
p = km_sklearn.plot_0_1_loss()

In [ ]:
c = km_sklearn.plot_num_assignments_for_each_center()

In [ ]:
km_sklearn.assess_test_data()

In [ ]: