In [1]:
import numpy as np
import seaborn as sb
import pandas as pd
import MyML.cluster.K_Means3 as myKM
import MyML.cluster.eac as eac
import MyML.helper.partition as mpart
import MyML.metrics.accuracy as acc
import MyML.cluster.linkage as linkage

Prepare datasets


In [2]:
import sklearn.datasets
from sklearn.preprocessing import normalize

In [3]:
datasets_path = '/home/chiroptera/QCThesis/datasets/'

In [4]:
datasets = dict()

iris


In [5]:
iris = sklearn.datasets.load_iris()
data = iris.data.astype(np.float32)
gt = iris.target

datasets['iris'] = {'data':data, 'gt':gt}

wine


In [6]:
wine = sklearn.datasets.fetch_mldata('uci-20070111 wine', data_home='~/')
data = wine.data.astype(np.float32)
data_norm = normalize(wine.data, axis=0).astype(np.float32)
gt = wine.target.astype(np.int32)

datasets['wine'] = {'data':data, 'gt':gt}
datasets['wine_norm'] = {'data':data_norm, 'gt':gt}

ionosphere


In [7]:
dataname = datasets_path + "ionosphere/ionosphere.data"

dataset = pd.read_csv(dataname, header=None, sep=",")
print dataset.shape
dataset.head()

data = dataset.values[:,:-1].astype(np.float32)
gt = dataset.values[:,-1]
gt[gt=='g'] = 1
gt[gt=='b'] = 0
gt = gt.astype(np.int32)

datasets['ionosphere'] = {'data':data, 'gt':gt}


(351, 35)

optdigits


In [8]:
optdigits = sklearn.datasets.load_digits(n_class=10)
data = np.float32(optdigits.data)
gt = np.int32(optdigits.target)

datasets['optdigits'] = {'data':data, 'gt':gt}

In [9]:
dataname = datasets_path + 'optdigits/optdigits.tra'

dataset = pd.read_csv(dataname, header=None, sep=",")
data = dataset.get_values()[:,:-1]
data = data.astype(np.float32)
gt = dataset.get_values()[:,-1]
gt = gt.astype(np.int32)

datasets['optdigits'] = {'data':data, 'gt':gt}

mfeat-fou


In [10]:
dataname = datasets_path + "mfeat/mfeat-fou.asc"

dataset = pd.read_csv(dataname, header=None, sep="  ")
data = dataset.get_values().astype(np.float32)
gt = np.empty(dataset.shape[0], dtype=np.int32)
for i in range(10):
    gt[i*200 : i*200+200]=i
    
datasets['mfeat_fou'] = {'data':data, 'gt':gt}


/home/chiroptera/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:648: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'.
  ParserWarning)

breast-cancer


In [11]:
dataname = datasets_path + "breast-cancer/breast-cancer"

data, gt = sklearn.datasets.load_svmlight_file(dataname)
data = data.todense().astype(np.float32)
gt = gt.astype(np.int32)

datasets['breast_cancer'] = {'data':data, 'gt':gt}

pima


In [12]:
dataname = datasets_path + "pima/pima-indians-diabetes.data"

pima = pd.read_csv(dataname, header=None, sep=",")
data = pima.get_values()[:,:-1].astype(np.float32)
data_norm = normalize(data, axis=0).astype(np.float32)
gt = pima.get_values()[:,-1].astype(np.int32)
    
datasets['pima'] = {'data':data, 'gt':gt}
datasets['pima_norm'] = {'data':data_norm, 'gt':gt}

isolet


In [13]:
datasets['optdigits']['data'].shape


Out[13]:
(3823, 64)

In [14]:
dataname = datasets_path + "isolet/isolet1-5.data"

dataset = pd.read_csv(dataname, header=None, sep=",")
data = dataset.get_values().astype(np.float32)[:,:-1]
gt = dataset.get_values()[:,-1].astype(np.int32)

datasets['isolet'] = {'data':data, 'gt':gt}

ECG IT


In [15]:
dataname = datasets_path + "ecg_it/ecg_it.data"
dataset = pd.read_csv(dataname, header=None, sep=",")
data = dataset.values[1:,1:-1].astype(np.float32)
gt = dataset.values[1:,-1].astype(np.int32)

#remove unlabeled
labeled_idx = gt!=2
data = data[labeled_idx]
gt = gt[labeled_idx]

In [16]:
gt0 = gt==0
gt1 = gt==1

In [17]:
data = np.concatenate((data[gt0],data[gt1][:600]))
gt = np.concatenate((gt[gt0],gt[gt1][:600]))

MNIST


In [18]:
import sklearn.datasets
mnist = sklearn.datasets.fetch_mldata('MNIST (original)', data_home='~/')
data = mnist.data.astype(np.float32)
gt = mnist.target.astype(np.int32)

datasets['mnist'] = {'data':data, 'gt':gt}

In [19]:
datasets.keys()


Out[19]:
['optdigits',
 'iris',
 'breast_cancer',
 'isolet',
 'pima_norm',
 'mfeat_fou',
 'wine_norm',
 'pima',
 'ionosphere',
 'mnist',
 'wine']

In [20]:
from scipy.io import savemat

In [21]:
savemat(datasets_path + 'dataset.mat', datasets)

Process


In [22]:
import MyML.cluster.K_Means3 as myKM
import MyML.EAC.eac_new as eac
import MyML.helper.partition as part
import MyML.EAC.rules as rules
import MyML.metrics.accuracy as acc
import MyML.utils.profiling as prof

In [23]:
for name, ds in datasets.iteritems():
    print ds['data'].shape, '\t', name


(3823, 64) 	optdigits
(150, 4) 	iris
(683, 10) 	breast_cancer
(7797, 617) 	isolet
(768, 8) 	pima_norm
(2000, 76) 	mfeat_fou
(178, 4) 	wine_norm
(768, 8) 	pima
(351, 34) 	ionosphere
(70000, 784) 	mnist
(178, 4) 	wine

In [24]:
del datasets['mnist']

In [25]:
validation_my_eac = pd.DataFrame(index=datasets.keys(), columns=['accuracy', 'n_clusts','lifetime accuracy', 'lifetime n_clusts', 'ensemble time', 'build time', 'clustering time'])

In [42]:
t = prof.Timer()
for name, ds in datasets.iteritems():

#name = 'iris'
#ds = datasets[name]
    print '{}:'.format(name),
    data = ds['data']
    gt = ds['gt']

    # generate ensemble with K-Means
    kmGen = myKM.K_Means()
    kmGen._MAX_THREADS_BLOCK = 256
    kmGen._label_mode = 'numba'
    
    t.reset()
    t.tic()
    ensemble = part.generateEnsemble(data, kmGen, n_clusters=rules.rule1(data.shape[0]), npartitions=100, iters=2)
    ensemble_time = t.tac()
    
    # eac clustering
    eacEst = eac.EAC(data.shape[0], sparse=False, condensed=True)
    
    t.reset()
    t.tic()
    eacEst.buildMatrix(ensemble)
    build_time = t.tac()
    
    t.reset()
    t.tic()    
    eacEst.finalClustering(np.unique(gt).size)
    clustering_time = t.tac()
    
    # eac clustering with lifetime
    eacEst2 = eac.EAC(data.shape[0], sparse=False, condensed=True)
    eacEst2.buildMatrix(ensemble)
    t.tic()
    eacEst2.finalClustering()
    t.tac()

    # score result
    mukresAcc = acc.HungarianIndex(data.shape[0])
    accuracy =  mukresAcc.score(gt, eacEst.labels)
    accuracy_lt = mukresAcc.score(gt, eacEst2.labels)

    print '{} - {}'.format(accuracy, accuracy_lt)
    validation_my_eac.loc[name] = (accuracy, np.unique(eacEst.labels).size, accuracy_lt, np.unique(eacEst2.labels).size, ensemble_time, build_time, clustering_time)


optdigits: 0.200627779231 - 0.199581480513
iris: 0.973333333333 - 0.666666666667
breast_cancer: 0.647144948755 - 0.600292825769
isolet: 0.0405284083622 - 0.0387328459664
pima_norm: 0.649739583333 - 0.649739583333
mfeat_fou: 0.1025 - 0.101
wine_norm: 0.52808988764 - 0.52808988764
pima: 0.645833333333 - 0.494791666667
ionosphere: 0.643874643875 - 0.643874643875
wine: 0.410112359551 - 0.404494382022

build and cluster from ensemble built by Matlab


In [43]:
def load_matlab_ensemble(directory, dataset):
    import os.path
    import scipy.io
    import glob

    ensemble_directory = os.path.abspath(directory) + os.path.sep
    ensemble = list()
    for p_file in glob.glob(ensemble_directory + 'kmeans-{}-*.mat'.format(dataset)):

        ml_part = scipy.io.loadmat(p_file)
        part = ml_part['clusters_m']
        py_part = list()
        for c in part:
            cluster = c[c != 0] - 1
            py_part.append(cluster.astype(np.int32))

        ensemble.append(py_part)
    return ensemble

In [33]:
validation_ml_ensemble = pd.DataFrame(index=datasets.keys(), columns=['accuracy', 'n_clusts','lifetime accuracy', 'lifetime n_clusts', 'build time', 'clustering time'])

In [44]:
t = prof.Timer()
ensemble_directory = '/home/chiroptera/workspace/QCThesis/EAC_toolbox/'

for name, ds in datasets.iteritems():

#name = 'iris'
#ds = datasets[name]
    data = ds['data']
    gt = ds['gt']

    ensemble = load_matlab_ensemble(ensemble_directory, name)

    # eac clustering
    eacEst = eac.EAC(data.shape[0], sparse=False, condensed=True)
    
    t.reset()
    t.tic()
    eacEst.buildMatrix(ensemble)
    build_time = t.tac()
    
    t.reset()
    t.tic()
    eacEst.finalClustering(np.unique(gt).size)
    clustering_time = t.tac()
    
    # eac clustering with lifetime
    eacEst2 = eac.EAC(data.shape[0], sparse=False, condensed=True)
    eacEst2.buildMatrix(ensemble)
    t.tic() # accumulate from with clustering
    eacEst2.finalClustering()
    t.tac()

    # score result
    mukresAcc = acc.HungarianIndex(data.shape[0])
    accuracy =  mukresAcc.score(gt, eacEst.labels)
    accuracy_lt = mukresAcc.score(gt, eacEst2.labels)
    print '{}: {} - {}'.format(name, accuracy, accuracy_lt)

    validation_ml_ensemble.loc[name] = (accuracy, np.unique(eacEst.labels).size, accuracy_lt, np.unique(eacEst2.labels).size, build_time, clustering_time)


optdigits: 0.200366204551 - 0.199581480513
iris: 0.746666666667 - 0.666666666667
breast_cancer: 0.647144948755 - 0.600292825769
isolet: 0.0977298961139 - 0.0386045915096
pima_norm: 0.649739583333 - 0.649739583333
mfeat_fou: 0.203 - 0.102
wine_norm: 0.52808988764 - 0.522471910112
pima: 0.645833333333 - 0.645833333333
ionosphere: 0.652421652422 - 0.547008547009
wine: 0.404494382022 - 0.404494382022

load matlab results


In [35]:
ml_validation = """dataset	accuracy lifetime	n_c lifetime	accuracy fixed	n_c fixed	ensemble time	build time	clustering time
iris	0.66667	2	0.74667	3	0.71809	0.037135	0.084747
wine	0.46629	4	0.42135	3	0.89522	0.038203	0.088121
wine_norm	0.52247	2	0.52809	3	0.88707	0.040153	0.088825
breast_cancer	0.60029	3	0.64714	2	5.2023	0.082844	0.26207
ionosphere	0.54701	9	0.65242	2	2.5557	0.066384	0.15149
pima	0.64583	2	0.64583	2	6.0796	1.8383	0.28582
pima_norm	0.64974	2	0.64974	2	6.1221	1.757	0.29261
mfeat_fou	0.102	6	0.2975	10	36.4247	1.3105	2.4198
optdigits	0.19958	2	0.20037	10	85.8691	2.0843	12.2125
isolet	0.038605	2	0.09773	26	1064.6	4.1708	161.2619
"""

import StringIO
ml_buffer=StringIO.StringIO(buf=ml_validation)
ml_df = pd.read_table(ml_buffer)
ml_df = ml_df.set_index('dataset')
ml_df.rename(columns={'accuracy fixed':'accuracy',
                      'accuracy lifetime':'lifetime accuracy',
                      'n_c fixed':'n_clusts',
                      'n_c lifetime':'lifetime n_clusts'},inplace=True)

analyze results


In [122]:
print "error of accuracies"
accuracy_error = ml_df.sort()[['accuracy','lifetime accuracy']] - validation_ml_ensemble.sort()[['accuracy','lifetime accuracy']]
accuracy_error.apply(np.abs)


error of accuracies
Out[122]:
accuracy lifetime accuracy
dataset
breast_cancer 4.948755e-06 2.825769e-06
ionosphere 1.652422e-06 1.452991e-06
iris 3.333333e-06 3.333333e-06
isolet 1.038861e-07 4.084904e-07
mfeat_fou 0.0945 0
optdigits 3.795449e-06 1.480513e-06
pima 3.333333e-06 3.333333e-06
pima_norm 4.166667e-07 4.166667e-07
wine 0.01685562 0.06179562
wine_norm 1.123596e-07 1.910112e-06

In [123]:
print accuracy_error.apply(np.abs).to_latex(index_names=False)


\begin{tabular}{lll}
\toprule
{} &      accuracy & lifetime accuracy \\
\midrule
breast\_cancer &  4.948755e-06 &      2.825769e-06 \\
ionosphere    &  1.652422e-06 &      1.452991e-06 \\
iris          &  3.333333e-06 &      3.333333e-06 \\
isolet        &  1.038861e-07 &      4.084904e-07 \\
mfeat\_fou     &        0.0945 &                 0 \\
optdigits     &  3.795449e-06 &      1.480513e-06 \\
pima          &  3.333333e-06 &      3.333333e-06 \\
pima\_norm     &  4.166667e-07 &      4.166667e-07 \\
wine          &    0.01685562 &        0.06179562 \\
wine\_norm     &  1.123596e-07 &      1.910112e-06 \\
\bottomrule
\end{tabular}


In [47]:
print "Number of clusters equal:"
ml_df.sort()[['n_clusts','lifetime n_clusts']] == validation_ml_ensemble.sort()[['n_clusts','lifetime n_clusts']]


Number of clusters equal:
Out[47]:
n_clusts lifetime n_clusts
dataset
breast_cancer True True
ionosphere True True
iris True True
isolet True True
mfeat_fou True True
optdigits True True
pima True True
pima_norm True True
wine True False
wine_norm True True

In [53]:
print 'speed-up from same ensemble:'
speed_ups_ml = ml_df.sort_index()[['build time','clustering time']] / validation_ml_ensemble.sort_index()[['build time','clustering time']]
speed_ups_ml


speed-up from same ensemble:
Out[53]:
build time clustering time
dataset
breast_cancer 7.713564 15.22334
ionosphere 9.678288 20.12336
iris 14.25549 28.4751
isolet 5.500147 174.4283
mfeat_fou 26.34002 31.19785
optdigits 9.783604 53.21466
pima 85.21744 8.406726
pima_norm 127.2274 12.89474
wine 8.485675 24.60761
wine_norm 8.0178 12.98206

In [54]:
print speed_ups_ml.to_latex(index_names=False)


\begin{tabular}{lll}
\toprule
{} & build time & clustering time \\
\midrule
breast\_cancer &   7.713564 &        15.22334 \\
ionosphere    &   9.678288 &        20.12336 \\
iris          &   14.25549 &         28.4751 \\
isolet        &   5.500147 &        174.4283 \\
mfeat\_fou     &   26.34002 &        31.19785 \\
optdigits     &   9.783604 &        53.21466 \\
pima          &   85.21744 &        8.406726 \\
pima\_norm     &   127.2274 &        12.89474 \\
wine          &   8.485675 &        24.60761 \\
wine\_norm     &     8.0178 &        12.98206 \\
\bottomrule
\end{tabular}


In [131]:
print "speed-up from my ensemble:"
speed_ups_all = ml_df.sort_index()[['ensemble time','build time','clustering time']] / validation_my_eac.sort()[['ensemble time','build time','clustering time']]
speed_ups_all['No. patterns'] = 0
speed_ups_all['No. features'] = 0
for i in speed_ups_all.index:
    speed_ups_all.loc[i,'No. patterns'] = datasets[i]['data'].shape[0]
    speed_ups_all.loc[i,'No. features'] = datasets[i]['data'].shape[1]
speed_ups_all['No. classes'] = validation_my_eac['n_clusts']
speed_ups_all[['No. patterns','No. features','No. classes','ensemble time','build time','clustering time']]


speed-up from my ensemble:
Out[131]:
No. patterns No. features No. classes ensemble time build time clustering time
dataset
breast_cancer 683 10 2 50.43974 7.544247 15.83316
ionosphere 351 34 2 21.86286 11.30883 19.97219
iris 150 4 3 19.76525 14.49562 28.50479
isolet 7797 617 26 7.010007 6.183124 206.2837
mfeat_fou 2000 76 10 14.81462 26.43032 31.27446
optdigits 3823 64 10 17.30209 10.2096 53.02636
pima 768 8 2 50.65624 141.4828 13.93502
pima_norm 768 8 2 54.25415 132.8632 14.355
wine 178 4 3 22.06657 14.60266 24.85751
wine_norm 178 4 3 22.92404 14.56994 25.27709

In [132]:
print speed_ups_all[['No. patterns','No. features','No. classes','ensemble time','build time','clustering time']].to_latex(index_names=False)


\begin{tabular}{lrrllll}
\toprule
{} &  No. patterns &  No. features & No. classes & ensemble time & build time & clustering time \\
\midrule
breast\_cancer &           683 &            10 &           2 &      50.43974 &   7.544247 &        15.83316 \\
ionosphere    &           351 &            34 &           2 &      21.86286 &   11.30883 &        19.97219 \\
iris          &           150 &             4 &           3 &      19.76525 &   14.49562 &        28.50479 \\
isolet        &          7797 &           617 &          26 &      7.010007 &   6.183124 &        206.2837 \\
mfeat\_fou     &          2000 &            76 &          10 &      14.81462 &   26.43032 &        31.27446 \\
optdigits     &          3823 &            64 &          10 &      17.30209 &    10.2096 &        53.02636 \\
pima          &           768 &             8 &           2 &      50.65624 &   141.4828 &        13.93502 \\
pima\_norm     &           768 &             8 &           2 &      54.25415 &   132.8632 &          14.355 \\
wine          &           178 &             4 &           3 &      22.06657 &   14.60266 &        24.85751 \\
wine\_norm     &           178 &             4 &           3 &      22.92404 &   14.56994 &        25.27709 \\
\bottomrule
\end{tabular}


In [40]:
print validation_ml_ensemble.sort()[['accuracy','n_clusts','lifetime accuracy','lifetime n_clusts']]
print ml_df.sort()[['accuracy','n_clusts','lifetime accuracy','lifetime n_clusts']]
print validation_my_eac.sort()[['accuracy','n_clusts','lifetime accuracy','lifetime n_clusts']]


                accuracy n_clusts lifetime accuracy lifetime n_clusts
breast_cancer  0.6471449        2         0.6002928                 3
ionosphere     0.6524217        2         0.5470085                 9
iris           0.7466667        3         0.6666667                 2
isolet         0.0977299       26        0.03860459                 2
mfeat_fou          0.203       10             0.102                 6
optdigits      0.2003662       10         0.1995815                 2
pima           0.6458333        2         0.6458333                 2
pima_norm      0.6497396        2         0.6497396                 2
wine           0.4044944        3         0.4044944                 2
wine_norm      0.5280899        3         0.5224719                 2
               accuracy  n_clusts  lifetime accuracy  lifetime n_clusts
dataset                                                                
breast_cancer   0.64714         2           0.600290                  3
ionosphere      0.65242         2           0.547010                  9
iris            0.74667         3           0.666670                  2
isolet          0.09773        26           0.038605                  2
mfeat_fou       0.29750        10           0.102000                  6
optdigits       0.20037        10           0.199580                  2
pima            0.64583         2           0.645830                  2
pima_norm       0.64974         2           0.649740                  2
wine            0.42135         3           0.466290                  4
wine_norm       0.52809         3           0.522470                  2
                accuracy n_clusts lifetime accuracy lifetime n_clusts
breast_cancer  0.6471449        2         0.6002928                 3
ionosphere     0.6438746        2         0.6438746                 4
iris           0.7466667        3         0.6666667                 2
isolet         0.0588688       26        0.03898935                 5
mfeat_fou          0.102       10             0.101                 4
optdigits      0.2984567       10         0.1995815                 2
pima           0.6458333        2         0.4947917                 3
pima_norm      0.6497396        2         0.6497396                 2
wine           0.4101124        3          0.488764                 8
wine_norm      0.6348315        3         0.5224719                 2

In [128]:


In [127]:
validation_my_eac['n_clusts']


Out[127]:
optdigits        10
iris              3
breast_cancer     2
isolet           26
pima_norm         2
mfeat_fou        10
wine_norm         3
pima              2
ionosphere        2
wine              3
Name: n_clusts, dtype: object

In [149]:
validation_my_eac['n_samples']=0
validation_my_eac['dimension']=0
for i in validation_my_eac.index:
    validation_my_eac.n_samples.loc[i] = datasets[i]['data'].shape[0]
    validation_my_eac.dimension.loc[i] = datasets[i]['data'].shape[1]
validation_my_eac


/home/chiroptera/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:115: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)

read labels from Matlab


In [38]:
import os.path

In [204]:
ensemble_directory = '/home/chiroptera/workspace/QCThesis/EAC_toolbox/'

In [210]:
e_iris = load_matlab_ensemble(ensemble_directory, 'iris')

In [39]:
ensemble_directory = '/home/chiroptera/workspace/QCThesis/EAC_toolbox/'
def load_matlab_ensemble(directory, dataset):
    import os.path
    import scipy.io
    import glob

    ensemble_directory = os.path.abspath(directory) + os.path.sep
    ensemble = list()
    for p_file in glob.glob(ensemble_directory + 'kmeans-{}-*.mat'.format(dataset)):

        ml_part = scipy.io.loadmat(p_file)
        part = ml_part['clusters_m']
        py_part = list()
        for c in part:
            cluster = c[c != 0] - 1
            py_part.append(cluster.astype(np.int32))

        ensemble.append(py_part)
    return ensemble

In [129]:
directory = '/home/chiroptera/workspace/QCThesis/EAC_toolbox/eac'

def labels_path(dataset, lifetime=True):
    labels_type = '-'
    if lifetime:
        labels_type += 'k-fixo-'
    return '{}-eac-kmeans-single{}Stable-combined.mat'.format(dataset,labels_type)

In [118]:
accuracy_from_matlab('/home/chiroptera/QCThesis/EAC_toolbox/eac','mfeat_fou')


Out[118]:
(0.10199999999999999, 0.29749999999999999)

In [67]:
def accuracy_from_matlab(directory, dataset):
    #load matrix
    import scipy.io
    import os.path
    
    def labels_path(dataset, lifetime=True):
        labels_type = '-'
        if lifetime:
            labels_type += 'k-fixo-'
        return '{}-eac-kmeans-single{}Stable-combined.mat'.format(dataset,labels_type)

    matlab_mat = scipy.io.loadmat(os.path.join(directory, labels_path(dataset,False)))

    labels_mat = matlab_mat['clusters_m'].astype(np.int32)
    n_samples = labels_mat.max()

    labels = np.empty(n_samples, dtype=np.int32)

    labels_mat -= 1
    for l,c in enumerate(labels_mat):
        idx = c[c!=-1]
        labels[idx] = l

    gt = datasets[dataset]['gt']
        
    # score result
    mukresAcc = acc.HungarianIndex(n_samples)
    accuracy_fixed =  mukresAcc.score(gt, labels)
    
    ## lifetime accuracy
    matlab_mat = scipy.io.loadmat(os.path.join(directory, labels_path(dataset,True)))

    labels_mat = matlab_mat['clusters_m'].astype(np.int32)
    n_samples = labels_mat.max()

    labels = np.empty(n_samples, dtype=np.int32)

    labels_mat -= 1
    for l,c in enumerate(labels_mat):
        idx = c[c!=-1]
        labels[idx] = l

    # score result
    mukresAcc = acc.HungarianIndex(n_samples)
    accuracy_lt =  mukresAcc.score(gt, labels)    
    
    return accuracy_fixed,accuracy_lt

In [148]:
accuracy_from_matlab('iris')


Out[148]:
(0.66666666666666663, 0.7466666666666667)