In [10]:
import numpy as np
import scipy as sp
from sklearn.datasets import fetch_mldata
import math
from collections import Counter
from scipy.cluster.vq import vq, kmeans, whiten
from skll.metrics import kappa

In [2]:
dataset = fetch_mldata('banana-ida')

In [3]:
def laplace(stddev, size):
    return np.random.laplace(0, stddev, size)

def laplace(stddev):
    return np.random.laplace(0, stddev)

def noisy_count(data, epsilon):
    return len(data) + laplace(1. / epsilon)

def noisy_sum(data, epsilon):
    clipped = np.clip(data, -1, 1)
    return np.sum(clipped) + laplace(1. / epsilon)

def noisy_average(data, epsilon):
    clipped = np.clip(data, -1, 1)
    tally = np.sum(clipped)
    count = len(clipped)
    
    if count == 0:
        return np.random.uniform(-1, 1)
    
    candidate = (tally + laplace(2. / epsilon)) / count
    while candidate < -1.0 or candidate > 1.0:
        candidate = (tally + laplace(2. / epsilon)) / count
    
    return candidate

In [4]:
def gen_data(dimensions, length):
    return np.random.uniform(0, 1, dimensions*length).reshape(length, dimensions)

def gen_datapoint(dimensions):
    return np.random.uniform(0, 1, dimensions)

In [11]:
def perceptron_step(x, y, normal, epsilon):
    errors = np.array([xi*yi for xi, yi in zip(x, y) if (yi*np.sum(xi*normal)) < 0]).reshape((-1, normal.shape[0]))
    newnormal = np.zeros(normal.shape)
    for i in range(len(normal)):
        newnormal[i] = normal[i] + noisy_average(errors[:, i], epsilon)
    return newnormal

def svm_step(x, y, normal, epsilon):
    errors = np.array([xi*yi for xi, yi in zip(x, y) if (yi*np.sum(xi*normal)) < 1]).reshape((-1, normal.shape[0]))
    errors = np.vstack((errors, np.array(10*[-normal])))
    newnormal = np.zeros(normal.shape)
    for i in range(len(normal)):
        newnormal[i] = normal[i] + noisy_average(errors[:, i], epsilon)
    return newnormal

def logistic_step(x, y, normal, epsilon):
    errors = np.array([xi*((yi+1)/2. - 1./(1+np.exp(np.sum(xi*normal)))) for xi, yi in zip(x, y)]).reshape((-1, normal.shape[0]))
    newnormal = np.zeros(normal.shape)
    for i in range(len(normal)):
        newnormal[i] = normal[i] + noisy_average(errors[:, i], epsilon)
    return newnormal

def fit_binary(x, y, fn, epsilon, niter=20):
    if any(abs(yi) != 1 for yi in y):
        y[y == 0] = -1
    if any(abs(yi) != 1 for yi in y):
        raise ValueError('Unrecognized class label occured')
    normal = gen_datapoint(x.shape[1])
    for _ in range(niter):
        normal = fn(x, y, normal, epsilon)
    return normal

def eval_binary(x, y, normal):
    err = y*x.dot(normal)
    err[err > 0] = 0
    err[err < 0] = 1
    return 1 - np.average(err)

def fit_ova(x, y, fn, epsilon, niter=20):
    yset = sorted(list(set(y)))
    normal = gen_data(x.shape[1], len(yset))
    labels = np.ones((len(yset), x.shape[0]), dtype=np.int) * -1
    for idx, yi in enumerate(yset):
        labels[idx, np.where(y==yi)[0]] = 1
    for _ in range(niter):
        for idx, yi in enumerate(yset):
            normal[idx, :] = fn(x, labels[idx, :], normal[idx, :], epsilon)
    return normal

def eval_ova(x, y, normal):
    yset = sorted(list(set(y)))
    ydict = dict(zip(yset, range(len(yset))))
    ytr = [ydict[yi] for yi in y]
    err = ytr - np.argmax(x.dot(normal.T), axis=1)
    err[err != 0] = 1
    return 1 - np.average(err)

def eval_ova_kappa(x, y, normal):
    yset = sorted(list(set(y)))
    ydict = dict(zip(yset, range(len(yset))))
    ytr = [ydict[yi] for yi in y]
    return kappa(ytr, np.argmax(x.dot(normal.T), axis=1))

In [6]:
derp = dataset['target']
multinormal = fit_ova(dataset['data'], dataset['target'], svm_step, 0.1, niter=50)

In [7]:
eval_ova(dataset['data'], dataset['target'], multinormal)


Out[7]:
0.58905660377358493

In [12]:
eval_ova_kappa(dataset['data'], dataset['target'], multinormal)


Out[12]:
0.17811320754716986

In [16]:
from sklearn.cross_validation import train_test_split

dsets = ['iris', 'diabetes_scale', 'image-ida', 'diabetes-ida', 'breast-cancer-ida', 'ringnorm-ida', 'thyroid-ida', 'usps']
fns = [(logistic_step, 'log'), (svm_step, 'svm'), (perceptron_step, 'perc')]
for dset in dsets:
    dataset = fetch_mldata(dset)
    X_train, X_test, Y_train, Y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.3, random_state=42)
    for niter in [5, 10, 15, 25]:
        for eps in [0.001, 0.01, 0.1, 0.5, 1]:
            for fn in fns:
                result = 0
                for _ in range(10):
                    multinormal = fit_ova(X_train, Y_train, fn[0], eps, niter)
                    result += eval_ova_kappa(X_test, Y_test, multinormal)
                print dset+';'+fn[1]+';'+str(niter)+';'+str(eps)+';'+str(result / 10)


iris;log;5;0.001;-0.0396422662971
iris;svm;5;0.001;0.0754717677177
iris;perc;5;0.001;-0.0979398810332
iris;log;5;0.01;0.213893674051
iris;svm;5;0.01;0.264242136984
iris;perc;5;0.01;0.106781106987
iris;log;5;0.1;0.637629554258
iris;svm;5;0.1;0.63124484114
iris;perc;5;0.1;0.553741273653
iris;log;5;0.5;0.672351278858
iris;svm;5;0.5;0.734939325381
iris;perc;5;0.5;0.641704842027
iris;log;5;1;0.665325749903
iris;svm;5;1;0.76878342246
iris;perc;5;1;0.586751522075
iris;log;10;0.001;0.239417423925
iris;svm;10;0.001;0.114321261565
iris;perc;10;0.001;-0.127584607152
iris;log;10;0.01;0.210973857351
iris;svm;10;0.01;0.297195298444
iris;perc;10;0.01;0.21316195265
iris;log;10;0.1;0.65425516403
iris;svm;10;0.1;0.714500205677
iris;perc;10;0.1;0.613813482621
iris;log;10;0.5;0.632853148171
iris;svm;10;0.5;0.748868778281
iris;perc;10;0.5;0.710688777379
iris;log;10;1;0.667708107213
iris;svm;10;1;0.738687782805
iris;perc;10;1;0.71887197264
iris;log;15;0.001;0.0879567976405
iris;svm;15;0.001;0.00645349611289
iris;perc;15;0.001;0.0453395344697
iris;log;15;0.01;0.408424766909
iris;svm;15;0.01;0.423754850043
iris;perc;15;0.01;0.325722466223
iris;log;15;0.1;0.625594399649
iris;svm;15;0.1;0.751097020896
iris;perc;15;0.1;0.669431419878
iris;log;15;0.5;0.628723136861
iris;svm;15;0.5;0.72850678733
iris;perc;15;0.5;0.720421721711
iris;log;15;1;0.635776674626
iris;svm;15;1;0.72850678733
iris;perc;15;1;0.68423224232
iris;log;25;0.001;0.0729451696158
iris;svm;25;0.001;0.0853184586487
iris;perc;25;0.001;0.0842788743701
iris;log;25;0.01;0.294216262949
iris;svm;25;0.01;0.510417514737
iris;perc;25;0.01;0.347556792718
iris;log;25;0.1;0.611898328516
iris;svm;25;0.1;0.704271383331
iris;perc;25;0.1;0.67676770765
iris;log;25;0.5;0.628440366972
iris;svm;25;0.5;0.738687782805
iris;perc;25;0.5;0.734380899117
iris;log;25;1;0.632037828327
iris;svm;25;1;0.731900452489
iris;perc;25;1;0.721240131746
diabetes_scale;log;5;0.001;-0.0196229695253
diabetes_scale;svm;5;0.001;0.0315130932647
diabetes_scale;perc;5;0.001;0.00399861699483
diabetes_scale;log;5;0.01;0.0420225720128
diabetes_scale;svm;5;0.01;0.0597703686115
diabetes_scale;perc;5;0.01;0.118044245201
diabetes_scale;log;5;0.1;0.00725905442643
diabetes_scale;svm;5;0.1;0.159220281747
diabetes_scale;perc;5;0.1;0.21784484427
diabetes_scale;log;5;0.5;0.0016279445852
diabetes_scale;svm;5;0.5;0.183188508237
diabetes_scale;perc;5;0.5;0.200810789289
diabetes_scale;log;5;1;0.00488383375559
diabetes_scale;svm;5;1;0.157735066103
diabetes_scale;perc;5;1;0.218292747281
diabetes_scale;log;10;0.001;0.0513045198866
diabetes_scale;svm;10;0.001;0.0332339287262
diabetes_scale;perc;10;0.001;0.0317206459556
diabetes_scale;log;10;0.01;0.0154773126068
diabetes_scale;svm;10;0.01;0.168522122877
diabetes_scale;perc;10;0.01;0.147547397186
diabetes_scale;log;10;0.1;0.0016279445852
diabetes_scale;svm;10;0.1;0.30976428925
diabetes_scale;perc;10;0.1;0.309186383974
diabetes_scale;log;10;0.5;0.00651177834079
diabetes_scale;svm;10;0.5;0.344947676142
diabetes_scale;perc;10;0.5;0.28318669463
diabetes_scale;log;10;1;0.00649936462953
diabetes_scale;svm;10;1;0.322318859395
diabetes_scale;perc;10;1;0.311449435748
diabetes_scale;log;15;0.001;0.0498364246962
diabetes_scale;svm;15;0.001;0.0613705298802
diabetes_scale;perc;15;0.001;0.00438793096528
diabetes_scale;log;15;0.01;0.0265278050625
diabetes_scale;svm;15;0.01;0.178680885676
diabetes_scale;perc;15;0.01;0.110660000858
diabetes_scale;log;15;0.1;0.0
diabetes_scale;svm;15;0.1;0.35955137169
diabetes_scale;perc;15;0.1;0.339578559804
diabetes_scale;log;15;0.5;0.0032558891704
diabetes_scale;svm;15;0.5;0.397456622873
diabetes_scale;perc;15;0.5;0.334875590685
diabetes_scale;log;15;1;0.0032558891704
diabetes_scale;svm;15;1;0.384954656561
diabetes_scale;perc;15;1;0.366132088295
diabetes_scale;log;25;0.001;-0.00896333271391
diabetes_scale;svm;25;0.001;-0.00125030860216
diabetes_scale;perc;25;0.001;0.0225489683247
diabetes_scale;log;25;0.01;-0.000862487197456
diabetes_scale;svm;25;0.01;0.227393173959
diabetes_scale;perc;25;0.01;0.175210107714
diabetes_scale;log;25;0.1;0.0016279445852
diabetes_scale;svm;25;0.1;0.423638752303
diabetes_scale;perc;25;0.1;0.388262199369
diabetes_scale;log;25;0.5;0.0
diabetes_scale;svm;25;0.5;0.446389330047
diabetes_scale;perc;25;0.5;0.376959429104
diabetes_scale;log;25;1;0.0
diabetes_scale;svm;25;1;0.451453826204
diabetes_scale;perc;25;1;0.404001378787
image-ida;log;5;0.001;0.0833915907232
image-ida;svm;5;0.001;0.0511333177108
image-ida;perc;5;0.001;0.102427662542
image-ida;log;5;0.01;0.221696741672
image-ida;svm;5;0.01;0.33410794542
image-ida;perc;5;0.01;0.241734885047
image-ida;log;5;0.1;0.1643613721
image-ida;svm;5;0.1;0.353314698815
image-ida;perc;5;0.1;0.349605317537
image-ida;log;5;0.5;0.190861171277
image-ida;svm;5;0.5;0.341664400004
image-ida;perc;5;0.5;0.338803986821
image-ida;log;5;1;0.198490335816
image-ida;svm;5;1;0.370540496611
image-ida;perc;5;1;0.381871721379
image-ida;log;10;0.001;0.0841491966225
image-ida;svm;10;0.001;0.110232608707
image-ida;perc;10;0.001;0.0723044433894
image-ida;log;10;0.01;0.19691227493
image-ida;svm;10;0.01;0.320196489254
image-ida;perc;10;0.01;0.308070546037
image-ida;log;10;0.1;0.175034992616
image-ida;svm;10;0.1;0.422621873151
image-ida;perc;10;0.1;0.407997737485
image-ida;log;10;0.5;0.178030594735
image-ida;svm;10;0.5;0.409339279671
image-ida;perc;10;0.5;0.348817889093
image-ida;log;10;1;0.177336291628
image-ida;svm;10;1;0.391855517591
image-ida;perc;10;1;0.366611412273
image-ida;log;15;0.001;0.171022068401
image-ida;svm;15;0.001;0.115232410836
image-ida;perc;15;0.001;0.108153883976
image-ida;log;15;0.01;0.179489896756
image-ida;svm;15;0.01;0.356000886438
image-ida;perc;15;0.01;0.302437484224
image-ida;log;15;0.1;0.184130070631
image-ida;svm;15;0.1;0.452714440689
image-ida;perc;15;0.1;0.385910946237
image-ida;log;15;0.5;0.172512574032
image-ida;svm;15;0.5;0.426293344059
image-ida;perc;15;0.5;0.395811684608
image-ida;log;15;1;0.16775424591
image-ida;svm;15;1;0.3972615882
image-ida;perc;15;1;0.370607890598
image-ida;log;25;0.001;0.189901248556
image-ida;svm;25;0.001;0.232923468167
image-ida;perc;25;0.001;0.150104929153
image-ida;log;25;0.01;0.185013597654
image-ida;svm;25;0.01;0.365384714464
image-ida;perc;25;0.01;0.31622273607
image-ida;log;25;0.1;0.160614529407
image-ida;svm;25;0.1;0.433014935077
image-ida;perc;25;0.1;0.426183176588
image-ida;log;25;0.5;0.185848061785
image-ida;svm;25;0.5;0.464330867524
image-ida;perc;25;0.5;0.407988833955
image-ida;log;25;1;0.171641156145
image-ida;svm;25;1;0.4724537376
image-ida;perc;25;1;0.386898948778
diabetes-ida;log;5;0.001;-0.0182511888245
diabetes-ida;svm;5;0.001;0.0320982828674
diabetes-ida;perc;5;0.001;0.00563465620192
diabetes-ida;log;5;0.01;0.202160153825
diabetes-ida;svm;5;0.01;0.25739116286
diabetes-ida;perc;5;0.01;0.211295793665
diabetes-ida;log;5;0.1;0.33507716267
diabetes-ida;svm;5;0.1;0.340386006708
diabetes-ida;perc;5;0.1;0.328092633704
diabetes-ida;log;5;0.5;0.330041170263
diabetes-ida;svm;5;0.5;0.339459596086
diabetes-ida;perc;5;0.5;0.255837342679
diabetes-ida;log;5;1;0.323698604345
diabetes-ida;svm;5;1;0.334600813524
diabetes-ida;perc;5;1;0.221177962814
diabetes-ida;log;10;0.001;0.0438073455714
diabetes-ida;svm;10;0.001;0.0845115448999
diabetes-ida;perc;10;0.001;0.0639994489192
diabetes-ida;log;10;0.01;0.280137971824
diabetes-ida;svm;10;0.01;0.277730553767
diabetes-ida;perc;10;0.01;0.275902451361
diabetes-ida;log;10;0.1;0.319410008421
diabetes-ida;svm;10;0.1;0.331167660876
diabetes-ida;perc;10;0.1;0.239246667826
diabetes-ida;log;10;0.5;0.322652447023
diabetes-ida;svm;10;0.5;0.331296315417
diabetes-ida;perc;10;0.5;0.313890644546
diabetes-ida;log;10;1;0.298929542319
diabetes-ida;svm;10;1;0.334126215355
diabetes-ida;perc;10;1;0.307171410639
diabetes-ida;log;15;0.001;0.114775627197
diabetes-ida;svm;15;0.001;0.132541345863
diabetes-ida;perc;15;0.001;0.099817783937
diabetes-ida;log;15;0.01;0.316127200742
diabetes-ida;svm;15;0.01;0.280005236682
diabetes-ida;perc;15;0.01;0.241363492025
diabetes-ida;log;15;0.1;0.29049673518
diabetes-ida;svm;15;0.1;0.349761685396
diabetes-ida;perc;15;0.1;0.297451785524
diabetes-ida;log;15;0.5;0.327611145989
diabetes-ida;svm;15;0.5;0.333066939378
diabetes-ida;perc;15;0.5;0.300313606038
diabetes-ida;log;15;1;0.317652540231
diabetes-ida;svm;15;1;0.331681884076
diabetes-ida;perc;15;1;0.292648346462
diabetes-ida;log;25;0.001;0.126833114251
diabetes-ida;svm;25;0.001;0.0554342765432
diabetes-ida;perc;25;0.001;0.105812194673
diabetes-ida;log;25;0.01;0.295929058781
diabetes-ida;svm;25;0.01;0.283606261557
diabetes-ida;perc;25;0.01;0.237071796122
diabetes-ida;log;25;0.1;0.32409141291
diabetes-ida;svm;25;0.1;0.329287074749
diabetes-ida;perc;25;0.1;0.256401635856
diabetes-ida;log;25;0.5;0.321346856391
diabetes-ida;svm;25;0.5;0.341369441374
diabetes-ida;perc;25;0.5;0.300429071453
diabetes-ida;log;25;1;0.310559358247
diabetes-ida;svm;25;1;0.338571670229
diabetes-ida;perc;25;1;0.282065828613
breast-cancer-ida;log;5;0.001;0.0375339585595
breast-cancer-ida;svm;5;0.001;-0.0772578344246
breast-cancer-ida;perc;5;0.001;0.0654773104564
breast-cancer-ida;log;5;0.01;0.105052333171
breast-cancer-ida;svm;5;0.01;0.0262761683737
breast-cancer-ida;perc;5;0.01;0.0236291456842
breast-cancer-ida;log;5;0.1;0.23510277322
breast-cancer-ida;svm;5;0.1;0.228062226945
breast-cancer-ida;perc;5;0.1;0.2016600203
breast-cancer-ida;log;5;0.5;0.25777219967
breast-cancer-ida;svm;5;0.5;0.368743833194
breast-cancer-ida;perc;5;0.5;0.141648964293
breast-cancer-ida;log;5;1;0.251508517405
breast-cancer-ida;svm;5;1;0.372917029513
breast-cancer-ida;perc;5;1;0.157716449631
breast-cancer-ida;log;10;0.001;0.0066448220051
breast-cancer-ida;svm;10;0.001;-0.0214371240385
breast-cancer-ida;perc;10;0.001;-0.00158771359627
breast-cancer-ida;log;10;0.01;0.0960665859404
breast-cancer-ida;svm;10;0.01;0.119816025286
breast-cancer-ida;perc;10;0.01;0.164036501514
breast-cancer-ida;log;10;0.1;0.259091317392
breast-cancer-ida;svm;10;0.1;0.27489946845
breast-cancer-ida;perc;10;0.1;0.154610252802
breast-cancer-ida;log;10;0.5;0.300452981344
breast-cancer-ida;svm;10;0.5;0.346486823485
breast-cancer-ida;perc;10;0.5;0.182005191152
breast-cancer-ida;log;10;1;0.265889235667
breast-cancer-ida;svm;10;1;0.34841641761
breast-cancer-ida;perc;10;1;0.211737069912
breast-cancer-ida;log;15;0.001;0.0562464041835
breast-cancer-ida;svm;15;0.001;-0.0135268311307
breast-cancer-ida;perc;15;0.001;-0.0290314198014
breast-cancer-ida;log;15;0.01;0.185850913038
breast-cancer-ida;svm;15;0.01;0.113131291756
breast-cancer-ida;perc;15;0.01;0.15889096567
breast-cancer-ida;log;15;0.1;0.263452991489
breast-cancer-ida;svm;15;0.1;0.227519582727
breast-cancer-ida;perc;15;0.1;0.145903206641
breast-cancer-ida;log;15;0.5;0.288695114226
breast-cancer-ida;svm;15;0.5;0.275251209304
breast-cancer-ida;perc;15;0.5;0.195127389955
breast-cancer-ida;log;15;1;0.258912242435
breast-cancer-ida;svm;15;1;0.27439920876
breast-cancer-ida;perc;15;1;0.188930506721
breast-cancer-ida;log;25;0.001;0.0435505531239
breast-cancer-ida;svm;25;0.001;0.0160841795282
breast-cancer-ida;perc;25;0.001;0.0332477974289
breast-cancer-ida;log;25;0.01;0.24196115159
breast-cancer-ida;svm;25;0.01;0.16706137132
breast-cancer-ida;perc;25;0.01;0.137894662662
breast-cancer-ida;log;25;0.1;0.26845260551
breast-cancer-ida;svm;25;0.1;0.291565654122
breast-cancer-ida;perc;25;0.1;0.145405043494
breast-cancer-ida;log;25;0.5;0.25880056417
breast-cancer-ida;svm;25;0.5;0.236968802115
breast-cancer-ida;perc;25;0.5;0.194758220934
breast-cancer-ida;log;25;1;0.267078482633
breast-cancer-ida;svm;25;1;0.24632465224
breast-cancer-ida;perc;25;1;0.163270488403
ringnorm-ida;log;5;0.001;0.20873216826
ringnorm-ida;svm;5;0.001;0.209778488894
ringnorm-ida;perc;5;0.001;0.158206286556
ringnorm-ida;log;5;0.01;0.391243615143
ringnorm-ida;svm;5;0.01;0.487387172988
ringnorm-ida;perc;5;0.01;0.440641790378
ringnorm-ida;log;5;0.1;0.374866972416
ringnorm-ida;svm;5;0.1;0.50179349759
ringnorm-ida;perc;5;0.1;0.50169899591
ringnorm-ida;log;5;0.5;0.393035959002
ringnorm-ida;svm;5;0.5;0.498815884211
ringnorm-ida;perc;5;0.5;0.501440897067
ringnorm-ida;log;5;1;0.404982701309
ringnorm-ida;svm;5;1;0.4981016103
ringnorm-ida;perc;5;1;0.512967736021
ringnorm-ida;log;10;0.001;0.257110213482
ringnorm-ida;svm;10;0.001;0.285101740493
ringnorm-ida;perc;10;0.001;0.236701279697
ringnorm-ida;log;10;0.01;0.463529423149
ringnorm-ida;svm;10;0.01;0.506744330177
ringnorm-ida;perc;10;0.01;0.43099281857
ringnorm-ida;log;10;0.1;0.457316452332
ringnorm-ida;svm;10;0.1;0.518134219467
ringnorm-ida;perc;10;0.1;0.416161145002
ringnorm-ida;log;10;0.5;0.465605456519
ringnorm-ida;svm;10;0.5;0.51561285313
ringnorm-ida;perc;10;0.5;0.412348496206
ringnorm-ida;log;10;1;0.466057388298
ringnorm-ida;svm;10;1;0.516509971443
ringnorm-ida;perc;10;1;0.430549940289
ringnorm-ida;log;15;0.001;0.262470713676
ringnorm-ida;svm;15;0.001;0.274109019086
ringnorm-ida;perc;15;0.001;0.252731345265
ringnorm-ida;log;15;0.01;0.478104821167
ringnorm-ida;svm;15;0.01;0.496842664429
ringnorm-ida;perc;15;0.01;0.402946354809
ringnorm-ida;log;15;0.1;0.472968131611
ringnorm-ida;svm;15;0.1;0.518678470718
ringnorm-ida;perc;15;0.1;0.412110081958
ringnorm-ida;log;15;0.5;0.483355682898
ringnorm-ida;svm;15;0.5;0.517496143571
ringnorm-ida;perc;15;0.5;0.360358593451
ringnorm-ida;log;15;1;0.46964925039
ringnorm-ida;svm;15;1;0.517408594038
ringnorm-ida;perc;15;1;0.353911189
ringnorm-ida;log;25;0.001;0.368272399039
ringnorm-ida;svm;25;0.001;0.33826455743
ringnorm-ida;perc;25;0.001;0.267456288437
ringnorm-ida;log;25;0.01;0.491331456805
ringnorm-ida;svm;25;0.01;0.495864695826
ringnorm-ida;perc;25;0.01;0.391635292262
ringnorm-ida;log;25;0.1;0.49636637783
ringnorm-ida;svm;25;0.1;0.519481877472
ringnorm-ida;perc;25;0.1;0.428763063635
ringnorm-ida;log;25;0.5;0.491428428994
ringnorm-ida;svm;25;0.5;0.518122990484
ringnorm-ida;perc;25;0.5;0.456488244277
ringnorm-ida;log;25;1;0.493883206469
ringnorm-ida;svm;25;1;0.517762981207
ringnorm-ida;perc;25;1;0.429200397086
thyroid-ida;log;5;0.001;0.141162498072
thyroid-ida;svm;5;0.001;-0.0135475915192
thyroid-ida;perc;5;0.001;-0.00247205109723
thyroid-ida;log;5;0.01;0.141208302866
thyroid-ida;svm;5;0.01;0.158321503813
thyroid-ida;perc;5;0.01;0.113871380242
thyroid-ida;log;5;0.1;0.155170239882
thyroid-ida;svm;5;0.1;0.371850555685
thyroid-ida;perc;5;0.1;0.294830446745
thyroid-ida;log;5;0.5;0.15867936568
thyroid-ida;svm;5;0.5;0.422216092607
thyroid-ida;perc;5;0.5;0.463729246331
thyroid-ida;log;5;1;0.148860582562
thyroid-ida;svm;5;1;0.441432764797
thyroid-ida;perc;5;1;0.472039023804
thyroid-ida;log;10;0.001;0.20461196345
thyroid-ida;svm;10;0.001;-0.0732036337894
thyroid-ida;perc;10;0.001;0.0731201756686
thyroid-ida;log;10;0.01;0.169599232449
thyroid-ida;svm;10;0.01;0.0989215974942
thyroid-ida;perc;10;0.01;0.0913031198885
thyroid-ida;log;10;0.1;0.212471266399
thyroid-ida;svm;10;0.1;0.468993663174
thyroid-ida;perc;10;0.1;0.408459360042
thyroid-ida;log;10;0.5;0.170831640266
thyroid-ida;svm;10;0.5;0.46671097846
thyroid-ida;perc;10;0.5;0.508991187132
thyroid-ida;log;10;1;0.154774544315
thyroid-ida;svm;10;1;0.453500753143
thyroid-ida;perc;10;1;0.47009237293
thyroid-ida;log;15;0.001;-0.00146279450942
thyroid-ida;svm;15;0.001;-0.00421397325609
thyroid-ida;perc;15;0.001;0.0548178300756
thyroid-ida;log;15;0.01;0.0388218854559
thyroid-ida;svm;15;0.01;0.133882565199
thyroid-ida;perc;15;0.01;0.007419998352
thyroid-ida;log;15;0.1;0.0936868610525
thyroid-ida;svm;15;0.1;0.483038238851
thyroid-ida;perc;15;0.1;0.420131982046
thyroid-ida;log;15;0.5;0.129938843191
thyroid-ida;svm;15;0.5;0.518526492528
thyroid-ida;perc;15;0.5;0.557743773807
thyroid-ida;log;15;1;0.116453594452
thyroid-ida;svm;15;1;0.537436899419
thyroid-ida;perc;15;1;0.571218047822
thyroid-ida;log;25;0.001;-0.0667997274379
thyroid-ida;svm;25;0.001;0.0105227303287
thyroid-ida;perc;25;0.001;-0.00457936187576
thyroid-ida;log;25;0.01;0.223054732056
thyroid-ida;svm;25;0.01;0.203587254877
thyroid-ida;perc;25;0.01;0.0598220979648
thyroid-ida;log;25;0.1;0.0602078147689
thyroid-ida;svm;25;0.1;0.589487984071
thyroid-ida;perc;25;0.1;0.53898872698
thyroid-ida;log;25;0.5;0.150606028846
thyroid-ida;svm;25;0.5;0.559956667509
thyroid-ida;perc;25;0.5;0.524610641919
thyroid-ida;log;25;1;0.160043218831
thyroid-ida;svm;25;1;0.565047314448
thyroid-ida;perc;25;1;0.568582699254
usps;log;5;0.001;0.208729032304
usps;svm;5;0.001;0.328738762529
usps;perc;5;0.001;0.320277524963
usps;log;5;0.01;0.167545906655
usps;svm;5;0.01;0.49131079555
usps;perc;5;0.01;0.502178833142
usps;log;5;0.1;0.164126450759
usps;svm;5;0.1;0.583766227149
usps;perc;5;0.1;0.587058190224
usps;log;5;0.5;0.164015197733
usps;svm;5;0.5;0.584850911526
usps;perc;5;0.5;0.588175138363
usps;log;5;1;0.163624576321
usps;svm;5;1;0.58270889112
usps;perc;5;1;0.587806848976
usps;log;10;0.001;0.220016538752
usps;svm;10;0.001;0.46554994612
usps;perc;10;0.001;0.457477724434
usps;log;10;0.01;0.167959020168
usps;svm;10;0.01;0.380709423047
usps;perc;10;0.01;0.432329219971
usps;log;10;0.1;0.16387931391
usps;svm;10;0.1;0.488408681409
usps;perc;10;0.1;0.600631251175
usps;log;10;0.5;0.162727560368
usps;svm;10;0.5;0.649530009885
usps;perc;10;0.5;0.702850587753
usps;log;10;1;0.162677254758
usps;svm;10;1;0.634845239623
usps;perc;10;1;0.772473590246
usps;log;15;0.001;0.184195394472
usps;svm;15;0.001;0.531444031321
usps;perc;15;0.001;0.486489589999
usps;log;15;0.01;0.16094888618
usps;svm;15;0.01;0.699468116958
usps;perc;15;0.01;0.722559420167
usps;log;15;0.1;0.162755935202
usps;svm;15;0.1;0.796732161023
usps;perc;15;0.1;0.834848373131
usps;log;15;0.5;0.164064275925
usps;svm;15;0.5;0.784256212696
usps;perc;15;0.5;0.791168287773
usps;log;15;1;0.163968506954
usps;svm;15;1;0.819418728495
usps;perc;15;1;0.801018744205
usps;log;25;0.001;0.183600135693
usps;svm;25;0.001;0.591937801614
usps;perc;25;0.001;0.546121836187
usps;log;25;0.01;0.162815848525
usps;svm;25;0.01;0.743782857422
usps;perc;25;0.01;0.773559079763
usps;log;25;0.1;0.16310952608
usps;svm;25;0.1;0.84588336428
usps;perc;25;0.1;0.852409189509
usps;log;25;0.5;0.163062560758
usps;svm;25;0.5;0.820681976369
usps;perc;25;0.5;0.863408129591
usps;log;25;1;0.163617823793
usps;svm;25;1;0.840599839191
usps;perc;25;1;0.858640043751

In [ ]:
import seaborn

In [583]:
dsets = ['iris', 'banana-ida','diabetes_scale', 'image-ida',
         'diabetes-ida', 'breast-cancer-ida', 'ringnorm-ida', 'thyroid-ida']
for dset in dsets:
    dataset = fetch_mldata(dset)
    Y = dataset['target']
    Yset = list(set(Y))
    cnt = Counter(Y)
    guess_ratio = cnt.most_common(1)[0][1] / float(len(Y))
    print dset +';'+str(guess_ratio)


iris;0.333333333333
banana-ida;0.551698113208
diabetes_scale;0.651041666667
image-ida;0.569511025887
diabetes-ida;0.651041666667
breast-cancer-ida;0.707224334601
ringnorm-ida;0.504864864865
thyroid-ida;0.697674418605

In [ ]:


In [ ]:


In [547]:
from sklearn import linear_model
from sklearn.metrics import accuracy_score
clf = linear_model.SGDClassifier()
clf.fit(dataset['data'], dataset['target'])


Out[547]:
SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
       penalty='l2', power_t=0.5, random_state=None, shuffle=True,
       verbose=0, warm_start=False)

In [548]:
pred = clf.predict(dataset['data'])
Y = dataset['target']
mtx = [1 if y1==y2 else 0 for (y1, y2) in zip(pred, Y) ]

In [549]:
np.sum(mtx)/float(len(mtx))


Out[549]:
0.4907547169811321

In [ ]: