In [80]:
from sklearn.semi_supervised import LabelPropagation
from sklearn.semi_supervised import LabelSpreading

from sklearn.datasets import fetch_20newsgroups
from pprint import pprint
import scipy
import scipy.sparse

import joblib

import sklearn
import sklearn.datasets
import sklearn.cross_validation

import sys
from time import time

import numpy as np
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [81]:
X, y = sklearn.datasets.load_svmlight_file('data/news20.binary')

In [82]:
instance_ids = np.arange(y.size)

In [ ]:
def test_label_prop(X, y, labeled_indices, unlabeled_indices):
    L = X[labeled_indices]
    L_ids = instance_ids[labeled_indices]

    U = X[unlabeled_indices]
    U_ids = instance_ids[unlabeled_indices]

    y_l = y[labeled_indices]
    y_u = y[unlabeled_indices]

    yp = (y+1)/2
    y_lp = (y_l+1)/2
    y_up = (y_u+1)/2

    l_ssl = np.copy(yp)
    l_ssl[unlabeled_indices]=-1
    
    for g in [100,500,1000]:
        for a in [0.8, 0.9, 1.0]:
            label_prop_model = LabelPropagation(kernel='rbf', gamma=g, alpha = a ,max_iter=100)
            fit = label_prop_model.fit(X,l_ssl)
            sl = fit.score(L,y_lp)
            su = fit.score(U,y_up)
            print "LP rbf", g, a, sl, su

    for k in [1,2,3,4,5]:
        for a in [0.8, 0.9, 1.0]:
            label_prop_model = LabelPropagation(kernel='knn', n_neighbors=k, alpha = a ,max_iter=100)
            fit = label_prop_model.fit(X,l_ssl)
            sl = fit.score(L,y_lp)
            su = fit.score(U,y_up)
            print "LP knn", k, a, sl, su

            label_spread_model = LabelSpreading(kernel='knn', n_neighbors=k, alpha = a ,max_iter=100)
            fit = label_spread_model.fit(X,l_ssl)
            sl = fit.score(L,y_lp)
            su = fit.score(U,y_up)
            print "LS knn", k, a, sl, su

In [102]:
for test_size in [0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95]:
    splits = sklearn.cross_validation.StratifiedShuffleSplit(y, n_iter=1, test_size=test_size)
    labeled_indices, unlabeled_indices = splits.__iter__().next()
    
    print "testing split:  ",test_size
    test_label_prop(X, y, labeled_indices, unlabeled_indices)
    print "....................."


testing split:   0.05
LP rbf 100 0.8 0.998841861445 0.765
LP rbf 100 0.9 0.998841861445 0.764
LP rbf 100 1.0 0.998789218783 0.764
LP rbf 500 0.8 0.998841861445 0.772
LP rbf 500 0.9 0.998841861445 0.772
LP rbf 500 1.0 0.998841861445 0.772
LP rbf 1000 0.8 0.499947357338 0.5
LP rbf 1000 0.9 0.499947357338 0.5
LP rbf 1000 1.0 0.499947357338 0.5
LP knn 1 0.8 0.996578226995 0.504
LS knn 1 0.8 0.996578226995 0.504
LP knn 1 0.9 0.996578226995 0.504
LS knn 1 0.9 0.996578226995 0.504
LP knn 1 1.0 0.996578226995 0.504
LS knn 1 1.0 0.499947357338 0.5
LP knn 2 0.8 0.725310591704 0.739
LS knn 2 0.8 0.725310591704 0.739
LP knn 2 0.9 0.725310591704 0.739
LS knn 2 0.9 0.725310591704 0.739
LP knn 2 1.0 0.725310591704 0.739
LS knn 2 1.0 0.710781217098 0.534
LP knn 3 0.8 0.608496525584 0.617
LS knn 3 0.8 0.608496525584 0.617
LP knn 3 0.9 0.608391240261 0.617
LS knn 3 0.9 0.608496525584 0.617
LP knn 3 1.0 0.608654453569 0.618
LS knn 3 1.0 0.608549168246 0.618
LP knn 4 0.8 0.549168245947 0.558
LS knn 4 0.8 0.549905243209 0.559
LP knn 4 0.9 0.549168245947 0.558
LS knn 4 0.9 0.550010528532 0.559
LP knn 4 1.0 0.549326173931 0.558
LS knn 4 1.0 0.548694461992 0.558
LP knn 5 0.8 0.522425773847 0.526
LS knn 5 0.8 0.523794483049 0.526
LP knn 5 0.9 0.522583701832 0.526
LS knn 5 0.9 0.523741840387 0.526
LP knn 5 1.0 0.523004843125 0.526
LS knn 5 1.0 0.52253105917 0.526
.....................
testing split:   0.1
LP rbf 100 0.8 0.998833074016 0.7565
LP rbf 100 0.9 0.998833074016 0.755
LP rbf 100 1.0 0.998777506112 0.754
LP rbf 500 0.8 0.998833074016 0.7635
LP rbf 500 0.9 0.998833074016 0.7635
LP rbf 500 1.0 0.998833074016 0.7635
LP rbf 1000 0.8 0.499944432096 0.5
LP rbf 1000 0.9 0.499944432096 0.5
LP rbf 1000 1.0 0.499944432096 0.5
LP knn 1 0.8 0.996165814625 0.5065
LS knn 1 0.8 0.996165814625 0.5065
LP knn 1 0.9 0.996165814625 0.5065
LS knn 1 0.9 0.996165814625 0.5065
LP knn 1 1.0 0.996165814625 0.5065
LS knn 1 1.0 0.499944432096 0.5
LP knn 2 0.8 0.725494554345 0.7205
LS knn 2 0.8 0.725494554345 0.7205
LP knn 2 0.9 0.725494554345 0.7205
LS knn 2 0.9 0.725494554345 0.7205
LP knn 2 1.0 0.725494554345 0.7205
LS knn 2 1.0 0.696877083796 0.5335
LP knn 3 0.8 0.608968659702 0.603
LS knn 3 0.8 0.609024227606 0.603
LP knn 3 0.9 0.608857523894 0.603
LS knn 3 0.9 0.609135363414 0.603
LP knn 3 1.0 0.609691042454 0.603
LS knn 3 1.0 0.609579906646 0.603
LP knn 4 0.8 0.549511002445 0.544
LS knn 4 0.8 0.550122249389 0.544
LP knn 4 0.9 0.549511002445 0.544
LS knn 4 0.9 0.550233385197 0.5445
LP knn 4 1.0 0.549677706157 0.5445
LS knn 4 1.0 0.549122027117 0.5435
LP knn 5 0.8 0.522838408535 0.519
LS knn 5 0.8 0.524005334519 0.519
LP knn 5 0.9 0.523171815959 0.519
LS knn 5 0.9 0.524060902423 0.519
LP knn 5 1.0 0.523449655479 0.519
LS knn 5 1.0 0.523060680151 0.518
.....................
testing split:   0.25
LP rbf 100 0.8 0.998933119957 0.727945589118
LP rbf 100 0.9 0.998933119957 0.725545109022
LP rbf 100 1.0 0.998933119957 0.720944188838
LP rbf 500 0.8 0.998933119957 0.731346269254
LP rbf 500 0.9 0.998933119957 0.730946189238
LP rbf 500 1.0 0.998933119957 0.730146029206
LP rbf 1000 0.8 0.499966659999 0.499899979996
LP rbf 1000 0.9 0.499966659999 0.499899979996
LP rbf 1000 1.0 0.499966659999 0.499899979996
LP knn 1 0.8 0.995732479829 0.505101020204
LS knn 1 0.8 0.995732479829 0.505101020204
LP knn 1 0.9 0.995732479829 0.505101020204
LS knn 1 0.9 0.995732479829 0.505101020204
LP knn 1 1.0 0.995732479829 0.505101020204
LS knn 1 1.0 0.499966659999 0.499899979996
LP knn 2 0.8 0.772287790892 0.722544508902
LS knn 2 0.8 0.772287790892 0.722544508902
LP knn 2 0.9 0.772287790892 0.722544508902
LS knn 2 0.9 0.772287790892 0.722544508902
LP knn 2 1.0 0.772287790892 0.722544508902
LS knn 2 1.0 0.660198706408 0.521904380876
LP knn 3 0.8 0.606654664266 0.605921184237
LS knn 3 0.8 0.612589184504 0.609721944389
LP knn 3 0.9 0.605721144229 0.605921184237
LS knn 3 0.9 0.607121424285 0.605321064213
LP knn 3 1.0 0.605587784224 0.604120824165
LS knn 3 1.0 0.605587784224 0.604120824165
LP knn 4 0.8 0.554577582183 0.554310862172
LS knn 4 0.8 0.561178902447 0.56051210242
LP knn 4 0.9 0.551910382076 0.551710342068
LS knn 4 0.9 0.554977662199 0.556111222244
LP knn 4 1.0 0.549776621991 0.549909981996
LS knn 4 1.0 0.549709941988 0.549909981996
LP knn 5 0.8 0.52550510102 0.524904980996
LS knn 5 0.8 0.530439421218 0.530706141228
LP knn 5 0.9 0.525038341002 0.525105021004
LS knn 5 0.9 0.526838701074 0.527105421084
LP knn 5 1.0 0.523438020938 0.5225045009
LS knn 5 1.0 0.523304660932 0.5225045009
.....................
testing split:   0.5
LP rbf 100 0.8 0.999399879976 0.671834366873
LP rbf 100 0.9 0.999399879976 0.669933986797
LP rbf 100 1.0 0.999299859972 0.665633126625
LP rbf 500 0.8 0.999399879976 0.673834766953
LP rbf 500 0.9 0.999399879976 0.673634726945
LP rbf 500 1.0 0.999399879976 0.673134626925
LP rbf 1000 0.8 0.499899979996 0.5
LP rbf 1000 0.9 0.499899979996 0.5
LP rbf 1000 1.0 0.499899979996 0.5
LP knn 1 0.8 0.994698939788 0.504300860172
LS knn 1 0.8 0.994698939788 0.504300860172
LP knn 1 0.9 0.994698939788 0.504300860172
LS knn 1 0.9 0.994698939788 0.504300860172
LP knn 1 1.0 0.994698939788 0.504300860172
LS knn 1 1.0 0.499899979996 0.5
LP knn 2 0.8 0.76925385077 0.669033806761
LS knn 2 0.8 0.76925385077 0.669033806761
LP knn 2 0.9 0.76925385077 0.669033806761
LS knn 2 0.9 0.76925385077 0.669033806761
LP knn 2 1.0 0.76925385077 0.669033806761
LS knn 2 1.0 0.605721144229 0.50900180036
LP knn 3 0.8 0.607921584317 0.584816963393
LS knn 3 0.8 0.615123024605 0.587517503501
LP knn 3 0.9 0.608021604321 0.584816963393
LS knn 3 0.9 0.609121824365 0.585917183437
LP knn 3 1.0 0.608821764353 0.585417083417
LS knn 3 1.0 0.608621724345 0.585217043409
LP knn 4 0.8 0.553610722144 0.545609121824
LS knn 4 0.8 0.562512502501 0.55201040208
LP knn 4 0.9 0.552310462092 0.545609121824
LS knn 4 0.9 0.556411282256 0.548109621924
LP knn 4 1.0 0.55151030206 0.545209041808
LS knn 4 1.0 0.55151030206 0.545209041808
LP knn 5 0.8 0.528105621124 0.523604720944
LS knn 5 0.8 0.538207641528 0.529705941188
LP knn 5 0.9 0.5275055011 0.523204640928
LS knn 5 0.9 0.53100620124 0.52550510102
LP knn 5 1.0 0.524904980996 0.521404280856
LS knn 5 1.0 0.524604920984 0.521104220844
.....................
testing split:   0.75
LP rbf 100 0.8 0.999399879976 0.611789024472
LP rbf 100 0.9 0.999399879976 0.610655464426
LP rbf 100 1.0 0.999399879976 0.606321264253
LP rbf 500 0.8 0.999399879976 0.610722144429
LP rbf 500 0.9 0.999399879976 0.610522104421
LP rbf 500 1.0 0.999399879976 0.609588584384
LP rbf 1000 0.8 0.499899979996 0.499966659999
LP rbf 1000 0.9 0.499899979996 0.499966659999
LP rbf 1000 1.0 0.499899979996 0.499966659999
LP knn 1 0.8 0.99399879976 0.502100420084
LS knn 1 0.8 0.99399879976 0.502100420084
LP knn 1 0.9 0.99399879976 0.502100420084
LS knn 1 0.9 0.99399879976 0.502100420084
LP knn 1 1.0 0.99399879976 0.502100420084
LS knn 1 1.0 0.499899979996 0.499966659999
LP knn 2 0.8 0.693138627726 0.562579182503
LS knn 2 0.8 0.693138627726 0.562579182503
LP knn 2 0.9 0.693138627726 0.562579182503
LS knn 2 0.9 0.693138627726 0.562579182503
LP knn 2 1.0 0.693138627726 0.562579182503
LS knn 2 1.0 0.541108221644 0.501833700073
LP knn 3 0.8 0.574514902981 0.542308461692
LS knn 3 0.8 0.574714942989 0.542308461692
LP knn 3 0.9 0.574514902981 0.542308461692
LS knn 3 0.9 0.574714942989 0.542308461692
LP knn 3 1.0 0.574514902981 0.542308461692
LS knn 3 1.0 0.569313862773 0.538174301527
LP knn 4 0.8 0.548309661932 0.539907981596
LS knn 4 0.8 0.555111022204 0.544642261786
LP knn 4 0.9 0.548109621924 0.539907981596
LS knn 4 0.9 0.5525105021 0.5425085017
LP knn 4 1.0 0.54850970194 0.540374741615
LS knn 4 1.0 0.54850970194 0.540374741615
LP knn 5 0.8 0.522904580916 0.519103820764
LS knn 5 0.8 0.534106821364 0.52523838101
LP knn 5 0.9 0.523104620924 0.519370540775
LS knn 5 0.9 0.5275055011 0.522437820898
LP knn 5 1.0 0.522904580916 0.519770620791
LS knn 5 1.0 0.522904580916 0.519770620791
.....................
testing split:   0.9
LP rbf 100 0.8 0.999499749875 0.548869255987
LP rbf 100 0.9 0.999499749875 0.548091348558
LP rbf 100 1.0 0.999499749875 0.545646496638
LP rbf 500 0.8 0.999499749875 0.547424570762
LP rbf 500 0.9 0.999499749875 0.547257876313
LP rbf 500 1.0 0.999499749875 0.546591098516
LP rbf 1000 0.8 0.499749874937 0.499972217592
LP rbf 1000 0.9 0.499749874937 0.499972217592
LP rbf 1000 1.0 0.499749874937 0.499972217592
LP knn 1 0.8 0.989994997499 0.50091681947
LS knn 1 0.8 0.989994997499 0.50091681947
LP knn 1 0.9 0.989994997499 0.50091681947
LS knn 1 0.9 0.989994997499 0.50091681947
LP knn 1 1.0 0.989994997499 0.50091681947
LS knn 1 1.0 0.499749874937 0.499972217592
LP knn 2 0.8 0.687843921961 0.526032116464
LS knn 2 0.8 0.687843921961 0.526032116464
LP knn 2 0.9 0.687843921961 0.526032116464
LS knn 2 0.9 0.687843921961 0.526032116464
LP knn 2 1.0 0.687843921961 0.526032116464
LS knn 2 1.0 0.52176088044 0.500472300939
LP knn 3 0.8 0.587793896948 0.520642329277
LS knn 3 0.8 0.587793896948 0.520642329277
LP knn 3 0.9 0.587293646823 0.520586764461
LS knn 3 0.9 0.587793896948 0.520642329277
LP knn 3 1.0 0.587793896948 0.520642329277
LS knn 3 1.0 0.576288144072 0.518808690337
LP knn 4 0.8 0.537768884442 0.511307440129
LS knn 4 0.8 0.538269134567 0.511029616047
LP knn 4 0.9 0.538269134567 0.511085180863
LS knn 4 0.9 0.538269134567 0.511029616047
LP knn 4 1.0 0.534767383692 0.510418403067
LS knn 4 1.0 0.534267133567 0.510585097516
LP knn 5 0.8 0.515257628814 0.504084014002
LS knn 5 0.8 0.515257628814 0.503917319553
LP knn 5 0.9 0.515257628814 0.504028449186
LS knn 5 0.9 0.515257628814 0.503917319553
LP knn 5 1.0 0.514257128564 0.5044174029
LS knn 5 1.0 0.514257128564 0.504306273268
.....................
testing split:   0.95
LP rbf 100 0.8 1.0 0.526714744433
LP rbf 100 0.9 1.0 0.526135705638
LP rbf 100 1.0 1.0 0.525135547718
LP rbf 500 0.8 1.0 0.525451387061
LP rbf 500 0.9 1.0 0.52529346739
LP rbf 500 1.0 1.0 0.525135547718
LP rbf 1000 0.8 0.499499499499 0.499973680055
LP rbf 1000 0.9 0.499499499499 0.499973680055
LP rbf 1000 1.0 0.499499499499 0.499973680055
LP knn 1 0.8 0.987987987988 0.499973680055
LS knn 1 0.8 0.987987987988 0.499973680055
LP knn 1 0.9 0.987987987988 0.499973680055
LS knn 1 0.9 0.987987987988 0.499973680055
LP knn 1 1.0 0.987987987988 0.499973680055
LS knn 1 1.0 0.499499499499 0.499973680055
LP knn 2 0.8 0.673673673674 0.512080854872
LS knn 2 0.8 0.673673673674 0.512080854872
LP knn 2 0.9 0.673673673674 0.512080854872
LS knn 2 0.9 0.673673673674 0.512080854872
LP knn 2 1.0 0.673673673674 0.512080854872
LS knn 2 1.0 0.513513513514 0.500289519398
LP knn 3 0.8 0.56956956957 0.510606937938
LS knn 3 0.8 0.56956956957 0.510606937938
LP knn 3 0.9 0.56956956957 0.510606937938
LS knn 3 0.9 0.56956956957 0.510606937938
LP knn 3 1.0 0.56956956957 0.510606937938
LS knn 3 1.0 0.55955955956 0.509238300784
LP knn 4 0.8 0.537537537538 0.506501026478
LS knn 4 0.8 0.537537537538 0.506395746697
LP knn 4 0.9 0.537537537538 0.506501026478
LS knn 4 0.9 0.537537537538 0.506395746697
LP knn 4 1.0 0.536536536537 0.506816865821
LS knn 4 1.0 0.536536536537 0.506658946149
LP knn 5 0.8 0.521521521522 0.50365847239
LS knn 5 0.8 0.521521521522 0.50365847239
LP knn 5 0.9 0.521521521522 0.50365847239
LS knn 5 0.9 0.521521521522 0.50365847239
LP knn 5 1.0 0.521521521522 0.504500710639
LS knn 5 1.0 0.521521521522 0.504500710639
.....................

In [ ]:


In [ ]: