Implementing Hedge Algorithm for Ensemble Learning

Our goal is to given prediction results from m experts (classifiers), combine them with weights in such way to get as close as possible to the best classifer.

We do that by first initializing all the weights to 1, then iterating throught the results and in cases where the weighted sum of classifiers' prediction does not match with the ground-truth label, we remove the weith of those classifiers who caused this mistake by a multiplicative approach:

$w_i = w_i * exp(-\eta)$


In [1]:
import numpy as np
import pandas
import scipy, scipy.spatial
import sklearn
import sys

from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
### Function to plot weights
def plot_weights(weights):
    ncol = weights.shape[0]
    yl = np.arange(ncol)

    fig1 = plt.figure(1, figsize=(12,6))
    ax = fig1.add_subplot(1, 1, 1)
    #line1 = plt.plot(yl, yc, 'b-')
    line1 = ax.plot(yl, weights, color='r')
    plt.setp(line1, color='b', linewidth=3, marker='^', markersize=8)

    plt.setp(ax.get_xticklabels(), rotation='horizontal', fontsize=16)
    plt.setp(ax.get_yticklabels(), rotation='vertical', fontsize=16)

    plt.xlabel('Classifier Index', size=20)
    plt.ylabel('Weight', size=20)
    plt.title('Weighted Ensemble Learning ', size=20)
    #plt.legend(['Y1', 'Y2'], loc='upper right') ## not supported by plotly yet

    plt.show()

In [3]:
## Function to update weights
def update_weights(ycv, fname, ncol, chunks=10000, lr1=-0.02, lr2=1):

    weights = np.ones(shape=ncol, dtype=float) / float(ncol)
    learn_rate_1 = np.exp(lr1)
    learn_rate_2 = np.exp(lr2)

    n = 0
    num_mistakes, num_mistakes_simpleVotes = 0, 0
    for df in pandas.read_table(fname, header=None, sep=' ', iterator=True, chunksize=chunks):
        sys.stdout.write("%6d %5d,%d  ==>  "%(n, df.shape[0], df.shape[1]))
        num_mistake_part, num_mistake_part_simpleVotes = 0, 0
        for i in range(df.shape[0]):
            row = df.iloc[i,:]
            wsum = np.sum(weights * row)
            yi = ycv.iloc[n,0]
            
            if np.sum(row)*yi <=0 : # simple vote count
                num_mistake_part_simpleVotes += 1
                
            if wsum * yi <= 0:
                num_mistake_part += 1            
                for j,vj in enumerate(row):
                    if (vj * yi) < 0:
                        weights[j] *= learn_rate_1
                    elif(vj * yi) > 0:
                        weights[j] *= learn_rate_2
                    
                weights /= np.sum(weights)
                    
            n += 1
        print("Num. of Mistakes: %d \tWithout-updating %d"%(num_mistake_part, num_mistake_part_simpleVotes))
        num_mistakes += num_mistake_part
        num_mistakes_simpleVotes += num_mistake_part_simpleVotes
    
    print('Total number of mistakes:  %d without updating %d'%(num_mistakes, num_mistakes_simpleVotes))

    return(weights)

In [4]:
### Function to predict test data with given weights
def predict_test(weights, fname, outname):
    pred = np.zeros(shape=nlines, dtype=int)
    sum_pred = np.zeros(shape=nlines, dtype=float)

    chunks = 10000
    n = 0
    num_neg, num_pos = 0, 0
    for df in pandas.read_table(fname, header=None, sep=' ', iterator=True, chunksize=chunks):
        sys.stdout.write("%6d shape: %5d,%d  ==> "%(n, df.shape[0], df.shape[1]))
        for i in range(df.shape[0]):
            row = df.iloc[i,:]
            wsum = np.sum(weights * row)
            if wsum < 0:
                num_neg += 1
                pred[n] = -1
                sum_pred[n] = wsum
            else:
                num_pos += 1
                pred[n] = 1
                sum_pred[n] = wsum
            n += 1
        sys.stdout.write('Num. Positive %d   Num. Negative %d\n'%(num_pos, num_neg))
                
    df = pandas.DataFrame({'V1':pred, 'V2':sum_pred})
    df.to_csv(outname, sep=' ')

Combining HF1 Ensemble of Results


In [20]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv.iloc[np.where(ycv[0] < 157)[0],0] = -1
ycv.iloc[np.where(ycv[0] >=157)[0],0] = 1

print(ycv.shape)

ycv.head()


(100000, 1)
Out[20]:
0
0 1
1 1
2 1
3 1
4 -1

In [21]:
ncol = pandas.read_table('../results/hf1/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

pandas.read_table('../results/hf1/res_cv.dat', header=None, sep=' ', nrows=3).head()


Out[21]:
0 1 2 3 4 5 6 7 8 9 ... 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

3 rows × 1615 columns

Weighted Ensemble of Classifiers with Hedge Updating Algorithm


In [24]:
w1 = update_weights(ycv, '../results/hf1/res_cv.dat', ncol, lr1=-0.02, lr2=0.02)
plot_weights(w1)


     0 10000,1615  ==>  Num. of Mistakes: 2722 	Without-updating 2719
 10000 10000,1615  ==>  Num. of Mistakes: 2737 	Without-updating 2720
 20000 10000,1615  ==>  Num. of Mistakes: 2733 	Without-updating 2723
 30000 10000,1615  ==>  Num. of Mistakes: 2699 	Without-updating 2677
 40000 10000,1615  ==>  Num. of Mistakes: 2747 	Without-updating 2730
 50000 10000,1615  ==>  Num. of Mistakes: 2743 	Without-updating 2714
 60000 10000,1615  ==>  Num. of Mistakes: 2788 	Without-updating 2751
 70000 10000,1615  ==>  Num. of Mistakes: 2755 	Without-updating 2742
 80000 10000,1615  ==>  Num. of Mistakes: 2735 	Without-updating 2705
 90000 10000,1615  ==>  Num. of Mistakes: 2749 	Without-updating 2744
Total number of mistakes:  27408 without updating 27225

In [7]:
nlines = pandas.read_table('../results/hf1/res_test.dat', header=None, sep=' ', usecols=[0]).shape[0]

print(nlines)


262102

In [25]:
predict_test(w1, '../results/hf1/res_test.dat', '../results/pred.hf1.dat')


     0 shape: 10000,1615  ==> Num. Positive 5872   Num. Negative 4128
 10000 shape: 10000,1615  ==> Num. Positive 11742   Num. Negative 8258
 20000 shape: 10000,1615  ==> Num. Positive 17600   Num. Negative 12400
 30000 shape: 10000,1615  ==> Num. Positive 23519   Num. Negative 16481
 40000 shape: 10000,1615  ==> Num. Positive 29421   Num. Negative 20579
 50000 shape: 10000,1615  ==> Num. Positive 35358   Num. Negative 24642
 60000 shape: 10000,1615  ==> Num. Positive 41279   Num. Negative 28721
 70000 shape: 10000,1615  ==> Num. Positive 47136   Num. Negative 32864
 80000 shape: 10000,1615  ==> Num. Positive 53061   Num. Negative 36939
 90000 shape: 10000,1615  ==> Num. Positive 58881   Num. Negative 41119
100000 shape: 10000,1615  ==> Num. Positive 64817   Num. Negative 45183
110000 shape: 10000,1615  ==> Num. Positive 70704   Num. Negative 49296
120000 shape: 10000,1615  ==> Num. Positive 76608   Num. Negative 53392
130000 shape: 10000,1615  ==> Num. Positive 82445   Num. Negative 57555
140000 shape: 10000,1615  ==> Num. Positive 88331   Num. Negative 61669
150000 shape: 10000,1615  ==> Num. Positive 94267   Num. Negative 65733
160000 shape: 10000,1615  ==> Num. Positive 100164   Num. Negative 69836
170000 shape: 10000,1615  ==> Num. Positive 106041   Num. Negative 73959
180000 shape: 10000,1615  ==> Num. Positive 111962   Num. Negative 78038
190000 shape: 10000,1615  ==> Num. Positive 117797   Num. Negative 82203
200000 shape: 10000,1615  ==> Num. Positive 123648   Num. Negative 86352
210000 shape: 10000,1615  ==> Num. Positive 129559   Num. Negative 90441
220000 shape: 10000,1615  ==> Num. Positive 135529   Num. Negative 94471
230000 shape: 10000,1615  ==> Num. Positive 141351   Num. Negative 98649
240000 shape: 10000,1615  ==> Num. Positive 147229   Num. Negative 102771
250000 shape: 10000,1615  ==> Num. Positive 153142   Num. Negative 106858
260000 shape:  2102,1615  ==> Num. Positive 154365   Num. Negative 107737

Combining HF2 Ensemble Results


In [26]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where(ycv[0] >= 157)[0], :]

ycv.iloc[np.where(ycv[0] <  162)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 162)[0], 0] = 1

print(ycv.shape)

ycv.head()


(52445, 1)
Out[26]:
0
0 1
1 1
2 1
3 -1
5 1

In [27]:
ncol = pandas.read_table('../results/hf2/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf2/res_cv.dat', header=None, sep=' ', nrows=3).head()


785
Out[27]:
0 1 2 3 4 5 6 7 8 9 ... 775 776 777 778 779 780 781 782 783 784
0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
1 -1 -1 -1 -1 -1 -1 1 1 1 -1 ... 1 -1 1 1 1 -1 -1 -1 -1 -1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

3 rows × 785 columns


In [28]:
w2 = update_weights(ycv, '../results/hf2/res_cv.dat', ncol, lr1=-0.01, lr2=0.2)
plot_weights(w2)


     0 10000,785  ==>  Num. of Mistakes: 2900 	Without-updating 2883
 10000 10000,785  ==>  Num. of Mistakes: 2856 	Without-updating 2869
 20000 10000,785  ==>  Num. of Mistakes: 2881 	Without-updating 2871
 30000 10000,785  ==>  Num. of Mistakes: 2968 	Without-updating 2960
 40000 10000,785  ==>  Num. of Mistakes: 2917 	Without-updating 2912
 50000  2445,785  ==>  Num. of Mistakes: 699 	Without-updating 690
Total number of mistakes:  15221 without updating 15185

In [29]:
predict_test(w2, '../results/hf2/res_test.dat', '../results/pred.hf2.dat')


     0 shape: 10000,785  ==> Num. Positive 4220   Num. Negative 5780
 10000 shape: 10000,785  ==> Num. Positive 8471   Num. Negative 11529
 20000 shape: 10000,785  ==> Num. Positive 12713   Num. Negative 17287
 30000 shape: 10000,785  ==> Num. Positive 17038   Num. Negative 22962
 40000 shape: 10000,785  ==> Num. Positive 21252   Num. Negative 28748
 50000 shape: 10000,785  ==> Num. Positive 25504   Num. Negative 34496
 60000 shape: 10000,785  ==> Num. Positive 29712   Num. Negative 40288
 70000 shape: 10000,785  ==> Num. Positive 33934   Num. Negative 46066
 80000 shape: 10000,785  ==> Num. Positive 38209   Num. Negative 51791
 90000 shape: 10000,785  ==> Num. Positive 42396   Num. Negative 57604
100000 shape: 10000,785  ==> Num. Positive 46728   Num. Negative 63272
110000 shape: 10000,785  ==> Num. Positive 50949   Num. Negative 69051
120000 shape: 10000,785  ==> Num. Positive 55227   Num. Negative 74773
130000 shape: 10000,785  ==> Num. Positive 59447   Num. Negative 80553
140000 shape: 10000,785  ==> Num. Positive 63744   Num. Negative 86256
150000 shape: 10000,785  ==> Num. Positive 67997   Num. Negative 92003
160000 shape: 10000,785  ==> Num. Positive 72293   Num. Negative 97707
170000 shape: 10000,785  ==> Num. Positive 76600   Num. Negative 103400
180000 shape: 10000,785  ==> Num. Positive 80837   Num. Negative 109163
190000 shape: 10000,785  ==> Num. Positive 85090   Num. Negative 114910
200000 shape: 10000,785  ==> Num. Positive 89303   Num. Negative 120697
210000 shape: 10000,785  ==> Num. Positive 93562   Num. Negative 126438
220000 shape: 10000,785  ==> Num. Positive 97871   Num. Negative 132129
230000 shape: 10000,785  ==> Num. Positive 102070   Num. Negative 137930
240000 shape: 10000,785  ==> Num. Positive 106276   Num. Negative 143724
250000 shape: 10000,785  ==> Num. Positive 110578   Num. Negative 149422
260000 shape:  2102,785  ==> Num. Positive 111499   Num. Negative 150603

Combining HF3 Ensemble Results


In [5]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where(ycv[0] >= 162)[0], :]

nremain_cv = pandas.read_table('../results/hf3/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  164)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 164)[0], 0] = 1

print(ycv.shape)

ycv.head()


(28905, 1)
Out[5]:
0
0 -1
1 1
2 1
5 -1
9 -1

In [6]:
ncol = pandas.read_table('../results/hf3/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf3/res_cv.dat', header=None, sep=' ', nrows=3).head()


526
Out[6]:
0 1 2 3 4 5 6 7 8 9 ... 516 517 518 519 520 521 522 523 524 525
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

3 rows × 526 columns


In [7]:
w3 = update_weights(ycv, '../results/hf3/res_cv.dat', ncol, lr1=-0.02, lr2=0.1)
plot_weights(w3)


     0 10000,526  ==>  Num. of Mistakes: 2838 	Without-updating 2840
 10000 10000,526  ==>  Num. of Mistakes: 2817 	Without-updating 2826
 20000  8905,526  ==>  Num. of Mistakes: 2519 	Without-updating 2545
Total number of mistakes:  8174 without updating 8211

In [10]:
nlines = pandas.read_table('../results/hf3/res_test.dat', header=None, sep=' ', usecols=[0]).shape[0]

print(nlines)


262102

In [10]:
predict_test(w3, '../results/hf3/res_test.dat', '../results/pred.hf3.dat')


     0 shape: 10000,526  ==> Num. Positive 5764   Num. Negative 4236
 10000 shape: 10000,526  ==> Num. Positive 11395   Num. Negative 8605
 20000 shape: 10000,526  ==> Num. Positive 17200   Num. Negative 12800
 30000 shape: 10000,526  ==> Num. Positive 22892   Num. Negative 17108
 40000 shape: 10000,526  ==> Num. Positive 28612   Num. Negative 21388
 50000 shape: 10000,526  ==> Num. Positive 34478   Num. Negative 25522
 60000 shape: 10000,526  ==> Num. Positive 40171   Num. Negative 29829
 70000 shape: 10000,526  ==> Num. Positive 45832   Num. Negative 34168
 80000 shape: 10000,526  ==> Num. Positive 51525   Num. Negative 38475
 90000 shape: 10000,526  ==> Num. Positive 57266   Num. Negative 42734
100000 shape: 10000,526  ==> Num. Positive 62997   Num. Negative 47003
110000 shape: 10000,526  ==> Num. Positive 68853   Num. Negative 51147
120000 shape: 10000,526  ==> Num. Positive 74550   Num. Negative 55450
130000 shape: 10000,526  ==> Num. Positive 80358   Num. Negative 59642
140000 shape: 10000,526  ==> Num. Positive 86093   Num. Negative 63907
150000 shape: 10000,526  ==> Num. Positive 91795   Num. Negative 68205
160000 shape: 10000,526  ==> Num. Positive 97543   Num. Negative 72457
170000 shape: 10000,526  ==> Num. Positive 103302   Num. Negative 76698
180000 shape: 10000,526  ==> Num. Positive 109037   Num. Negative 80963
190000 shape: 10000,526  ==> Num. Positive 114699   Num. Negative 85301
200000 shape: 10000,526  ==> Num. Positive 120495   Num. Negative 89505
210000 shape: 10000,526  ==> Num. Positive 126231   Num. Negative 93769
220000 shape: 10000,526  ==> Num. Positive 131918   Num. Negative 98082
230000 shape: 10000,526  ==> Num. Positive 137650   Num. Negative 102350
240000 shape: 10000,526  ==> Num. Positive 143440   Num. Negative 106560
250000 shape: 10000,526  ==> Num. Positive 149175   Num. Negative 110825
260000 shape:  2102,526  ==> Num. Positive 150391   Num. Negative 111711

Combining HF4 Ensemble Results


In [15]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 162) & (ycv[0]<164))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf4/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  163)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 163)[0], 0] = 1

ycv.head()


(15885, 1)
Out[15]:
0
0 1
5 -1
9 1
11 1
15 1

In [16]:
ncol = pandas.read_table('../results/hf4/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf4/res_cv.dat', header=None, sep=' ', nrows=3).head()


271
Out[16]:
0 1 2 3 4 5 6 7 8 9 ... 261 262 263 264 265 266 267 268 269 270
0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
1 -1 -1 1 1 1 1 1 1 -1 -1 ... 1 1 -1 1 -1 1 1 1 1 1
2 -1 1 -1 -1 1 -1 -1 1 -1 -1 ... -1 -1 1 -1 -1 -1 -1 -1 1 1

3 rows × 271 columns


In [17]:
w4 = update_weights(ycv, '../results/hf4/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=2000)
plot_weights(w4)


     0  2000,271  ==>  Num. of Mistakes: 651 	Without-updating 651
  2000  2000,271  ==>  Num. of Mistakes: 709 	Without-updating 702
  4000  2000,271  ==>  Num. of Mistakes: 654 	Without-updating 636
  6000  2000,271  ==>  Num. of Mistakes: 679 	Without-updating 668
  8000  2000,271  ==>  Num. of Mistakes: 651 	Without-updating 647
 10000  2000,271  ==>  Num. of Mistakes: 685 	Without-updating 693
 12000  2000,271  ==>  Num. of Mistakes: 659 	Without-updating 661
 14000  1885,271  ==>  Num. of Mistakes: 606 	Without-updating 613
Total number of mistakes:  5294 without updating 5271

In [18]:
predict_test(w4, '../results/hf4/res_test.dat', '../results/pred.hf4.dat')


     0 shape: 10000,271  ==> Num. Positive 3524   Num. Negative 6476
 10000 shape: 10000,271  ==> Num. Positive 6942   Num. Negative 13058
 20000 shape: 10000,271  ==> Num. Positive 10457   Num. Negative 19543
 30000 shape: 10000,271  ==> Num. Positive 13996   Num. Negative 26004
 40000 shape: 10000,271  ==> Num. Positive 17561   Num. Negative 32439
 50000 shape: 10000,271  ==> Num. Positive 21066   Num. Negative 38934
 60000 shape: 10000,271  ==> Num. Positive 24586   Num. Negative 45414
 70000 shape: 10000,271  ==> Num. Positive 28110   Num. Negative 51890
 80000 shape: 10000,271  ==> Num. Positive 31600   Num. Negative 58400
 90000 shape: 10000,271  ==> Num. Positive 35053   Num. Negative 64947
100000 shape: 10000,271  ==> Num. Positive 38597   Num. Negative 71403
110000 shape: 10000,271  ==> Num. Positive 42071   Num. Negative 77929
120000 shape: 10000,271  ==> Num. Positive 45614   Num. Negative 84386
130000 shape: 10000,271  ==> Num. Positive 49072   Num. Negative 90928
140000 shape: 10000,271  ==> Num. Positive 52586   Num. Negative 97414
150000 shape: 10000,271  ==> Num. Positive 56053   Num. Negative 103947
160000 shape: 10000,271  ==> Num. Positive 59616   Num. Negative 110384
170000 shape: 10000,271  ==> Num. Positive 63139   Num. Negative 116861
180000 shape: 10000,271  ==> Num. Positive 66707   Num. Negative 123293
190000 shape: 10000,271  ==> Num. Positive 70247   Num. Negative 129753
200000 shape: 10000,271  ==> Num. Positive 73682   Num. Negative 136318
210000 shape: 10000,271  ==> Num. Positive 77226   Num. Negative 142774
220000 shape: 10000,271  ==> Num. Positive 80830   Num. Negative 149170
230000 shape: 10000,271  ==> Num. Positive 84321   Num. Negative 155679
240000 shape: 10000,271  ==> Num. Positive 87772   Num. Negative 162228
250000 shape: 10000,271  ==> Num. Positive 91239   Num. Negative 168761
260000 shape:  2102,271  ==> Num. Positive 91984   Num. Negative 170118

HF5


In [19]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 157) & (ycv[0]<162))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf5/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  160)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 160)[0], 0] = 1

ycv.head()


(23540, 1)
Out[19]:
0
3 -1
7 1
12 -1
17 -1
19 1

In [20]:
ncol = pandas.read_table('../results/hf5/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf5/res_cv.dat', header=None, sep=' ', nrows=3).head()


213
Out[20]:
0 1 2 3 4 5 6 7 8 9 ... 203 204 205 206 207 208 209 210 211 212
0 1 -1 -1 -1 1 -1 -1 -1 1 -1 ... -1 1 1 -1 1 1 1 -1 1 -1
1 -1 -1 1 -1 -1 1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 1 -1 -1 1 -1
2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

3 rows × 213 columns


In [21]:
w5 = update_weights(ycv, '../results/hf5/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=4000)
plot_weights(w5)


     0  4000,213  ==>  Num. of Mistakes: 920 	Without-updating 927
  4000  4000,213  ==>  Num. of Mistakes: 948 	Without-updating 954
  8000  4000,213  ==>  Num. of Mistakes: 972 	Without-updating 960
 12000  4000,213  ==>  Num. of Mistakes: 937 	Without-updating 941
 16000  4000,213  ==>  Num. of Mistakes: 930 	Without-updating 925
 20000  3540,213  ==>  Num. of Mistakes: 817 	Without-updating 827
Total number of mistakes:  5524 without updating 5534

In [22]:
predict_test(w5, '../results/hf5/res_test.dat', '../results/pred.hf5.dat')


     0 shape: 10000,213  ==> Num. Positive 5328   Num. Negative 4672
 10000 shape: 10000,213  ==> Num. Positive 10692   Num. Negative 9308
 20000 shape: 10000,213  ==> Num. Positive 16028   Num. Negative 13972
 30000 shape: 10000,213  ==> Num. Positive 21446   Num. Negative 18554
 40000 shape: 10000,213  ==> Num. Positive 26766   Num. Negative 23234
 50000 shape: 10000,213  ==> Num. Positive 32111   Num. Negative 27889
 60000 shape: 10000,213  ==> Num. Positive 37504   Num. Negative 32496
 70000 shape: 10000,213  ==> Num. Positive 42874   Num. Negative 37126
 80000 shape: 10000,213  ==> Num. Positive 48251   Num. Negative 41749
 90000 shape: 10000,213  ==> Num. Positive 53596   Num. Negative 46404
100000 shape: 10000,213  ==> Num. Positive 58992   Num. Negative 51008
110000 shape: 10000,213  ==> Num. Positive 64338   Num. Negative 55662
120000 shape: 10000,213  ==> Num. Positive 69709   Num. Negative 60291
130000 shape: 10000,213  ==> Num. Positive 74973   Num. Negative 65027
140000 shape: 10000,213  ==> Num. Positive 80307   Num. Negative 69693
150000 shape: 10000,213  ==> Num. Positive 85719   Num. Negative 74281
160000 shape: 10000,213  ==> Num. Positive 91085   Num. Negative 78915
170000 shape: 10000,213  ==> Num. Positive 96431   Num. Negative 83569
180000 shape: 10000,213  ==> Num. Positive 101779   Num. Negative 88221
190000 shape: 10000,213  ==> Num. Positive 107104   Num. Negative 92896
200000 shape: 10000,213  ==> Num. Positive 112425   Num. Negative 97575
210000 shape: 10000,213  ==> Num. Positive 117847   Num. Negative 102153
220000 shape: 10000,213  ==> Num. Positive 123178   Num. Negative 106822
230000 shape: 10000,213  ==> Num. Positive 128455   Num. Negative 111545
240000 shape: 10000,213  ==> Num. Positive 133771   Num. Negative 116229
250000 shape: 10000,213  ==> Num. Positive 139136   Num. Negative 120864
260000 shape:  2102,213  ==> Num. Positive 140277   Num. Negative 121825

HF6


In [6]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 160) & (ycv[0]<162))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf6/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  161)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 161)[0], 0] = 1

print(np.sum(ycv[0] == -1), np.sum(ycv[0] == 1))
ycv.head()


(12491, 1)
(6038, 6453)
Out[6]:
0
7 1
19 1
66 1
68 1
70 1

In [7]:
ncol = pandas.read_table('../results/hf6/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf6/res_cv.dat', header=None, sep=' ', nrows=3).head()


185
Out[7]:
0 1 2 3 4 5 6 7 8 9 ... 175 176 177 178 179 180 181 182 183 184
0 -1 1 1 -1 -1 -1 1 -1 -1 -1 ... -1 1 1 -1 1 -1 1 1 -1 -1
1 1 -1 1 1 1 -1 1 1 1 1 ... 1 1 -1 1 1 1 1 1 1 -1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

3 rows × 185 columns


In [8]:
w6 = update_weights(ycv, '../results/hf6/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=4000)
plot_weights(w6)


     0  4000,185  ==>  Num. of Mistakes: 1417 	Without-updating 1414
  4000  4000,185  ==>  Num. of Mistakes: 1409 	Without-updating 1402
  8000  4000,185  ==>  Num. of Mistakes: 1432 	Without-updating 1420
 12000   491,185  ==>  Num. of Mistakes: 161 	Without-updating 162
Total number of mistakes:  4419 without updating 4398

In [11]:
predict_test(w6, '../results/hf6/res_test.dat', '../results/pred.hf6.dat')


     0 shape: 10000,185  ==> Num. Positive 5476   Num. Negative 4524
 10000 shape: 10000,185  ==> Num. Positive 10920   Num. Negative 9080
 20000 shape: 10000,185  ==> Num. Positive 16323   Num. Negative 13677
 30000 shape: 10000,185  ==> Num. Positive 21824   Num. Negative 18176
 40000 shape: 10000,185  ==> Num. Positive 27273   Num. Negative 22727
 50000 shape: 10000,185  ==> Num. Positive 32706   Num. Negative 27294
 60000 shape: 10000,185  ==> Num. Positive 38252   Num. Negative 31748
 70000 shape: 10000,185  ==> Num. Positive 43799   Num. Negative 36201
 80000 shape: 10000,185  ==> Num. Positive 49297   Num. Negative 40703
 90000 shape: 10000,185  ==> Num. Positive 54748   Num. Negative 45252
100000 shape: 10000,185  ==> Num. Positive 60293   Num. Negative 49707
110000 shape: 10000,185  ==> Num. Positive 65719   Num. Negative 54281
120000 shape: 10000,185  ==> Num. Positive 71273   Num. Negative 58727
130000 shape: 10000,185  ==> Num. Positive 76665   Num. Negative 63335
140000 shape: 10000,185  ==> Num. Positive 82132   Num. Negative 67868
150000 shape: 10000,185  ==> Num. Positive 87578   Num. Negative 72422
160000 shape: 10000,185  ==> Num. Positive 93058   Num. Negative 76942
170000 shape: 10000,185  ==> Num. Positive 98520   Num. Negative 81480
180000 shape: 10000,185  ==> Num. Positive 103886   Num. Negative 86114
190000 shape: 10000,185  ==> Num. Positive 109416   Num. Negative 90584
200000 shape: 10000,185  ==> Num. Positive 114859   Num. Negative 95141
210000 shape: 10000,185  ==> Num. Positive 120331   Num. Negative 99669
220000 shape: 10000,185  ==> Num. Positive 125760   Num. Negative 104240
230000 shape: 10000,185  ==> Num. Positive 131296   Num. Negative 108704
240000 shape: 10000,185  ==> Num. Positive 136817   Num. Negative 113183
250000 shape: 10000,185  ==> Num. Positive 142343   Num. Negative 117657
260000 shape:  2102,185  ==> Num. Positive 143489   Num. Negative 118613

HF7


In [12]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 157) & (ycv[0]<160))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf7/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  159)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 159)[0], 0] = 1

print(np.sum(ycv[0] == -1), np.sum(ycv[0] == 1))
ycv.head()


(11049, 1)
(6548, 4501)
Out[12]:
0
3 -1
12 1
17 -1
30 -1
33 -1

In [13]:
ncol = pandas.read_table('../results/hf7/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf7/res_cv.dat', header=None, sep=' ', nrows=3).head()


200
Out[13]:
0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 194 195 196 197 198 199
0 1 -1 -1 -1 -1 1 -1 1 1 -1 ... -1 -1 1 -1 1 -1 -1 1 1 -1
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

3 rows × 200 columns


In [14]:
w7 = update_weights(ycv, '../results/hf7/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=4000)
plot_weights(w7)


     0  4000,200  ==>  Num. of Mistakes: 1011 	Without-updating 1016
  4000  4000,200  ==>  Num. of Mistakes: 1062 	Without-updating 1059
  8000  3049,200  ==>  Num. of Mistakes: 758 	Without-updating 765
Total number of mistakes:  2831 without updating 2840

In [15]:
predict_test(w7, '../results/hf7/res_test.dat', '../results/pred.hf7.dat')


     0 shape: 10000,200  ==> Num. Positive 4847   Num. Negative 5153
 10000 shape: 10000,200  ==> Num. Positive 9854   Num. Negative 10146
 20000 shape: 10000,200  ==> Num. Positive 14806   Num. Negative 15194
 30000 shape: 10000,200  ==> Num. Positive 19851   Num. Negative 20149
 40000 shape: 10000,200  ==> Num. Positive 24734   Num. Negative 25266
 50000 shape: 10000,200  ==> Num. Positive 29651   Num. Negative 30349
 60000 shape: 10000,200  ==> Num. Positive 34598   Num. Negative 35402
 70000 shape: 10000,200  ==> Num. Positive 39511   Num. Negative 40489
 80000 shape: 10000,200  ==> Num. Positive 44528   Num. Negative 45472
 90000 shape: 10000,200  ==> Num. Positive 49514   Num. Negative 50486
100000 shape: 10000,200  ==> Num. Positive 54511   Num. Negative 55489
110000 shape: 10000,200  ==> Num. Positive 59466   Num. Negative 60534
120000 shape: 10000,200  ==> Num. Positive 64419   Num. Negative 65581
130000 shape: 10000,200  ==> Num. Positive 69362   Num. Negative 70638
140000 shape: 10000,200  ==> Num. Positive 74342   Num. Negative 75658
150000 shape: 10000,200  ==> Num. Positive 79337   Num. Negative 80663
160000 shape: 10000,200  ==> Num. Positive 84265   Num. Negative 85735
170000 shape: 10000,200  ==> Num. Positive 89152   Num. Negative 90848
180000 shape: 10000,200  ==> Num. Positive 94083   Num. Negative 95917
190000 shape: 10000,200  ==> Num. Positive 99039   Num. Negative 100961
200000 shape: 10000,200  ==> Num. Positive 103897   Num. Negative 106103
210000 shape: 10000,200  ==> Num. Positive 108847   Num. Negative 111153
220000 shape: 10000,200  ==> Num. Positive 113839   Num. Negative 116161
230000 shape: 10000,200  ==> Num. Positive 118776   Num. Negative 121224
240000 shape: 10000,200  ==> Num. Positive 123695   Num. Negative 126305
250000 shape: 10000,200  ==> Num. Positive 128687   Num. Negative 131313
260000 shape:  2102,200  ==> Num. Positive 129717   Num. Negative 132385

HF8


In [16]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 157) & (ycv[0]<159))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf8/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  158)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 158)[0], 0] = 1

print(np.sum(ycv[0] == -1), np.sum(ycv[0] == 1))
ycv.head()


(6548, 1)
(3053, 3495)
Out[16]:
0
3 1
17 1
30 -1
33 1
40 1

In [17]:
ncol = pandas.read_table('../results/hf8/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf8/res_cv.dat', header=None, sep=' ', nrows=3).head()


200
Out[17]:
0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 194 195 196 197 198 199
0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

3 rows × 200 columns


In [18]:
w8 = update_weights(ycv, '../results/hf8/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=1000)
plot_weights(w8)


     0  1000,200  ==>  Num. of Mistakes: 170 	Without-updating 170
  1000  1000,200  ==>  Num. of Mistakes: 179 	Without-updating 179
  2000  1000,200  ==>  Num. of Mistakes: 178 	Without-updating 178
  3000  1000,200  ==>  Num. of Mistakes: 173 	Without-updating 172
  4000  1000,200  ==>  Num. of Mistakes: 178 	Without-updating 176
  5000  1000,200  ==>  Num. of Mistakes: 167 	Without-updating 167
  6000   548,200  ==>  Num. of Mistakes: 93 	Without-updating 93
Total number of mistakes:  1138 without updating 1135

In [19]:
predict_test(w8, '../results/hf8/res_test.dat', '../results/pred.hf8.dat')


     0 shape: 10000,200  ==> Num. Positive 5046   Num. Negative 4954
 10000 shape: 10000,200  ==> Num. Positive 10130   Num. Negative 9870
 20000 shape: 10000,200  ==> Num. Positive 15095   Num. Negative 14905
 30000 shape: 10000,200  ==> Num. Positive 20063   Num. Negative 19937
 40000 shape: 10000,200  ==> Num. Positive 25106   Num. Negative 24894
 50000 shape: 10000,200  ==> Num. Positive 30102   Num. Negative 29898
 60000 shape: 10000,200  ==> Num. Positive 35041   Num. Negative 34959
 70000 shape: 10000,200  ==> Num. Positive 40120   Num. Negative 39880
 80000 shape: 10000,200  ==> Num. Positive 45034   Num. Negative 44966
 90000 shape: 10000,200  ==> Num. Positive 50131   Num. Negative 49869
100000 shape: 10000,200  ==> Num. Positive 55139   Num. Negative 54861
110000 shape: 10000,200  ==> Num. Positive 60093   Num. Negative 59907
120000 shape: 10000,200  ==> Num. Positive 65109   Num. Negative 64891
130000 shape: 10000,200  ==> Num. Positive 70154   Num. Negative 69846
140000 shape: 10000,200  ==> Num. Positive 75115   Num. Negative 74885
150000 shape: 10000,200  ==> Num. Positive 80104   Num. Negative 79896
160000 shape: 10000,200  ==> Num. Positive 85175   Num. Negative 84825
170000 shape: 10000,200  ==> Num. Positive 90219   Num. Negative 89781
180000 shape: 10000,200  ==> Num. Positive 95106   Num. Negative 94894
190000 shape: 10000,200  ==> Num. Positive 100042   Num. Negative 99958
200000 shape: 10000,200  ==> Num. Positive 105104   Num. Negative 104896
210000 shape: 10000,200  ==> Num. Positive 110066   Num. Negative 109934
220000 shape: 10000,200  ==> Num. Positive 115038   Num. Negative 114962
230000 shape: 10000,200  ==> Num. Positive 120096   Num. Negative 119904
240000 shape: 10000,200  ==> Num. Positive 125116   Num. Negative 124884
250000 shape: 10000,200  ==> Num. Positive 130145   Num. Negative 129855
260000 shape:  2102,200  ==> Num. Positive 131210   Num. Negative 130892

In [ ]: