# Implementing Hedge Algorithm for Ensemble Learning

Our goal is to given prediction results from m experts (classifiers), combine them with weights in such way to get as close as possible to the best classifer.

We do that by first initializing all the weights to 1, then iterating throught the results and in cases where the weighted sum of classifiers' prediction does not match with the ground-truth label, we remove the weith of those classifiers who caused this mistake by a multiplicative approach:

$w_i = w_i * exp(-\eta)$

In [1]:
import numpy as np
import pandas
import scipy, scipy.spatial
import sklearn
import sys

from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
### Function to plot weights
def plot_weights(weights):
ncol = weights.shape[0]
yl = np.arange(ncol)

fig1 = plt.figure(1, figsize=(12,6))
ax = fig1.add_subplot(1, 1, 1)
#line1 = plt.plot(yl, yc, 'b-')
line1 = ax.plot(yl, weights, color='r')
plt.setp(line1, color='b', linewidth=3, marker='^', markersize=8)

plt.setp(ax.get_xticklabels(), rotation='horizontal', fontsize=16)
plt.setp(ax.get_yticklabels(), rotation='vertical', fontsize=16)

plt.xlabel('Classifier Index', size=20)
plt.ylabel('Weight', size=20)
plt.title('Weighted Ensemble Learning ', size=20)
#plt.legend(['Y1', 'Y2'], loc='upper right') ## not supported by plotly yet

plt.show()

In [3]:
## Function to update weights
def update_weights(ycv, fname, ncol, chunks=10000, lr1=-0.02, lr2=1):

weights = np.ones(shape=ncol, dtype=float) / float(ncol)
learn_rate_1 = np.exp(lr1)
learn_rate_2 = np.exp(lr2)

n = 0
num_mistakes, num_mistakes_simpleVotes = 0, 0
for df in pandas.read_table(fname, header=None, sep=' ', iterator=True, chunksize=chunks):
sys.stdout.write("%6d %5d,%d  ==>  "%(n, df.shape[0], df.shape[1]))
num_mistake_part, num_mistake_part_simpleVotes = 0, 0
for i in range(df.shape[0]):
row = df.iloc[i,:]
wsum = np.sum(weights * row)
yi = ycv.iloc[n,0]

if np.sum(row)*yi <=0 : # simple vote count
num_mistake_part_simpleVotes += 1

if wsum * yi <= 0:
num_mistake_part += 1
for j,vj in enumerate(row):
if (vj * yi) < 0:
weights[j] *= learn_rate_1
elif(vj * yi) > 0:
weights[j] *= learn_rate_2

weights /= np.sum(weights)

n += 1
print("Num. of Mistakes: %d \tWithout-updating %d"%(num_mistake_part, num_mistake_part_simpleVotes))
num_mistakes += num_mistake_part
num_mistakes_simpleVotes += num_mistake_part_simpleVotes

print('Total number of mistakes:  %d without updating %d'%(num_mistakes, num_mistakes_simpleVotes))

return(weights)

In [4]:
### Function to predict test data with given weights
def predict_test(weights, fname, outname):
pred = np.zeros(shape=nlines, dtype=int)
sum_pred = np.zeros(shape=nlines, dtype=float)

chunks = 10000
n = 0
num_neg, num_pos = 0, 0
for df in pandas.read_table(fname, header=None, sep=' ', iterator=True, chunksize=chunks):
sys.stdout.write("%6d shape: %5d,%d  ==> "%(n, df.shape[0], df.shape[1]))
for i in range(df.shape[0]):
row = df.iloc[i,:]
wsum = np.sum(weights * row)
if wsum < 0:
num_neg += 1
pred[n] = -1
sum_pred[n] = wsum
else:
num_pos += 1
pred[n] = 1
sum_pred[n] = wsum
n += 1
sys.stdout.write('Num. Positive %d   Num. Negative %d\n'%(num_pos, num_neg))

df = pandas.DataFrame({'V1':pred, 'V2':sum_pred})
df.to_csv(outname, sep=' ')

## Combining HF1 Ensemble of Results

In [20]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv.iloc[np.where(ycv[0] < 157)[0],0] = -1
ycv.iloc[np.where(ycv[0] >=157)[0],0] = 1

print(ycv.shape)

ycv.head()

(100000, 1)
Out[20]:
0
0 1
1 1
2 1
3 1
4 -1

In [21]:
ncol = pandas.read_table('../results/hf1/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

pandas.read_table('../results/hf1/res_cv.dat', header=None, sep=' ', nrows=3).head()

Out[21]:
0 1 2 3 4 5 6 7 8 9 ... 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

3 rows × 1615 columns

### Weighted Ensemble of Classifiers with Hedge Updating Algorithm

In [24]:
w1 = update_weights(ycv, '../results/hf1/res_cv.dat', ncol, lr1=-0.02, lr2=0.02)
plot_weights(w1)

0 10000,1615  ==>  Num. of Mistakes: 2722 	Without-updating 2719
10000 10000,1615  ==>  Num. of Mistakes: 2737 	Without-updating 2720
20000 10000,1615  ==>  Num. of Mistakes: 2733 	Without-updating 2723
30000 10000,1615  ==>  Num. of Mistakes: 2699 	Without-updating 2677
40000 10000,1615  ==>  Num. of Mistakes: 2747 	Without-updating 2730
50000 10000,1615  ==>  Num. of Mistakes: 2743 	Without-updating 2714
60000 10000,1615  ==>  Num. of Mistakes: 2788 	Without-updating 2751
70000 10000,1615  ==>  Num. of Mistakes: 2755 	Without-updating 2742
80000 10000,1615  ==>  Num. of Mistakes: 2735 	Without-updating 2705
90000 10000,1615  ==>  Num. of Mistakes: 2749 	Without-updating 2744
Total number of mistakes:  27408 without updating 27225

In [7]:
nlines = pandas.read_table('../results/hf1/res_test.dat', header=None, sep=' ', usecols=[0]).shape[0]

print(nlines)

262102

In [25]:
predict_test(w1, '../results/hf1/res_test.dat', '../results/pred.hf1.dat')

0 shape: 10000,1615  ==> Num. Positive 5872   Num. Negative 4128
10000 shape: 10000,1615  ==> Num. Positive 11742   Num. Negative 8258
20000 shape: 10000,1615  ==> Num. Positive 17600   Num. Negative 12400
30000 shape: 10000,1615  ==> Num. Positive 23519   Num. Negative 16481
40000 shape: 10000,1615  ==> Num. Positive 29421   Num. Negative 20579
50000 shape: 10000,1615  ==> Num. Positive 35358   Num. Negative 24642
60000 shape: 10000,1615  ==> Num. Positive 41279   Num. Negative 28721
70000 shape: 10000,1615  ==> Num. Positive 47136   Num. Negative 32864
80000 shape: 10000,1615  ==> Num. Positive 53061   Num. Negative 36939
90000 shape: 10000,1615  ==> Num. Positive 58881   Num. Negative 41119
100000 shape: 10000,1615  ==> Num. Positive 64817   Num. Negative 45183
110000 shape: 10000,1615  ==> Num. Positive 70704   Num. Negative 49296
120000 shape: 10000,1615  ==> Num. Positive 76608   Num. Negative 53392
130000 shape: 10000,1615  ==> Num. Positive 82445   Num. Negative 57555
140000 shape: 10000,1615  ==> Num. Positive 88331   Num. Negative 61669
150000 shape: 10000,1615  ==> Num. Positive 94267   Num. Negative 65733
160000 shape: 10000,1615  ==> Num. Positive 100164   Num. Negative 69836
170000 shape: 10000,1615  ==> Num. Positive 106041   Num. Negative 73959
180000 shape: 10000,1615  ==> Num. Positive 111962   Num. Negative 78038
190000 shape: 10000,1615  ==> Num. Positive 117797   Num. Negative 82203
200000 shape: 10000,1615  ==> Num. Positive 123648   Num. Negative 86352
210000 shape: 10000,1615  ==> Num. Positive 129559   Num. Negative 90441
220000 shape: 10000,1615  ==> Num. Positive 135529   Num. Negative 94471
230000 shape: 10000,1615  ==> Num. Positive 141351   Num. Negative 98649
240000 shape: 10000,1615  ==> Num. Positive 147229   Num. Negative 102771
250000 shape: 10000,1615  ==> Num. Positive 153142   Num. Negative 106858
260000 shape:  2102,1615  ==> Num. Positive 154365   Num. Negative 107737

## Combining HF2 Ensemble Results

In [26]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where(ycv[0] >= 157)[0], :]

ycv.iloc[np.where(ycv[0] <  162)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 162)[0], 0] = 1

print(ycv.shape)

ycv.head()

(52445, 1)
Out[26]:
0
0 1
1 1
2 1
3 -1
5 1

In [27]:
ncol = pandas.read_table('../results/hf2/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf2/res_cv.dat', header=None, sep=' ', nrows=3).head()

785
Out[27]:
0 1 2 3 4 5 6 7 8 9 ... 775 776 777 778 779 780 781 782 783 784
0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
1 -1 -1 -1 -1 -1 -1 1 1 1 -1 ... 1 -1 1 1 1 -1 -1 -1 -1 -1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

3 rows × 785 columns

In [28]:
w2 = update_weights(ycv, '../results/hf2/res_cv.dat', ncol, lr1=-0.01, lr2=0.2)
plot_weights(w2)

0 10000,785  ==>  Num. of Mistakes: 2900 	Without-updating 2883
10000 10000,785  ==>  Num. of Mistakes: 2856 	Without-updating 2869
20000 10000,785  ==>  Num. of Mistakes: 2881 	Without-updating 2871
30000 10000,785  ==>  Num. of Mistakes: 2968 	Without-updating 2960
40000 10000,785  ==>  Num. of Mistakes: 2917 	Without-updating 2912
50000  2445,785  ==>  Num. of Mistakes: 699 	Without-updating 690
Total number of mistakes:  15221 without updating 15185

In [29]:
predict_test(w2, '../results/hf2/res_test.dat', '../results/pred.hf2.dat')

0 shape: 10000,785  ==> Num. Positive 4220   Num. Negative 5780
10000 shape: 10000,785  ==> Num. Positive 8471   Num. Negative 11529
20000 shape: 10000,785  ==> Num. Positive 12713   Num. Negative 17287
30000 shape: 10000,785  ==> Num. Positive 17038   Num. Negative 22962
40000 shape: 10000,785  ==> Num. Positive 21252   Num. Negative 28748
50000 shape: 10000,785  ==> Num. Positive 25504   Num. Negative 34496
60000 shape: 10000,785  ==> Num. Positive 29712   Num. Negative 40288
70000 shape: 10000,785  ==> Num. Positive 33934   Num. Negative 46066
80000 shape: 10000,785  ==> Num. Positive 38209   Num. Negative 51791
90000 shape: 10000,785  ==> Num. Positive 42396   Num. Negative 57604
100000 shape: 10000,785  ==> Num. Positive 46728   Num. Negative 63272
110000 shape: 10000,785  ==> Num. Positive 50949   Num. Negative 69051
120000 shape: 10000,785  ==> Num. Positive 55227   Num. Negative 74773
130000 shape: 10000,785  ==> Num. Positive 59447   Num. Negative 80553
140000 shape: 10000,785  ==> Num. Positive 63744   Num. Negative 86256
150000 shape: 10000,785  ==> Num. Positive 67997   Num. Negative 92003
160000 shape: 10000,785  ==> Num. Positive 72293   Num. Negative 97707
170000 shape: 10000,785  ==> Num. Positive 76600   Num. Negative 103400
180000 shape: 10000,785  ==> Num. Positive 80837   Num. Negative 109163
190000 shape: 10000,785  ==> Num. Positive 85090   Num. Negative 114910
200000 shape: 10000,785  ==> Num. Positive 89303   Num. Negative 120697
210000 shape: 10000,785  ==> Num. Positive 93562   Num. Negative 126438
220000 shape: 10000,785  ==> Num. Positive 97871   Num. Negative 132129
230000 shape: 10000,785  ==> Num. Positive 102070   Num. Negative 137930
240000 shape: 10000,785  ==> Num. Positive 106276   Num. Negative 143724
250000 shape: 10000,785  ==> Num. Positive 110578   Num. Negative 149422
260000 shape:  2102,785  ==> Num. Positive 111499   Num. Negative 150603

## Combining HF3 Ensemble Results

In [5]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where(ycv[0] >= 162)[0], :]

nremain_cv = pandas.read_table('../results/hf3/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  164)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 164)[0], 0] = 1

print(ycv.shape)

ycv.head()

(28905, 1)
Out[5]:
0
0 -1
1 1
2 1
5 -1
9 -1

In [6]:
ncol = pandas.read_table('../results/hf3/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf3/res_cv.dat', header=None, sep=' ', nrows=3).head()

526
Out[6]:
0 1 2 3 4 5 6 7 8 9 ... 516 517 518 519 520 521 522 523 524 525
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

3 rows × 526 columns

In [7]:
w3 = update_weights(ycv, '../results/hf3/res_cv.dat', ncol, lr1=-0.02, lr2=0.1)
plot_weights(w3)

0 10000,526  ==>  Num. of Mistakes: 2838 	Without-updating 2840
10000 10000,526  ==>  Num. of Mistakes: 2817 	Without-updating 2826
20000  8905,526  ==>  Num. of Mistakes: 2519 	Without-updating 2545
Total number of mistakes:  8174 without updating 8211

In [10]:
nlines = pandas.read_table('../results/hf3/res_test.dat', header=None, sep=' ', usecols=[0]).shape[0]

print(nlines)

262102

In [10]:
predict_test(w3, '../results/hf3/res_test.dat', '../results/pred.hf3.dat')

0 shape: 10000,526  ==> Num. Positive 5764   Num. Negative 4236
10000 shape: 10000,526  ==> Num. Positive 11395   Num. Negative 8605
20000 shape: 10000,526  ==> Num. Positive 17200   Num. Negative 12800
30000 shape: 10000,526  ==> Num. Positive 22892   Num. Negative 17108
40000 shape: 10000,526  ==> Num. Positive 28612   Num. Negative 21388
50000 shape: 10000,526  ==> Num. Positive 34478   Num. Negative 25522
60000 shape: 10000,526  ==> Num. Positive 40171   Num. Negative 29829
70000 shape: 10000,526  ==> Num. Positive 45832   Num. Negative 34168
80000 shape: 10000,526  ==> Num. Positive 51525   Num. Negative 38475
90000 shape: 10000,526  ==> Num. Positive 57266   Num. Negative 42734
100000 shape: 10000,526  ==> Num. Positive 62997   Num. Negative 47003
110000 shape: 10000,526  ==> Num. Positive 68853   Num. Negative 51147
120000 shape: 10000,526  ==> Num. Positive 74550   Num. Negative 55450
130000 shape: 10000,526  ==> Num. Positive 80358   Num. Negative 59642
140000 shape: 10000,526  ==> Num. Positive 86093   Num. Negative 63907
150000 shape: 10000,526  ==> Num. Positive 91795   Num. Negative 68205
160000 shape: 10000,526  ==> Num. Positive 97543   Num. Negative 72457
170000 shape: 10000,526  ==> Num. Positive 103302   Num. Negative 76698
180000 shape: 10000,526  ==> Num. Positive 109037   Num. Negative 80963
190000 shape: 10000,526  ==> Num. Positive 114699   Num. Negative 85301
200000 shape: 10000,526  ==> Num. Positive 120495   Num. Negative 89505
210000 shape: 10000,526  ==> Num. Positive 126231   Num. Negative 93769
220000 shape: 10000,526  ==> Num. Positive 131918   Num. Negative 98082
230000 shape: 10000,526  ==> Num. Positive 137650   Num. Negative 102350
240000 shape: 10000,526  ==> Num. Positive 143440   Num. Negative 106560
250000 shape: 10000,526  ==> Num. Positive 149175   Num. Negative 110825
260000 shape:  2102,526  ==> Num. Positive 150391   Num. Negative 111711

## Combining HF4 Ensemble Results

In [15]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 162) & (ycv[0]<164))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf4/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  163)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 163)[0], 0] = 1

ycv.head()

(15885, 1)
Out[15]:
0
0 1
5 -1
9 1
11 1
15 1

In [16]:
ncol = pandas.read_table('../results/hf4/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf4/res_cv.dat', header=None, sep=' ', nrows=3).head()

271
Out[16]:
0 1 2 3 4 5 6 7 8 9 ... 261 262 263 264 265 266 267 268 269 270
0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
1 -1 -1 1 1 1 1 1 1 -1 -1 ... 1 1 -1 1 -1 1 1 1 1 1
2 -1 1 -1 -1 1 -1 -1 1 -1 -1 ... -1 -1 1 -1 -1 -1 -1 -1 1 1

3 rows × 271 columns

In [17]:
w4 = update_weights(ycv, '../results/hf4/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=2000)
plot_weights(w4)

0  2000,271  ==>  Num. of Mistakes: 651 	Without-updating 651
2000  2000,271  ==>  Num. of Mistakes: 709 	Without-updating 702
4000  2000,271  ==>  Num. of Mistakes: 654 	Without-updating 636
6000  2000,271  ==>  Num. of Mistakes: 679 	Without-updating 668
8000  2000,271  ==>  Num. of Mistakes: 651 	Without-updating 647
10000  2000,271  ==>  Num. of Mistakes: 685 	Without-updating 693
12000  2000,271  ==>  Num. of Mistakes: 659 	Without-updating 661
14000  1885,271  ==>  Num. of Mistakes: 606 	Without-updating 613
Total number of mistakes:  5294 without updating 5271

In [18]:
predict_test(w4, '../results/hf4/res_test.dat', '../results/pred.hf4.dat')

0 shape: 10000,271  ==> Num. Positive 3524   Num. Negative 6476
10000 shape: 10000,271  ==> Num. Positive 6942   Num. Negative 13058
20000 shape: 10000,271  ==> Num. Positive 10457   Num. Negative 19543
30000 shape: 10000,271  ==> Num. Positive 13996   Num. Negative 26004
40000 shape: 10000,271  ==> Num. Positive 17561   Num. Negative 32439
50000 shape: 10000,271  ==> Num. Positive 21066   Num. Negative 38934
60000 shape: 10000,271  ==> Num. Positive 24586   Num. Negative 45414
70000 shape: 10000,271  ==> Num. Positive 28110   Num. Negative 51890
80000 shape: 10000,271  ==> Num. Positive 31600   Num. Negative 58400
90000 shape: 10000,271  ==> Num. Positive 35053   Num. Negative 64947
100000 shape: 10000,271  ==> Num. Positive 38597   Num. Negative 71403
110000 shape: 10000,271  ==> Num. Positive 42071   Num. Negative 77929
120000 shape: 10000,271  ==> Num. Positive 45614   Num. Negative 84386
130000 shape: 10000,271  ==> Num. Positive 49072   Num. Negative 90928
140000 shape: 10000,271  ==> Num. Positive 52586   Num. Negative 97414
150000 shape: 10000,271  ==> Num. Positive 56053   Num. Negative 103947
160000 shape: 10000,271  ==> Num. Positive 59616   Num. Negative 110384
170000 shape: 10000,271  ==> Num. Positive 63139   Num. Negative 116861
180000 shape: 10000,271  ==> Num. Positive 66707   Num. Negative 123293
190000 shape: 10000,271  ==> Num. Positive 70247   Num. Negative 129753
200000 shape: 10000,271  ==> Num. Positive 73682   Num. Negative 136318
210000 shape: 10000,271  ==> Num. Positive 77226   Num. Negative 142774
220000 shape: 10000,271  ==> Num. Positive 80830   Num. Negative 149170
230000 shape: 10000,271  ==> Num. Positive 84321   Num. Negative 155679
240000 shape: 10000,271  ==> Num. Positive 87772   Num. Negative 162228
250000 shape: 10000,271  ==> Num. Positive 91239   Num. Negative 168761
260000 shape:  2102,271  ==> Num. Positive 91984   Num. Negative 170118

## HF5

In [19]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 157) & (ycv[0]<162))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf5/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  160)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 160)[0], 0] = 1

ycv.head()

(23540, 1)
Out[19]:
0
3 -1
7 1
12 -1
17 -1
19 1

In [20]:
ncol = pandas.read_table('../results/hf5/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf5/res_cv.dat', header=None, sep=' ', nrows=3).head()

213
Out[20]:
0 1 2 3 4 5 6 7 8 9 ... 203 204 205 206 207 208 209 210 211 212
0 1 -1 -1 -1 1 -1 -1 -1 1 -1 ... -1 1 1 -1 1 1 1 -1 1 -1
1 -1 -1 1 -1 -1 1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 1 -1 -1 1 -1
2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

3 rows × 213 columns

In [21]:
w5 = update_weights(ycv, '../results/hf5/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=4000)
plot_weights(w5)

0  4000,213  ==>  Num. of Mistakes: 920 	Without-updating 927
4000  4000,213  ==>  Num. of Mistakes: 948 	Without-updating 954
8000  4000,213  ==>  Num. of Mistakes: 972 	Without-updating 960
12000  4000,213  ==>  Num. of Mistakes: 937 	Without-updating 941
16000  4000,213  ==>  Num. of Mistakes: 930 	Without-updating 925
20000  3540,213  ==>  Num. of Mistakes: 817 	Without-updating 827
Total number of mistakes:  5524 without updating 5534

In [22]:
predict_test(w5, '../results/hf5/res_test.dat', '../results/pred.hf5.dat')

0 shape: 10000,213  ==> Num. Positive 5328   Num. Negative 4672
10000 shape: 10000,213  ==> Num. Positive 10692   Num. Negative 9308
20000 shape: 10000,213  ==> Num. Positive 16028   Num. Negative 13972
30000 shape: 10000,213  ==> Num. Positive 21446   Num. Negative 18554
40000 shape: 10000,213  ==> Num. Positive 26766   Num. Negative 23234
50000 shape: 10000,213  ==> Num. Positive 32111   Num. Negative 27889
60000 shape: 10000,213  ==> Num. Positive 37504   Num. Negative 32496
70000 shape: 10000,213  ==> Num. Positive 42874   Num. Negative 37126
80000 shape: 10000,213  ==> Num. Positive 48251   Num. Negative 41749
90000 shape: 10000,213  ==> Num. Positive 53596   Num. Negative 46404
100000 shape: 10000,213  ==> Num. Positive 58992   Num. Negative 51008
110000 shape: 10000,213  ==> Num. Positive 64338   Num. Negative 55662
120000 shape: 10000,213  ==> Num. Positive 69709   Num. Negative 60291
130000 shape: 10000,213  ==> Num. Positive 74973   Num. Negative 65027
140000 shape: 10000,213  ==> Num. Positive 80307   Num. Negative 69693
150000 shape: 10000,213  ==> Num. Positive 85719   Num. Negative 74281
160000 shape: 10000,213  ==> Num. Positive 91085   Num. Negative 78915
170000 shape: 10000,213  ==> Num. Positive 96431   Num. Negative 83569
180000 shape: 10000,213  ==> Num. Positive 101779   Num. Negative 88221
190000 shape: 10000,213  ==> Num. Positive 107104   Num. Negative 92896
200000 shape: 10000,213  ==> Num. Positive 112425   Num. Negative 97575
210000 shape: 10000,213  ==> Num. Positive 117847   Num. Negative 102153
220000 shape: 10000,213  ==> Num. Positive 123178   Num. Negative 106822
230000 shape: 10000,213  ==> Num. Positive 128455   Num. Negative 111545
240000 shape: 10000,213  ==> Num. Positive 133771   Num. Negative 116229
250000 shape: 10000,213  ==> Num. Positive 139136   Num. Negative 120864
260000 shape:  2102,213  ==> Num. Positive 140277   Num. Negative 121825

## HF6

In [6]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 160) & (ycv[0]<162))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf6/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  161)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 161)[0], 0] = 1

print(np.sum(ycv[0] == -1), np.sum(ycv[0] == 1))
ycv.head()

(12491, 1)
(6038, 6453)
Out[6]:
0
7 1
19 1
66 1
68 1
70 1

In [7]:
ncol = pandas.read_table('../results/hf6/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf6/res_cv.dat', header=None, sep=' ', nrows=3).head()

185
Out[7]:
0 1 2 3 4 5 6 7 8 9 ... 175 176 177 178 179 180 181 182 183 184
0 -1 1 1 -1 -1 -1 1 -1 -1 -1 ... -1 1 1 -1 1 -1 1 1 -1 -1
1 1 -1 1 1 1 -1 1 1 1 1 ... 1 1 -1 1 1 1 1 1 1 -1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

3 rows × 185 columns

In [8]:
w6 = update_weights(ycv, '../results/hf6/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=4000)
plot_weights(w6)

0  4000,185  ==>  Num. of Mistakes: 1417 	Without-updating 1414
4000  4000,185  ==>  Num. of Mistakes: 1409 	Without-updating 1402
8000  4000,185  ==>  Num. of Mistakes: 1432 	Without-updating 1420
12000   491,185  ==>  Num. of Mistakes: 161 	Without-updating 162
Total number of mistakes:  4419 without updating 4398

In [11]:
predict_test(w6, '../results/hf6/res_test.dat', '../results/pred.hf6.dat')

0 shape: 10000,185  ==> Num. Positive 5476   Num. Negative 4524
10000 shape: 10000,185  ==> Num. Positive 10920   Num. Negative 9080
20000 shape: 10000,185  ==> Num. Positive 16323   Num. Negative 13677
30000 shape: 10000,185  ==> Num. Positive 21824   Num. Negative 18176
40000 shape: 10000,185  ==> Num. Positive 27273   Num. Negative 22727
50000 shape: 10000,185  ==> Num. Positive 32706   Num. Negative 27294
60000 shape: 10000,185  ==> Num. Positive 38252   Num. Negative 31748
70000 shape: 10000,185  ==> Num. Positive 43799   Num. Negative 36201
80000 shape: 10000,185  ==> Num. Positive 49297   Num. Negative 40703
90000 shape: 10000,185  ==> Num. Positive 54748   Num. Negative 45252
100000 shape: 10000,185  ==> Num. Positive 60293   Num. Negative 49707
110000 shape: 10000,185  ==> Num. Positive 65719   Num. Negative 54281
120000 shape: 10000,185  ==> Num. Positive 71273   Num. Negative 58727
130000 shape: 10000,185  ==> Num. Positive 76665   Num. Negative 63335
140000 shape: 10000,185  ==> Num. Positive 82132   Num. Negative 67868
150000 shape: 10000,185  ==> Num. Positive 87578   Num. Negative 72422
160000 shape: 10000,185  ==> Num. Positive 93058   Num. Negative 76942
170000 shape: 10000,185  ==> Num. Positive 98520   Num. Negative 81480
180000 shape: 10000,185  ==> Num. Positive 103886   Num. Negative 86114
190000 shape: 10000,185  ==> Num. Positive 109416   Num. Negative 90584
200000 shape: 10000,185  ==> Num. Positive 114859   Num. Negative 95141
210000 shape: 10000,185  ==> Num. Positive 120331   Num. Negative 99669
220000 shape: 10000,185  ==> Num. Positive 125760   Num. Negative 104240
230000 shape: 10000,185  ==> Num. Positive 131296   Num. Negative 108704
240000 shape: 10000,185  ==> Num. Positive 136817   Num. Negative 113183
250000 shape: 10000,185  ==> Num. Positive 142343   Num. Negative 117657
260000 shape:  2102,185  ==> Num. Positive 143489   Num. Negative 118613

## HF7

In [12]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 157) & (ycv[0]<160))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf7/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  159)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 159)[0], 0] = 1

print(np.sum(ycv[0] == -1), np.sum(ycv[0] == 1))
ycv.head()

(11049, 1)
(6548, 4501)
Out[12]:
0
3 -1
12 1
17 -1
30 -1
33 -1

In [13]:
ncol = pandas.read_table('../results/hf7/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf7/res_cv.dat', header=None, sep=' ', nrows=3).head()

200
Out[13]:
0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 194 195 196 197 198 199
0 1 -1 -1 -1 -1 1 -1 1 1 -1 ... -1 -1 1 -1 1 -1 -1 1 1 -1
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

3 rows × 200 columns

In [14]:
w7 = update_weights(ycv, '../results/hf7/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=4000)
plot_weights(w7)

0  4000,200  ==>  Num. of Mistakes: 1011 	Without-updating 1016
4000  4000,200  ==>  Num. of Mistakes: 1062 	Without-updating 1059
8000  3049,200  ==>  Num. of Mistakes: 758 	Without-updating 765
Total number of mistakes:  2831 without updating 2840

In [15]:
predict_test(w7, '../results/hf7/res_test.dat', '../results/pred.hf7.dat')

0 shape: 10000,200  ==> Num. Positive 4847   Num. Negative 5153
10000 shape: 10000,200  ==> Num. Positive 9854   Num. Negative 10146
20000 shape: 10000,200  ==> Num. Positive 14806   Num. Negative 15194
30000 shape: 10000,200  ==> Num. Positive 19851   Num. Negative 20149
40000 shape: 10000,200  ==> Num. Positive 24734   Num. Negative 25266
50000 shape: 10000,200  ==> Num. Positive 29651   Num. Negative 30349
60000 shape: 10000,200  ==> Num. Positive 34598   Num. Negative 35402
70000 shape: 10000,200  ==> Num. Positive 39511   Num. Negative 40489
80000 shape: 10000,200  ==> Num. Positive 44528   Num. Negative 45472
90000 shape: 10000,200  ==> Num. Positive 49514   Num. Negative 50486
100000 shape: 10000,200  ==> Num. Positive 54511   Num. Negative 55489
110000 shape: 10000,200  ==> Num. Positive 59466   Num. Negative 60534
120000 shape: 10000,200  ==> Num. Positive 64419   Num. Negative 65581
130000 shape: 10000,200  ==> Num. Positive 69362   Num. Negative 70638
140000 shape: 10000,200  ==> Num. Positive 74342   Num. Negative 75658
150000 shape: 10000,200  ==> Num. Positive 79337   Num. Negative 80663
160000 shape: 10000,200  ==> Num. Positive 84265   Num. Negative 85735
170000 shape: 10000,200  ==> Num. Positive 89152   Num. Negative 90848
180000 shape: 10000,200  ==> Num. Positive 94083   Num. Negative 95917
190000 shape: 10000,200  ==> Num. Positive 99039   Num. Negative 100961
200000 shape: 10000,200  ==> Num. Positive 103897   Num. Negative 106103
210000 shape: 10000,200  ==> Num. Positive 108847   Num. Negative 111153
220000 shape: 10000,200  ==> Num. Positive 113839   Num. Negative 116161
230000 shape: 10000,200  ==> Num. Positive 118776   Num. Negative 121224
240000 shape: 10000,200  ==> Num. Positive 123695   Num. Negative 126305
250000 shape: 10000,200  ==> Num. Positive 128687   Num. Negative 131313
260000 shape:  2102,200  ==> Num. Positive 129717   Num. Negative 132385

## HF8

In [16]:
ycv = pandas.read_table('../data/label_cv.txt', header=None, sep=' ')

ycv = ycv.iloc[np.where((ycv[0] >= 157) & (ycv[0]<159))[0], :]
print(ycv.shape)
nremain_cv = pandas.read_table('../results/hf8/res_cv.dat', usecols=[0], header=None, sep=' ').shape[0]
assert(ycv.shape[0] == nremain_cv)
#print(nremain_cv)

ycv.iloc[np.where(ycv[0] <  158)[0], 0] = -1
ycv.iloc[np.where(ycv[0] >= 158)[0], 0] = 1

print(np.sum(ycv[0] == -1), np.sum(ycv[0] == 1))
ycv.head()

(6548, 1)
(3053, 3495)
Out[16]:
0
3 1
17 1
30 -1
33 1
40 1

In [17]:
ncol = pandas.read_table('../results/hf8/res_cv.dat', header=None, sep=' ', nrows=3).shape[1]

print(ncol)

pandas.read_table('../results/hf8/res_cv.dat', header=None, sep=' ', nrows=3).head()

200
Out[17]:
0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 194 195 196 197 198 199
0 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

3 rows × 200 columns

In [18]:
w8 = update_weights(ycv, '../results/hf8/res_cv.dat', ncol, lr1=-0.02, lr2=0.1, chunks=1000)
plot_weights(w8)

0  1000,200  ==>  Num. of Mistakes: 170 	Without-updating 170
1000  1000,200  ==>  Num. of Mistakes: 179 	Without-updating 179
2000  1000,200  ==>  Num. of Mistakes: 178 	Without-updating 178
3000  1000,200  ==>  Num. of Mistakes: 173 	Without-updating 172
4000  1000,200  ==>  Num. of Mistakes: 178 	Without-updating 176
5000  1000,200  ==>  Num. of Mistakes: 167 	Without-updating 167
6000   548,200  ==>  Num. of Mistakes: 93 	Without-updating 93
Total number of mistakes:  1138 without updating 1135

In [19]:
predict_test(w8, '../results/hf8/res_test.dat', '../results/pred.hf8.dat')

0 shape: 10000,200  ==> Num. Positive 5046   Num. Negative 4954
10000 shape: 10000,200  ==> Num. Positive 10130   Num. Negative 9870
20000 shape: 10000,200  ==> Num. Positive 15095   Num. Negative 14905
30000 shape: 10000,200  ==> Num. Positive 20063   Num. Negative 19937
40000 shape: 10000,200  ==> Num. Positive 25106   Num. Negative 24894
50000 shape: 10000,200  ==> Num. Positive 30102   Num. Negative 29898
60000 shape: 10000,200  ==> Num. Positive 35041   Num. Negative 34959
70000 shape: 10000,200  ==> Num. Positive 40120   Num. Negative 39880
80000 shape: 10000,200  ==> Num. Positive 45034   Num. Negative 44966
90000 shape: 10000,200  ==> Num. Positive 50131   Num. Negative 49869
100000 shape: 10000,200  ==> Num. Positive 55139   Num. Negative 54861
110000 shape: 10000,200  ==> Num. Positive 60093   Num. Negative 59907
120000 shape: 10000,200  ==> Num. Positive 65109   Num. Negative 64891
130000 shape: 10000,200  ==> Num. Positive 70154   Num. Negative 69846
140000 shape: 10000,200  ==> Num. Positive 75115   Num. Negative 74885
150000 shape: 10000,200  ==> Num. Positive 80104   Num. Negative 79896
160000 shape: 10000,200  ==> Num. Positive 85175   Num. Negative 84825
170000 shape: 10000,200  ==> Num. Positive 90219   Num. Negative 89781
180000 shape: 10000,200  ==> Num. Positive 95106   Num. Negative 94894
190000 shape: 10000,200  ==> Num. Positive 100042   Num. Negative 99958
200000 shape: 10000,200  ==> Num. Positive 105104   Num. Negative 104896
210000 shape: 10000,200  ==> Num. Positive 110066   Num. Negative 109934
220000 shape: 10000,200  ==> Num. Positive 115038   Num. Negative 114962
230000 shape: 10000,200  ==> Num. Positive 120096   Num. Negative 119904
240000 shape: 10000,200  ==> Num. Positive 125116   Num. Negative 124884
250000 shape: 10000,200  ==> Num. Positive 130145   Num. Negative 129855
260000 shape:  2102,200  ==> Num. Positive 131210   Num. Negative 130892

In [ ]: