Bootstrap example

Estimate confidence interval via bootstrapping for a dummy data set


In [1]:
%cd ~/NetBeansProjects/ExpLosion/
from notebooks.common_imports import *
import numpy as np
from sklearn.metrics import accuracy_score


/Users/miroslavbatchkarov/NetBeansProjects/ExpLosion

In [8]:
gold = np.array([1,1,1,1,1,0,0,0,0,0])
pred = np.array([1,1,1,1,0,0,0,0,0,1])

accuracies = []
for i in range(50000):
    ids = np.random.choice(list(range(10)), 10)
    acc = accuracy_score(gold[ids], pred[ids])
    accuracies.append(acc)
    if i<6:
        print(i, ids, acc)


0 [3 4 7 2 0 3 1 7 0 4] 0.8
1 [9 9 3 7 0 9 9 5 9 8] 0.5
2 [3 3 8 9 3 6 5 2 8 0] 0.9
3 [7 7 5 9 7 9 1 6 4 5] 0.7
4 [7 9 2 4 3 6 9 0 8 4] 0.6
5 [7 3 2 0 9 4 7 9 8 6] 0.7

In [9]:
sns.set_style('white')
sns.distplot(accuracies, kde=False, bins=10, kde_kws={'bw':0.04})
plt.axvline(np.mean(accuracies), c='k', linewidth=4);
plt.axvline(np.mean(np.percentile(accuracies, 2.5)), linestyle ='--', c='k', linewidth=4);
plt.axvline(np.mean(np.percentile(accuracies, 97.5)), linestyle ='--', c='k', linewidth=4);
plt.xlabel('Accuracy');
plt.xlim(0, 1.01)
plt.ylabel('')
plt.yticks([])
sns.despine(top=True, left=True)
plt.savefig('bootstrap-example.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)



In [56]:
np.mean(accuracies[:1000])


Out[56]:
0.7975000000000001

In [60]:
''.join(map(str,gold))


Out[60]:
'1111100000'

In [61]:
''.join(map(str,pred))


Out[61]:
'1111000001'

In [63]:
print(np.mean(np.percentile(accuracies, 2.5)))
print(np.mean(np.percentile(accuracies, 97.5)))


0.5
1.0

In [ ]: