Bootstrap example

Estimate confidence interval via bootstrapping for a dummy data set



In [1]:

    
%cd ~/NetBeansProjects/ExpLosion/
from notebooks.common_imports import *
import numpy as np
from sklearn.metrics import accuracy_score









    



/Users/miroslavbatchkarov/NetBeansProjects/ExpLosion



In [8]:

    
gold = np.array([1,1,1,1,1,0,0,0,0,0])
pred = np.array([1,1,1,1,0,0,0,0,0,1])

accuracies = []
for i in range(50000):
    ids = np.random.choice(list(range(10)), 10)
    acc = accuracy_score(gold[ids], pred[ids])
    accuracies.append(acc)
    if i<6:
        print(i, ids, acc)









    



0 [3 4 7 2 0 3 1 7 0 4] 0.8
1 [9 9 3 7 0 9 9 5 9 8] 0.5
2 [3 3 8 9 3 6 5 2 8 0] 0.9
3 [7 7 5 9 7 9 1 6 4 5] 0.7
4 [7 9 2 4 3 6 9 0 8 4] 0.6
5 [7 3 2 0 9 4 7 9 8 6] 0.7



In [9]:

    
sns.set_style('white')
sns.distplot(accuracies, kde=False, bins=10, kde_kws={'bw':0.04})
plt.axvline(np.mean(accuracies), c='k', linewidth=4);
plt.axvline(np.mean(np.percentile(accuracies, 2.5)), linestyle ='--', c='k', linewidth=4);
plt.axvline(np.mean(np.percentile(accuracies, 97.5)), linestyle ='--', c='k', linewidth=4);
plt.xlabel('Accuracy');
plt.xlim(0, 1.01)
plt.ylabel('')
plt.yticks([])
sns.despine(top=True, left=True)
plt.savefig('bootstrap-example.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)



In [56]:

    
np.mean(accuracies[:1000])









    Out[56]:





0.7975000000000001



In [60]:

    
''.join(map(str,gold))









    Out[60]:





'1111100000'



In [61]:

    
''.join(map(str,pred))









    Out[61]:





'1111000001'



In [63]:

    
print(np.mean(np.percentile(accuracies, 2.5)))
print(np.mean(np.percentile(accuracies, 97.5)))



In [ ]: