In [41]:
import numpy as np
import scipy 
import scipy.stats

In [2]:
def calc_frequency_array(count_arr):
    freq_arr = np.zeros(count_arr.shape)

    for row_idx in range(0, count_arr.shape[0]):
        count_row = count_arr[row_idx]

        total = sum(count_row)
        #log.debug("sum of freq in row %s: %s",row_idx, total)
        freq_arr[row_idx] = [float(n)/float(total) for n in count_row.tolist()]

    return freq_arr

In [19]:
assemblage = [0.1, 0.2, 0.5, 0.05, 0.15]

print "assemblage to be bootstrapped: %s" % assemblage

bootsize = 10
assem_size = 150
bootstrap_shape = (bootsize, len(assemblage))
sampled = np.zeros(bootstrap_shape)

print "initialized arrayOfStats: %s" % sampled
                          
sampled = np.random.multinomial(assem_size, assemblage, size=bootsize)

print "multinomial sample: %s" % sampled

bootstrap_sample = sampled / float(assem_size)

print "full bootstrap sample: %s" % bootstrap_sample


assemblage to be bootstrapped: [0.1, 0.2, 0.5, 0.05, 0.15]
initialized arrayOfStats: [[ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]]
multinomial sample: [[20 38 60  8 24]
 [21 22 66  5 36]
 [14 28 81  6 21]
 [15 34 70 12 19]
 [15 31 63 12 29]
 [12 24 85 11 18]
 [16 32 69  7 26]
 [24 23 76  7 20]
 [17 29 70  8 26]
 [15 34 70  7 24]]
full bootstrap sample: [[ 0.13333333  0.25333333  0.4         0.05333333  0.16      ]
 [ 0.14        0.14666667  0.44        0.03333333  0.24      ]
 [ 0.09333333  0.18666667  0.54        0.04        0.14      ]
 [ 0.1         0.22666667  0.46666667  0.08        0.12666667]
 [ 0.1         0.20666667  0.42        0.08        0.19333333]
 [ 0.08        0.16        0.56666667  0.07333333  0.12      ]
 [ 0.10666667  0.21333333  0.46        0.04666667  0.17333333]
 [ 0.16        0.15333333  0.50666667  0.04666667  0.13333333]
 [ 0.11333333  0.19333333  0.46666667  0.05333333  0.17333333]
 [ 0.1         0.22666667  0.46666667  0.04666667  0.16      ]]

In [21]:
np.sum(bootstrap_sample, axis=1)


Out[21]:
array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [39]:
def confidence_interval(data, confidence=0.05):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * sp.stats.t._ppf((1 + confidence) / 2., n - 1)
    return m, m - h, m + h

In [25]:



Out[25]:
5

In [43]:
for col in xrange(0, bootstrap_sample.shape[1]):
    col_data = bootstrap_sample[:,col]
    mean, lower, upper = confidence_interval(col_data)
    print "mean: %s lower: %s upper %s" % (mean, lower, upper)


mean: 0.112666666667 lower: 0.112168466297 upper 0.113164867036
mean: 0.196666666667 lower: 0.195946684754 upper 0.197386648579
mean: 0.473333333333 lower: 0.472282383386 upper 0.47438428328
mean: 0.0553333333333 lower: 0.0549939642109 upper 0.0556727024557
mean: 0.162 lower: 0.161266458979 upper 0.162733541021

In [ ]: