In [41]:
import numpy as np
import scipy
import scipy.stats
In [2]:
def calc_frequency_array(count_arr):
freq_arr = np.zeros(count_arr.shape)
for row_idx in range(0, count_arr.shape[0]):
count_row = count_arr[row_idx]
total = sum(count_row)
#log.debug("sum of freq in row %s: %s",row_idx, total)
freq_arr[row_idx] = [float(n)/float(total) for n in count_row.tolist()]
return freq_arr
In [19]:
assemblage = [0.1, 0.2, 0.5, 0.05, 0.15]
print "assemblage to be bootstrapped: %s" % assemblage
bootsize = 10
assem_size = 150
bootstrap_shape = (bootsize, len(assemblage))
sampled = np.zeros(bootstrap_shape)
print "initialized arrayOfStats: %s" % sampled
sampled = np.random.multinomial(assem_size, assemblage, size=bootsize)
print "multinomial sample: %s" % sampled
bootstrap_sample = sampled / float(assem_size)
print "full bootstrap sample: %s" % bootstrap_sample
In [21]:
np.sum(bootstrap_sample, axis=1)
Out[21]:
In [39]:
def confidence_interval(data, confidence=0.05):
a = 1.0 * np.array(data)
n = len(a)
m, se = np.mean(a), scipy.stats.sem(a)
h = se * sp.stats.t._ppf((1 + confidence) / 2., n - 1)
return m, m - h, m + h
In [25]:
Out[25]:
In [43]:
for col in xrange(0, bootstrap_sample.shape[1]):
col_data = bootstrap_sample[:,col]
mean, lower, upper = confidence_interval(col_data)
print "mean: %s lower: %s upper %s" % (mean, lower, upper)
In [ ]: