In [1]:
%pylab inline
%load_ext cython
import time
import ipy_table
import numpy as np
import matplotlib.pyplot as plt
In [2]:
%%cython
cimport numpy as np
import numpy as np
np.import_array()
cdef Py_ssize_t A_(Py_ssize_t n, Py_ssize_t x):
"""Calculate A_n(x) as per Schilling's original paper"""
cdef Py_ssize_t j
cdef Py_ssize_t val=0
if n<=x:
return 2**n
else:
for j in range(0, x+1):
val += A_(n-1-j, x)
return val
def A(n, x):
"""Python interface for A_(n, x)"""
return A_(n,x)
def Amatrix(Py_ssize_t N):
"""Calculate an NxN matrix of the Schilling distribution
The elements A[n, x] are the number of possible outcomes of an n-sequence of
independent coin-tosses where the maximum length of consecutive heads is
_not_larger_than x."""
cdef np.ndarray[np.uint64_t, ndim=2] result
cdef Py_ssize_t n,x
result = np.empty((N, N), np.uint64)
for x in range(N):
for n in range(0, x+1):
result[n,x]=(2**n)
for n in range(x+1,N):
result[n, x]=result[n-1-x:n,x].sum()
return result
def amatrix(Py_ssize_t N):
"""Calculate an NxN matrix of the Schilling distribution
The elements a[n, x] are the number of possible outcomes of an n-sequence of
independent coin-tosses where the maximum length of consecutive heads is
_exactly_ x. Thus a[n, x] = A[n, x] - A[n, x-1]"""
cdef np.ndarray[np.uint64_t, ndim=2] result
cdef Py_ssize_t n,x
cdef Py_ssize_t val
result = np.zeros((N, N), np.uint64)
result[:,0] = 1 # a_n(x=0) = 1
for n in range(N):
result[n,n] = 1 #a_n(x=n) = 1
# a_n(x>n) = 0
for x in range(1, n):
result[n,x] = result[n-1-x:n,x].sum() + result[n-1-x,:x].sum()
return result
def pmatrix(Py_ssize_t N):
"""Calculate an NxN matrix of the Schilling distribution.
The elements p[n, x] of the resulting matrix are the probabilities that
the length of the longest head-run in a sequence of n independent tosses
of a fair coin is exactly x.
It holds that p[n, x] = a[n, x] / 2 ** n = (A[n, x] - A[n, x-1]) / 2 ** n
Note that the probability that the length of the longest run
(no matter if head or tail) in a sequence of n independent
tosses of a fair coin is _exactly_ x is p[n-1, x-1].
"""
cdef np.ndarray[np.double_t, ndim=2] result
cdef Py_ssize_t n,x,j
cdef double val
result = np.zeros((N, N), np.double)
for n in range(N):
result[n, 0] = 2.0**(-n)
result[n, n] = 2.0**(-n) #p_n(x=n) = 1/2**n
# p_n(x>n) = 0
for x in range(1, n):
val=0
for j in range(n-1-x,n):
val+=2.0**(j-n)*result[j,x]
for j in range(0, x):
val += 2.0**(-x-1)*result[n-1-x,j]
result[n,x] = val
return result
In [3]:
def cormap_pval(n, x):
"""Cormap P-value algorithm, giving the probability that from
n coin-tosses the longest continuous sequence of either heads
or tails is _not_shorter_ than x.
Python version of the original Fortran90 code of Daniel Franke"""
dbl_max_exponent=np.finfo(np.double).maxexp-1
P=np.zeros(dbl_max_exponent)
if x <=1:
pval = 1
elif x>n:
pval = 0
elif x>dbl_max_exponent:
pval = 0
elif x==n:
pval = 2.0**(1-x)
else:
half_pow_x = 2**(-x)
P[1:x] = 0
i_x = 0
P[i_x]=2*half_pow_x
for i in range(x+1, n+1):
im1_x = i_x # == (i-1) % x
i_x = i % x
P[i_x] = P[im1_x] + half_pow_x * (1-P[i_x])
pval = P[i_x]
return pval
In [4]:
N=20
# Calculate the matrix for A using the slow method
A_slow = np.empty((N,N), np.uint64)
for n in range(N):
for x in range(N):
A_slow[n,x]=A(n,x)
A_fast = Amatrix(N)
print('The two matrices are the same:',(np.abs(A_slow - A_fast)).sum()==0)
In [5]:
N=50
a=amatrix(N)
A_fast=Amatrix(N)
A_constructed=np.empty((N,N), np.uint64)
for x in range(N):
A_constructed[:, x]=a[:,:x+1].sum(axis=1)
print('The two matrices are the same:',(np.abs(A_fast - A_constructed).sum()==0))
In [6]:
N=50
p=pmatrix(N)
a=amatrix(N)
p_from_a=np.empty((N,N), np.double)
for n in range(N):
p_from_a[n, :] = a[n, :]/2**n
print('The two matrices are the same:',(np.abs(p - p_from_a).sum()==0))
Well-known special cases
2 possible outcomes: H, T
| Max length | Outcomes | p |
|---|---|---|
| 1 | 2 | 1 |
4 possible outcomes: HH, HT, TH, TT
| Max length | Outcomes | p |
|---|---|---|
| 1 | 2 | 0.5 |
| 2 | 2 | 0.5 |
8 possible outcomes: HHT, HTT, THT, TTT, HHH, HTH, THH, TTH
| Max length | Outcomes | p |
|---|---|---|
| 1 | 2 | 0.25 |
| 2 | 4 | 0.5 |
| 2 | 2 | 0.25 |
16 possible outcomes:
| Outcome | Longest sequence length |
|---|---|
| HHTH | 2 |
| HTTH | 2 |
| THTH | 1 |
| TTTH | 3 |
| HHHH | 4 |
| HTHH | 2 |
| THHH | 3 |
| TTHH | 2 |
| HHTT | 2 |
| HTTT | 3 |
| THTT | 2 |
| TTTT | 4 |
| HHHT | 3 |
| HTHT | 1 |
| THHT | 2 |
| TTHT | 2 |
| Max length | Outcomes | p |
|---|---|---|
| 1 | 2 | 0.125 |
| 2 | 8 | 0.5 |
| 3 | 4 | 0.25 |
| 4 | 2 | 0.125 |
In [7]:
p=pmatrix(50)
for n in range(1,5):
print('{} toss(es):'.format(n))
for x in range(1,n+1):
print(' p_{}({}) = {}'.format(n,x,p[n-1,x-1]))
In [8]:
Nmax=200
times = np.empty(Nmax)
for i in range(Nmax):
t0=time.monotonic()
p=pmatrix(i)
times[i]=time.monotonic()-t0
plt.loglog(np.arange(Nmax),times,'o-',label='Execution time')
plt.xlabel('N')
plt.ylabel('Execution time of pmatrix(N) (sec)')
x=np.arange(Nmax)
a,b=np.polyfit(np.log(x[x>100]), np.log(times[x>100]),1)
plt.loglog(x,np.exp(np.log(x)*a+b),'r-', label='$\\propto N^{%.3f}$' % a)
plt.legend(loc='best')
Out[8]:
In [9]:
N=50
p=pmatrix(N+1)
bar(left=np.arange(p.shape[1])-0.5,height=p[N,:])
plt.axis(xmin=0,xmax=14)
plt.xlabel('Length of the longest head-run in {} tosses'.format(N))
plt.ylabel('Probability')
print('Most probable maximum head-run length:',p[N,:].argmax())
In [10]:
N=2050
print('Calculating {0}x{0} p-matrix...', flush=True)
t0=time.monotonic()
p=pmatrix(N)
t=time.monotonic()-t0
print('Done in {} seconds'.format(t))
np.savez('cointoss_p.npz',p=p)
Visualize the matrix:
In [13]:
plt.figure(figsize=(8,4))
plt.subplot(1,2,1)
plt.imshow(p,norm=matplotlib.colors.Normalize(),interpolation='nearest')
plt.xlabel('x')
plt.ylabel('n')
plt.title('Linear colour scale')
plt.colorbar()
plt.subplot(1,2,2)
plt.imshow(p,norm=matplotlib.colors.LogNorm(), interpolation='nearest')
plt.xlabel('x')
plt.ylabel('n')
plt.title('Logarithmic colour scale')
plt.colorbar()
Out[13]:
Note that in CorMap, the p-value is the probability that the longest continuous sequence of either heads or tails in $n$ coin-tosses is not shorter than $x$, while our pmatrix() gives the probability that it is exactly $x$ long. The desired quantity is obtained from the pmatrix() approach as $\sum_{j \ge(x-1)} p_n(j)$.
In [15]:
table =[['n', 'x', 'p (D. Franke)', 'p (A. Wacha)']]
for n, x in [(449,137),(449,10),(2039,338),(2039,18),(200,11),(10,2), (1,0), (1,1), (2,0), (2,1), (2,2), (3,0), (3,1), (3,2), (3,3), (4,0), (4,1), (4,2), (4,3), (4,4)]:
table.append([n,x,cormap_pval(n,x), p[n-1,max(0,x-1):].sum()])
tab=ipy_table.IpyTable(table)
tab.apply_theme('basic')
display(tab)
In [ ]: