In [1]:
import numpy as np
%pylab inline
import pandas as pd
import seaborn as sns
import scipy
import matplotlib.pyplot as plt
import statsmodels.api as sm


Populating the interactive namespace from numpy and matplotlib

In [2]:
# Beta density (a,b)
def betaDensity(a,b,x):
    coeff = math.factorial(a - 1) * math.factorial(b - 1) *1. / math.factorial(a + b - 1)
    res = x**(a - 1) * (1 - x)**(b - 1) / coeff
    return(res)

# Beta simulation via acceptance-rejection method
def betaSimulation(a,b):
    xmax = (a - 1) * 1. /(a + b - 2)
    fxmax = betaDensity(a, b, xmax)
    
    x = -1
    test = True
    while test:
        u = np.random.uniform()
        y = np.random.uniform()
        if u <= (betaDensity(a,b,y) / fxmax):
            x = y
            test = False
    return x

In [3]:
# Compare distribution estimations from above implementation and numpy
def verificationPlot(test, reference):
    dfSim = pd.DataFrame(test)
    dfPy = pd.DataFrame(reference)
    sns.distplot(dfSim, color="y", label= "Manual simulation")
    sns.distplot(dfPy, hist=False, label = "Numpy simulation")
    legend = plt.legend()
    return True

In [4]:
# Check Beta simulation
N = 200000
a = 3
b = 5

def multipleBetaSimulation(a, b, size = N):
    beta = []
    for i in range(N):
        beta.append(betaSimulation(a,b))
    return(beta)

test = multipleBetaSimulation(a, b, size = N)
reference = np.random.beta(a, b, size = N)

verificationPlot(test, reference)


Out[4]:
True

In [5]:
# Bernouilli simulation
def bernouilliSimulation(p):
    u = np.random.uniform()
    x = -1
    if (u <= p): return(1)
    return(0)

# Geometric simulation
def geometricSimulation(p):
    test = True
    i = 1
    while test:
        ber = bernouilliSimulation(p)
        if ber == 1: test = False
        else: i = i + 1
    return(i - 1)

# Negative binomial simulation
def negBinSimulation(n, p):
    Y = []
    for i in range(n): Y.append(geometricSimulation(p))
    return(sum(Y))

In [6]:
# Check negative binomial simulation
N = 200000
n = 10
p = 0.25

def multipleNegBinSimulation(n, p, size = N):
    negBin = []
    for i in range(N):
        negBin.append(negBinSimulation(n,p))
    return(negBin)

test = multipleNegBinSimulation(n, p, size = N)
reference = np.random.negative_binomial(n, p, size = N)

verificationPlot(test, reference)


Out[6]:
True

In [7]:
# Gibbs Sampler implementation
# m1: 1st step capture
# m2: unique 2nd step capture
# m12: recapture
def gibbs_sampler(nbIter, manual = False, m1 = 22, m2 = 60, m12 = 11):
    mplus = m1 + m2
    mc = m1 + m2 + m12
    
    # Conditional simulations of p|M, x and M - m+|p, x
    # p|M,x ~ beta(mc + 1, 2*M  - mc + 1)
    # M - m+|p,x ~ NegBin(m+, 1 - (1 - p)**2)
    p = [] # 
    M = [] # In fact M - m+
    
    # Prior init
    p.append(np.random.uniform())
    
    # Iterations
    for t in range(nbIter):
        if manual:
            M.append(negBinSimulation(mplus, 1 - ((1 - p[t])**2)))
            p.append(betaSimulation(mc + 1,2*(M[t] + mplus) - mc + 1))
        else:
            M.append(np.random.negative_binomial(mplus, 1 - ((1 - p[t])**2)))
            p.append(np.random.beta(mc + 1,2*(M[t] + mplus) - mc + 1))
    M = np.array(M)   
    p = np.array(p)
    M = M + mplus
    print(scipy.stats.describe(M))
    print(scipy.stats.describe(p))
    return (M,p)

def resultsExploitation(resultats):
    fig, (ax1,ax2) = plt.subplots(1, 2, figsize=(20,8))
    dfM = pd.DataFrame(resultats[0])
    sns.distplot(dfM, ax = ax1, kde_kws={"color": "k", "lw": 1, "label": "M"})
    dfp = pd.DataFrame(resultats[1])
    sns.distplot(dfp, ax = ax2, kde_kws={"color": "k", "lw": 1, "label": "p"})
    legend = plt.legend()

In [8]:
# Python functions
pythonResults = gibbs_sampler(1000)


DescribeResult(nobs=1000, minmax=(114, 396), mean=195.982, variance=1778.1958718718718, skewness=0.9235759699563058, kurtosis=1.3723052196965249)
DescribeResult(nobs=1001, minmax=(0.10649443602747205, 0.43868576926768743), mean=0.25047240451759806, variance=0.0030859661117988101, skewness=0.3761008207429068, kurtosis=-0.10103103879356157)

In [9]:
resultsExploitation(pythonResults)



In [ ]:
# Autocorrelograms
statsmodels.graphics.tsaplots.plot_acf(pythonResults[0], lags=200)

In [ ]:
# Trace
sns.tsplot(pythonResults[0][1:1000])

In [ ]:
# Own implementation of distributions
manualResults = gibbs_sampler(100000, manual=True)

In [ ]:
exploitationResultats(manualResults)

In [ ]:
statsmodels.graphics.tsaplots.plot_acf(manualResults[0], lags=200)

In [ ]:
# Monte-Carlo error
def errorMC(nbItera, nbIteraGS, manuel = False, m1 = 22, m2 = 60, m12 = 11):
    # Results
    p = []
    M = []
    # nbItera Gibbs Sampler iterations
    # Within each GS, nbIteraGS are done
    for i in range(nbItera):
        tmp = gibbs_sampler(nbIteraGS, manuel, m1, m2, m12)
        M.append(mean(tmp[0]))
        p.append(mean(tmp[1]))
    print(scipy.stats.describe(M))
    print(scipy.stats.describe(p))
    return(M,p)

In [ ]:
errorMC_python = errorMC(5000, 10000)

In [ ]: