In [1]:
def integral(estimate, ts):
    elements = np.ones(len(ts) - 1)
    for i in range(len(ts) - 1):
        elements[i] = (ts[i+1] - ts[i])*(estimates[i+1] + estimates[i])/2
    return sum(elements)

In [16]:
# serial MCMC

from scipy.stats import invgamma
from scipy.stats import norm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline

pine = pd.read_table("pine.txt", delim_whitespace = True)
p = pine.values
ave_x = np.average(p[:, 1]).item()

mu_alpha = 3000
sigma_alpha = 10**6
mu_beta = 185
sigma_beta = 10**4
a = 3
b = 1/(2*300**2)

sample_iter = 100000
burn_in = 30000
N = np.shape(p)[0]
var = np.var(p[:, 1])

def sum1(beta):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = p[i, 0] - beta * (p[i, 1] - ave_x)
    return np.sum(elements)

def sum2(alpha):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = (p[i, 1] - ave_x)*(p[i, 0] - alpha)
    return np.sum(elements)

def sum3(alpha, beta):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = (p[i, 0] - alpha - beta*(p[i, 1] - ave_x))**2
    return np.sum(elements)

def loglike(alpha, beta, sigma):
    return N*np.log(1/(sigma*np.sqrt(2*np.pi))) - sum3(alpha, beta) / (2*sigma)

n = 20
c = 5

estimates = np.ones(n+1)
ts = np.ones(n+1)

for i in range(n+1):
    t = (i/n)**c
    ts[i] = t
    
    if i == 0:
        alphas = [3000]
        betas = [185]
        sigmas = [90000]
        
    else:
        alphas = [np.mean(alpha_sample).item()]
        betas = [np.mean(beta_sample).item()]
        sigmas = [np.mean(sigma_sample).item()]
    
    for j in range(sample_iter):
        
        location_alpha = (sigma_alpha*t*sum1(betas[-1]) + sigmas[-1]*mu_alpha) / (sigma_alpha * N*t + sigmas[-1])
        scale_alpha = np.sqrt((sigma_alpha * sigmas[-1]) / (sigma_alpha * N*t + sigmas[-1]))
        r = norm.rvs(loc = location_alpha, scale = scale_alpha)
        alphas.append(r.item())
        
        location_beta = (sigma_beta * t * sum2(alphas[-1]) + sigmas[-1] * mu_beta) / (sigma_beta * var*N + sigmas[-1])
        scale_beta = (sigmas[-1] * sigma_beta) / (sigma_beta * var*N + sigmas[-1])
        q = norm.rvs(loc = location_beta, scale = scale_beta)
        betas.append(q.item())
        
        shape = N*t/2 + a
        invrate = 2*b / (b*t*sum3(alphas[-1], betas[-1]) + 2)
        s = invgamma.rvs(a = shape, scale = 1/ invrate)
        sigmas.append(s.item())
    
    alpha_sample = alphas[burn_in:]
    beta_sample = betas[burn_in:len(betas)]
    sigma_sample = sigmas[burn_in:len(sigmas)]
    
    box = np.ones(len(alpha_sample))
    for k, l in enumerate(alpha_sample):
        box[k] = loglike(l, beta_sample[k], sigma_sample[k])
    
    estimates[i] = np.average(box)

どのぐらいで収束するかを見る。 どれも1万回とかで十分収束しているのでは?


In [2]:
from scipy.stats import invgamma
from scipy.stats import norm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline

pine = pd.read_table("pine.txt", delim_whitespace = True)
p = pine.values
ave_x = np.average(p[:, 1]).item()

mu_alpha = 3000
sigma_alpha = 10**6
mu_beta = 185
sigma_beta = 10**4
a = 3
b = 1/(2*300**2)

sample_iter = 10000
burn_in = 30
N = np.shape(p)[0]
var = np.var(p[:, 1])

def sum1(beta):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = p[i, 0] - beta * (p[i, 1] - ave_x)
    return np.sum(elements)

def sum2(alpha):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = (p[i, 1] - ave_x)*(p[i, 0] - alpha)
    return np.sum(elements)

def sum3(alpha, beta):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = (p[i, 0] - alpha - beta*(p[i, 1] - ave_x))**2
    return np.sum(elements)

def loglike(alpha, beta, sigma):
    return N*np.log(1/(sigma*np.sqrt(2*np.pi))) - sum3(alpha, beta) / (2*sigma)

n = 20
c = 5

alphas = [3000]
betas = [185]
sigmas = [90000]

t = (1/10)**2

alphas = np.ones(sample_iter)
betas = np.ones(sample_iter)
sigmas = np.ones(sample_iter)

alphas[0] = 3000
betas[0] = 185
sigmas[0] = 90000

t = (1/10)**2

for j in range(sample_iter -1):

    location_alpha = (sigma_alpha*t*sum1(betas[-1]) + sigmas[-1]*mu_alpha) / (sigma_alpha * N*t + sigmas[-1])
    scale_alpha = np.sqrt((sigma_alpha * sigmas[-1]) / (sigma_alpha * N*t + sigmas[-1]))
    r = norm.rvs(loc = location_alpha, scale = scale_alpha)
    alphas[j+1] = r.item()

    location_beta = (sigma_beta * t * sum2(alphas[-1]) + sigmas[-1] * mu_beta) / (sigma_beta *t* var*N + sigmas[-1])
    scale_beta = np.sqrt((sigmas[-1] * sigma_beta) / (sigma_beta *t* var*N + sigmas[-1]))
    q = norm.rvs(loc = location_beta, scale = scale_beta)
    betas[j+1] = q.item()

    shape = N*t/2 + a
    invrate = 2*b / (b*t*sum3(alphas[-1], betas[-1]) + 2)
    s = invgamma.rvs(a = shape, scale = 1/ invrate)
    sigmas[j+1] = s.item()

In [3]:
plt.plot(betas)


Out[3]:
[<matplotlib.lines.Line2D at 0x11687b588>]

In [4]:
plt.plot(alphas)


Out[4]:
[<matplotlib.lines.Line2D at 0x119cfd438>]

In [5]:
plt.plot(sigmas)


Out[5]:
[<matplotlib.lines.Line2D at 0x119e365c0>]

並列化

でも結果を次のルーぷに使うから無理やん


In [7]:
from scipy.stats import invgamma
from scipy.stats import norm
from multiprocessing import Pool
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline

pine = pd.read_table("pine.txt", delim_whitespace = True)
p = pine.values
ave_x = np.average(p[:, 1]).item()

mu_alpha = 3000
sigma_alpha = 10**6
mu_beta = 185
sigma_beta = 10**4
a = 3
b = 1/(2*300**2)

# ここをいじってね。
sample_iter = 1000
burn_in = 30
m = 10
core = 3

N = np.shape(p)[0]
var = np.var(p[:, 1])

def sum1(beta):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = p[i, 0] - beta * (p[i, 1] - ave_x)
    return np.sum(elements)

def sum2(alpha):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = (p[i, 1] - ave_x)*(p[i, 0] - alpha)
    return np.sum(elements)

def sum3(alpha, beta):
    elements = np.ones(N)
    for i in range(N):
        elements[i] = (p[i, 0] - alpha - beta*(p[i, 1] - ave_x))**2
    return np.sum(elements)

def loglike(alpha, beta, sigma):
    return N*np.log(1/(sigma*np.sqrt(2*np.pi))) - sum3(alpha, beta) / (2*sigma)

def function(w):
    if w < 100:
        
        n = 20
        c = 3
        estimates = np.ones(n+1)
        ts = np.ones(n+1)

        for i in range(n+1):
            t = (i/n)**c
            ts[i] = t

            if i == 0:
                alphas = [3000]
                betas = [185]
                sigmas = [90000]

            else:
                alphas = [np.mean(alpha_sample).item()]
                betas = [np.mean(beta_sample).item()]
                sigmas = [np.mean(sigma_sample).item()]

            for j in range(sample_iter):

                location_alpha = (sigma_alpha*t*sum1(betas[-1]) + sigmas[-1]*mu_alpha) / (sigma_alpha * N*t + sigmas[-1])
                scale_alpha = np.sqrt((sigma_alpha * sigmas[-1]) / (sigma_alpha * N*t + sigmas[-1]))
                r = norm.rvs(loc = location_alpha, scale = scale_alpha)
                alphas.append(r.item())

                location_beta = (sigma_beta * t * sum2(alphas[-1]) + sigmas[-1] * mu_beta) / (sigma_beta * var*N + sigmas[-1])
                scale_beta = (sigmas[-1] * sigma_beta) / (sigma_beta * var*N + sigmas[-1])
                q = norm.rvs(loc = location_beta, scale = scale_beta)
                betas.append(q.item())

                shape = N*t/2 + a
                invrate = 2*b / (b*t*sum3(alphas[-1], betas[-1]) + 2)
                s = invgamma.rvs(a = shape, scale = 1/ invrate)
                sigmas.append(s.item())

            alpha_sample = alphas[burn_in:]
            beta_sample = betas[burn_in:len(betas)]
            sigma_sample = sigmas[burn_in:len(sigmas)]

            box = np.ones(len(alpha_sample))
            for k, l in enumerate(alpha_sample):
                box[k] = loglike(l, beta_sample[k], sigma_sample[k])

            estimates[i] = np.average(box)
        
        return estimates

if __name__ == '__main__':
    p = Pool(core) 
    result = p.map(function, range(m))

In [8]:
result


Out[8]:
[array([-1281.52648633, -1189.67948291, -1008.69127558,  -851.92786789,
         -858.68470908,  -873.90565731,  -877.02651235,  -859.91947303,
         -878.3447848 ,  -869.96235801,  -873.28045876,  -875.61921931,
         -879.93889428,  -884.16577025,  -892.05108111,  -881.78634586,
         -882.36680116,  -895.66147305,  -885.68805034,  -893.36193867,
         -888.00551288]),
 array([-1281.52648633, -1189.67948291, -1008.69127558,  -851.92786789,
         -858.68470908,  -873.90565731,  -877.02651235,  -859.91947303,
         -878.3447848 ,  -869.96235801,  -873.28045876,  -875.61921931,
         -879.93889428,  -884.16577025,  -892.05108111,  -881.78634586,
         -882.36680116,  -895.66147305,  -885.68805034,  -893.36193867,
         -888.00551288]),
 array([-1281.52648633, -1189.67948291, -1008.69127558,  -851.92786789,
         -858.68470908,  -873.90565731,  -877.02651235,  -859.91947303,
         -878.3447848 ,  -869.96235801,  -873.28045876,  -875.61921931,
         -879.93889428,  -884.16577025,  -892.05108111,  -881.78634586,
         -882.36680116,  -895.66147305,  -885.68805034,  -893.36193867,
         -888.00551288]),
 array([-1259.04596905, -1209.29708546, -1010.46820594,  -873.14307252,
         -864.1371405 ,  -839.52347539,  -864.56958139,  -858.84846122,
         -878.0755815 ,  -861.04022846,  -876.42421408,  -873.51226597,
         -885.67299906,  -881.5684352 ,  -882.82896588,  -884.67730722,
         -886.39912995,  -883.81481798,  -879.31266115,  -889.13813933,
         -890.68742532]),
 array([-1259.04596905, -1209.29708546, -1010.46820594,  -873.14307252,
         -864.1371405 ,  -839.52347539,  -864.56958139,  -858.84846122,
         -878.0755815 ,  -861.04022846,  -876.42421408,  -873.51226597,
         -885.67299906,  -881.5684352 ,  -882.82896588,  -884.67730722,
         -886.39912995,  -883.81481798,  -879.31266115,  -889.13813933,
         -890.68742532]),
 array([-1259.04596905, -1209.29708546, -1010.46820594,  -873.14307252,
         -864.1371405 ,  -839.52347539,  -864.56958139,  -858.84846122,
         -878.0755815 ,  -861.04022846,  -876.42421408,  -873.51226597,
         -885.67299906,  -881.5684352 ,  -882.82896588,  -884.67730722,
         -886.39912995,  -883.81481798,  -879.31266115,  -889.13813933,
         -890.68742532]),
 array([-1220.99783367, -1245.69632555,  -997.71211448,  -856.73497905,
         -817.33190181,  -867.44842168,  -861.98510039,  -866.08634258,
         -863.85722841,  -875.94158511,  -875.73288508,  -874.72333617,
         -884.79604484,  -886.59848292,  -878.22739358,  -879.02446822,
         -892.46975711,  -884.93421851,  -894.65654293,  -887.02439295,
         -893.49719602]),
 array([-1220.99783367, -1245.69632555,  -997.71211448,  -856.73497905,
         -817.33190181,  -867.44842168,  -861.98510039,  -866.08634258,
         -863.85722841,  -875.94158511,  -875.73288508,  -874.72333617,
         -884.79604484,  -886.59848292,  -878.22739358,  -879.02446822,
         -892.46975711,  -884.93421851,  -894.65654293,  -887.02439295,
         -893.49719602]),
 array([-1220.99783367, -1245.69632555,  -997.71211448,  -856.73497905,
         -817.33190181,  -867.44842168,  -861.98510039,  -866.08634258,
         -863.85722841,  -875.94158511,  -875.73288508,  -874.72333617,
         -884.79604484,  -886.59848292,  -878.22739358,  -879.02446822,
         -892.46975711,  -884.93421851,  -894.65654293,  -887.02439295,
         -893.49719602]),
 array([-1276.87774026, -1195.03146662,  -997.75207588,  -862.64358998,
         -825.80868033,  -872.38091703,  -888.28528088,  -867.59473965,
         -873.52325774,  -873.06359205,  -872.32937347,  -878.17590133,
         -880.59895651,  -887.87099288,  -879.8326407 ,  -887.45744133,
         -887.73657226,  -885.29207868,  -885.79353134,  -888.46211445,
         -892.77014537])]

In [ ]: