In [1]:
from scipy.stats import gamma,rv_continuous,norm
from scipy.special import gamma as gm
from scipy.special import kv
from scipy.integrate import quad
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
import numpy as np

In [2]:
class gen_gamma(rv_continuous):
    def _pdf(self, x, v, k, b):
        return abs(v)*x**(k*v-1)*np.exp(-(x**v)/(b**abs(v)))/(b**(k*abs(v))*gm(k))
    def _argcheck(self, v, k,b):
        return k>0 and b>0

In [3]:
class GVG:
    def cdf(self,x,a,v,k,b):
        def integrand(z,x,a,v,k,b):
            gg = gen_gamma(a=0.)
            return norm.cdf((x-a*z)/math.sqrt(z))*gg.pdf(z,v,k,b)
        return quad(integrand,0.0001,np.inf,args=(x,a,v,k,b))[0]
    def pdf(self,x,a,v,k,b):
        return (self.cdf(x,a,v,k,b)-self.cdf(x-0.001,a,v,k,b))/0.001

In [4]:
class gvg1(rv_continuous):
    def _cdf(self,x,a,v,k,b):
        def integrand(z,x,a,v,k,b):
            gg = gen_gamma(a=0.)
            return norm.cdf((x-a*z)/math.sqrt(z))*gg.pdf(z,v,k,b)
        return quad(integrand,0.0001,np.inf,args=(x,a,v,k,b))[0]
    def _argcheck(self,a, v, k,b):
        return k>0 and b>0

In [5]:
def gvg_rand(a,v, k, b,size=1000):
    GG = gen_gamma(a=0.)
    rand_gg=GG.rvs(v, k, b, size=size)
    return norm.rvs(size=size) * np.sqrt(rand_gg) + a*rand_gg

In [6]:
#a- масштаб
plt.hist(gvg_rand(2,0.8,1,1,1000),40,normed=True)
plt.show()



In [ ]:
plt.hist(gvg_rand(0,0.8,3,1,1000),40,normed=True)
plt.show()

In [ ]:
plt.hist(gvg_rand(-1.2,2,1,3,1000),40,normed=True)
plt.show()

In [ ]:
plt.hist(gvg_rand(1.2,2,1,3,1000),40,normed=True)
plt.show()

In [ ]:
plt.hist(gvg_rand(1.2,2,0.2,0.8,1000),40,normed=True)
plt.show()

In [ ]:
plt.hist(gvg_rand(0.2,2,7,0.8,1000),40,normed=True)
plt.show()

In [ ]:
plt.hist(gvg_rand(2,1,0.5,0.8,1000),40,normed=True)
plt.show()

In [ ]:
plt.hist(gvg_rand(4,1,0.3,0.8,1000),40,normed=True)
plt.show()

In [ ]:
plt.hist(gvg_rand(0,2,0.25,5,1000),40,normed=True)
plt.show()

In [ ]:
params_sets = [
    [0,2,0.25,5],
    [4,1,0.3,0.8],
    [2,1,0.5,0.8],
    [0.2,2,7,0.8],
    [1.2,2,0.2,0.8],
    [1.2,2,1,3],
    [-1.2,2,1,3],
    [0,0.8,3,1],
    [2,0.8,1,1]]

steps_grid = [0,10,20]
grid_sizes = [5,10,20]

In [7]:
def experiment(params_sets,steps_grid,grid_sizes):
    for i,params in enumerate(params_sets):
        np.random.seed(0)
        sample = gvg_rand(params[0],params[1],params[2],params[3],1000)
        plt.hist(sample,40)
        x=np.linspace(np.min(sample),np.max(sample),50)
        y=[gvg.pdf(x,params[0],params[1],params[2],params[3]) for x in x]
        plt.plot(x,y)
        plt.savefig('pics/params'+str(i)+'.png')
        for steps in steps_grid:
            for grid_size in grid_sizes:
                print('Params: ',params,', steps:',steps,', grid:',grid_size)
                grid = np.linspace(1,40,grid_sizes)
                p,a,new_grid,iter_num = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=10)
                res=fit_gvg(p,a,new_grid)
                dist = pdf_dist([a,res[0],res[1],res[2]],[2,0.8,3,1])

In [52]:
class Mixture():
    def __init__(self,p,a,u):
        self.p=p
        self.a=a
        self.u=u
    def cdf(self,x):
        return (self.p*norm.cdf((x-self.a*self.u)/np.sqrt(self.u))).sum()
    def pdf(self,x):
        return (self.cdf(x)-self.cdf(x-0.001))/0.001
    
    
    
def grid_likehood(sample,u,p,a):
    summ=0
    u,p = np.array(u),np.array(p)
    temp2 = np.sqrt(u)
    temp1 = p/temp2
    temp3= a*u
    for x in sample:
        summ += (temp1 * norm.pdf((x-temp3)/temp2)).sum()
    return summ

In [56]:
def fit_mixture(sample,u,eps=0.05,verbose=1,irreg_grid_step=0,recalc_steps=10):
    import math
    p = np.full(len(u), 1/len(u))
    mean = np.mean(sample)
    a = mean/(u*p).sum()
    f = np.zeros((len(u),len(sample)))
    g = np.zeros((len(u),len(sample)))
    new_likehood = grid_likehood(sample,u,p,a)
    iteration = 0
    bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2] 
    while True:
        for i in range(0,len(u)):
            f[i,:]= 1/math.sqrt(u[i]) * norm.pdf((sample-a*u[i])/math.sqrt(u[i]))
        
        for j in range(0,len(sample)):
            g[:,j] = p*f[:,j]/ (p*f[:,j]).sum()           
        
        p = g.sum(axis=1)/len(sample)
        a = mean/(u*p).sum()
        prev_likehood = new_likehood
        new_likehood = grid_likehood(sample,u,p,a)
        iteration += 1
        if (verbose): print('\nIter: %d  likehood: %f' %(iteration,new_likehood))
        if ((new_likehood-prev_likehood)<eps) & (iteration>recalc_steps+1) : break      
        if (irreg_grid_step):
            if ((iteration%irreg_grid_step==0) & (iteration<=recalc_steps)):
                new_bins,new_grid = move_grid(bins,p)
                p = piecewise(new_grid,u,p)+1e-6
                p = p/p.sum()
                bins,u=new_bins,new_grid
                a = mean/(u*p).sum()
    return p,a,u,iteration

In [10]:
def piecewise(x,grid,p):
    bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2] 
    square = np.sum(np.array(p)*(grid[1]-grid[0]))
    p=p/square
    try:
        res=np.zeros(x.shape[0])
    except:
        res=np.zeros(1)
        x=[x]
    for i,num in enumerate(x):

        #closest point
        try:
            idx_bin=bins.index(min([i for i in bins if i>num], key=lambda k:abs(k-num)))
        except:
            res[i]= 0
            continue
        if (idx_bin==0):
            res[i]= 0
            continue
        
        try:
            idx_grid=grid.index(min([i for i in grid if i>num], key=lambda k:abs(k-num)))
        except:
            res[i] = (bins[-1]-num)*p[-1]/(bins[-1]-grid[-1])
            continue
        if (idx_grid==0):
            res[i] = 2*(num - bins[0])*p[0]/(bins[1]-bins[0])
            continue
        
        s = (grid[idx_grid]-grid[idx_grid-1])*(p[idx_grid]+p[idx_grid-1])/2
        h1 = num - grid[idx_grid-1]
        h2 = grid[idx_grid] - num
        res[i] = (2*s - h1*p[idx_grid-1]-h2*p[idx_grid])/(h1+h2)                     
    return res

In [11]:
def move_grid(bins,p):
    bins=np.array(bins)
    new_bins=split_hist(np.array(bins),np.array(p)/(np.array(p)*(bins[1:]-bins[:-1])).sum(),20)
    new_bins.append(bins[0])
    new_bins.append(bins[-1])
    new_bins=np.array(sorted(new_bins))
    return new_bins,(new_bins[1:] + new_bins[:-1]) / 2

In [12]:
def split_hist(x,p,k):
    c = np.cumsum(p*(x[1:]-x[:-1]))
    res=[]
    for i in range(k-1):
        excess = c-(i+1)/k
        ix = np.where(excess>0)[0].min()
        res.append(x[ix+1]-excess[ix]/p[ix])
    return res

grid=[  0.01  ,   2.5075,   5.005 ,   7.5025,  10.    ]
p=[ 0.25941888,  0.64792493,  0.02372754,  0.00882218,  0.06010648]
bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2] 
square = np.sum(np.array(p)*(grid[1]-grid[0]))
p=p/square

split_hist(np.array(bins),np.array(p),5)


Out[12]:
[0.68670741079061026,
 1.8006358588910907,
 2.571558510370175,
 3.3424811618492591]

In [38]:
sample = gvg_rand(0.2,2,7,0.8,200)
grid = np.linspace(1,40,20)

In [25]:
plt.hist(sample,50,normed=True)

plt.show()



In [66]:
p,a,new_grid,_ = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=0)


Iter: 1  likehood: 23.239135

Iter: 2  likehood: 27.106261

Iter: 3  likehood: 30.821387

Iter: 4  likehood: 33.818221

Iter: 5  likehood: 35.992622

Iter: 6  likehood: 37.501451

Iter: 7  likehood: 38.540013

Iter: 8  likehood: 39.260789

Iter: 9  likehood: 39.767907

Iter: 10  likehood: 40.129797

Iter: 11  likehood: 40.391254

Iter: 12  likehood: 40.581902

Iter: 13  likehood: 40.721675

Iter: 14  likehood: 40.824272

Iter: 15  likehood: 40.899304

Iter: 16  likehood: 40.953654

Iter: 17  likehood: 40.992340

Iter: 18  likehood: 41.019075

Iter: 19  likehood: 41.036642

Iter: 20  likehood: 41.047153

Iter: 21  likehood: 41.052223

In [62]:
mixture = Mixture(p,a,new_grid)
#mixture = Mixture(np.array([0.2,0.2,0.2,0.2,0.2]),a,grid)
x=np.linspace(-5,30,100)
y=[mixture.pdf(x) for x in x]
plt.hist(sample,30,normed=True)
#plt.hist(sample,60,normed=1)
plt.plot(x,y)
plt.show()


Расстояние К-Л


In [124]:
def fit_gvg(p,a,u,x0):
    from scipy.stats import entropy
    from scipy.optimize import minimize
    GG = gen_gamma(shapes='v,k,b')
    #fun = lambda x: entropy(GG.pdf(u,x[0],x[1],x[2]),p+1e-06)
    fun = lambda x: -(np.array(p)*np.log(GG.pdf(u,x[0],x[1],x[2])+1e-7)).sum()
    bnds = ((None, None), (0, None), (0, None))
    res = minimize(fun, x0=x0, bounds=bnds)
    return res

In [64]:
%%time
res=fit_gvg(p,a,new_grid)
print(res.message)
res=res.x
gvg=GVG()
x=np.linspace(-3,25,50)
y=[gvg.pdf(x,a,res[0],res[1],res[2]) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()


b'ABNORMAL_TERMINATION_IN_LNSRCH'
Wall time: 12.3 s

In [ ]:
# Если сгенерировать выборку

def gen(p,a,grid):

    sample_gen=[]
    rate=10000
    for i in range(0,len(p)):
        sample0 = np.full(math.floor(p[i]*rate),grid[i]) #+ np.random.normal(grid[i],(grid[1]-grid[0])*1.5,size=math.floor(p[i]*rate))
        sample_gen= sample_gen+[i for i in list(sample0) if i>0]

    GG = gen_gamma(shapes='v,k,b',a=0.)
    v,k,b,_,_ = GG.fit(sample_gen, floc=0, fscale=1)

    x=np.linspace(-1,25,100)
    plt.hist(sample_gen,40,normed=1)
    plt.plot(x,GG.pdf(x,v,k,b))
    plt.show()

    gvg=GVG()
    x=np.linspace(-3,25,100)
    y=[gvg.pdf(x,a,v,k,b) for x in x]
    plt.hist(sample,40,normed=1)
    plt.plot(x,y,label=('fitted: a=%0.3f v=%0.3f k=%0.3f b=%0.3f' %(a,v,k,b) ))
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.03),prop={'size':10})

    plt.show()
    
gen(p,a,grid)
gen(new_p,new_a,new_grid)

In [ ]:
def pdf_dist(params1,params2):
    gvg_DIST=gvg1()
    a1,v1,k1,b1 = params1
    a2,v2,k2,b2 = params2
    
    lower = min(gvg_DIST.ppf(0.0001,a1,v1,k1,b1),gvg_DIST.ppf(0.0001,a2,v2,k2,b2))
    upper = max(gvg_DIST.ppf(0.9999,a1,v1,k1,b1),gvg_DIST.ppf(0.9999,a2,v2,k2,b2))
    
    step=(upper-lower)/100
    
    grid=np.arange(lower,upper,step)
    dist=0
    for dot in list(grid):
        dist=dist+step*abs(gvg_DIST.pdf(dot,a1,v1,k1,b1)-gvg_DIST.pdf(dot,a2,v2,k2,b2))
    
    return dist

In [ ]:
#print(pdf_dist([0.849,0.399,15.6,0.009],[2,0.8,3,1]))
print(pdf_dist([a,res[0],res[1],res[2]],[2,0.8,3,1]))

In [ ]:
print(pdf_dist([a,res[0],res[1],res[2]],[2,0.8,3,1]))

In [ ]:
gvg1=GVG()
gvg2=GVG()
abs(gvg1.pdf(10,0.849,0.399,15.6,0.009)-gvg2.pdf(10,2,0.8,3,1))

GH


In [6]:
class GIG(rv_continuous):
    def _pdf(self, x, v, m,l):
        return (l**(v/2) * x**(v-1) * np.exp(-0.5 * (m/x + l*x)))/(2*m**(v/2)*kv(v,np.sqrt(m*l)))
    def _argcheck(self, v, m,l):
          return m>1e-10 and l > 1e-10 
    
class GH:
    def cdf(self,x,a,v,m,l):
        def integrand(z,x,a,v,m,l):
            gig = GIG(a=1e-10)
            return norm.cdf((x-a*z)/math.sqrt(z))*gig.pdf(z,v,m,l)
        return quad(integrand,0.0001,np.inf,args=(x,a,v,m,l),limit=1000)[0]
    def pdf(self,x,a,v,m,l):
        return (self.cdf(x,a,v,m,l)-self.cdf(x-0.001,a,v,m,l))/0.001

In [7]:
class gh1(rv_continuous):
    def _cdf(self,x,a,v,m,l):
        def integrand(z,x,a,v,m,l):
            gig = GIG(a=1e-10)
            return norm.cdf((x-a*z)/math.sqrt(z))*gig.pdf(z,v,m,l)
        return quad(integrand,0.0001,np.inf,args=(x,a,v,m,l),limit=1000)[0]
    def _argcheck(self,a,v,m,l):
        return m>0 and l>0

In [8]:
def gh_rand(a,v, m,l,size=1000):
    gig = GIG(a=1e-10)
    rand_gig=gig.rvs(v, m,l, size=size)
    return norm.rvs(size=size) * np.sqrt(rand_gig) + a*rand_gig

In [9]:
def fit_gh(p,a,u):
    from scipy.stats import entropy
    from scipy.optimize import minimize
    #GG = gen_gamma(shapes='v,k,b')
    gig = GIG()
    #fun = lambda x: entropy(GG.pdf(u,x[0],x[1],x[2]),p+1e-06)
    fun = lambda x: -(np.array(p)*np.log(gig.pdf(u,x[0],x[1],x[2])+1e-7)).sum()
    x0 = [1, 1, 1]
    bnds = ((None, None), (0, None), (0, None))
    res = minimize(fun, x0=x0, bounds=bnds)
    return res

In [10]:
def pdf_dist_gh(params1,params2):
    gh_DIST=gh1()
    a1,v1,k1,b1 = params1
    a2,v2,k2,b2 = params2
    
    lower = min(gh_DIST.ppf(0.0001,a1,v1,k1,b1),gh_DIST.ppf(0.0001,a2,v2,k2,b2))
    upper = max(gh_DIST.ppf(0.9999,a1,v1,k1,b1),gh_DIST.ppf(0.9999,a2,v2,k2,b2))
    
    step=(upper-lower)/100
    
    grid=np.arange(lower,upper,step)
    dist=0
    for dot in list(grid):
        dist=dist+step*abs(gh_DIST.pdf(dot,a1,v1,k1,b1)-gh_DIST.pdf(dot,a2,v2,k2,b2))
    
    return dist

In [9]:
sample = gh_rand(1,1.5,1,1,2000)
grid = np.linspace(0.001,5,30)

In [28]:
params_sets = [
    #[1,1.5,1,1],
    #[-2,1.5,1,1],
    #[0,1.5,1,1],
    #[0,2,20,1],
    #[2,-3,2,4],
    #[2,-3,2,0.01],
    [-2,-3,2,0.05],
    [0,0,0.1,0.1],
    [-0.5,0,0.1,0.1]]

steps_grid = [0,10,20]

In [22]:
gh_DIST = gh1()
for i,params in enumerate(params_sets):
    np.random.seed(0)
    sample = gh_rand(params[0],params[1],params[2],params[3],1000)
    x=np.linspace(np.min(sample),np.max(sample),50)
    
    plt.hist(sample,40 ,ec='black',color='blue',normed=True)
    
    y=[gh_DIST.pdf(x,params[0],params[1],params[2],params[3]) for x in x]
    plt.plot(x,y,label='a=%0.3f v=%0.3f k=%0.3f b=%0.3f' %(params[0],params[1],params[2],params[3]))
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.08),prop={'size':10})
             
    plt.savefig('pics/gh_params'+str(i)+'.png')
    plt.clf()

In [29]:
def experiment(params_sets,steps_grid,grid_sizes):
    for i,params in enumerate(params_sets):
        np.random.seed(0)
        sample = gh_rand(params[0],params[1],params[2],params[3],1000)
        for steps in steps_grid:
            for grid_size in grid_sizes:
                print('Params: ',params,', steps:',steps,', grid:',grid_size)
                grid = np.linspace(1,40,grid_size)
                p,a,new_grid,iter_num = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=steps,verbose=0)
                res=fit_gh(p,a,new_grid).x
                dist = pdf_dist_gh([a,res[0],res[1],res[2]],[params[0],params[1],params[2],params[3]])
                print(dist)
                print()

experiment(params_sets,steps_grid,[20])


Params:  [-2, -3, 2, 0.05]
/home/kirill/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:3: RuntimeWarning: invalid value encountered in true_divide
  app.launch_new_instance()
/home/kirill/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:3: RuntimeWarning: invalid value encountered in multiply
  app.launch_new_instance()
/home/kirill/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:3: RuntimeWarning: overflow encountered in power
  app.launch_new_instance()
/home/kirill/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:3: RuntimeWarning: overflow encountered in multiply
  app.launch_new_instance()
 , steps: 0 , grid: 20
0.353870478139

Params:  [-2, -3, 2, 0.05] , steps: 10 , grid: 20
0.073513902478

Params:  [-2, -3, 2, 0.05] , steps: 20 , grid: 20
0.0774976871991

Params:  [0, 0, 0.1, 0.1] , steps: 0 , grid: 20
0.297420946011

Params:  [0, 0, 0.1, 0.1] , steps: 10 , grid: 20
0.203816551978

Params:  [0, 0, 0.1, 0.1] , steps: 20 , grid: 20
0.206076747532

Params:  [-0.5, 0, 0.1, 0.1] , steps: 0 , grid: 20
0.476471503157

Params:  [-0.5, 0, 0.1, 0.1] , steps: 10 , grid: 20
0.272962550627

Params:  [-0.5, 0, 0.1, 0.1] , steps: 20 , grid: 20
0.272871289356


In [17]:
plt.hist(sample,40 ,ec='black',color='blue',normed=True)

x=np.linspace(np.min(sample),np.max(sample),50)
y=[gh_DIST.pdf(x,1,1.5,1,1) for x in x]
plt.plot(x,y,label='fitted: a=%0.3f v=%0.3f k=%0.3f b=%0.3f' %(1,2,3,4) )
 
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.08),prop={'size':10})
         
plt.show()



In [ ]:
p,a = fit_mixture(sample,grid,0.01)

In [ ]:
mixture = Mixture(p,a,grid)
x=np.linspace(-6,20,100)
y=[mixture.pdf(x) for x in x]

plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()

In [ ]:
sample_gen=[]
rate=10000
for i in range(0,len(p)):
    sample0 = np.full(math.floor(p[i]*rate),grid[i])
    sample_gen= sample_gen+[i for i in list(sample0) if i>0]

In [ ]:
gig = GIG(shapes='v,m,l',a=0.)
v,m,l,_,_ = gig.fit(sample_gen, floc=0, fscale=1)

In [ ]:
x=np.linspace(-1,12,100)
plt.hist(sample_gen,40,normed=1)
plt.plot(x,gig.pdf(x,v,m,l))
plt.show()

In [ ]:
gh=GH()
x=np.linspace(-6,20,100)
y=[gh.pdf(x,a,v,m,l) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()

In [ ]:
for i,j in zip(p,grid):
    print(j,'  ',i)

In [ ]:
gig.pdf(0.001 ,v,m,l)

In [17]:
def fit_mixture(sample,u,eps=0.05,verbose=1,irreg_grid_step=0,recalc_steps=10):
    p = np.full(len(u), 1/len(u))
    mean = np.mean(sample)
    a = mean/(u*p).sum()
    f = np.zeros((len(u),len(sample)))
    g = np.zeros((len(u),len(sample)))
    new_likehood = grid_likehood(sample,u,p,a)
    iteration = 0
    bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2] 
    while True:
        for i in range(0,len(u)):
            for j in range(0,len(sample)):
                f[i,j]= 1/math.sqrt(u[i]) * norm.pdf((sample[j]-a*u[i])/math.sqrt(u[i]))
                 
        for i in range(0,len(u)):
            for j in range(0,len(sample)):
                g[i,j] = p[i]*f[i,j]/ (p*f[:,j]).sum()
        
        p = g.sum(axis=1)/len(sample)
        a = mean/(u*p).sum()
        prev_likehood = new_likehood
        new_likehood = grid_likehood(sample,u,p,a)
        iteration += 1
        if (verbose): print('\nIter: %d  likehood: %f' %(iteration,new_likehood))
        if ((new_likehood-prev_likehood)<eps) & (iteration>recalc_steps+1) : break
        print('p:')
        print(p)
        print('grid:')
        print(u)  
        if (irreg_grid_step):
            if ((iteration%irreg_grid_step==0) & (iteration<=recalc_steps)):
                new_bins,new_grid = move_grid(bins,p)
                #print('p after moving:')
                #print(p)
                print('new_bins:')
                print(new_bins)
                print('new_grid:')
                print(new_grid)
                new_p = piecewise(np.array(new_grid),u,p)+1e-6
                new_p = new_p/new_p.sum()
                print('new_p:')
                print(new_p) 
                bins,u,p=new_bins,new_grid,new_p
                a = mean/(u*p).sum()
        
        plot_hist(np.array(bins),p)
    return p,a,u,iteration
        
p,a,new_grid,_ = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=10)


Iter: 1  likehood: 113.686381
p:
[ 0.10173428  0.08493377  0.07380872  0.0660369   0.0602559   0.05574939
  0.05211219  0.04909785  0.04654729  0.04435285  0.04243878  0.0407501
  0.03924577  0.03789452  0.036672    0.03555896  0.03453996  0.03360244
  0.03273607  0.03193226]
grid:
[  1.           3.05263158   5.10526316   7.15789474   9.21052632
  11.26315789  13.31578947  15.36842105  17.42105263  19.47368421
  21.52631579  23.57894737  25.63157895  27.68421053  29.73684211
  31.78947368  33.84210526  35.89473684  37.94736842  40.        ]
Iter: 2  likehood: 130.937669
p:
[ 0.17509256  0.12569008  0.09593676  0.0772176   0.06450105  0.055333
  0.04842108  0.04302812  0.03870495  0.03516296  0.03220849  0.02970685
  0.02756155  0.02570161  0.02407372  0.02263707  0.02135985  0.02021696
  0.01918826  0.01825749]
grid:
[  1.           3.05263158   5.10526316   7.15789474   9.21052632
  11.26315789  13.31578947  15.36842105  17.42105263  19.47368421
  21.52631579  23.57894737  25.63157895  27.68421053  29.73684211
  31.78947368  33.84210526  35.89473684  37.94736842  40.        ]
new_bins:
[ -2.63157895e-02   5.59840362e-01   1.14599651e+00   1.73215267e+00
   2.43307665e+00   3.24962142e+00   4.06616620e+00   5.13198596e+00
   6.21878529e+00   7.54790695e+00   9.01361946e+00   1.06657442e+01
   1.25535273e+01   1.47145754e+01   1.71785485e+01   1.99694808e+01
   2.31540012e+01   2.67724967e+01   3.09136464e+01   3.56464177e+01
   4.10263158e+01]
new_grid:
[  0.26676229   0.85291844   1.43907459   2.08261466   2.84134904
   3.65789381   4.59907608   5.67538562   6.88334612   8.28076321
   9.83968181  11.60963571  13.63405134  15.94656198  18.57401464
  21.561741    24.96324896  28.84307157  33.28003209  38.33636676]
new_p:
[ 0.0718408   0.07080767  0.06977454  0.06864027  0.06730297  0.06586377
  0.06420489  0.06230785  0.06017876  0.05771575  0.05496808  0.05184846
  0.04828033  0.04420443  0.03957342  0.03430741  0.0283121   0.02147373
  0.01365339  0.00474136]
Iter: 3  likehood: 161.660952
p:
[ 0.08806173  0.08947783  0.08802112  0.08472467  0.08017023  0.07526241
  0.06995768  0.06446191  0.05897984  0.05341149  0.04799595  0.04265206
  0.03735543  0.03212271  0.02698333  0.02192888  0.0169478   0.0120253
  0.00714397  0.00231566]
grid:
[  0.26676229   0.85291844   1.43907459   2.08261466   2.84134904
   3.65789381   4.59907608   5.67538562   6.88334612   8.28076321
   9.83968181  11.60963571  13.63405134  15.94656198  18.57401464
  21.561741    24.96324896  28.84307157  33.28003209  38.33636676]
Iter: 4  likehood: 170.132864
p:
[ 0.09777456  0.10469914  0.10420657  0.09907643  0.09114929  0.08253659
  0.07347455  0.06450428  0.05605602  0.04802796  0.04077684  0.03416922
  0.02815813  0.02274097  0.01791578  0.01363663  0.00985698  0.00653138
  0.00361727  0.0010914 ]
grid:
[  0.26676229   0.85291844   1.43907459   2.08261466   2.84134904
   3.65789381   4.59907608   5.67538562   6.88334612   8.28076321
   9.83968181  11.60963571  13.63405134  15.94656198  18.57401464
  21.561741    24.96324896  28.84307157  33.28003209  38.33636676]
new_bins:
[ -2.63157895e-02   5.42459463e-01   1.07476663e+00   1.60809830e+00
   2.16297632e+00   2.74960323e+00   3.37120927e+00   4.04499222e+00
   4.79926519e+00   5.61513586e+00   6.51623367e+00   7.50830811e+00
   8.65959262e+00   9.96044522e+00   1.14515943e+01   1.31913379e+01
   1.52739282e+01   1.78650291e+01   2.12827714e+01   2.62071146e+01
   4.10263158e+01]
new_grid:
[  0.25807184   0.80861305   1.34143246   1.88553731   2.45628977
   3.06040625   3.70810074   4.4221287    5.20720052   6.06568476
   7.01227089   8.08395037   9.31001892  10.70601975  12.32146608
  14.23263302  16.56947863  19.57390024  23.74494301  33.61671521]
new_p:
[ 0.06523472  0.06429875  0.06339292  0.0624679   0.06149757  0.06047052
  0.05936939  0.05815548  0.0568208   0.05536131  0.05375203  0.05193009
  0.04984567  0.04747236  0.04472597  0.04147683  0.037504    0.03239624
  0.02530513  0.00852232]
Iter: 5  likehood: 164.461071
p:
[ 0.07766062  0.07851442  0.07742547  0.07509076  0.07213064  0.0688635
  0.06542749  0.06182782  0.05813923  0.05442638  0.05068949  0.04685631
  0.04291281  0.03890139  0.03477932  0.03048063  0.02589323  0.02082132
  0.01490924  0.00424993]
grid:
[  0.25807184   0.80861305   1.34143246   1.88553731   2.45628977
   3.06040625   3.70810074   4.4221287    5.20720052   6.06568476
   7.01227089   8.08395037   9.31001892  10.70601975  12.32146608
  14.23263302  16.56947863  19.57390024  23.74494301  33.61671521]
Iter: 6  likehood: 171.291229
p:
[ 0.08554106  0.09027331  0.0899972   0.08653546  0.08154981  0.07591212
  0.07003312  0.06402299  0.05807416  0.0523316   0.04681975  0.04145776
  0.03625814  0.0313029   0.02656309  0.02199853  0.01754796  0.01312188
  0.00859681  0.00206237]
grid:
[  0.25807184   0.80861305   1.34143246   1.88553731   2.45628977
   3.06040625   3.70810074   4.4221287    5.20720052   6.06568476
   7.01227089   8.08395037   9.31001892  10.70601975  12.32146608
  14.23263302  16.56947863  19.57390024  23.74494301  33.61671521]
new_bins:
[ -2.63157895e-02   5.12036427e-01   1.02376228e+00   1.53530190e+00
   2.06455564e+00   2.62323905e+00   3.22049303e+00   3.86540440e+00
   4.56783755e+00   5.33710378e+00   6.18658285e+00   7.13136007e+00
   8.19340553e+00   9.39664495e+00   1.07785443e+01   1.23921024e+01
   1.43196430e+01   1.67019204e+01   1.98190968e+01   2.44054474e+01
   4.10263158e+01]
new_grid:
[  0.24286032   0.76789935   1.27953209   1.79992877   2.34389734
   2.92186604   3.54294871   4.21662097   4.95247066   5.76184332
   6.65897146   7.6623828    8.79502524  10.0875946   11.5853233
  13.35587269  15.51078172  18.26050858  22.11227208  32.71588159]
new_p:
[ 0.06686156  0.0658183   0.06480168  0.06376764  0.06268677  0.06153833
  0.06030423  0.05896563  0.05750348  0.05589525  0.05411264  0.05211884
  0.04986826  0.0472999   0.04432389  0.04080577  0.03652393  0.03106017
  0.02340665  0.00233709]
Iter: 7  likehood: 167.051879
p:
[ 0.07705031  0.07829913  0.07754772  0.07545002  0.07264835  0.06945835
  0.06603657  0.062463    0.05878366  0.05502184  0.05118482  0.04726722
  0.04325854  0.03913887  0.03486961  0.03039146  0.02560377  0.02031357
  0.0140518   0.00116139]
grid:
[  0.24286032   0.76789935   1.27953209   1.79992877   2.34389734
   2.92186604   3.54294871   4.21662097   4.95247066   5.76184332
   6.65897146   7.6623828    8.79502524  10.0875946   11.5853233
  13.35587269  15.51078172  18.26050858  22.11227208  32.71588159]
Iter: 8  likehood: 173.060487
p:
[ 0.08301044  0.0884163   0.08894245  0.08613477  0.0816415   0.07632158
  0.07062257  0.06478811  0.05896453  0.05323774  0.04765545  0.04224018
  0.03700553  0.03195421  0.02707241  0.02233641  0.01770349  0.01309099
  0.0082973   0.00056405]
grid:
[  0.24286032   0.76789935   1.27953209   1.79992877   2.34389734
   2.92186604   3.54294871   4.21662097   4.95247066   5.76184332
   6.65897146   7.6623828    8.79502524  10.0875946   11.5853233
  13.35587269  15.51078172  18.26050858  22.11227208  32.71588159]
new_bins:
[ -2.63157895e-02   4.85408356e-01   9.67473217e-01   1.44540093e+00
   1.93563306e+00   2.44884219e+00   2.99325793e+00   3.57640624e+00
   4.20603252e+00   4.89070588e+00   5.64058777e+00   6.46799660e+00
   7.38859285e+00   8.42264179e+00   9.59802492e+00   1.09542171e+01
   1.25510997e+01   1.44877192e+01   1.69524258e+01   2.04157832e+01
   4.10263158e+01]
new_grid:
[  0.22954628   0.72644079   1.20643707   1.690517     2.19223763
   2.72105006   3.28483209   3.89121938   4.5483692    5.26564682
   6.05429219   6.92829473   7.90561732   9.01033335  10.27612101
  11.75265842  13.51940946  15.72007246  18.68410448  30.72104949]
new_p:
[ 0.0650759   0.06408852  0.06313472  0.0621728   0.06117583  0.06012502
  0.05900473  0.05779977  0.05649395  0.05506864  0.05350152  0.05176478
  0.04982274  0.04762755  0.0451123   0.04217827  0.03866755  0.0342946
  0.02840475  0.00448608]
Iter: 9  likehood: 168.718124
p:
[ 0.0735323   0.07490764  0.07451309  0.07284603  0.07048685  0.06773329
  0.06473737  0.06158044  0.05830727  0.05494155  0.05149351  0.0479635
  0.04434344  0.04061592  0.03675162  0.03270372  0.02839384  0.02367591
  0.01820244  0.00227028]
grid:
[  0.22954628   0.72644079   1.20643707   1.690517     2.19223763
   2.72105006   3.28483209   3.89121938   4.5483692    5.26564682
   6.05429219   6.92829473   7.90561732   9.01033335  10.27612101
  11.75265842  13.51940946  15.72007246  18.68410448  30.72104949]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-17-2e94e8689ab7> in <module>()
     47     return p,a,u,iteration
     48 
---> 49 p,a,new_grid,_ = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=10)

<ipython-input-17-2e94e8689ab7> in fit_mixture(sample, u, eps, verbose, irreg_grid_step, recalc_steps)
     11         for i in range(0,len(u)):
     12             for j in range(0,len(sample)):
---> 13                 f[i,j]= 1/math.sqrt(u[i]) * norm.pdf((sample[j]-a*u[i])/math.sqrt(u[i]))
     14 
     15         for i in range(0,len(u)):

/home/kirill/anaconda3/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py in pdf(self, x, *args, **kwds)
   1646         x, loc, scale = map(asarray, (x, loc, scale))
   1647         args = tuple(map(asarray, args))
-> 1648         dtyp = np.find_common_type([x.dtype, np.float64], [])
   1649         x = np.asarray((x - loc)/scale, dtype=dtyp)
   1650         cond0 = self._argcheck(*args) & (scale > 0)

/home/kirill/anaconda3/lib/python3.6/site-packages/numpy/core/numerictypes.py in find_common_type(array_types, scalar_types)
   1016     scalar_types = [dtype(x) for x in scalar_types]
   1017 
-> 1018     maxa = _can_coerce_all(array_types)
   1019     maxsc = _can_coerce_all(scalar_types)
   1020 

/home/kirill/anaconda3/lib/python3.6/site-packages/numpy/core/numerictypes.py in _can_coerce_all(dtypelist, start)
    948     thisind = start
    949     while thisind < __len_test_types:
--> 950         newdtype = dtype(__test_types[thisind])
    951         numcoerce = len([x for x in dtypelist if newdtype >= x])
    952         if numcoerce == N:

KeyboardInterrupt: 

In [14]:
def plot_hist(bins,p):
    plt.bar(bins[:-1], p, width=bins[1:] - bins[:-1])
    plt.show()

In [ ]:
x = np.sort(np.random.rand(6))
y = np.random.rand(5)

plt.bar(x[:-1], y, width=x[1:] - x[:-1])

plt.show()

Prediction


In [68]:
import pandas as pd

In [71]:
data = np.log(pd.read_csv('KOSPI.csv',sep=';')['<CLOSE>'].values)
data = data[:-1] - data[1:]

In [95]:
plt.hist(data,40)
plt.show()



In [117]:
grid = np.linspace(3e-8,3e-6,20)
p,a,new_grid,_ = fit_mixture(data,grid,0.1,irreg_grid_step=2,recalc_steps=0)


Iter: 1  likehood: 661885.229816

Iter: 2  likehood: 777444.554904

Iter: 3  likehood: 878201.822905

Iter: 4  likehood: 949721.399259

Iter: 5  likehood: 994824.273354

Iter: 6  likehood: 1021710.474034

Iter: 7  likehood: 1037321.235321

Iter: 8  likehood: 1046225.335616

Iter: 9  likehood: 1051187.100738

Iter: 10  likehood: 1053835.765562

Iter: 11  likehood: 1055128.210887

Iter: 12  likehood: 1055629.410177

Iter: 13  likehood: 1055675.533215

Iter: 14  likehood: 1055467.955345

In [118]:
grid = np.linspace(3e-8,3e-6,20)


Out[118]:
array([ 0.31349597,  0.37218455,  0.14336513,  0.06316259,  0.0320826 ,
        0.01824597,  0.01136101,  0.00762633,  0.00546554,  0.0041591 ,
        0.00335096,  0.00285322,  0.00256229,  0.00242022,  0.00239561,
        0.00247376,  0.00265149,  0.00293434,  0.00333526,  0.00387407])

In [125]:
window_size = 200
step = 2
test = data[-window_size:]
train = data[:-window_size]
result = []


for window_pos in range(len(train)-window_size):
    if (window_pos%10==0):
        print(window_pos)
        print(res)
    window = train[window_pos:window_pos+window_size]
    p,a,new_grid,_ = fit_mixture(window,grid,0.1,irreg_grid_step=2,recalc_steps=0,verbose=0)
    if (window_pos == 0):
        x0=[1,1,1]
    else:
        x0=res
    res=fit_gvg(p,a,new_grid,x0).x
    result.append(res)


0
[ -0.2183622   25.80633303   2.47179969]
10
[ -0.19353643  17.4584136    3.49969505]
20
[ -0.19604519  17.97972071   3.69789693]
30
[ -0.19522349  17.9797069    3.69789342]
40
[ -0.19243443  17.215025     3.41942613]
50
[ -0.1880895   16.29105828   3.07173609]
60
[ -0.19013302  16.45959742   3.13297519]
70
[ -0.19371963  16.84645083   3.27614143]
80
[ -0.19313304  16.79484101   3.22075351]
90
[ -0.20045018  18.07778848   3.67997296]
100
[ -0.20574897  18.939619     3.94638471]
110
[ -0.21470747  20.72297893   4.47737923]
120
[ -0.21297911  20.17423962   4.16027528]
130
[ -0.2442095   26.97040156   6.58555708]
140
[ -0.28455179  38.9177581   11.22139106]
150
[ -0.25213693  28.58285781   7.77807363]
160
[ -0.23980685  25.3296048    6.55525155]
170
[ -0.2358411   24.33791617   6.1582923 ]
180
[ -0.21089345  19.28685218   4.2018711 ]
190
[ -0.20035013  21.37566811   1.114602  ]
200
[ -0.22702577  24.43271687   3.67138793]
210
[ -0.22390193  24.006086     3.34889216]
220
[ -0.20462251  21.45678465   1.7075843 ]
230
[ -0.20210781  21.20861667   1.49243354]
240
[ -0.18571648  20.13230511   0.51963789]
250
[ -0.19574172  20.4824399    1.17299645]
260
[ -0.18564526  22.11705988   0.32903327]
270
[ -0.20296302  18.44112565   3.39578125]
280
[ -0.20080895  18.35409034   3.28817216]
290
[ -0.19418177  17.50071279   2.81148247]
300
[ -0.19054813  17.07697612   2.57784393]
310
[ -0.18790791  16.86170307   2.47260459]
320
[ -0.18879098  16.86170118   2.47260101]
330
[ -0.18795943  16.86168521   2.47259952]
340
[ -0.18604664  16.66347156   2.39329172]
350
[ -0.18468907  16.56348366   2.35902571]
360
[ -0.18916026  17.36729744   2.81537439]
370
[ -0.19078701  17.82944182   3.08443405]
380
[ -0.19186539  17.87738727   3.17012176]
390
[ -0.19404566  18.30048609   3.44565665]
400
[ -0.19111715  17.65937634   3.23081141]
410
[ -0.18995355  17.35725844   3.1645668 ]
420
[ -0.19032886  17.47028075   3.2788268 ]
430
[ -0.19382929  17.90408778   3.50623974]
440
[ -0.1995575   19.09646796   3.97187127]
450
[ -0.20305743  19.7867907    4.22996044]
460
[ -0.20322312  19.78677713   4.22995139]
470
[ -0.20141949  19.37150847   4.03628289]
480
[ -0.20281309  19.68499024   4.14975749]
490
[ -0.20009312  19.15639149   3.95435605]
500
[ -0.20052168  19.15639519   3.95435683]
510
[ -0.20057973  19.15636937   3.95434711]
520
[ -0.20005818  19.52337567   4.1029193 ]
530
[ -0.2003814   19.52332557   4.10290702]
540
[ -0.19760871  18.66091811   3.81921187]
550
[ -0.19428022  17.63325806   3.45387749]
560
[ -0.19233823  17.01560163   3.22461319]
570
[ -0.19360597  17.0369048    3.23178436]
580
[ -0.19334329  17.03695065   3.2318039 ]
590
[ -0.1932219   17.17459562   3.28840638]
600
[ -0.19233845  17.20758674   3.32554966]
610
[ -0.19427369  17.53740006   3.46786101]
620
[ -0.19390678  17.53735552   3.46785574]
630
[ -0.18950497  16.71113417   3.23186205]
640
[ -0.18956444  16.61751778   3.22416899]
650
[ -0.19077412  16.74263032   3.29526803]
660
[ -0.18936104  16.74262384   3.29527687]
670
[ -0.18990063  17.07310686   3.49949486]
680
[ -0.19054753  17.07310146   3.49949484]
690
[ -0.1939392   17.37220911   3.65628743]
700
[ -0.19377611  17.37221045   3.65628971]
710
[ -0.19477471  17.16873332   3.60094118]
720
[ -0.19602207  17.00921846   3.53923778]
730
[ -0.19758853  17.16045685   3.5754198 ]
740
[ -0.19600944  17.10774068   3.52387985]
750
[ -0.20450964  18.90350991   4.17363147]
760
[ -0.20017089  18.29615363   3.8970698 ]
770
[ -0.19748386  17.98443146   3.76956008]
780
[ -0.19749493  17.98442218   3.76955288]
790
[ -0.1982905   17.7026998    3.59936482]
800
[ -0.20200081  18.13941008   3.73523452]
810
[ -0.20054352  17.87046464   3.58651989]
820
[ -0.20101148  17.92439942   3.52254685]
830
[ -0.21234202  20.21461863   4.22775859]
840
[ -0.20443973  18.95748687   3.56264956]
850
[ -0.20055513  18.46309955   3.2961669 ]
860
[ -0.20074797  18.4783949    3.19399406]
870
[ -0.19465301  17.32099852   2.54967023]
880
[ -0.20068927  18.30117521   2.84254564]
890
[ -0.19703538  17.94503641   2.60783044]
900
[ -0.19020411  17.16564955   2.04086134]
910
[ -0.19349854  17.90600185   2.44572703]
920
[ -0.19519792  18.2038051    2.59453772]
930
[ -0.19589878  18.46874039   2.67846621]
940
[ -0.19844841  18.66070794   2.71443596]
950
[ -0.1981842   18.64507784   2.65882604]
960
[ -0.19929712  18.86320943   2.74281004]
970
[ -0.21790807  22.0180641    4.15148446]
980
[ -0.25017402  29.06142994   7.71818   ]
990
[ -0.21506634  21.26656251   5.01082052]
1000
[ -0.20572922  19.47532802   4.33337694]
1010
[ -0.20914419  20.2523542    4.58741547]
1020
[ -0.21691225  21.67373699   5.03057713]
1030
[ -0.21166397  20.73053298   4.59562673]
1040
[ -0.21384847  21.20150272   4.6808331 ]
1050
[ -0.21287636  21.12011929   4.59693616]
1060
[ -0.21320329  21.12635222   4.54676002]
1070
[ -0.21447674  26.8468493    1.7181823 ]
1080
[ -0.20082226  26.28241102   0.72413659]
1090
[ -0.2026997   26.33634658   0.81253448]
1100
[ -0.20594748  29.64840448   0.53738016]
1110
[ -0.21057418  29.71270756   0.72747554]
1120
[ -0.19520126  29.5827965    0.21140417]