``````

In [1]:

from scipy.stats import gamma,rv_continuous,norm
from scipy.special import gamma as gm
from scipy.special import kv
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
import numpy as np

``````
``````

In [2]:

class gen_gamma(rv_continuous):
def _pdf(self, x, v, k, b):
return abs(v)*x**(k*v-1)*np.exp(-(x**v)/(b**abs(v)))/(b**(k*abs(v))*gm(k))
def _argcheck(self, v, k,b):
return k>0 and b>0

``````
``````

In [3]:

class GVG:
def cdf(self,x,a,v,k,b):
def integrand(z,x,a,v,k,b):
gg = gen_gamma(a=0.)
return norm.cdf((x-a*z)/math.sqrt(z))*gg.pdf(z,v,k,b)
def pdf(self,x,a,v,k,b):
return (self.cdf(x,a,v,k,b)-self.cdf(x-0.001,a,v,k,b))/0.001

``````
``````

In [4]:

class gvg1(rv_continuous):
def _cdf(self,x,a,v,k,b):
def integrand(z,x,a,v,k,b):
gg = gen_gamma(a=0.)
return norm.cdf((x-a*z)/math.sqrt(z))*gg.pdf(z,v,k,b)
def _argcheck(self,a, v, k,b):
return k>0 and b>0

``````
``````

In [5]:

def gvg_rand(a,v, k, b,size=1000):
GG = gen_gamma(a=0.)
rand_gg=GG.rvs(v, k, b, size=size)
return norm.rvs(size=size) * np.sqrt(rand_gg) + a*rand_gg

``````
``````

In [6]:

#a- масштаб
plt.hist(gvg_rand(2,0.8,1,1,1000),40,normed=True)
plt.show()

``````
``````

``````
``````

In [ ]:

plt.hist(gvg_rand(0,0.8,3,1,1000),40,normed=True)
plt.show()

``````
``````

In [ ]:

plt.hist(gvg_rand(-1.2,2,1,3,1000),40,normed=True)
plt.show()

``````
``````

In [ ]:

plt.hist(gvg_rand(1.2,2,1,3,1000),40,normed=True)
plt.show()

``````
``````

In [ ]:

plt.hist(gvg_rand(1.2,2,0.2,0.8,1000),40,normed=True)
plt.show()

``````
``````

In [ ]:

plt.hist(gvg_rand(0.2,2,7,0.8,1000),40,normed=True)
plt.show()

``````
``````

In [ ]:

plt.hist(gvg_rand(2,1,0.5,0.8,1000),40,normed=True)
plt.show()

``````
``````

In [ ]:

plt.hist(gvg_rand(4,1,0.3,0.8,1000),40,normed=True)
plt.show()

``````
``````

In [ ]:

plt.hist(gvg_rand(0,2,0.25,5,1000),40,normed=True)
plt.show()

``````
``````

In [ ]:

params_sets = [
[0,2,0.25,5],
[4,1,0.3,0.8],
[2,1,0.5,0.8],
[0.2,2,7,0.8],
[1.2,2,0.2,0.8],
[1.2,2,1,3],
[-1.2,2,1,3],
[0,0.8,3,1],
[2,0.8,1,1]]

steps_grid = [0,10,20]
grid_sizes = [5,10,20]

``````
``````

In [7]:

def experiment(params_sets,steps_grid,grid_sizes):
for i,params in enumerate(params_sets):
np.random.seed(0)
sample = gvg_rand(params[0],params[1],params[2],params[3],1000)
plt.hist(sample,40)
x=np.linspace(np.min(sample),np.max(sample),50)
y=[gvg.pdf(x,params[0],params[1],params[2],params[3]) for x in x]
plt.plot(x,y)
plt.savefig('pics/params'+str(i)+'.png')
for steps in steps_grid:
for grid_size in grid_sizes:
print('Params: ',params,', steps:',steps,', grid:',grid_size)
grid = np.linspace(1,40,grid_sizes)
p,a,new_grid,iter_num = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=10)
res=fit_gvg(p,a,new_grid)
dist = pdf_dist([a,res[0],res[1],res[2]],[2,0.8,3,1])

``````
``````

In [52]:

class Mixture():
def __init__(self,p,a,u):
self.p=p
self.a=a
self.u=u
def cdf(self,x):
return (self.p*norm.cdf((x-self.a*self.u)/np.sqrt(self.u))).sum()
def pdf(self,x):
return (self.cdf(x)-self.cdf(x-0.001))/0.001

def grid_likehood(sample,u,p,a):
summ=0
u,p = np.array(u),np.array(p)
temp2 = np.sqrt(u)
temp1 = p/temp2
temp3= a*u
for x in sample:
summ += (temp1 * norm.pdf((x-temp3)/temp2)).sum()
return summ

``````
``````

In [56]:

def fit_mixture(sample,u,eps=0.05,verbose=1,irreg_grid_step=0,recalc_steps=10):
import math
p = np.full(len(u), 1/len(u))
mean = np.mean(sample)
a = mean/(u*p).sum()
f = np.zeros((len(u),len(sample)))
g = np.zeros((len(u),len(sample)))
new_likehood = grid_likehood(sample,u,p,a)
iteration = 0
bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2]
while True:
for i in range(0,len(u)):
f[i,:]= 1/math.sqrt(u[i]) * norm.pdf((sample-a*u[i])/math.sqrt(u[i]))

for j in range(0,len(sample)):
g[:,j] = p*f[:,j]/ (p*f[:,j]).sum()

p = g.sum(axis=1)/len(sample)
a = mean/(u*p).sum()
prev_likehood = new_likehood
new_likehood = grid_likehood(sample,u,p,a)
iteration += 1
if (verbose): print('\nIter: %d  likehood: %f' %(iteration,new_likehood))
if ((new_likehood-prev_likehood)<eps) & (iteration>recalc_steps+1) : break
if (irreg_grid_step):
if ((iteration%irreg_grid_step==0) & (iteration<=recalc_steps)):
new_bins,new_grid = move_grid(bins,p)
p = piecewise(new_grid,u,p)+1e-6
p = p/p.sum()
bins,u=new_bins,new_grid
a = mean/(u*p).sum()
return p,a,u,iteration

``````
``````

In [10]:

def piecewise(x,grid,p):
bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2]
square = np.sum(np.array(p)*(grid[1]-grid[0]))
p=p/square
try:
res=np.zeros(x.shape[0])
except:
res=np.zeros(1)
x=[x]
for i,num in enumerate(x):

#closest point
try:
idx_bin=bins.index(min([i for i in bins if i>num], key=lambda k:abs(k-num)))
except:
res[i]= 0
continue
if (idx_bin==0):
res[i]= 0
continue

try:
idx_grid=grid.index(min([i for i in grid if i>num], key=lambda k:abs(k-num)))
except:
res[i] = (bins[-1]-num)*p[-1]/(bins[-1]-grid[-1])
continue
if (idx_grid==0):
res[i] = 2*(num - bins[0])*p[0]/(bins[1]-bins[0])
continue

s = (grid[idx_grid]-grid[idx_grid-1])*(p[idx_grid]+p[idx_grid-1])/2
h1 = num - grid[idx_grid-1]
h2 = grid[idx_grid] - num
res[i] = (2*s - h1*p[idx_grid-1]-h2*p[idx_grid])/(h1+h2)
return res

``````
``````

In [11]:

def move_grid(bins,p):
bins=np.array(bins)
new_bins=split_hist(np.array(bins),np.array(p)/(np.array(p)*(bins[1:]-bins[:-1])).sum(),20)
new_bins.append(bins[0])
new_bins.append(bins[-1])
new_bins=np.array(sorted(new_bins))
return new_bins,(new_bins[1:] + new_bins[:-1]) / 2

``````
``````

In [12]:

def split_hist(x,p,k):
c = np.cumsum(p*(x[1:]-x[:-1]))
res=[]
for i in range(k-1):
excess = c-(i+1)/k
ix = np.where(excess>0)[0].min()
res.append(x[ix+1]-excess[ix]/p[ix])
return res

grid=[  0.01  ,   2.5075,   5.005 ,   7.5025,  10.    ]
p=[ 0.25941888,  0.64792493,  0.02372754,  0.00882218,  0.06010648]
bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2]
square = np.sum(np.array(p)*(grid[1]-grid[0]))
p=p/square

split_hist(np.array(bins),np.array(p),5)

``````
``````

Out[12]:

[0.68670741079061026,
1.8006358588910907,
2.571558510370175,
3.3424811618492591]

``````
``````

In [38]:

sample = gvg_rand(0.2,2,7,0.8,200)
grid = np.linspace(1,40,20)

``````
``````

In [25]:

plt.hist(sample,50,normed=True)

plt.show()

``````
``````

``````
``````

In [66]:

p,a,new_grid,_ = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=0)

``````
``````

Iter: 1  likehood: 23.239135

Iter: 2  likehood: 27.106261

Iter: 3  likehood: 30.821387

Iter: 4  likehood: 33.818221

Iter: 5  likehood: 35.992622

Iter: 6  likehood: 37.501451

Iter: 7  likehood: 38.540013

Iter: 8  likehood: 39.260789

Iter: 9  likehood: 39.767907

Iter: 10  likehood: 40.129797

Iter: 11  likehood: 40.391254

Iter: 12  likehood: 40.581902

Iter: 13  likehood: 40.721675

Iter: 14  likehood: 40.824272

Iter: 15  likehood: 40.899304

Iter: 16  likehood: 40.953654

Iter: 17  likehood: 40.992340

Iter: 18  likehood: 41.019075

Iter: 19  likehood: 41.036642

Iter: 20  likehood: 41.047153

Iter: 21  likehood: 41.052223

``````
``````

In [62]:

mixture = Mixture(p,a,new_grid)
#mixture = Mixture(np.array([0.2,0.2,0.2,0.2,0.2]),a,grid)
x=np.linspace(-5,30,100)
y=[mixture.pdf(x) for x in x]
plt.hist(sample,30,normed=True)
#plt.hist(sample,60,normed=1)
plt.plot(x,y)
plt.show()

``````
``````

``````

## Расстояние К-Л

``````

In [124]:

def fit_gvg(p,a,u,x0):
from scipy.stats import entropy
from scipy.optimize import minimize
GG = gen_gamma(shapes='v,k,b')
#fun = lambda x: entropy(GG.pdf(u,x[0],x[1],x[2]),p+1e-06)
fun = lambda x: -(np.array(p)*np.log(GG.pdf(u,x[0],x[1],x[2])+1e-7)).sum()
bnds = ((None, None), (0, None), (0, None))
res = minimize(fun, x0=x0, bounds=bnds)
return res

``````
``````

In [64]:

%%time
res=fit_gvg(p,a,new_grid)
print(res.message)
res=res.x
gvg=GVG()
x=np.linspace(-3,25,50)
y=[gvg.pdf(x,a,res[0],res[1],res[2]) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()

``````
``````

b'ABNORMAL_TERMINATION_IN_LNSRCH'

Wall time: 12.3 s

``````
``````

In [ ]:

# Если сгенерировать выборку

def gen(p,a,grid):

sample_gen=[]
rate=10000
for i in range(0,len(p)):
sample0 = np.full(math.floor(p[i]*rate),grid[i]) #+ np.random.normal(grid[i],(grid[1]-grid[0])*1.5,size=math.floor(p[i]*rate))
sample_gen= sample_gen+[i for i in list(sample0) if i>0]

GG = gen_gamma(shapes='v,k,b',a=0.)
v,k,b,_,_ = GG.fit(sample_gen, floc=0, fscale=1)

x=np.linspace(-1,25,100)
plt.hist(sample_gen,40,normed=1)
plt.plot(x,GG.pdf(x,v,k,b))
plt.show()

gvg=GVG()
x=np.linspace(-3,25,100)
y=[gvg.pdf(x,a,v,k,b) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y,label=('fitted: a=%0.3f v=%0.3f k=%0.3f b=%0.3f' %(a,v,k,b) ))
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.03),prop={'size':10})

plt.show()

gen(p,a,grid)
gen(new_p,new_a,new_grid)

``````
``````

In [ ]:

def pdf_dist(params1,params2):
gvg_DIST=gvg1()
a1,v1,k1,b1 = params1
a2,v2,k2,b2 = params2

lower = min(gvg_DIST.ppf(0.0001,a1,v1,k1,b1),gvg_DIST.ppf(0.0001,a2,v2,k2,b2))
upper = max(gvg_DIST.ppf(0.9999,a1,v1,k1,b1),gvg_DIST.ppf(0.9999,a2,v2,k2,b2))

step=(upper-lower)/100

grid=np.arange(lower,upper,step)
dist=0
for dot in list(grid):
dist=dist+step*abs(gvg_DIST.pdf(dot,a1,v1,k1,b1)-gvg_DIST.pdf(dot,a2,v2,k2,b2))

return dist

``````
``````

In [ ]:

#print(pdf_dist([0.849,0.399,15.6,0.009],[2,0.8,3,1]))
print(pdf_dist([a,res[0],res[1],res[2]],[2,0.8,3,1]))

``````
``````

In [ ]:

print(pdf_dist([a,res[0],res[1],res[2]],[2,0.8,3,1]))

``````
``````

In [ ]:

gvg1=GVG()
gvg2=GVG()
abs(gvg1.pdf(10,0.849,0.399,15.6,0.009)-gvg2.pdf(10,2,0.8,3,1))

``````

## GH

``````

In [6]:

class GIG(rv_continuous):
def _pdf(self, x, v, m,l):
return (l**(v/2) * x**(v-1) * np.exp(-0.5 * (m/x + l*x)))/(2*m**(v/2)*kv(v,np.sqrt(m*l)))
def _argcheck(self, v, m,l):
return m>1e-10 and l > 1e-10

class GH:
def cdf(self,x,a,v,m,l):
def integrand(z,x,a,v,m,l):
gig = GIG(a=1e-10)
return norm.cdf((x-a*z)/math.sqrt(z))*gig.pdf(z,v,m,l)
def pdf(self,x,a,v,m,l):
return (self.cdf(x,a,v,m,l)-self.cdf(x-0.001,a,v,m,l))/0.001

``````
``````

In [7]:

class gh1(rv_continuous):
def _cdf(self,x,a,v,m,l):
def integrand(z,x,a,v,m,l):
gig = GIG(a=1e-10)
return norm.cdf((x-a*z)/math.sqrt(z))*gig.pdf(z,v,m,l)
def _argcheck(self,a,v,m,l):
return m>0 and l>0

``````
``````

In [8]:

def gh_rand(a,v, m,l,size=1000):
gig = GIG(a=1e-10)
rand_gig=gig.rvs(v, m,l, size=size)
return norm.rvs(size=size) * np.sqrt(rand_gig) + a*rand_gig

``````
``````

In [9]:

def fit_gh(p,a,u):
from scipy.stats import entropy
from scipy.optimize import minimize
#GG = gen_gamma(shapes='v,k,b')
gig = GIG()
#fun = lambda x: entropy(GG.pdf(u,x[0],x[1],x[2]),p+1e-06)
fun = lambda x: -(np.array(p)*np.log(gig.pdf(u,x[0],x[1],x[2])+1e-7)).sum()
x0 = [1, 1, 1]
bnds = ((None, None), (0, None), (0, None))
res = minimize(fun, x0=x0, bounds=bnds)
return res

``````
``````

In [10]:

def pdf_dist_gh(params1,params2):
gh_DIST=gh1()
a1,v1,k1,b1 = params1
a2,v2,k2,b2 = params2

lower = min(gh_DIST.ppf(0.0001,a1,v1,k1,b1),gh_DIST.ppf(0.0001,a2,v2,k2,b2))
upper = max(gh_DIST.ppf(0.9999,a1,v1,k1,b1),gh_DIST.ppf(0.9999,a2,v2,k2,b2))

step=(upper-lower)/100

grid=np.arange(lower,upper,step)
dist=0
for dot in list(grid):
dist=dist+step*abs(gh_DIST.pdf(dot,a1,v1,k1,b1)-gh_DIST.pdf(dot,a2,v2,k2,b2))

return dist

``````
``````

In [9]:

sample = gh_rand(1,1.5,1,1,2000)
grid = np.linspace(0.001,5,30)

``````
``````

In [28]:

params_sets = [
#[1,1.5,1,1],
#[-2,1.5,1,1],
#[0,1.5,1,1],
#[0,2,20,1],
#[2,-3,2,4],
#[2,-3,2,0.01],
[-2,-3,2,0.05],
[0,0,0.1,0.1],
[-0.5,0,0.1,0.1]]

steps_grid = [0,10,20]

``````
``````

In [22]:

gh_DIST = gh1()
for i,params in enumerate(params_sets):
np.random.seed(0)
sample = gh_rand(params[0],params[1],params[2],params[3],1000)
x=np.linspace(np.min(sample),np.max(sample),50)

plt.hist(sample,40 ,ec='black',color='blue',normed=True)

y=[gh_DIST.pdf(x,params[0],params[1],params[2],params[3]) for x in x]
plt.plot(x,y,label='a=%0.3f v=%0.3f k=%0.3f b=%0.3f' %(params[0],params[1],params[2],params[3]))
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.08),prop={'size':10})

plt.savefig('pics/gh_params'+str(i)+'.png')
plt.clf()

``````
``````

In [29]:

def experiment(params_sets,steps_grid,grid_sizes):
for i,params in enumerate(params_sets):
np.random.seed(0)
sample = gh_rand(params[0],params[1],params[2],params[3],1000)
for steps in steps_grid:
for grid_size in grid_sizes:
print('Params: ',params,', steps:',steps,', grid:',grid_size)
grid = np.linspace(1,40,grid_size)
p,a,new_grid,iter_num = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=steps,verbose=0)
res=fit_gh(p,a,new_grid).x
dist = pdf_dist_gh([a,res[0],res[1],res[2]],[params[0],params[1],params[2],params[3]])
print(dist)
print()

experiment(params_sets,steps_grid,[20])

``````
``````

Params:  [-2, -3, 2, 0.05]

/home/kirill/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:3: RuntimeWarning: invalid value encountered in true_divide
app.launch_new_instance()
/home/kirill/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:3: RuntimeWarning: invalid value encountered in multiply
app.launch_new_instance()
/home/kirill/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:3: RuntimeWarning: overflow encountered in power
app.launch_new_instance()
/home/kirill/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:3: RuntimeWarning: overflow encountered in multiply
app.launch_new_instance()

, steps: 0 , grid: 20
0.353870478139

Params:  [-2, -3, 2, 0.05] , steps: 10 , grid: 20
0.073513902478

Params:  [-2, -3, 2, 0.05] , steps: 20 , grid: 20
0.0774976871991

Params:  [0, 0, 0.1, 0.1] , steps: 0 , grid: 20
0.297420946011

Params:  [0, 0, 0.1, 0.1] , steps: 10 , grid: 20
0.203816551978

Params:  [0, 0, 0.1, 0.1] , steps: 20 , grid: 20
0.206076747532

Params:  [-0.5, 0, 0.1, 0.1] , steps: 0 , grid: 20
0.476471503157

Params:  [-0.5, 0, 0.1, 0.1] , steps: 10 , grid: 20
0.272962550627

Params:  [-0.5, 0, 0.1, 0.1] , steps: 20 , grid: 20
0.272871289356

``````
``````

In [17]:

plt.hist(sample,40 ,ec='black',color='blue',normed=True)

x=np.linspace(np.min(sample),np.max(sample),50)
y=[gh_DIST.pdf(x,1,1.5,1,1) for x in x]
plt.plot(x,y,label='fitted: a=%0.3f v=%0.3f k=%0.3f b=%0.3f' %(1,2,3,4) )

plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.08),prop={'size':10})

plt.show()

``````
``````

``````
``````

In [ ]:

p,a = fit_mixture(sample,grid,0.01)

``````
``````

In [ ]:

mixture = Mixture(p,a,grid)
x=np.linspace(-6,20,100)
y=[mixture.pdf(x) for x in x]

plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()

``````
``````

In [ ]:

sample_gen=[]
rate=10000
for i in range(0,len(p)):
sample0 = np.full(math.floor(p[i]*rate),grid[i])
sample_gen= sample_gen+[i for i in list(sample0) if i>0]

``````
``````

In [ ]:

gig = GIG(shapes='v,m,l',a=0.)
v,m,l,_,_ = gig.fit(sample_gen, floc=0, fscale=1)

``````
``````

In [ ]:

x=np.linspace(-1,12,100)
plt.hist(sample_gen,40,normed=1)
plt.plot(x,gig.pdf(x,v,m,l))
plt.show()

``````
``````

In [ ]:

gh=GH()
x=np.linspace(-6,20,100)
y=[gh.pdf(x,a,v,m,l) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()

``````
``````

In [ ]:

for i,j in zip(p,grid):
print(j,'  ',i)

``````
``````

In [ ]:

gig.pdf(0.001 ,v,m,l)

``````
``````

In [17]:

def fit_mixture(sample,u,eps=0.05,verbose=1,irreg_grid_step=0,recalc_steps=10):
p = np.full(len(u), 1/len(u))
mean = np.mean(sample)
a = mean/(u*p).sum()
f = np.zeros((len(u),len(sample)))
g = np.zeros((len(u),len(sample)))
new_likehood = grid_likehood(sample,u,p,a)
iteration = 0
bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2]
while True:
for i in range(0,len(u)):
for j in range(0,len(sample)):
f[i,j]= 1/math.sqrt(u[i]) * norm.pdf((sample[j]-a*u[i])/math.sqrt(u[i]))

for i in range(0,len(u)):
for j in range(0,len(sample)):
g[i,j] = p[i]*f[i,j]/ (p*f[:,j]).sum()

p = g.sum(axis=1)/len(sample)
a = mean/(u*p).sum()
prev_likehood = new_likehood
new_likehood = grid_likehood(sample,u,p,a)
iteration += 1
if (verbose): print('\nIter: %d  likehood: %f' %(iteration,new_likehood))
if ((new_likehood-prev_likehood)<eps) & (iteration>recalc_steps+1) : break
print('p:')
print(p)
print('grid:')
print(u)
if (irreg_grid_step):
if ((iteration%irreg_grid_step==0) & (iteration<=recalc_steps)):
new_bins,new_grid = move_grid(bins,p)
#print('p after moving:')
#print(p)
print('new_bins:')
print(new_bins)
print('new_grid:')
print(new_grid)
new_p = piecewise(np.array(new_grid),u,p)+1e-6
new_p = new_p/new_p.sum()
print('new_p:')
print(new_p)
bins,u,p=new_bins,new_grid,new_p
a = mean/(u*p).sum()

plot_hist(np.array(bins),p)
return p,a,u,iteration

p,a,new_grid,_ = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=10)

``````
``````

Iter: 1  likehood: 113.686381
p:
[ 0.10173428  0.08493377  0.07380872  0.0660369   0.0602559   0.05574939
0.05211219  0.04909785  0.04654729  0.04435285  0.04243878  0.0407501
0.03924577  0.03789452  0.036672    0.03555896  0.03453996  0.03360244
0.03273607  0.03193226]
grid:
[  1.           3.05263158   5.10526316   7.15789474   9.21052632
11.26315789  13.31578947  15.36842105  17.42105263  19.47368421
21.52631579  23.57894737  25.63157895  27.68421053  29.73684211
31.78947368  33.84210526  35.89473684  37.94736842  40.        ]

Iter: 2  likehood: 130.937669
p:
[ 0.17509256  0.12569008  0.09593676  0.0772176   0.06450105  0.055333
0.04842108  0.04302812  0.03870495  0.03516296  0.03220849  0.02970685
0.02756155  0.02570161  0.02407372  0.02263707  0.02135985  0.02021696
0.01918826  0.01825749]
grid:
[  1.           3.05263158   5.10526316   7.15789474   9.21052632
11.26315789  13.31578947  15.36842105  17.42105263  19.47368421
21.52631579  23.57894737  25.63157895  27.68421053  29.73684211
31.78947368  33.84210526  35.89473684  37.94736842  40.        ]
new_bins:
[ -2.63157895e-02   5.59840362e-01   1.14599651e+00   1.73215267e+00
2.43307665e+00   3.24962142e+00   4.06616620e+00   5.13198596e+00
6.21878529e+00   7.54790695e+00   9.01361946e+00   1.06657442e+01
1.25535273e+01   1.47145754e+01   1.71785485e+01   1.99694808e+01
2.31540012e+01   2.67724967e+01   3.09136464e+01   3.56464177e+01
4.10263158e+01]
new_grid:
[  0.26676229   0.85291844   1.43907459   2.08261466   2.84134904
3.65789381   4.59907608   5.67538562   6.88334612   8.28076321
9.83968181  11.60963571  13.63405134  15.94656198  18.57401464
21.561741    24.96324896  28.84307157  33.28003209  38.33636676]
new_p:
[ 0.0718408   0.07080767  0.06977454  0.06864027  0.06730297  0.06586377
0.06420489  0.06230785  0.06017876  0.05771575  0.05496808  0.05184846
0.04828033  0.04420443  0.03957342  0.03430741  0.0283121   0.02147373
0.01365339  0.00474136]

Iter: 3  likehood: 161.660952
p:
[ 0.08806173  0.08947783  0.08802112  0.08472467  0.08017023  0.07526241
0.06995768  0.06446191  0.05897984  0.05341149  0.04799595  0.04265206
0.03735543  0.03212271  0.02698333  0.02192888  0.0169478   0.0120253
0.00714397  0.00231566]
grid:
[  0.26676229   0.85291844   1.43907459   2.08261466   2.84134904
3.65789381   4.59907608   5.67538562   6.88334612   8.28076321
9.83968181  11.60963571  13.63405134  15.94656198  18.57401464
21.561741    24.96324896  28.84307157  33.28003209  38.33636676]

Iter: 4  likehood: 170.132864
p:
[ 0.09777456  0.10469914  0.10420657  0.09907643  0.09114929  0.08253659
0.07347455  0.06450428  0.05605602  0.04802796  0.04077684  0.03416922
0.02815813  0.02274097  0.01791578  0.01363663  0.00985698  0.00653138
0.00361727  0.0010914 ]
grid:
[  0.26676229   0.85291844   1.43907459   2.08261466   2.84134904
3.65789381   4.59907608   5.67538562   6.88334612   8.28076321
9.83968181  11.60963571  13.63405134  15.94656198  18.57401464
21.561741    24.96324896  28.84307157  33.28003209  38.33636676]
new_bins:
[ -2.63157895e-02   5.42459463e-01   1.07476663e+00   1.60809830e+00
2.16297632e+00   2.74960323e+00   3.37120927e+00   4.04499222e+00
4.79926519e+00   5.61513586e+00   6.51623367e+00   7.50830811e+00
8.65959262e+00   9.96044522e+00   1.14515943e+01   1.31913379e+01
1.52739282e+01   1.78650291e+01   2.12827714e+01   2.62071146e+01
4.10263158e+01]
new_grid:
[  0.25807184   0.80861305   1.34143246   1.88553731   2.45628977
3.06040625   3.70810074   4.4221287    5.20720052   6.06568476
7.01227089   8.08395037   9.31001892  10.70601975  12.32146608
14.23263302  16.56947863  19.57390024  23.74494301  33.61671521]
new_p:
[ 0.06523472  0.06429875  0.06339292  0.0624679   0.06149757  0.06047052
0.05936939  0.05815548  0.0568208   0.05536131  0.05375203  0.05193009
0.04984567  0.04747236  0.04472597  0.04147683  0.037504    0.03239624
0.02530513  0.00852232]

Iter: 5  likehood: 164.461071
p:
[ 0.07766062  0.07851442  0.07742547  0.07509076  0.07213064  0.0688635
0.06542749  0.06182782  0.05813923  0.05442638  0.05068949  0.04685631
0.04291281  0.03890139  0.03477932  0.03048063  0.02589323  0.02082132
0.01490924  0.00424993]
grid:
[  0.25807184   0.80861305   1.34143246   1.88553731   2.45628977
3.06040625   3.70810074   4.4221287    5.20720052   6.06568476
7.01227089   8.08395037   9.31001892  10.70601975  12.32146608
14.23263302  16.56947863  19.57390024  23.74494301  33.61671521]

Iter: 6  likehood: 171.291229
p:
[ 0.08554106  0.09027331  0.0899972   0.08653546  0.08154981  0.07591212
0.07003312  0.06402299  0.05807416  0.0523316   0.04681975  0.04145776
0.03625814  0.0313029   0.02656309  0.02199853  0.01754796  0.01312188
0.00859681  0.00206237]
grid:
[  0.25807184   0.80861305   1.34143246   1.88553731   2.45628977
3.06040625   3.70810074   4.4221287    5.20720052   6.06568476
7.01227089   8.08395037   9.31001892  10.70601975  12.32146608
14.23263302  16.56947863  19.57390024  23.74494301  33.61671521]
new_bins:
[ -2.63157895e-02   5.12036427e-01   1.02376228e+00   1.53530190e+00
2.06455564e+00   2.62323905e+00   3.22049303e+00   3.86540440e+00
4.56783755e+00   5.33710378e+00   6.18658285e+00   7.13136007e+00
8.19340553e+00   9.39664495e+00   1.07785443e+01   1.23921024e+01
1.43196430e+01   1.67019204e+01   1.98190968e+01   2.44054474e+01
4.10263158e+01]
new_grid:
[  0.24286032   0.76789935   1.27953209   1.79992877   2.34389734
2.92186604   3.54294871   4.21662097   4.95247066   5.76184332
6.65897146   7.6623828    8.79502524  10.0875946   11.5853233
13.35587269  15.51078172  18.26050858  22.11227208  32.71588159]
new_p:
[ 0.06686156  0.0658183   0.06480168  0.06376764  0.06268677  0.06153833
0.06030423  0.05896563  0.05750348  0.05589525  0.05411264  0.05211884
0.04986826  0.0472999   0.04432389  0.04080577  0.03652393  0.03106017
0.02340665  0.00233709]

Iter: 7  likehood: 167.051879
p:
[ 0.07705031  0.07829913  0.07754772  0.07545002  0.07264835  0.06945835
0.06603657  0.062463    0.05878366  0.05502184  0.05118482  0.04726722
0.04325854  0.03913887  0.03486961  0.03039146  0.02560377  0.02031357
0.0140518   0.00116139]
grid:
[  0.24286032   0.76789935   1.27953209   1.79992877   2.34389734
2.92186604   3.54294871   4.21662097   4.95247066   5.76184332
6.65897146   7.6623828    8.79502524  10.0875946   11.5853233
13.35587269  15.51078172  18.26050858  22.11227208  32.71588159]

Iter: 8  likehood: 173.060487
p:
[ 0.08301044  0.0884163   0.08894245  0.08613477  0.0816415   0.07632158
0.07062257  0.06478811  0.05896453  0.05323774  0.04765545  0.04224018
0.03700553  0.03195421  0.02707241  0.02233641  0.01770349  0.01309099
0.0082973   0.00056405]
grid:
[  0.24286032   0.76789935   1.27953209   1.79992877   2.34389734
2.92186604   3.54294871   4.21662097   4.95247066   5.76184332
6.65897146   7.6623828    8.79502524  10.0875946   11.5853233
13.35587269  15.51078172  18.26050858  22.11227208  32.71588159]
new_bins:
[ -2.63157895e-02   4.85408356e-01   9.67473217e-01   1.44540093e+00
1.93563306e+00   2.44884219e+00   2.99325793e+00   3.57640624e+00
4.20603252e+00   4.89070588e+00   5.64058777e+00   6.46799660e+00
7.38859285e+00   8.42264179e+00   9.59802492e+00   1.09542171e+01
1.25510997e+01   1.44877192e+01   1.69524258e+01   2.04157832e+01
4.10263158e+01]
new_grid:
[  0.22954628   0.72644079   1.20643707   1.690517     2.19223763
2.72105006   3.28483209   3.89121938   4.5483692    5.26564682
6.05429219   6.92829473   7.90561732   9.01033335  10.27612101
11.75265842  13.51940946  15.72007246  18.68410448  30.72104949]
new_p:
[ 0.0650759   0.06408852  0.06313472  0.0621728   0.06117583  0.06012502
0.05900473  0.05779977  0.05649395  0.05506864  0.05350152  0.05176478
0.04982274  0.04762755  0.0451123   0.04217827  0.03866755  0.0342946
0.02840475  0.00448608]

Iter: 9  likehood: 168.718124
p:
[ 0.0735323   0.07490764  0.07451309  0.07284603  0.07048685  0.06773329
0.06473737  0.06158044  0.05830727  0.05494155  0.05149351  0.0479635
0.04434344  0.04061592  0.03675162  0.03270372  0.02839384  0.02367591
0.01820244  0.00227028]
grid:
[  0.22954628   0.72644079   1.20643707   1.690517     2.19223763
2.72105006   3.28483209   3.89121938   4.5483692    5.26564682
6.05429219   6.92829473   7.90561732   9.01033335  10.27612101
11.75265842  13.51940946  15.72007246  18.68410448  30.72104949]

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-17-2e94e8689ab7> in <module>()
47     return p,a,u,iteration
48
---> 49 p,a,new_grid,_ = fit_mixture(sample,grid,0.01,irreg_grid_step=2,recalc_steps=10)

<ipython-input-17-2e94e8689ab7> in fit_mixture(sample, u, eps, verbose, irreg_grid_step, recalc_steps)
11         for i in range(0,len(u)):
12             for j in range(0,len(sample)):
---> 13                 f[i,j]= 1/math.sqrt(u[i]) * norm.pdf((sample[j]-a*u[i])/math.sqrt(u[i]))
14
15         for i in range(0,len(u)):

/home/kirill/anaconda3/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py in pdf(self, x, *args, **kwds)
1646         x, loc, scale = map(asarray, (x, loc, scale))
1647         args = tuple(map(asarray, args))
-> 1648         dtyp = np.find_common_type([x.dtype, np.float64], [])
1649         x = np.asarray((x - loc)/scale, dtype=dtyp)
1650         cond0 = self._argcheck(*args) & (scale > 0)

/home/kirill/anaconda3/lib/python3.6/site-packages/numpy/core/numerictypes.py in find_common_type(array_types, scalar_types)
1016     scalar_types = [dtype(x) for x in scalar_types]
1017
-> 1018     maxa = _can_coerce_all(array_types)
1019     maxsc = _can_coerce_all(scalar_types)
1020

/home/kirill/anaconda3/lib/python3.6/site-packages/numpy/core/numerictypes.py in _can_coerce_all(dtypelist, start)
948     thisind = start
949     while thisind < __len_test_types:
--> 950         newdtype = dtype(__test_types[thisind])
951         numcoerce = len([x for x in dtypelist if newdtype >= x])
952         if numcoerce == N:

KeyboardInterrupt:

``````
``````

In [14]:

def plot_hist(bins,p):
plt.bar(bins[:-1], p, width=bins[1:] - bins[:-1])
plt.show()

``````
``````

In [ ]:

x = np.sort(np.random.rand(6))
y = np.random.rand(5)

plt.bar(x[:-1], y, width=x[1:] - x[:-1])

plt.show()

``````

## Prediction

``````

In [68]:

import pandas as pd

``````
``````

In [71]:

data = data[:-1] - data[1:]

``````
``````

In [95]:

plt.hist(data,40)
plt.show()

``````
``````

``````
``````

In [117]:

grid = np.linspace(3e-8,3e-6,20)
p,a,new_grid,_ = fit_mixture(data,grid,0.1,irreg_grid_step=2,recalc_steps=0)

``````
``````

Iter: 1  likehood: 661885.229816

Iter: 2  likehood: 777444.554904

Iter: 3  likehood: 878201.822905

Iter: 4  likehood: 949721.399259

Iter: 5  likehood: 994824.273354

Iter: 6  likehood: 1021710.474034

Iter: 7  likehood: 1037321.235321

Iter: 8  likehood: 1046225.335616

Iter: 9  likehood: 1051187.100738

Iter: 10  likehood: 1053835.765562

Iter: 11  likehood: 1055128.210887

Iter: 12  likehood: 1055629.410177

Iter: 13  likehood: 1055675.533215

Iter: 14  likehood: 1055467.955345

``````
``````

In [118]:

grid = np.linspace(3e-8,3e-6,20)

``````
``````

Out[118]:

array([ 0.31349597,  0.37218455,  0.14336513,  0.06316259,  0.0320826 ,
0.01824597,  0.01136101,  0.00762633,  0.00546554,  0.0041591 ,
0.00335096,  0.00285322,  0.00256229,  0.00242022,  0.00239561,
0.00247376,  0.00265149,  0.00293434,  0.00333526,  0.00387407])

``````
``````

In [125]:

window_size = 200
step = 2
test = data[-window_size:]
train = data[:-window_size]
result = []

for window_pos in range(len(train)-window_size):
if (window_pos%10==0):
print(window_pos)
print(res)
window = train[window_pos:window_pos+window_size]
p,a,new_grid,_ = fit_mixture(window,grid,0.1,irreg_grid_step=2,recalc_steps=0,verbose=0)
if (window_pos == 0):
x0=[1,1,1]
else:
x0=res
res=fit_gvg(p,a,new_grid,x0).x
result.append(res)

``````
``````

0
[ -0.2183622   25.80633303   2.47179969]
10
[ -0.19353643  17.4584136    3.49969505]
20
[ -0.19604519  17.97972071   3.69789693]
30
[ -0.19522349  17.9797069    3.69789342]
40
[ -0.19243443  17.215025     3.41942613]
50
[ -0.1880895   16.29105828   3.07173609]
60
[ -0.19013302  16.45959742   3.13297519]
70
[ -0.19371963  16.84645083   3.27614143]
80
[ -0.19313304  16.79484101   3.22075351]
90
[ -0.20045018  18.07778848   3.67997296]
100
[ -0.20574897  18.939619     3.94638471]
110
[ -0.21470747  20.72297893   4.47737923]
120
[ -0.21297911  20.17423962   4.16027528]
130
[ -0.2442095   26.97040156   6.58555708]
140
[ -0.28455179  38.9177581   11.22139106]
150
[ -0.25213693  28.58285781   7.77807363]
160
[ -0.23980685  25.3296048    6.55525155]
170
[ -0.2358411   24.33791617   6.1582923 ]
180
[ -0.21089345  19.28685218   4.2018711 ]
190
[ -0.20035013  21.37566811   1.114602  ]
200
[ -0.22702577  24.43271687   3.67138793]
210
[ -0.22390193  24.006086     3.34889216]
220
[ -0.20462251  21.45678465   1.7075843 ]
230
[ -0.20210781  21.20861667   1.49243354]
240
[ -0.18571648  20.13230511   0.51963789]
250
[ -0.19574172  20.4824399    1.17299645]
260
[ -0.18564526  22.11705988   0.32903327]
270
[ -0.20296302  18.44112565   3.39578125]
280
[ -0.20080895  18.35409034   3.28817216]
290
[ -0.19418177  17.50071279   2.81148247]
300
[ -0.19054813  17.07697612   2.57784393]
310
[ -0.18790791  16.86170307   2.47260459]
320
[ -0.18879098  16.86170118   2.47260101]
330
[ -0.18795943  16.86168521   2.47259952]
340
[ -0.18604664  16.66347156   2.39329172]
350
[ -0.18468907  16.56348366   2.35902571]
360
[ -0.18916026  17.36729744   2.81537439]
370
[ -0.19078701  17.82944182   3.08443405]
380
[ -0.19186539  17.87738727   3.17012176]
390
[ -0.19404566  18.30048609   3.44565665]
400
[ -0.19111715  17.65937634   3.23081141]
410
[ -0.18995355  17.35725844   3.1645668 ]
420
[ -0.19032886  17.47028075   3.2788268 ]
430
[ -0.19382929  17.90408778   3.50623974]
440
[ -0.1995575   19.09646796   3.97187127]
450
[ -0.20305743  19.7867907    4.22996044]
460
[ -0.20322312  19.78677713   4.22995139]
470
[ -0.20141949  19.37150847   4.03628289]
480
[ -0.20281309  19.68499024   4.14975749]
490
[ -0.20009312  19.15639149   3.95435605]
500
[ -0.20052168  19.15639519   3.95435683]
510
[ -0.20057973  19.15636937   3.95434711]
520
[ -0.20005818  19.52337567   4.1029193 ]
530
[ -0.2003814   19.52332557   4.10290702]
540
[ -0.19760871  18.66091811   3.81921187]
550
[ -0.19428022  17.63325806   3.45387749]
560
[ -0.19233823  17.01560163   3.22461319]
570
[ -0.19360597  17.0369048    3.23178436]
580
[ -0.19334329  17.03695065   3.2318039 ]
590
[ -0.1932219   17.17459562   3.28840638]
600
[ -0.19233845  17.20758674   3.32554966]
610
[ -0.19427369  17.53740006   3.46786101]
620
[ -0.19390678  17.53735552   3.46785574]
630
[ -0.18950497  16.71113417   3.23186205]
640
[ -0.18956444  16.61751778   3.22416899]
650
[ -0.19077412  16.74263032   3.29526803]
660
[ -0.18936104  16.74262384   3.29527687]
670
[ -0.18990063  17.07310686   3.49949486]
680
[ -0.19054753  17.07310146   3.49949484]
690
[ -0.1939392   17.37220911   3.65628743]
700
[ -0.19377611  17.37221045   3.65628971]
710
[ -0.19477471  17.16873332   3.60094118]
720
[ -0.19602207  17.00921846   3.53923778]
730
[ -0.19758853  17.16045685   3.5754198 ]
740
[ -0.19600944  17.10774068   3.52387985]
750
[ -0.20450964  18.90350991   4.17363147]
760
[ -0.20017089  18.29615363   3.8970698 ]
770
[ -0.19748386  17.98443146   3.76956008]
780
[ -0.19749493  17.98442218   3.76955288]
790
[ -0.1982905   17.7026998    3.59936482]
800
[ -0.20200081  18.13941008   3.73523452]
810
[ -0.20054352  17.87046464   3.58651989]
820
[ -0.20101148  17.92439942   3.52254685]
830
[ -0.21234202  20.21461863   4.22775859]
840
[ -0.20443973  18.95748687   3.56264956]
850
[ -0.20055513  18.46309955   3.2961669 ]
860
[ -0.20074797  18.4783949    3.19399406]
870
[ -0.19465301  17.32099852   2.54967023]
880
[ -0.20068927  18.30117521   2.84254564]
890
[ -0.19703538  17.94503641   2.60783044]
900
[ -0.19020411  17.16564955   2.04086134]
910
[ -0.19349854  17.90600185   2.44572703]
920
[ -0.19519792  18.2038051    2.59453772]
930
[ -0.19589878  18.46874039   2.67846621]
940
[ -0.19844841  18.66070794   2.71443596]
950
[ -0.1981842   18.64507784   2.65882604]
960
[ -0.19929712  18.86320943   2.74281004]
970
[ -0.21790807  22.0180641    4.15148446]
980
[ -0.25017402  29.06142994   7.71818   ]
990
[ -0.21506634  21.26656251   5.01082052]
1000
[ -0.20572922  19.47532802   4.33337694]
1010
[ -0.20914419  20.2523542    4.58741547]
1020
[ -0.21691225  21.67373699   5.03057713]
1030
[ -0.21166397  20.73053298   4.59562673]
1040
[ -0.21384847  21.20150272   4.6808331 ]
1050
[ -0.21287636  21.12011929   4.59693616]
1060
[ -0.21320329  21.12635222   4.54676002]
1070
[ -0.21447674  26.8468493    1.7181823 ]
1080
[ -0.20082226  26.28241102   0.72413659]
1090
[ -0.2026997   26.33634658   0.81253448]
1100
[ -0.20594748  29.64840448   0.53738016]
1110
[ -0.21057418  29.71270756   0.72747554]
1120
[ -0.19520126  29.5827965    0.21140417]

``````