In [1]:
from scipy.stats import gamma,rv_continuous,norm
from scipy.special import gamma as gm
from scipy.special import kv
from scipy.integrate import quad
import matplotlib.pyplot as plt
import math
import numpy as np
In [2]:
class gen_gamma(rv_continuous):
def _pdf(self, x, v, k, b):
return abs(v)*x**(k*v-1)*np.exp(-(x**v)/(b**abs(v)))/(b**(k*abs(v))*gm(k))
def _argcheck(self, v, k,b):
return k>0 and b>0
In [3]:
class GVG:
def cdf(self,x,a,v,k,b):
def integrand(z,x,a,v,k,b):
gg = gen_gamma(a=0.)
return norm.cdf((x-a*z)/math.sqrt(z))*gg.pdf(z,v,k,b)
return quad(integrand,0.0001,np.inf,args=(x,a,v,k,b))[0]
def pdf(self,x,a,v,k,b):
return (self.cdf(x,a,v,k,b)-self.cdf(x-0.001,a,v,k,b))/0.001
In [102]:
class gvg(rv_continuous):
def _cdf(self,x,a,v,k,b):
def integrand(z,x,a,v,k,b):
gg = gen_gamma(a=0.)
return norm.cdf((x-a*z)/math.sqrt(z))*gg.pdf(z,v,k,b)
return quad(integrand,0.0001,np.inf,args=(x,a,v,k,b))[0]
def _argcheck(self,a, v, k,b):
return k>0 and b>0
In [4]:
def gvg_rand(a,v, k, b,size=1000):
GG = gen_gamma(a=0.)
rand_gg=GG.rvs(v, k, b, size=size)
return norm.rvs(size=size) * np.sqrt(rand_gg) + a*rand_gg
In [5]:
#a- масштаб
plt.hist(gvg_rand(2,0.8,3,1,1000),40,normed=True)
plt.show()
In [6]:
def grid_likehood(sample,u,p,a):
summ=0
for x in sample:
for i in range(0,len(p)):
summ += p[i]/math.sqrt(u[i]) * norm.pdf((x-a*u[i])/math.sqrt(u[i]))
return summ
In [7]:
class Mixture():
def __init__(self,p,a,u):
self.p=p
self.a=a
self.u=u
def cdf(self,x):
return (self.p*norm.cdf((x-self.a*self.u)/np.sqrt(self.u))).sum()
def pdf(self,x):
return (self.cdf(x)-self.cdf(x-0.001))/0.001
In [8]:
def move_grid(grid1,p1):
class piecewise(rv_continuous):
def _pdf(self, x):
grid=grid1
p=p1
try:
res=np.zeros(x.shape[0])
except:
res=np.zeros(1)
x=[x]
for i,num in enumerate(x):
if (num<=grid[1]):
z = np.poly1d(np.polyfit([grid[0],grid[1]], [p[0],p[1]], 1))
res[i]= max(0,z(num))
elif (num>grid[-2]):
z = np.poly1d(np.polyfit([grid[-1],grid[-2]], [p[-1],p[-2]], 1))
res[i]= max(0,z(num))
else:
idx=grid.index(min([ i for i in grid if i < num], key=lambda k:abs(k-num)))
z = np.poly1d(np.polyfit([grid[idx],grid[idx+1]],[p[idx],p[idx+1]], 1))
res[i]= z(num)
return res
piece = piecewise()
n=len(grid1)+1
quant = [i/n for i in range(n) if i >0]
u = piece.ppf(quant)
p = piece.pdf(u)
return u,p
def fit_mixture(sample,u,eps=0.05,verbose=1,irreg_grid=False):
p = np.full(len(u), 1/len(u))
mean = np.mean(sample)
a = mean/(u*p).sum()
f = np.zeros((len(u),len(sample)))
g = np.zeros((len(u),len(sample)))
new_likehood = grid_likehood(sample,u,p,a)
iteration = 0
while True:
for i in range(0,len(u)):
for j in range(0,len(sample)):
f[i,j]= 1/math.sqrt(u[i]) * norm.pdf((sample[j]-a*u[i])/math.sqrt(u[i]))
for i in range(0,len(u)):
for j in range(0,len(sample)):
g[i,j] = p[i]*f[i,j]/ (p*f[:,j]).sum()
p = g.sum(axis=1)/len(sample)
a = mean/(u*p).sum()
prev_likehood = new_likehood
new_likehood = grid_likehood(sample,u,p,a)
iteration += 1
if (verbose): print('Iter: %d likehood: %f' %(iteration,new_likehood))
if (abs(new_likehood-prev_likehood)<eps): break
if (irreg_grid):
u,p = move_grid(u,p)
a = mean/(u*p).sum()
print(p)
return p,a
In [9]:
class piecewise(rv_continuous):
def _cdf(self, x):
grid=[ 0.01 , 2.5075, 5.005 , 7.5025, 10. ]
p=[ 0.25941888, 0.64792493, 0.02372754, 0.00882218, 0.06010648]
bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2]
square = np.sum(np.array(p)*(grid[1]-grid[0]))
p=p/square
try:
res=np.zeros(x.shape[0])
except:
res=np.zeros(1)
x=[x]
for i,num in enumerate(x):
#closest point
try:
idx=bins.index(min([i for i in bins if i>num], key=lambda k:abs(k-num)))
except:
res[i]= 1
continue
if (idx==0):
res[i]= 0
continue
res[i] = (np.array(p)[:idx-1] * (grid[1]-grid[0])).sum() + (num - bins[idx-1]) * p[idx-1]
return res
piece = piecewise()
#n=len(grid)+1
#quant = [i/n for i in range(n) if i >0]
#u = piece.ppf(quant)
#p = piece.pdf(u)
In [ ]:
print([ 0.01 , 2.5075, 5.005 , 7.5025, 10. ])
print(list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2])
In [10]:
def split_hist(x,p,k):
c = np.cumsum(p*(x[1:]-x[:-1]))
res=[]
for i in range(k-1):
excess = c-(i+1)/k
ix = np.where(excess>0)[0].min()
res.append(x[ix+1]-excess[ix]/p[ix])
return res
grid=[ 0.01 , 2.5075, 5.005 , 7.5025, 10. ]
p=[ 0.25941888, 0.64792493, 0.02372754, 0.00882218, 0.06010648]
bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2]
square = np.sum(np.array(p)*(grid[1]-grid[0]))
p=p/square
split_hist(np.array(bins),np.array(p),5)
Out[10]:
In [45]:
sample = gvg_rand(2,0.8,3,1,1000)
grid = np.linspace(0.01,80,20)
In [46]:
plt.hist(sample,50,normed=True)
plt.show()
In [47]:
p,a = fit_mixture(sample,grid,0.1)
In [48]:
mixture = Mixture(p,a,grid)
#mixture = Mixture(np.array([0.2,0.2,0.2,0.2,0.2]),a,grid)
x=np.linspace(-5,30,100)
y=[mixture.pdf(x) for x in x]
plt.hist(sample,30,normed=True)
#plt.hist(sample,60,normed=1)
plt.plot(x,y)
plt.show()
In [125]:
for i,j in zip(grid,p):
print(i,j)
In [123]:
split_hist(np.array(bins),np.array(p)/(np.array(p)*(grid[1]-grid[0])).sum(),10)
Out[123]:
In [126]:
bins=list(np.array(grid)-(grid[1]-grid[0])/2) + [grid[-1]+(grid[1]-grid[0])/2]
new_bins=split_hist(np.array(bins),np.array(p)/(np.array(p)*(grid[1]-grid[0])).sum(),10)
new_bins.append(bins[0])
new_bins.append(bins[-1])
new_bins=np.array(sorted(new_bins))
new_bins
Out[126]:
In [51]:
new_grid=(new_bins[1:] + new_bins[:-1]) / 2
new_grid
Out[51]:
In [52]:
new_p,new_a = fit_mixture(sample,new_grid,0.001)
In [53]:
#mean = np.mean(sample)
#a1 = mean/(np.array([0.75,0.25])*new_grid).sum()
mixture = Mixture(np.array(new_p),new_a,np.array(new_grid))
x=np.linspace(-5,30,100)
y=[mixture.pdf(x) for x in x]
plt.hist(sample,40,normed=True)
#plt.hist(sample,60,normed=1)
plt.plot(x,y)
plt.show()
In [58]:
def fit_gvg(p,a,u):
from scipy.stats import entropy
from scipy.optimize import minimize
GG = gen_gamma(shapes='v,k,b')
fun = lambda x: entropy(GG.pdf(u,x[0],x[1],x[2]),p+1e-06)
x0 = [1, 1, 1]
bnds = ((None, None), (0, None), (0, None))
res = minimize(fun, x0=x0, bounds=bnds,tol=0.0001)
return res
In [59]:
%%time
res=fit_gvg(p,a,grid)
print(res.message)
res=res.x
gvg=GVG()
x=np.linspace(-3,25,50)
y=[gvg.pdf(x,a,res[0],res[1],res[2]) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()
In [57]:
# Если сгенерировать выборку
def gen(p,a,grid):
sample_gen=[]
rate=10000
for i in range(0,len(p)):
sample0 = np.full(math.floor(p[i]*rate),grid[i]) #+ np.random.normal(grid[i],(grid[1]-grid[0])*1.5,size=math.floor(p[i]*rate))
sample_gen= sample_gen+[i for i in list(sample0) if i>0]
GG = gen_gamma(shapes='v,k,b',a=0.)
v,k,b,_,_ = GG.fit(sample_gen, floc=0, fscale=1)
x=np.linspace(-1,25,100)
plt.hist(sample_gen,40,normed=1)
plt.plot(x,GG.pdf(x,v,k,b))
plt.show()
gvg=GVG()
x=np.linspace(-3,25,100)
y=[gvg.pdf(x,a,v,k,b) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y,label=('fitted: a=%0.3f v=%0.3f k=%0.3f b=%0.3f' %(a,v,k,b) ))
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.03),prop={'size':10})
plt.show()
gen(p,a,grid)
gen(new_p,new_a,new_grid)
In [121]:
def pdf_dist(params1,params2):
gvg1=gvg()
a1,v1,k1,b1 = params1
a2,v2,k2,b2 = params2
lower = min(gvg1.ppf(0.0001,a1,v1,k1,b1),gvg1.ppf(0.0001,a2,v2,k2,b2))
upper = max(gvg1.ppf(0.9999,a1,v1,k1,b1),gvg1.ppf(0.9999,a2,v2,k2,b2))
step=(upper-lower)/100
grid=np.arange(lower,upper,step)
dist=0
for dot in list(grid):
dist=dist+step*abs(gvg1.pdf(dot,a1,v1,k1,b1)-gvg1.pdf(dot,a2,v2,k2,b2))
return dist
In [122]:
print(pdf_dist([0.849,0.399,15.6,0.009],[2,0.8,3,1]))
print(pdf_dist([a,res[0],res[1],res[2]],[2,0.8,3,1]))
In [76]:
gvg1=GVG()
gvg2=GVG()
abs(gvg1.pdf(10,0.849,0.399,15.6,0.009)-gvg2.pdf(10,2,0.8,3,1))
Out[76]:
In [117]:
gvg1 = gvg(shapes='a,v,k,b')
gvg1.ppf(0.0001,0.849,0.399,15.6,0.009)
Out[117]:
In [118]:
min(1,2)
Out[118]:
In [ ]:
count=0
for ia in [-5,0,5]:
for iv in [-1,1,10]:
for ik in [1,10,100]:
for ib in [0.01,0.1,1]:
count += 1
print('a=%f, v=%f, k=%f, b=%f' %(ia,iv,ik,ib))
sample = gvg_rand(ia,iv,ik,ib,4000)
grid = np.linspace(0.001,10,40)
p,a = fit_mixture(sample,grid,0.03,verbose=0)
sample_gen=[]
rate=10000
for i in range(0,len(p)):
sample0 = np.full(math.floor(p[i]*rate),grid[i])
sample_gen= sample_gen+[i for i in list(sample0) if i>0]
GG = gen_gamma(shapes='v,k,b',a=0.)
v,k,b,_,_ = GG.fit(sample_gen, floc=0, fscale=1)
gvg=GVG()
x=np.linspace(-3,6,100)
y=[gvg.pdf(x,a,v,k,b) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y,label=('generated: a=1,v=1.5,k=1,b=1 \nfitted: a=%0.3f v=%0.3f k=%0.3f b=%0.3f \nCoef error:' %(a,v,k,b) ))
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.03),prop={'size':10})
plt.savefig('diplom/%d.jpg' %(count,ia,iv,ik,ib),dpi=100)
In [ ]:
class GIG(rv_continuous):
def _pdf(self, x, v, m,l):
return (l**(v/2) * x**(v-1) * np.exp(-0.5 * (m/x + l*x)))/(2*m**(v/2)*kv(v,np.sqrt(m*l)))
def _argcheck(self, v, m,l):
return m>1e-10 and l > 1e-10
class GH:
def cdf(self,x,a,v,m,l):
def integrand(z,x,a,v,m,l):
gig = GIG(a=1e-10)
return norm.cdf((x-a*z)/math.sqrt(z))*gig.pdf(z,v,m,l)
return quad(integrand,0.0001,np.inf,args=(x,a,v,m,l),limit=1000)[0]
def pdf(self,x,a,v,m,l):
return (self.cdf(x,a,v,m,l)-self.cdf(x-0.001,a,v,m,l))/0.001
In [ ]:
def gh_rand(a,v, m,l,size=1000):
gig = GIG(a=1e-10)
rand_gig=gig.rvs(v, m,l, size=size)
return norm.rvs(size=size) * np.sqrt(rand_gig) + a*rand_gig
In [ ]:
sample = gh_rand(1,1.5,1,1,2000)
grid = np.linspace(0.001,5,30)
In [ ]:
p,a = fit_mixture(sample,grid,0.01)
In [ ]:
mixture = Mixture(p,a,grid)
x=np.linspace(-6,20,100)
y=[mixture.pdf(x) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()
In [ ]:
sample_gen=[]
rate=10000
for i in range(0,len(p)):
sample0 = np.full(math.floor(p[i]*rate),grid[i])
sample_gen= sample_gen+[i for i in list(sample0) if i>0]
In [ ]:
gig = GIG(shapes='v,m,l',a=0.)
v,m,l,_,_ = gig.fit(sample_gen, floc=0, fscale=1)
In [ ]:
x=np.linspace(-1,12,100)
plt.hist(sample_gen,40,normed=1)
plt.plot(x,gig.pdf(x,v,m,l))
plt.show()
In [ ]:
gh=GH()
x=np.linspace(-6,20,100)
y=[gh.pdf(x,a,v,m,l) for x in x]
plt.hist(sample,40,normed=1)
plt.plot(x,y)
plt.show()
In [ ]:
for i,j in zip(p,grid):
print(j,' ',i)
In [ ]:
gig.pdf(0.001 ,v,m,l)
In [ ]: