In [1]:

    
%matplotlib inline



In [4]:

    
import pylab
import numpy as np
import astroML
from astroML import stats
import astroML.density_estimation
import scipy.stats
import scipy.special
import scipy.interpolate
import matplotlib.patches

Uniform



In [5]:

    
x = np.random.random(1000)
q25, q50, q75 = np.percentile(x, [25, 50, 75])
print(q50)









    



0.511942519611



In [6]:

    
dist = scipy.stats.uniform(0, 2) # left edge = 0,
# width = 2
r = dist.rvs(10) # ten random draws
p = dist.pdf(1) # pdf evaluated at x=1
print(r, p)









    



[ 1.25424678  1.75007529  1.52795338  0.07255817  0.01432285  0.84686213
  1.68211983  0.17726036  0.99015736  1.47319231] 0.5

Normal



In [7]:

    
np.random.seed(0)
x = np.random.normal(size=1000) # 1000 normally
# distributed points
stats.sigmaG(x)









    Out[7]:





0.96767271916940412



In [8]:

    
from scipy import stats
dist = stats.norm(0, 1) # mean = 0, stdev = 1 r = dist.rvs(10) # ten random draws
p = dist.pdf(0) # pdf evaluated at x=0
print(p)









    



0.398942280401



In [9]:

    
scipy.special.erf(1)









    Out[9]:





0.84270079294971478

Binominal



In [10]:

    
dist = scipy.stats.binom(20, 0.7) # N = 20, b = 0.7 r = dist.rvs(10) # ten random draws
p = dist.pmf(8) # prob. evaluated at k=8
print(p)









    



0.0038592819309



In [11]:

    
vals = np.random.multinomial(n=10, 
                             pvals=[0.2, 0.3, 0.5]) # pvals sum to 1
print(vals)

Poisson



In [12]:

    
dist = scipy.stats.poisson(5) # mu = 5
r = dist.rvs(10) # ten random draws
p = dist.pmf(3) # prob. evaluated at k=3
print(p)









    



0.140373895814

Lorentzian



In [39]:

    
dist = scipy.stats.cauchy(0, 1) # mu = 0, gamma = 1 r = dist.rvs(10) # ten random draws
p = dist.pdf(3) # pdf evaluated at x=3
print(p)









    



0.0318309886184

Laplace



In [41]:

    
dist = scipy.stats.laplace(0, 0.5) # mu = 0, delta = 0.5 r = dist.rvs(10) # ten random draws
p = dist.pdf(3) # pdf evaluated at x=3
print(p)









    



0.00247875217667

$\chi^2$ Distribution



In [49]:

    
dist = scipy.stats.chi2(5) # k = 5
r = dist.rvs(10000) # ten random draws
p = dist.pdf(1) # pdf evaluated at x=1
print(p, r[:2])
_ = pylab.hist(r, 100, lw=0, alpha=0.7)









    



0.080656908173 [ 3.492439    5.95034351]



In [52]:

    
scipy.special.gamma(5)









    Out[52]:





24.0

Student's t



In [54]:

    
dist = scipy.stats.t(5) # k = 5
r = dist.rvs(10000) # ten random draws
p = dist.pdf(4)
_ = pylab.hist(r, 100, lw=0, alpha=0.7)

Fischer's F



In [55]:

    
dist = scipy.stats.f(2,3) #d1=2,d2=3 r = dist.rvs(10) # ten random draws
p = dist.pdf(1) # pdf evaluated at x=1
print(p)









    



0.278854800927

Beta, Gamma



In [57]:

    
dist = scipy.stats.beta(0.5, 1.5) # alpha = 0.5,
# beta = 1.5
r = dist.rvs(10) # ten random draws
p = dist.pdf(0.6) # pdf evaluated at x=0.6
print(p)









    



0.519797867489



In [59]:

    
dist = scipy.stats.gamma(1, 0, 2) # k = 1, loc = 0,
# theta = 2
r = dist.rvs(10) # ten random draws
p = dist.pdf(1) # pdf evaluated at x=1
print(p)









    



0.303265329856



In [60]:

    
dist = scipy.stats.dweibull(1, 0, 2) # k = 1, loc = 0,
# lambda = 2
r = dist.rvs(10) # ten random draws
p = dist.pdf(1) # pdf evaluated at x=1

Multivariate



In [15]:

    
# simple independent gaussians
np.random.seed(0)
# x,y = np.random.randn(2,10000)
mu = [1,2]
cov = [[1,0.9],
       [0.9,1]]
x,y = np.random.multivariate_normal(mu, cov, 100000).T
pylab.figure(figsize=(12,6))
pylab.subplot(1,2,1)
pylab.scatter(x,y, lw=0)
pylab.subplot(1,2,2)
H,xe,ye = np.histogram2d(x,y, bins=(101,101))
pylab.pcolormesh(xe,ye,H)









    Out[15]:





<matplotlib.collections.QuadMesh at 0x11f6d6710>






    



/Applications/Pineapple.app/Contents/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):



In [88]:

    
# Exerpt from http://www.astroml.org/book_figures/chapter3/fig_robust_pca.html
np.random.seed(0)
X = stats.bivariate_normal(mu=[1,2], sigma_1=2, sigma_2=1, alpha=2, size=1000)
x, y = X.T

mean, sigma1, sigma2, alpha = stats.fit_bivariate_normal(x, y, robust=True)
print(mean, sigma1, sigma2, alpha)
ax = pylab.gca()
pylab.scatter(x,y, lw=0, alpha=0.2, marker=',')
E = matplotlib.patches.Ellipse(mean, sigma1, sigma2, np.rad2deg(alpha), 
                               ec='k', fc='none')
ax.add_patch(E)









    



[0.95620082605106349, 1.9918026519970979] 1.94388420421 0.979717667221 -1.19345230558






    Out[88]:





<matplotlib.patches.Ellipse at 0x112fef5c0>






    



/Applications/Pineapple.app/Contents/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):

Correlation Coefficients



In [93]:

    
x, y = np.random.random((2, 100)) # two random
# arrays
corr_coeff , p_value = scipy.stats.pearsonr(x, y)
rho, p_value = scipy.stats.spearmanr(x, y)
tau, p_value = scipy.stats.kendalltau(x, y)
print(rho, tau)









    



-0.159735973597 -0.111111111111

Recreate distribution



In [96]:

    
Ndata = 1000
dists = (scipy.stats.norm(-1.3, 0.5), 
        scipy.stats.norm(1.3, 0.5))
fracs = (0.6, 0.4)
x = np.hstack((d.rvs(f * Ndata) for d, f in zip(dists, fracs)))
_ = pylab.hist(x, 10)



In [104]:

    
Nclone = 10000
x_cloned = astroML.density_estimation.EmpiricalDistribution(x).rvs(Nclone)

# compute the KS test to check if they're the same
D, p = scipy.stats.ks_2samp(x, x_cloned)
print("KS test: D = %.2g; p = %.2g" % (D, p))
_ = pylab.hist(x_cloned, 20)









    



KS test: D = 0.0059; p = 1



In [122]:

    
sx = sorted(x)
Px_cuml = np.linspace(0, 1, Ndata)

# set up an interpolation of the inverse cumulative distribution
tck = scipy.interpolate.splrep(Px_cuml, sx)

# sample evenly along the cumulative distribution, and interpolate
Px_cuml_sample = np.linspace(0, 1, 10 * Ndata)
x_sample = scipy.interpolate.splev(Px_cuml_sample, tck)

pylab.figure(figsize=(12,6))
pylab.subplot(1,2,1)
pylab.plot(sx, Px_cuml, '.')
pylab.plot(x_sample, Px_cuml_sample+0.1) # add offset for display
pylab.subplot(1,2,2)
_ = pylab.hist(x_sample, 30)



In [ ]: