In [36]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy
import spacepy.toolbox as tb
import statsmodels.api as sm
In [37]:
goog = np.asarray([542.04, 545.92, 554.11, 555.22, 559.08, 550.31, 549.33, 548.9, 540.77, 539.78, 543.98, 532.71, 526.54,
520.84, 511.17, 524.51, 530.03, 537.94, 533.21, 544.49, 560.88, 572.5, 563.74, 577.35, 575.28, 570.08,
568.27, 577.36, 576.36, 577.1, 575.06, 587.99, 581.13, 587.37, 596.08, 589.27, 584.77, 579.95, 573.1,
575.62, 581.35, 583.1, 581.01, 589.72, 586.08, 581.98, 577.94, 577.33, 571.6, 569.2, 571, 577.86, 580.2,
582.56, 583.37, 584.49, 586.86, 582.16, 573.48, 574.65, 574.78, 562.73, 567.88, 568.77, 563.36, 566.37,
565.07, 573.15, 566.07, 571.6, 587.42, 585.61, 590.6, 589.02, 593.35, 595.98, 594.74, 589.47, 595.08,
573.73, 582.66, 584.78, 584.87, 579.18, 571.1, 576.08, 571.09, 582.25, 584.73, 582.34, 582.67, 575.28,
577.24, 576, 578.65, 564.62, 564.95, 556.36, 554.9, 553.37, 543.01, 544.28, 551.76, 551.35, 558.84,
560.55, 562.12, 556.33, 553.9, 544.66, 544.94, 553.93, 559.89, 560.08, 561.68, 565.95, 552.7, 545.06,
538.94, 529.77, 528.86, 520.63, 519.98, 526.65, 533.09, 529.92, 518.73, 511, 509.96, 515.14, 527.81,
527.93, 531.35, 526.66, 527.7, 517.15, 516.18, 525.16, 526.94, 534.81, 528.62, 536.1, 556.54, 536.44,
532.52, 530.6, 540.95, 564.14, 554.9, 538.15, 543.14, 569.74, 566.98, 566.88, 556.7, 559.51, 556.58,
565.42, 578.78, 578.39, 590.93, 597.98, 599.03, 605.02, 595.45, 585.81, 593.94, 603.05, 599.4, 605.18,
606.79, 609.2, 608.52, 606.85, 600.74, 607.22, 609, 609.47, 609.39, 605.65, 601.29, 601.45, 600.57, 604.83,
600.8, 599.35, 592.75, 594.49, 585.88, 588.13, 579.4, 571.03, 568.51, 566.15, 589.89, 567.13, 552.91,
560.94, 550.06, 561.35, 579.47, 581.93, 581.27, 574.69, 577.53, 573.74, 574.13, 560.93, 564.52, 564.55,
570.04, 568.86, 558.1, 551.95, 556, 559.79, 554.18, 558.64, 558.17, 555.36, 556.99, 549.76, 542.57, 541.83,
534.4, 535.95, 529.86, 534.45, 538.11, 541.79, 538.53, 534.4, 528.14, 528.56, 526.1, 526.71, 529.27, 531.02,
528.68, 522.44, 515.43, 516.52, 510.64, 512.09, 515.26, 516.26, 517.1, 515.72, 505.38, 504.79, 507.51])
In [38]:
kde = sm.nonparametric.KDEUnivariate(goog)
kde.fit()
# kde.density
# kde.cdf
# kde.icdf
In [39]:
np.random.seed(123)
randX = np.random.uniform(kde.support.min(), kde.support.max(), size=3000)
In [40]:
randY = tb.interpol(randX, kde.support, kde.icdf)
In [41]:
fig = plt.figure()
ax1 = fig.add_subplot(121)
h1 = ax1.hist(goog, 15, normed=True)
ax2 = fig.add_subplot(122)
h2 = ax2.hist(randY, 15, normed=True)
In [42]:
kde2 = sm.nonparametric.KDEUnivariate(randY)
kde2.fit()
In [43]:
fig = plt.figure()
ax = fig.add_subplot(111)
p1 = ax.plot(kde.support, kde.cdf, 'b', lw=3, label='Goog')
p2 = ax.plot(kde2.support, kde2.cdf, 'r', lw=3, label='Rand')
p3 = ax.legend(loc='upper left')
ks_stat, pval = scipy.stats.ks_2samp(goog, randY)
print("K-S Stat:{0} p-value:{1}".format(ks_stat, pval))
In [43]: