In [1]:
from __future__ import division

In [3]:
%matplotlib inline

In [4]:
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import seaborn as sns

In [130]:
np.random.seed(123)

In [131]:
# sample inputs
xs = np.exp(np.random.normal(size=(200,))) * 10
xs = sorted(xs)

In [132]:
xs[::10]


Out[132]:
[0.39515787320742013,
 1.7769805414250812,
 2.8157623556455276,
 3.5029077723392259,
 4.3278391252781727,
 4.9664627102815491,
 5.7757301405018175,
 6.8442484580775274,
 7.6320211165437151,
 8.6929849498956031,
 10.20523772790575,
 11.502486151701401,
 12.889352327190393,
 14.029666789765978,
 16.48292897671914,
 20.904274694411829,
 26.61090512638452,
 32.408286224693569,
 43.576345205469231,
 57.185325491301228]

In [133]:
sns.distplot(xs)


Out[133]:
<matplotlib.axes._subplots.AxesSubplot at 0x196fd9e8>

In [134]:
shape, loc, scale = st.lognorm.fit(xs)
shape, loc, scale


Out[134]:
(1.0282089571225324, -0.13241081082648995, 10.26766252850666)

In [135]:
# distribution of inputs and a log-normal fit
pts = np.arange(0, 100, 0.1)
plt.plot(pts, st.lognorm.pdf(pts, shape, loc, scale), 'r--', label='lognormal fit')
sns.distplot(xs, label='empirical distribution: histogram', kde_kws={'label': 'empirical distribution: kde plot'})
plt.legend(loc='best')


Out[135]:
<matplotlib.legend.Legend at 0x1a297e80>

In [136]:
rv_fit = st.lognorm.freeze(shape, loc, scale)

In [152]:
# cumulative probabilities
sns.distplot(rv_fit.cdf(xs), bins=np.arange(0, 1.1, 0.1))


Out[152]:
<matplotlib.axes._subplots.AxesSubplot at 0x1bb06b38>

In [153]:
ws = rv_fit.cdf(xs)
ws


Out[153]:
array([ 0.00194445,  0.0052919 ,  0.00915604,  0.01222621,  0.0233798 ,
        0.02857254,  0.03141246,  0.03833217,  0.03945566,  0.0468981 ,
        0.05091318,  0.06357282,  0.06363573,  0.0757507 ,  0.08646251,
        0.08892855,  0.09516933,  0.09634453,  0.10235793,  0.10778446,
        0.1124541 ,  0.11486343,  0.12249826,  0.12520746,  0.14511574,
        0.14572242,  0.14618857,  0.14815393,  0.14821276,  0.14844292,
        0.15629154,  0.15859406,  0.16297365,  0.16818819,  0.17277457,
        0.18311414,  0.19260325,  0.19751026,  0.20087904,  0.20220122,
        0.20870598,  0.21043428,  0.21254424,  0.21752915,  0.21956433,
        0.22671683,  0.23137824,  0.23590584,  0.23754005,  0.23834684,
        0.24800669,  0.2513186 ,  0.25434292,  0.2666526 ,  0.26701138,
        0.27535349,  0.28579381,  0.2905536 ,  0.29441361,  0.29544779,
        0.29545917,  0.29555054,  0.30147791,  0.33083087,  0.33416463,
        0.33605338,  0.3364822 ,  0.34498285,  0.34911962,  0.34937381,
        0.35352257,  0.3607285 ,  0.36219874,  0.36249378,  0.36706384,
        0.36931379,  0.37022776,  0.37663567,  0.39085612,  0.39136612,
        0.39289579,  0.39324093,  0.39827754,  0.40417555,  0.4066143 ,
        0.41224526,  0.41443149,  0.42120096,  0.42882702,  0.44030743,
        0.44148186,  0.4467617 ,  0.44679511,  0.45255169,  0.45868577,
        0.46614462,  0.48338663,  0.48542993,  0.49032619,  0.49594498,
        0.50263566,  0.50622355,  0.50793141,  0.51227765,  0.52277386,
        0.52519181,  0.53063481,  0.54105488,  0.54466488,  0.54611743,
        0.54838413,  0.54965169,  0.55768576,  0.55901471,  0.56252288,
        0.56341613,  0.56397897,  0.57002662,  0.57097311,  0.57099504,
        0.59138279,  0.60067729,  0.60225226,  0.60249327,  0.60252968,
        0.61129108,  0.61383176,  0.61656151,  0.61721684,  0.62062968,
        0.62276324,  0.63760875,  0.6400584 ,  0.64200303,  0.64613461,
        0.6496511 ,  0.65436752,  0.66501506,  0.66927998,  0.67134308,
        0.68015123,  0.68067155,  0.69858833,  0.69949326,  0.70526423,
        0.72757765,  0.73509548,  0.74218648,  0.74723143,  0.74963149,
        0.75728323,  0.76055021,  0.76435489,  0.77432456,  0.77790236,
        0.77792299,  0.77936896,  0.80118193,  0.80557752,  0.8109516 ,
        0.82407891,  0.82869786,  0.83034384,  0.83245979,  0.83688053,
        0.83892634,  0.84582725,  0.84584488,  0.86368547,  0.86620921,
        0.86903604,  0.86945426,  0.88234489,  0.88469201,  0.88669548,
        0.88694882,  0.90608505,  0.90899094,  0.91242527,  0.91876962,
        0.92055403,  0.92321581,  0.92330767,  0.92389992,  0.9270714 ,
        0.92771394,  0.93045984,  0.93839524,  0.94327858,  0.94432312,
        0.95278053,  0.95383912,  0.9584386 ,  0.97006695,  0.97755906,
        0.98224729,  0.98304491,  0.98431796,  0.98933854,  0.99383031])

In [170]:
plt.figure(figsize=(10, 10))
plt.plot(xs, ws, 'go-')
plt.xlabel('$\ X_i$')
plt.ylabel('$\ W_i$')
plt.title(r'Mapping of $\ X_i$ to $\ W_i$')


Out[170]:
<matplotlib.text.Text at 0x2046cf60>

In [182]:
# rescale W_i to form another distribution

In [183]:
# target distribution
rv_tgt = st.norm.freeze(1, 0.15)

In [181]:
pts = np.arange(0, 3, 0.01)
plt.plot(pts, rv_tgt.pdf(pts))


Out[181]:
[<matplotlib.lines.Line2D at 0x1d7f5d30>]

In [189]:
# target vars
ys = rv_tgt.ppf(ws)
ys


Out[189]:
array([ 0.5669447 ,  0.61657772,  0.64611303,  0.66250819,  0.70172931,
        0.71467007,  0.72093571,  0.73444344,  0.73644429,  0.74864441,
        0.75459052,  0.77118175,  0.77125743,  0.78486275,  0.79557023,
        0.79789264,  0.80356332,  0.8046002 ,  0.80976552,  0.81424046,
        0.81796302,  0.81984059,  0.82561173,  0.82759868,  0.84135791,
        0.8417564 ,  0.84206183,  0.84334243,  0.84338059,  0.84352977,
        0.84852748,  0.84996206,  0.85265355,  0.85579755,  0.85851136,
        0.86446587,  0.86974852,  0.87241781,  0.87422718,  0.87493233,
        0.87836209,  0.8792627 ,  0.88035634,  0.88291523,  0.88395019,
        0.88754456,  0.88985268,  0.89206983,  0.89286432,  0.89325544,
        0.89788363,  0.89944809,  0.9008672 ,  0.90655472,  0.90671845,
        0.91049484,  0.91514277,  0.91723478,  0.91891955,  0.9193692 ,
        0.91937415,  0.91941383,  0.92197681,  0.93435699,  0.93573368,
        0.93651124,  0.93668754,  0.94016476,  0.94184524,  0.94194826,
        0.94362587,  0.94652317,  0.94711183,  0.94722986,  0.949054  ,
        0.9499493 ,  0.95031247,  0.95285068,  0.95843674,  0.95863597,
        0.95923308,  0.95936771,  0.9613288 ,  0.96361694,  0.96456055,
        0.96673403,  0.96757599,  0.97017669,  0.97309581,  0.97747153,
        0.97791802,  0.97992292,  0.9799356 ,  0.98211744,  0.98443822,
        0.98725526,  0.99375166,  0.99452052,  0.99636235,  0.99847531,
        1.000991  ,  1.00234011,  1.00298236,  1.00461705,  1.0085675 ,
        1.00947828,  1.01152986,  1.01546375,  1.01682898,  1.01737869,
        1.01823699,  1.01871721,  1.02176565,  1.02227074,  1.02360531,
        1.02394542,  1.02415978,  1.02646629,  1.02682783,  1.0268362 ,
        1.03466552,  1.03826509,  1.03887717,  1.03897089,  1.03898505,
        1.04240284,  1.04339801,  1.04446936,  1.0447269 ,  1.04607025,
        1.04691192,  1.05281115,  1.05379224,  1.05457269,  1.05623582,
        1.0576568 ,  1.05957085,  1.0639284 ,  1.06568888,  1.06654373,
        1.07021826,  1.07043662,  1.07805171,  1.07844154,  1.0809403 ,
        1.09082548,  1.09424463,  1.09751514,  1.09987042,  1.10099958,
        1.10463852,  1.10621103,  1.1080572 ,  1.11297472,  1.11476921,
        1.11477961,  1.11550956,  1.12687758,  1.12925709,  1.13221127,
        1.1396533 ,  1.14235482,  1.14332874,  1.14458968,  1.14725766,
        1.14850823,  1.15280497,  1.15281611,  1.16455423,  1.16629737,
        1.1682769 ,  1.16857227,  1.17801859,  1.17981604,  1.18137101,
        1.18156902,  1.1975539 ,  1.20018504,  1.20337629,  1.20952646,
        1.21132121,  1.214056  ,  1.21415164,  1.21477047,  1.21814823,
        1.21884618,  1.22188368,  1.23121614,  1.23743593,  1.23882066,
        1.25086497,  1.25249133,  1.2599271 ,  1.28226678,  1.30086397,
        1.31538201,  1.31817072,  1.3228634 ,  1.34533262,  1.3753431 ])

In [190]:
plt.figure(figsize=(10, 10))

plt.plot(ws, ys, 'yo-')
plt.xlabel('$\ W_i$')
plt.ylabel('$\ Y_i$')
plt.title(r'Mapping of $\ W_i$ to $\ Y_i$')


Out[190]:
<matplotlib.text.Text at 0x21992f28>

In [191]:
plt.figure(figsize=(10, 10))

plt.plot(xs, ys, 'ro-')
plt.xlabel('$\ X_i$')
plt.ylabel('$\ Y_i$')
plt.title(r'Mapping of $\ X_i$ to $\ Y_i$')


Out[191]:
<matplotlib.text.Text at 0x219bfba8>

In [193]:
# distribution of outputs and the target density
plt.plot(pts, rv_tgt.pdf(pts), 'r--', label='target distribution')
sns.distplot(ys, label='output distribution: histogram', kde_kws={'label': 'output distribution: kde plot'})
plt.legend(loc='best')


Out[193]:
<matplotlib.legend.Legend at 0x21cee908>

In [ ]: