In [111]:
%pylab inline
import random
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.cbook import violin_stats
from scipy import stats
import statsmodels.api as sm
import weighted
import pandas as pd

# fake data
fs = 10  # fontsize
data = np.random.normal(size=100)


Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['random']
`%matplotlib` prevents importing * from pylab and numpy

In [174]:
nobs = 300
# np.random.seed(1234)  # Seed random generator
dens = sm.nonparametric.KDEUnivariate(data)
# weights=np.array(range(0, 100))
weights = np.array([ float(x) for x in np.random.randint(5000, size=100)])
dens.fit(fft=False, weights=weights)

print dens.evaluate([-2.72108736, -2.67226127, -2.62343519, -2.5746091 , -2.52578302, -2.47695693, -2.42813085, -2.37930476, -2.33047868, -2.28165259, -2.23282651, -2.18400042, -2.13517434, -2.08634825, -2.03752217, -1.98869608, -1.93987   , -1.89104391, -1.84221783, -1.79339174, -1.74456566, -1.69573957, -1.64691349, -1.5980874, -1.54926132, -1.50043523, -1.45160915, -1.40278306, -1.35395698, -1.30513089, -1.25630481, -1.20747872, -1.15865263, -1.10982655, -1.06100046, -1.01217438, -0.96334829, -0.91452221, -0.86569612, -0.81687004, 
-0.76804395, -0.71921787, -0.67039178, -0.6215657 , -0.57273961, -0.52391353, -0.47508744, -0.42626136, -0.37743527, -0.32860919, 
-0.2797831 , -0.23095702, -0.18213093, -0.13330485, -0.08447876, -0.03565268,  0.01317341,  0.06199949,  0.11082558,  0.15965166,  0.20847775,  0.25730383,  0.30612992,  0.354956  ,  0.40378209,  0.45260817,  0.50143426,  0.55026035,  0.59908643,  0.64791252,  0.6967386 ,  0.74556469,  0.79439077,  0.84321686,  0.89204294,  0.94086903,  0.98969511,  1.0385212,  1.08734728,  1.13617337,  1.18499945,  1.23382554,  1.28265162,  1.33147771,  1.38030379,  1.42912988,  1.47795596,  1.52678205,  1.57560813,  1.62443422,  1.6732603 ,  1.72208639,  1.77091247,  1.81973856,  1.86856464,  1.91739073,  1.96621681,  2.0150429,   2.06386898,  2.11269507])

nobs = 300
# np.random.seed(1234)  # Seed random generator
dens = sm.nonparametric.KDEUnivariate(data)
# weights=np.array(range(0, 100))
weights = np.array([ float(x) for x in np.random.randint(5000, size=100)])
dens.fit(fft=False)

print dens.evaluate([-2.72108736, -2.67226127, -2.62343519, -2.5746091 , -2.52578302, -2.47695693, -2.42813085, -2.37930476, -2.33047868, -2.28165259, -2.23282651, -2.18400042, -2.13517434, -2.08634825, -2.03752217, -1.98869608, -1.93987   , -1.89104391, -1.84221783, -1.79339174, -1.74456566, -1.69573957, -1.64691349, -1.5980874, -1.54926132, -1.50043523, -1.45160915, -1.40278306, -1.35395698, -1.30513089, -1.25630481, -1.20747872, -1.15865263, -1.10982655, -1.06100046, -1.01217438, -0.96334829, -0.91452221, -0.86569612, -0.81687004, 
-0.76804395, -0.71921787, -0.67039178, -0.6215657 , -0.57273961, -0.52391353, -0.47508744, -0.42626136, -0.37743527, -0.32860919, 
-0.2797831 , -0.23095702, -0.18213093, -0.13330485, -0.08447876, -0.03565268,  0.01317341,  0.06199949,  0.11082558,  0.15965166,  0.20847775,  0.25730383,  0.30612992,  0.354956  ,  0.40378209,  0.45260817,  0.50143426,  0.55026035,  0.59908643,  0.64791252,  0.6967386 ,  0.74556469,  0.79439077,  0.84321686,  0.89204294,  0.94086903,  0.98969511,  1.0385212,  1.08734728,  1.13617337,  1.18499945,  1.23382554,  1.28265162,  1.33147771,  1.38030379,  1.42912988,  1.47795596,  1.52678205,  1.57560813,  1.62443422,  1.6732603 ,  1.72208639,  1.77091247,  1.81973856,  1.86856464,  1.91739073,  1.96621681,  2.0150429,   2.06386898,  2.11269507])


[ 0.01000019  0.01189764  0.01398684  0.01625782  0.01869834  0.02129593
  0.02404034  0.02692621  0.02995578  0.0331413   0.03650684  0.04008918
  0.04393742  0.0481113   0.0526778   0.05770643  0.06326316  0.06940352
  0.07616555  0.08356326  0.09158154  0.1001733   0.10925954  0.11873284
  0.12846426  0.1383134   0.14814077  0.15782138  0.16725801  0.17639276
  0.18521528  0.19376655  0.20213732  0.21046113  0.21890203  0.22763814
  0.23684234  0.24666195  0.25719937  0.26849567  0.28051883  0.29315795
  0.30622431  0.3194593   0.33254876  0.34514257  0.35687786  0.36740378
  0.3764058   0.38362728  0.38888666  0.3920888   0.39322963  0.39239374
  0.38974554  0.38551464  0.37997697  0.37343333  0.36618729  0.35852424
  0.35069345  0.34289441  0.33526843  0.32789585  0.32079874  0.31394828
  0.30727574  0.30068559  0.29406939  0.28731883  0.28033705  0.27304728
  0.26539852  0.25736812  0.2489618   0.2402113   0.23117047  0.22191021
  0.21251293  0.20306673  0.1936599   0.18437581  0.17528851  0.16645931
  0.15793444  0.14974394  0.14190198  0.13440837  0.12725119  0.12041032
  0.11386125  0.10757897  0.10154121  0.09573082  0.09013701  0.0847552
  0.0795859   0.07463256  0.06989909  0.06538745]
[ 0.01799717  0.02091009  0.0239643   0.02710584  0.03027791  0.03342569
  0.03650149  0.03946972  0.0423112   0.04502621  0.04763599  0.05018233
  0.05272524  0.05533882  0.05810569  0.06111045  0.06443278  0.06814082
  0.07228555  0.0768967   0.08198069  0.08752089  0.09348033  0.09980662
  0.10643887  0.11331581  0.12038445  0.12760836  0.13497457  0.14249832
  0.15022487  0.158228    0.16660509  0.1754689   0.18493692  0.1951188
  0.20610331  0.21794572  0.23065702  0.24419587  0.25846414  0.27330663
  0.28851501  0.30383588  0.31898246  0.33364899  0.34752681  0.36032099
  0.37176621  0.38164069  0.38977711  0.39606979  0.40047753  0.40302203
  0.40378217  0.40288463  0.40049196  0.39678907  0.39196951  0.38622278
  0.37972372  0.37262484  0.365052    0.35710357  0.34885264  0.34035159
  0.33163809  0.3227415   0.31368865  0.30450821  0.29523314  0.28590109
  0.27655286  0.26722955  0.25796886  0.24880143  0.23974786  0.23081694
  0.2220053   0.21329869  0.20467459  0.19610576  0.18756442  0.1790264
  0.1704748   0.16190302  0.15331658  0.1447339   0.13618598  0.12771496
  0.11937187  0.11121377  0.10330036  0.09569037  0.08843801  0.08158959
  0.07518059  0.06923338  0.06375589  0.05874124]

In [175]:
x = sm.nonparametric.KDEUnivariate(data)
weights = pd.Series([1]*100)
print weights
x.fit(fft=False, weights=weights)
print x.kernel.weights

y = x.evaluate([-2.72108736, -2.67226127, -2.62343519, -2.5746091 , -2.52578302, -2.47695693, -2.42813085, -2.37930476, -2.33047868, -2.28165259, -2.23282651, -2.18400042, -2.13517434, -2.08634825, -2.03752217, -1.98869608, -1.93987   , -1.89104391, -1.84221783, -1.79339174, -1.74456566, -1.69573957, -1.64691349, -1.5980874, -1.54926132, -1.50043523, -1.45160915, -1.40278306, -1.35395698, -1.30513089, -1.25630481, -1.20747872, -1.15865263, -1.10982655, -1.06100046, -1.01217438, -0.96334829, -0.91452221, -0.86569612, -0.81687004, 
-0.76804395, -0.71921787, -0.67039178, -0.6215657 , -0.57273961, -0.52391353, -0.47508744, -0.42626136, -0.37743527, -0.32860919, 
-0.2797831 , -0.23095702, -0.18213093, -0.13330485, -0.08447876, -0.03565268,  0.01317341,  0.06199949,  0.11082558,  0.15965166,  0.20847775,  0.25730383,  0.30612992,  0.354956  ,  0.40378209,  0.45260817,  0.50143426,  0.55026035,  0.59908643,  0.64791252,  0.6967386 ,  0.74556469,  0.79439077,  0.84321686,  0.89204294,  0.94086903,  0.98969511,  1.0385212,  1.08734728,  1.13617337,  1.18499945,  1.23382554,  1.28265162,  1.33147771,  1.38030379,  1.42912988,  1.47795596,  1.52678205,  1.57560813,  1.62443422,  1.6732603 ,  1.72208639,  1.77091247,  1.81973856,  1.86856464,  1.91739073,  1.96621681,  2.0150429,   2.06386898,  2.11269507])
y = np.array(y)
y

In [ ]:


In [196]:
del data
del data2

In [183]:
import copy as pythoncopy

In [223]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 10))

def vdensity_with_weights(weights):
    def vdensity(data, coords):
        # print "2w: %s" % weights
        # print "2d: %s" % data
        weighted_cost = sm.nonparametric.KDEUnivariate(data)
        weighted_cost.fit(fft=False, weights=weights)

        y = weighted_cost.evaluate(coords)
        # print "y: %s" % y
        return y
    return vdensity

def custom_violin_stats(data, weights):
    # print "1w: %s" % weights
    # print "1d: %s" % data
    median = weighted.quantile_1D(data, weights, 0.5)
    mean, sumw = np.ma.average(data, weights=list(weights), returned=True)
    results = violin_stats(data, vdensity_with_weights(weights))
    
    # print "orig results: %s" % results[0]["vals"]
    
    results[0][u"mean"] = mean
    results[0][u"median"] = median
    results[0][u"min"] =  np.min(data)
    results[0][u"max"] =  np.max(data)

    # print "new results: %s" % results[0]["vals"]
    return results

data = np.random.normal(size=100)
data = np.array([-2]*25+[2]*75)
axes[0].violinplot(data, [0], points=80, vert=False, showmeans=True, showextrema=True, showmedians=True)

weights = np.array([ float(x) for x in np.random.randint(5000, size=100)])
weights = np.array([0.]*25+[1.]*50+[0.]*25)
vpstats1 = custom_violin_stats(data, weights)
tmp = axes[1].violin(vpstats1, vert=False, showmeans=True, showextrema=True, showmedians=True)
for pc in tmp['bodies']:
    pc.set_facecolor('red')
    pc.set_edgecolor('black')
    
axes[0].set_title('Custom violinplot 4', fontsize=fs)
axes[1].set_title('Custom violinplot 5', fontsize=fs)


for ax in axes.flatten():
    ax.set_yticklabels([])

fig.suptitle("Violin Plotting Examples")
fig.subplots_adjust(hspace=0.4)
plt.show()



In [222]:


In [191]:
vpstats1


Out[191]:
[{u'coords': array([ -2.43624537e+00,  -2.37676511e+00,  -2.31728484e+00,
          -2.25780458e+00,  -2.19832432e+00,  -2.13884405e+00,
          -2.07936379e+00,  -2.01988352e+00,  -1.96040326e+00,
          -1.90092299e+00,  -1.84144273e+00,  -1.78196246e+00,
          -1.72248220e+00,  -1.66300193e+00,  -1.60352167e+00,
          -1.54404140e+00,  -1.48456114e+00,  -1.42508087e+00,
          -1.36560061e+00,  -1.30612035e+00,  -1.24664008e+00,
          -1.18715982e+00,  -1.12767955e+00,  -1.06819929e+00,
          -1.00871902e+00,  -9.49238757e-01,  -8.89758493e-01,
          -8.30278228e-01,  -7.70797963e-01,  -7.11317699e-01,
          -6.51837434e-01,  -5.92357169e-01,  -5.32876905e-01,
          -4.73396640e-01,  -4.13916376e-01,  -3.54436111e-01,
          -2.94955846e-01,  -2.35475582e-01,  -1.75995317e-01,
          -1.16515052e-01,  -5.70347875e-02,   2.44547714e-03,
           6.19257418e-02,   1.21406006e-01,   1.80886271e-01,
           2.40366536e-01,   2.99846800e-01,   3.59327065e-01,
           4.18807330e-01,   4.78287594e-01,   5.37767859e-01,
           5.97248124e-01,   6.56728388e-01,   7.16208653e-01,
           7.75688918e-01,   8.35169182e-01,   8.94649447e-01,
           9.54129712e-01,   1.01360998e+00,   1.07309024e+00,
           1.13257051e+00,   1.19205077e+00,   1.25153104e+00,
           1.31101130e+00,   1.37049156e+00,   1.42997183e+00,
           1.48945209e+00,   1.54893236e+00,   1.60841262e+00,
           1.66789289e+00,   1.72737315e+00,   1.78685342e+00,
           1.84633368e+00,   1.90581395e+00,   1.96529421e+00,
           2.02477448e+00,   2.08425474e+00,   2.14373500e+00,
           2.20321527e+00,   2.26269553e+00,   2.32217580e+00,
           2.38165606e+00,   2.44113633e+00,   2.50061659e+00,
           2.56009686e+00,   2.61957712e+00,   2.67905739e+00,
           2.73853765e+00,   2.79801792e+00,   2.85749818e+00,
           2.91697845e+00,   2.97645871e+00,   3.03593897e+00,
           3.09541924e+00,   3.15489950e+00,   3.21437977e+00,
           3.27386003e+00,   3.33334030e+00,   3.39282056e+00,
           3.45230083e+00]),
  u'max': 3.4523008275660736,
  u'mean': -0.14926421657755892,
  u'median': -0.1387769416370631,
  u'min': -2.4362453740273291,
  u'vals': array([ 0.04246559,  0.04717308,  0.05207452,  0.05718135,  0.06251461,
          0.06810317,  0.07398126,  0.08018548,  0.086752  ,  0.09371439,
          0.10110218,  0.10894057,  0.11725081,  0.1260511 ,  0.1353573 ,
          0.14518296,  0.1555381 ,  0.16642662,  0.17784218,  0.18976316,
          0.20214731,  0.2149269 ,  0.22800544,  0.24125667,  0.25452625,
          0.26763638,  0.28039279,  0.29259365,  0.3040392 ,  0.31454116,
          0.32393108,  0.33206675,  0.33883664,  0.34416221,  0.34799868,
          0.35033462,  0.3511911 ,  0.35062068,  0.34870646,  0.34556094,
          0.3413243 ,  0.33616155,  0.33025788,  0.32381184,  0.31702651,
          0.31009875,  0.30320763,  0.29650303,  0.29009566,  0.2840499 ,
          0.27838021,  0.27305185,  0.26798569,  0.26306684,  0.25815591,
          0.25310183,  0.24775473,  0.24197784,  0.23565717,  0.22870858,
          0.22108189,  0.21276209,  0.20376826,  0.19415043,  0.18398533,
          0.17337122,  0.16242246,  0.15126399,  0.14002601,  0.12883883,
          0.11782814,  0.10711068,  0.09679048,  0.08695588,  0.07767738,
          0.06900654,  0.06097599,  0.05360035,  0.04687824,  0.04079485,
          0.03532493,  0.03043589,  0.02609071,  0.0222504 ,  0.01887587,
          0.01592917,  0.01337417,  0.01117664,  0.00930408,  0.0077253 ,
          0.00641006,  0.00532866,  0.0044519 ,  0.0037511 ,  0.00319839,
          0.00276716,  0.00243256,  0.00217203,  0.00196576,  0.00179703])}]

In [ ]:


In [ ]: