In [111]:
%pylab inline
import random
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.cbook import violin_stats
from scipy import stats
import statsmodels.api as sm
import weighted
import pandas as pd
# fake data
fs = 10 # fontsize
data = np.random.normal(size=100)
In [174]:
nobs = 300
# np.random.seed(1234) # Seed random generator
dens = sm.nonparametric.KDEUnivariate(data)
# weights=np.array(range(0, 100))
weights = np.array([ float(x) for x in np.random.randint(5000, size=100)])
dens.fit(fft=False, weights=weights)
print dens.evaluate([-2.72108736, -2.67226127, -2.62343519, -2.5746091 , -2.52578302, -2.47695693, -2.42813085, -2.37930476, -2.33047868, -2.28165259, -2.23282651, -2.18400042, -2.13517434, -2.08634825, -2.03752217, -1.98869608, -1.93987 , -1.89104391, -1.84221783, -1.79339174, -1.74456566, -1.69573957, -1.64691349, -1.5980874, -1.54926132, -1.50043523, -1.45160915, -1.40278306, -1.35395698, -1.30513089, -1.25630481, -1.20747872, -1.15865263, -1.10982655, -1.06100046, -1.01217438, -0.96334829, -0.91452221, -0.86569612, -0.81687004,
-0.76804395, -0.71921787, -0.67039178, -0.6215657 , -0.57273961, -0.52391353, -0.47508744, -0.42626136, -0.37743527, -0.32860919,
-0.2797831 , -0.23095702, -0.18213093, -0.13330485, -0.08447876, -0.03565268, 0.01317341, 0.06199949, 0.11082558, 0.15965166, 0.20847775, 0.25730383, 0.30612992, 0.354956 , 0.40378209, 0.45260817, 0.50143426, 0.55026035, 0.59908643, 0.64791252, 0.6967386 , 0.74556469, 0.79439077, 0.84321686, 0.89204294, 0.94086903, 0.98969511, 1.0385212, 1.08734728, 1.13617337, 1.18499945, 1.23382554, 1.28265162, 1.33147771, 1.38030379, 1.42912988, 1.47795596, 1.52678205, 1.57560813, 1.62443422, 1.6732603 , 1.72208639, 1.77091247, 1.81973856, 1.86856464, 1.91739073, 1.96621681, 2.0150429, 2.06386898, 2.11269507])
nobs = 300
# np.random.seed(1234) # Seed random generator
dens = sm.nonparametric.KDEUnivariate(data)
# weights=np.array(range(0, 100))
weights = np.array([ float(x) for x in np.random.randint(5000, size=100)])
dens.fit(fft=False)
print dens.evaluate([-2.72108736, -2.67226127, -2.62343519, -2.5746091 , -2.52578302, -2.47695693, -2.42813085, -2.37930476, -2.33047868, -2.28165259, -2.23282651, -2.18400042, -2.13517434, -2.08634825, -2.03752217, -1.98869608, -1.93987 , -1.89104391, -1.84221783, -1.79339174, -1.74456566, -1.69573957, -1.64691349, -1.5980874, -1.54926132, -1.50043523, -1.45160915, -1.40278306, -1.35395698, -1.30513089, -1.25630481, -1.20747872, -1.15865263, -1.10982655, -1.06100046, -1.01217438, -0.96334829, -0.91452221, -0.86569612, -0.81687004,
-0.76804395, -0.71921787, -0.67039178, -0.6215657 , -0.57273961, -0.52391353, -0.47508744, -0.42626136, -0.37743527, -0.32860919,
-0.2797831 , -0.23095702, -0.18213093, -0.13330485, -0.08447876, -0.03565268, 0.01317341, 0.06199949, 0.11082558, 0.15965166, 0.20847775, 0.25730383, 0.30612992, 0.354956 , 0.40378209, 0.45260817, 0.50143426, 0.55026035, 0.59908643, 0.64791252, 0.6967386 , 0.74556469, 0.79439077, 0.84321686, 0.89204294, 0.94086903, 0.98969511, 1.0385212, 1.08734728, 1.13617337, 1.18499945, 1.23382554, 1.28265162, 1.33147771, 1.38030379, 1.42912988, 1.47795596, 1.52678205, 1.57560813, 1.62443422, 1.6732603 , 1.72208639, 1.77091247, 1.81973856, 1.86856464, 1.91739073, 1.96621681, 2.0150429, 2.06386898, 2.11269507])
In [175]:
x = sm.nonparametric.KDEUnivariate(data)
weights = pd.Series([1]*100)
print weights
x.fit(fft=False, weights=weights)
print x.kernel.weights
y = x.evaluate([-2.72108736, -2.67226127, -2.62343519, -2.5746091 , -2.52578302, -2.47695693, -2.42813085, -2.37930476, -2.33047868, -2.28165259, -2.23282651, -2.18400042, -2.13517434, -2.08634825, -2.03752217, -1.98869608, -1.93987 , -1.89104391, -1.84221783, -1.79339174, -1.74456566, -1.69573957, -1.64691349, -1.5980874, -1.54926132, -1.50043523, -1.45160915, -1.40278306, -1.35395698, -1.30513089, -1.25630481, -1.20747872, -1.15865263, -1.10982655, -1.06100046, -1.01217438, -0.96334829, -0.91452221, -0.86569612, -0.81687004,
-0.76804395, -0.71921787, -0.67039178, -0.6215657 , -0.57273961, -0.52391353, -0.47508744, -0.42626136, -0.37743527, -0.32860919,
-0.2797831 , -0.23095702, -0.18213093, -0.13330485, -0.08447876, -0.03565268, 0.01317341, 0.06199949, 0.11082558, 0.15965166, 0.20847775, 0.25730383, 0.30612992, 0.354956 , 0.40378209, 0.45260817, 0.50143426, 0.55026035, 0.59908643, 0.64791252, 0.6967386 , 0.74556469, 0.79439077, 0.84321686, 0.89204294, 0.94086903, 0.98969511, 1.0385212, 1.08734728, 1.13617337, 1.18499945, 1.23382554, 1.28265162, 1.33147771, 1.38030379, 1.42912988, 1.47795596, 1.52678205, 1.57560813, 1.62443422, 1.6732603 , 1.72208639, 1.77091247, 1.81973856, 1.86856464, 1.91739073, 1.96621681, 2.0150429, 2.06386898, 2.11269507])
y = np.array(y)
y
In [ ]:
In [196]:
del data
del data2
In [183]:
import copy as pythoncopy
In [223]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 10))
def vdensity_with_weights(weights):
def vdensity(data, coords):
# print "2w: %s" % weights
# print "2d: %s" % data
weighted_cost = sm.nonparametric.KDEUnivariate(data)
weighted_cost.fit(fft=False, weights=weights)
y = weighted_cost.evaluate(coords)
# print "y: %s" % y
return y
return vdensity
def custom_violin_stats(data, weights):
# print "1w: %s" % weights
# print "1d: %s" % data
median = weighted.quantile_1D(data, weights, 0.5)
mean, sumw = np.ma.average(data, weights=list(weights), returned=True)
results = violin_stats(data, vdensity_with_weights(weights))
# print "orig results: %s" % results[0]["vals"]
results[0][u"mean"] = mean
results[0][u"median"] = median
results[0][u"min"] = np.min(data)
results[0][u"max"] = np.max(data)
# print "new results: %s" % results[0]["vals"]
return results
data = np.random.normal(size=100)
data = np.array([-2]*25+[2]*75)
axes[0].violinplot(data, [0], points=80, vert=False, showmeans=True, showextrema=True, showmedians=True)
weights = np.array([ float(x) for x in np.random.randint(5000, size=100)])
weights = np.array([0.]*25+[1.]*50+[0.]*25)
vpstats1 = custom_violin_stats(data, weights)
tmp = axes[1].violin(vpstats1, vert=False, showmeans=True, showextrema=True, showmedians=True)
for pc in tmp['bodies']:
pc.set_facecolor('red')
pc.set_edgecolor('black')
axes[0].set_title('Custom violinplot 4', fontsize=fs)
axes[1].set_title('Custom violinplot 5', fontsize=fs)
for ax in axes.flatten():
ax.set_yticklabels([])
fig.suptitle("Violin Plotting Examples")
fig.subplots_adjust(hspace=0.4)
plt.show()
In [222]:
In [191]:
vpstats1
Out[191]:
In [ ]:
In [ ]: