In [1]:
import numpy as np
import pandas as pd
from ggplot import *
import simple_access
import statsmodels.api as sm
from scipy import stats
%matplotlib inline

In [28]:
sel = ['qzpartOF>-1.25', 'qzpartOF<1.25', 'ptNF>5.0', 'ptNF<200.0','qzpartOF>-2.25', 'qzpartOF<2.25','prpart1OF>-1.25', 'prpart1OF<1.25','prpart2OF>-1.25', 'prpart2OF<1.25', 'qrpart1OF>-1.25', 'qrpart1OF<1.25','qrpart2OF>-1.25', 'qrpart2OF<1.25']
s1 = ' and '.join(sel)

In [3]:
rec = simple_access.chainer(izip=7, rrqs=['pzpartOF', 'ptNF', 'prpart1OF', 'prpart2OF', 'qzpartOF','qrpart1OF','qrpart2OF'],cuts=['cGoodEv_v53'], eventcuts=['cRandom_133'], selections=['cGoodEv_v53==1.0', 'cRandom_133==0.0'], dtype='cf')


Load time:  6.9011349678 s

In [ ]:
data = df[['ptNF','pzpartOF']]
kern = sm.nonparametric.KDEMultivariate(data = data.values, var_type='cc', bw='normal_reference')

In [29]:
p = ggplot(aes(x='ptNF', y='prpart1OF'), data=rec.query(s1)) + geom_point(alpha=0.5)
p


Out[29]:
<ggplot: (11654817)>

In [30]:
p_color = ggplot(aes(x='ptNF', y='prpart1OF', color = 'qzpartOF'), data=rec.query(s1)) + geom_point(alpha=0.5) + scale_colour_gradient2(low='blueviolet', high='darkorange')

In [31]:
p_color


Out[31]:
<ggplot: (11721529)>

In [7]:
df = rec.query(s1)
H = np.histogram2d(df.ptNF.values, df.pzpartOF.values, bins=30)
h1 = pd.DataFrame(H[0], index=H[1][1:], columns=H[2][1:])
h2 = pd.DataFrame(H[0].T, index=H[2][1:], columns=H[1][1:])
h1['ptNF'] = h1.index
h2['pzpartOF'] = h2.index
h1_melt=pd.melt(h1, id_vars=['ptNF'])
h2_melt = pd.melt(h2, id_vars=['pzpartOF'])

In [8]:
p1 = ggplot(aes(x='ptNF', y='value'), data=h1_melt) + geom_step() + stat_function(func=kern.pdf, args=+ facet_wrap("variable")
p2 = ggplot(aes(x='pzpartOF', y='value'), data=h2_melt) + geom_step() + facet_wrap("variable")

In [9]:
p1


Out[9]:
<ggplot: (11723493)>

In [27]:
p2


Out[27]:
<ggplot: (11723289)>

In [11]:


In [12]:
data.values.shape


Out[12]:
(22663, 2)

In [13]:


In [16]:
kern.pdf(data.ix[1])


Out[16]:
array(0.003506839988981963)

In [18]:
emp = sm.emplike.DescStatMV(data.values)
cp = emp.mv_mean_contour(-2, 2, -2, 2, .1, .1)


/usr/local/anaconda/lib/python2.7/site-packages/numpy/core/numeric.py:178: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  a = empty(shape, dtype, order)
/usr/local/anaconda/lib/python2.7/site-packages/matplotlib/figure.py:371: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "

In [2]:



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-7ed0097d7e9e> in <module>()
----> 1 df

NameError: name 'df' is not defined

In [ ]: