notebook.community

Edit and run



In [1]:

    
import numpy as np
import pandas as pd



In [2]:

    
import bokeh as bk
from bokeh.io import output_notebook,show,gridplot
output_notebook()









    





    
        
        Loading BokehJS ...
    
    Warning: Requesting CDN BokehJS version '0.11.1dev7' from Bokeh development version '0.11.1dev7-dirty'. This configuration is unsupported and may not work!



In [3]:

    
print np.__version__
print pd.__version__
print bk.__version__
print bk.__file__









    



1.10.2
0.17.1
0.11.1dev7-dirty
/home/chbrandt/fido/pynotes/photoz_bokeh/issue_3774/bokeh/bokeh/__init__.pyc



In [4]:

    
np.random.seed(1234509876)
signal = np.random.normal(0.5,0.1,900) 
instr = np.random.poisson(0.5,100)
sample = np.concatenate((signal,instr[instr>0]),axis=0)
np.random.shuffle(sample)
df = pd.DataFrame({'sample':sample})
df.describe()



In [5]:

    
from bokeh.charts import Histogram
p = Histogram(df,'sample')
show(p)









    






    







    Out[5]:




<Bokeh Notebook handle for In[5]>



In [6]:

    
nbins = 10
bins = np.linspace(0,1,nbins)
df['bins'] = pd.cut(df['sample'],bins)
df['quantil'] = pd.qcut(df['sample'],nbins)
print df.describe(include='all')

hnan = df.isnull().any().values
nnan = df.isnull().sum().values
_f = '{:<10}\t{}\t{:>10}'
#print _f.format('\n','Has NaN?','How many?')
#for i,col in enumerate(df.columns):
#    print _f.format(col,hnan[i],nnan[i])









    



            sample            bins     quantil
count   943.000000             933         943
unique         NaN               8          10
top            NaN  (0.444, 0.556]  (0.664, 3]
freq           NaN             397          95
mean      0.535862             NaN         NaN
std       0.211561             NaN         NaN
min       0.167255             NaN         NaN
25%       0.441506             NaN         NaN
50%       0.506249             NaN         NaN
75%       0.575700             NaN         NaN
max       3.000000             NaN         NaN



In [7]:

    
print df.groupby('bins').describe()









    



                          sample
bins                            
(0, 0.111]     count    0.000000
               mean          NaN
               std           NaN
               min           NaN
               25%           NaN
               50%           NaN
               75%           NaN
               max           NaN
(0.111, 0.222] count    2.000000
               mean     0.180787
               std      0.019136
               min      0.167255
               25%      0.174021
               50%      0.180787
               75%      0.187552
               max      0.194318
(0.222, 0.333] count   35.000000
               mean     0.295076
               std      0.023906
               min      0.223564
               25%      0.284188
               50%      0.293745
               75%      0.310848
               max      0.332206
(0.333, 0.444] count  206.000000
               mean     0.399358
               std      0.030983
               min      0.334101
               25%      0.376196
               50%      0.405030
...                          ...
(0.556, 0.667] std      0.031175
               min      0.555934
               25%      0.571801
               50%      0.589558
               75%      0.621899
               max      0.666396
(0.667, 0.778] count   45.000000
               mean     0.699060
               std      0.028432
               min      0.666998
               25%      0.674254
               50%      0.689047
               75%      0.720131
               max      0.769311
(0.778, 0.889] count    2.000000
               mean     0.784747
               std      0.004156
               min      0.781808
               25%      0.783278
               50%      0.784747
               75%      0.786216
               max      0.787685
(0.889, 1]     count   33.000000
               mean     1.000000
               std      0.000000
               min      1.000000
               25%      1.000000
               50%      1.000000
               75%      1.000000
               max      1.000000

[72 rows x 1 columns]



In [8]:

    
from bokeh.charts import BoxPlot
p = BoxPlot(df,values='sample',label='quantil')
show(p)









    






    







    Out[8]:




<Bokeh Notebook handle for In[8]>



In [9]:

    
brandt = None



In [10]:

    
import matplotlib.pyplot as plt

ret1 = df.boxplot(column='sample',by='bins',grid=False)
plt.ylim([0,1.1])
plt.xticks(rotation=90)

fig1 = plt.gcf()

from bokeh import mpl
show(mpl.to_bokeh())









    






    







    Out[10]:




<Bokeh Notebook handle for In[10]>



In [11]:

    
%matplotlib inline
import matplotlib.pyplot as plt

ret2 = df.boxplot(column='sample',by='bins',grid=False)
plt.ylim([0,1.1])
plt.xticks(rotation=90)

fig2 = plt.gcf()

plt.show()



In [12]:

    
import seaborn as sns
print sns.__version__
from bokeh import mpl
from bokeh.io import output_notebook, show

output_notebook()
iris = sns.load_dataset("iris")
sns.violinplot(iris.species,iris.sepal_length)
show(mpl.to_bokeh())









    



0.7.0






    





    
        
        Loading BokehJS ...
    
    Warning: Requesting CDN BokehJS version '0.11.1dev7' from Bokeh development version '0.11.1dev7-dirty'. This configuration is unsupported and may not work!






    














    



/home/chbrandt/.conda/envs/bokeh_devel/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):






    






    







    Out[12]:




<Bokeh Notebook handle for In[12]>



In [13]:

    
from bokeh.plotting import output_file, show
from bokeh.sampledata.iris import flowers
import matplotlib
matplotlib.use('TKAgg')
from bokeh import mpl

flowers.boxplot(column='sepal_length', by='species')
show(mpl.to_bokeh())









    



/home/chbrandt/.conda/envs/bokeh_devel/lib/python2.7/site-packages/matplotlib/__init__.py:1318: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)






    






    







    Out[13]:




<Bokeh Notebook handle for In[13]>



In [14]:

    
#print iris.groupby('species').describe()
#print flowers.groupby('species').describe()
(flowers == iris).all()









    Out[14]:





sepal_length    True
sepal_width     True
petal_length    True
petal_width     True
species         True
dtype: bool



In [ ]:

	sample
count	943.000000
mean	0.535862
std	0.211561
min	0.167255
25%	0.441506
50%	0.506249
75%	0.575700
max	3.000000