notebook.community

Edit and run



In [1]:

    
from bokeh.plotting import output_notebook

output_notebook()

%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'









    





    
        
        Loading BokehJS ...



In [2]:

    
import pandas as pd
embeddings = pd.read_csv('data/metadata_with_embeddings.csv', index_col=0, parse_dates=['Collection Date'])
embeddings.head()

data = embeddings[['Collection Date', 'coords0', 'coords1', 'coords2']].set_index('Collection Date').resample('M').mean()
data.head()









    Out[2]:






  
    
      
      coords0
      coords1
      coords2
    
    
      Collection Date
      
      
      
    
  
  
    
      2000-01-31
      0.528481
      -3.737833
      -0.502630
    
    
      2000-02-29
      0.075133
      -3.591450
      -0.312657
    
    
      2000-03-31
      0.322234
      -3.725801
      -0.207332
    
    
      2000-04-30
      NaN
      NaN
      NaN
    
    
      2000-05-31
      0.011306
      -3.379667
      -0.303968



In [3]:

    
from bokeh.palettes import viridis, inferno

palette = inferno(len(data))



In [51]:

    
from bokeh.plotting import figure, show
from bokeh.models import LogColorMapper, LogTicker, ColorBar

c = LogColorMapper(palette=palette, low=min(data.index).year, high=max(data.index).year)
cb = ColorBar(color_mapper=c, ticker=LogTicker(), location=(20,0), label_standoff=-12)
cb.minor_tick_line_width = 0
cb.width = 10

p = figure(webgl=True, plot_width=350, plot_height=300)
p.scatter(x=data['coords0'], y=data['coords1'], color=palette)

p.add_layout(cb, 'left')

show(p)



In [26]:

    
tables = pd.read_html('https://www.cdc.gov/flu/professionals/vaccination/effectiveness-studies.htm')
df = tables[0]
df.columns = df.loc[0, :]
df = df.drop(0).reset_index(drop=True)
df.columns = ['Season', 'Reference', 'Study Sites', 'Number of Patients', 'Overall VE', 'CI']
df['Season Start'] = df['Season'].str.split('-').str[0].apply(lambda x: int(x))

p = figure(plot_width=300, plot_height=250)
p.line(x=df['Season Start'], y=df['Overall VE'])
show(p)



In [ ]:



In [ ]:

	coords0	coords1	coords2
Collection Date
2000-01-31	0.528481	-3.737833	-0.502630
2000-02-29	0.075133	-3.591450	-0.312657
2000-03-31	0.322234	-3.725801	-0.207332
2000-04-30	NaN	NaN	NaN
2000-05-31	0.011306	-3.379667	-0.303968