First install the package:
pip install ipywidgets
then activate the plugin for jupyter:
jupyter nbextension enable --py widgetsnbextension
(both on the command line).
If you want to run all the examples you'll need other stuff:
pip install jupyter folium seaborn pandas scikit-image
Important: you have to restart jupyter after enabling the extension
In a new notebook, import what we need from the module:
In [2]:
from ipywidgets import interact
In [118]:
# minimal example
# interact() automatically generates the widget based on the type of the argument
# define a function which prints the square of its argument
def print_square(x):
print("the square of {} is {}".format(x, round(x**2, 5))) #round to ignore floating point errors
# call it to make sure it works
print_square(3)
print_square(12)
In [114]:
# the first argument to interact is the name of the function
# subsequent arguments are the arguments to the function in keyword style
# interact uses the arguments to figure out what widgets to display
interact(print_square, x=10)
Because we gave an integer argument, interact()
automatically creates an integer slider. It looks like for argument value n
the slider goes from -n
to 3n
.
In [5]:
# by giving a tuple we can set (min,max,stepsize)
interact(print_square, x=(1,10,0.1))
Out[5]:
In [6]:
# interact automatically generates different controls for different argument types
# giving a list (not any iterable) generates a drop down
def print_fruit(fruit):
print("you have chosen {}".format(fruit))
interact(print_fruit, fruit=['apple', 'banana', 'pear'])
Out[6]:
In [7]:
# see what happens if we pass a string
interact(print_fruit, fruit="apple")
Out[7]:
In [8]:
# multiple arguments will generate multiple widgets
def repeat_fruit(x, fruit):
print((fruit + ', ') * x)
interact(repeat_fruit, x=(1,10), fruit=['apple', 'banana', 'pear'])
Out[8]:
In [9]:
# in python 3 we can also use this syntax (function annotation)
def repeat_fruit(x:(1,10),fruit:['apple', 'banana', 'pear']):
print((fruit + ', ') * x)
interact(repeat_fruit)
# I will not do this for the rest of the talk as it still looks weird to me :-)
Out[9]:
In [10]:
# for slow functions it will be unusable to update the output every time we move the slider
# add the __manual argument to get an explicit button
def repeat_fruit(x, fruit):
print((fruit + ', ') * x)
interact(repeat_fruit, x=(1,10), fruit=['apple', 'banana', 'pear'], __manual=True)
Out[10]:
In [11]:
# example taken from my talk on building command line interfaces with argparse
# a function that reads dna from a file and finds kmers that make up more than a
# given fraction of the total. Don't worry about the code, just look at the signature
import collections
from tqdm import tqdm
def find_common_kmers(filename, kmer_length, threshold, report):
dna = open(filename).read().replace("\n", '')
all_kmers = []
for start in tqdm(range(len(dna) - kmer_length + 1)):
kmer = dna[start:start+kmer_length]
all_kmers.append(kmer)
kmer_counts = collections.Counter(all_kmers)
total_count = len(all_kmers)
for kmer, count in kmer_counts.items():
fraction = count / total_count
if fraction > threshold:
if report == 'count':
print(kmer, count)
elif report == 'fraction':
print(kmer, fraction)
In [12]:
# an example run
find_common_kmers('small.dna', 4, 0.01, 'fraction')
In [13]:
# now let's use interact
# on multiple lines for readability
interact(
find_common_kmers,
filename='small.dna',
kmer_length = (1,10),
threshold = (0.0, 0.1, 0.01),
report = ['count', 'fraction'],
__manual = True
)
Out[13]:
In [14]:
# set up pandas/seaborn stuff
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [15]:
# brief digression for pandas
# we can filter rows from a dataframe like this
# it gets displayed as a nice table using jupyter magic
euk = pd.read_csv("eukaryotes.tsv", sep="\t", na_values=['-'])
euk[euk['Group'] == 'Protists']
Out[15]:
In [16]:
# turn this into a function
def filter_genomes(group):
return euk[euk['Group'] == group]
filter_genomes('Plants')
Out[16]:
In [17]:
# we can use pandas to get a list of the groups in the dataframe
list(euk['Group'].unique())
Out[17]:
In [18]:
# use interact to populate a drop down from the dataframe itself
interact(filter_genomes, group=list(euk['Group'].unique()))
Out[18]:
In [19]:
# we can filter with multiple criteria
# this will look weird if you're not used to pandas, don't worry
euk[(euk['Size_(Mb)'] < 1000) & (euk['Group']=='Protists') & (euk['Center'] == 'JGI')]
Out[19]:
In [20]:
# as a function
def show_genomes(max_size, group, center):
return euk[(euk['Size_(Mb)'] < max_size) & (euk['Group']==group) & (euk['Center'] == center)]
show_genomes(1000, 'Protists', 'JGI')
Out[20]:
In [21]:
# and with widgets
interact(
show_genomes,
max_size=(0, euk['Size_(Mb)'].max()),
group = list(euk['Group'].unique()),
center = list(euk.groupby('Center').size().sort_values(ascending=False).index) # pandas to sort centers by number of genomes
)
Out[21]:
In [22]:
from ipywidgets import IntRangeSlider, SelectMultiple
# for more control we can create the widgets explicitly
# a function that takes a (min, max) size range and a list of subgroups (insects, birds, etc.)
# and returns a dataframe, filtered, and just showing some columns
def show_genomes(size_range, subgroups):
min_size, max_size = size_range # the size_range widget will give us a (min, max) tuple
selected = euk[(min_size < euk['Size_(Mb)']) & (euk['Size_(Mb)'] < max_size) & (euk['SubGroup'].isin(subgroups))]
return selected[['Organism/Name', 'Size_(Mb)', 'Group', 'SubGroup', 'GC%', 'Status', 'Center']]
# now we have to explicitly create our widgets
# create size slider for min/max interval
size_slider = IntRangeSlider(min=0, max=euk['Size_(Mb)'].max(), description='Genome size', continuous_update=False)
# allow multiple selection from subgroups
subgroup_select = SelectMultiple(
options=list(euk['SubGroup'].unique()),
description='Subgroups'
)
# note that while we have to construct the widgets, we have never (yet)
# written any code to handle events - this is very declarative
interact(show_genomes, size_range=size_slider, subgroups=subgroup_select)
Out[22]:
In [23]:
# for extra fancyness we can update widgets based on other widgets
# we create our two widgets just as before
size_slider = IntRangeSlider(min=0, max=euk['Size_(Mb)'].max(), description='Genome size', continuous_update=False)
subgroup_select = SelectMultiple(
options=list(euk['SubGroup'].unique()),
description='Subgroups'
)
# function to update the size range slider limits when we select new groups
def update_size_range(change):
subgroups = change['new']
selected_genome_sizes = euk[euk['SubGroup'].isin(subgroups)]['Size_(Mb)']
size_slider.min = selected_genome_sizes.min()
size_slider.max = selected_genome_sizes.max()
size_slider.value = (0, size_slider.max*0.8)
# tell the selecter to call the update function whenever it changes
subgroup_select.observe(update_size_range, 'value')
interact(show_genomes, size_range=size_slider, subgroups=subgroup_select)
Out[23]:
In [24]:
from ipywidgets import Select, IntSlider
# when combined with charting this gets interesting
# set a proper index on the pandas dataframe
euk = pd.read_csv("eukaryotes.tsv", sep="\t", na_values=['-'])
euk.index = euk.apply(lambda x : "{} ({})".format(x['Organism/Name'], x['BioSample_Accession']), axis=1)
# now our function will plot the genomes instead of displaying a dataframe
# takes a number of genomes to show, and the name of a subgroup
def show_genomes(count, subgroup):
plt.gcf().clear() # clear the plot before drawing a new one
selected = euk[euk['SubGroup'] == subgroup]
selected.sort_values('Genes', ascending=False)[:count][['Genes', 'Proteins', 'Size_(Mb)']].plot.barh(
figsize=(10,4),
subplots=True,
sharex=False # try setting this to True and see what happens
)
subgroup_select = Select(
options=list(euk['SubGroup'].unique()),
description='Subgroup'
)
# the count is controlled by a manual integer slider
count_slider = IntSlider(
min = 2, max=100, value=10, continuous_update=False
)
interact(show_genomes, subgroup=subgroup_select, count=count_slider)
Out[24]:
In [25]:
# another example with seaborn
sns.set_style("whitegrid")
# this function will take a list of subgroups
# select rows belonging to them
# and plot size vs. number of predicted proteins
def plot_size_proteins(subgroups):
sns.lmplot(
data=euk[euk['SubGroup'].isin(subgroups)],
x='Size_(Mb)',
y='Proteins',
size=4,
hue='SubGroup') # use hue to set colour column
# for this we want just one widget, a multiple selection
subgroup_select = SelectMultiple(
options=list(euk['SubGroup'].unique()),
description='Subgroups'
)
interact(plot_size_proteins, subgroups=subgroup_select)
Out[25]:
In [26]:
# a blobtools-like example with taxonomically-annotated contig data
import numpy as np
con=pd.read_csv('contigs.csv')
# make a log coverage column
con['log_coverage'] = con.apply(lambda x : np.log10(x['coverage']), axis=1)
con.head()
Out[26]:
In [27]:
from ipywidgets import Dropdown
# a function which draws a scatter plot of GC vs log coverage
# filtered by which phylum the contig (presumably) comes from
# and also filtered by minimum contig size
# it also samples a fraction of the total contigs for plotting
def draw_plot(phyla, min_size, frac):
con_sample = con.sample(frac=frac)
sns.lmplot(
data = con_sample[(con_sample['phylum'].isin(phyla)) & (con['length'] > min_size)],
x = 'GC',
y = 'log_coverage',
hue='phylum',
fit_reg=False,
size=4
)
phylum_select = SelectMultiple(
options=list(con.groupby('phylum').size().sort_values(ascending=False).index),
description='phyla'
)
min_size_slider = IntSlider(
min = 1000,
max = 100000,
continuous_update=False,
description = 'min contig length'
)
sample_select = Dropdown(
options = [1, 0.5, 0.2, 0.1],
description = 'sample fraction'
)
interact(draw_plot, phyla=phylum_select, min_size=min_size_slider, frac=sample_select)
Out[27]:
In [61]:
# one more with the eukaryotic genomes data
# function to plot a heatmap showing number of genomes for each group and completion status
# filtered by sequencing center
def plot_heatmap(center):
plt.figure(figsize=(7,7))
# add some spaces and change the colour map
# see here http://chrisalbon.com/python/seaborn_color_palettes.html for colour maps
size_v_status = euk[euk['Center'] == center].groupby(['SubGroup', 'Status']).size().unstack()
sns.heatmap(size_v_status, square=True, linewidths=2, cmap='OrRd', annot=True, fmt="3.0f")
interact(plot_heatmap, center=list(euk.groupby('Center').size().sort_values(ascending=False).index))
Out[61]:
In [109]:
import folium
from folium import plugins
# shall we do one with maps?
# using distribution of Anopheles gambiae from here
# http://lifemapper.org/species/Anopheles%20gambiae
# read the csv file into a dataframe
ano = pd.read_csv('ag.csv')
# function to draw a map showing specimens between two years
def draw_map(target_year):
# make a map centered on Africa
map_osm = folium.Map(location=[-10, -0], zoom_start=3)
# use pandas to grab the rows between the two target years
selected = ano[(ano['year'] >= target_year[0]) & (ano['year'] <= target_year[1])]
# make a list of (lat, long) tuples
locations = zip(list(selected['dec_lat']), list(selected['dec_long']))
# pass the list to folium and ask it to make a heatmap
map_osm.add_children(folium.plugins.HeatMap(locations))
return map_osm
# call it like this
draw_map((1970, 1979))
Out[109]:
In [110]:
# now make it interactive by adding a slider for the year range
years_slider = IntRangeSlider(
min=1967,
max=1998,
description='years',
continuous_update=False)
interact(draw_map, target_year=years_slider)
Out[110]:
In [84]:
# and one with images
# load an image of cells under a microscope that I found on the internet
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from skimage import io
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.color import label2rgb, rgb2gray
from skimage.viewer import ImageViewer
image = rgb2gray(io.imread('cells.png'))
io.imshow(image)
Out[84]:
In [87]:
# use this example from the scikit-image tutorial to detect and label cells
# create a slider to control minimum size of areas detected
def find_shapes(min_size):
# apply threshold
thresh = threshold_otsu(image)
bw = closing(image > thresh, square(1))
# remove artifacts connected to image border
cleared = clear_border(bw)
# label image regions
label_image = label(cleared)
image_label_overlay = label2rgb(label_image, image=image)
fig, ax = plt.subplots(figsize=(5, 5))
ax.imshow(image_label_overlay)
for region in regionprops(label_image):
# take regions with large enough areas
if region.area >= min_size:
# draw rectangle around segmented coins
minr, minc, maxr, maxc = region.bbox
rect = mpatches.Rectangle((minc, minr), maxc - minc, maxr - minr,
fill=False, edgecolor='red', linewidth=2)
ax.add_patch(rect)
ax.set_axis_off()
plt.tight_layout()
plt.show()
# we can call the function thus:
find_shapes(200)
In [88]:
# now to make it interactive we just add a slider for the minimum feature size
min_size_slider = IntSlider(min=50, max=1000, continous_update=False)
interact(find_shapes, min_size=min_size_slider)
Out[88]:
In [ ]:
In [ ]:
In [ ]:
In [73]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from skimage import io
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.color import label2rgb, rgb2gray
from skimage.viewer import ImageViewer
image = rgb2gray(io.imread('cells.png'))
io.imshow(image)
Out[73]:
In [82]:
def find_shapes(min_size):
# apply threshold
thresh = threshold_otsu(image)
bw = closing(image > thresh, square(1))
# remove artifacts connected to image border
cleared = clear_border(bw)
# label image regions
label_image = label(cleared)
image_label_overlay = label2rgb(label_image, image=image)
fig, ax = plt.subplots(figsize=(10, 6))
ax.imshow(image_label_overlay)
for region in regionprops(label_image):
# take regions with large enough areas
if region.area >= min_size:
# draw rectangle around segmented coins
minr, minc, maxr, maxc = region.bbox
rect = mpatches.Rectangle((minc, minr), maxc - minc, maxr - minr,
fill=False, edgecolor='red', linewidth=2)
ax.add_patch(rect)
ax.set_axis_off()
plt.tight_layout()
plt.show()
min_size_slider = IntSlider(min=50, max=1000, continous_update=False)
interact(find_shapes, min_size=min_size_slider)
Out[82]: