(Interactive) Plotting using Matplotlib and Seaborn

Matplotlib is basic plotting library for Python inspired by Matlab. Seaborn is built on top of it with integrated analysis and specialized plots + pretty good integration with Pandas

Also see the full gallery of Seaborn or Matplotlib.


In [45]:
#disable some annoying warning
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

#plots the figures in place instead of a new window
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

In [46]:
#use a standard dataset of heterogenous data
cars = pd.read_csv('data/mtcars.csv')
cars.head()


Out[46]:
car mpg cyl disp hp drat wt qsec vs am gear carb
0 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
1 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
2 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
3 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
4 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2

Scatterplot


In [47]:
plt.scatter(x=cars['mpg'],y=cars['wt'])
plt.xlabel('miles per gallon')
plt.ylabel('weight')
plt.title('MPG vs WT')
plt.show()



In [48]:
#integrated in pandas, too
cars.plot(x='mpg',y='wt',kind='scatter')


Out[48]:
<matplotlib.axes._subplots.AxesSubplot at 0x1012bdd8>

In [49]:
cars.plot(kind='scatter', x='mpg',y='wt',c='hp',s=cars['cyl']*20,alpha=0.5)


Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x10111198>

In [50]:
#what if we plot everything?
cars.plot()


Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0xedb7e48>

Histogram


In [51]:
cars['mpg'].hist(bins=5)


Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d0da20>

In [52]:
plt.hist(cars['mpg'],bins=5)
plt.title('miles per gallon')


Out[52]:
<matplotlib.text.Text at 0x10d9aa58>

In [53]:
#seaborn not just a histogram but also an kernel density enstimation and better default settings
sns.distplot(cars['mpg'],bins=5)


Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ded940>

Box Plots


In [54]:
#box plots
cars['mpg'].plot(kind='box')


Out[54]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e7ed30>

In [55]:
cars.boxplot('mpg')


Out[55]:
{'boxes': [<matplotlib.lines.Line2D at 0x1004de10>],
 'caps': [<matplotlib.lines.Line2D at 0xeffa668>,
  <matplotlib.lines.Line2D at 0x100ab5c0>],
 'fliers': [<matplotlib.lines.Line2D at 0x101b4160>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0x100e3240>],
 'whiskers': [<matplotlib.lines.Line2D at 0xefea4e0>,
  <matplotlib.lines.Line2D at 0xefea208>]}

In [56]:
#group by gear
cars.boxplot('mpg', by='gear')


Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0xed90048>

In [57]:
# load gapminder again and select 2007
gap = pd.read_csv('data/gapminder-unfiltered.tsv',index_col=0, sep='\t')
gap2007 = gap[gap.year == 2007]
gap2007.columns


Out[57]:
Index(['continent', 'year', 'lifeExp', 'pop', 'gdpPercap'], dtype='object')

Log Scale


In [58]:
gap2007.plot(kind='scatter', x='lifeExp',y='gdpPercap')


Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x1012e128>

unbalanced with outliers what about log scale?


In [59]:
gap2007.plot(kind='scatter', x='lifeExp',y='gdpPercap')
plt.yscale('log')


Grouping / Coloring Plots

grouped by color?


In [60]:
#create a color palette
colors = sns.color_palette()
sns.palplot(colors)



In [61]:
#for each group create an own plot an overlay them
for (name, group),color in zip(gap2007.groupby('continent'),colors):
    plt.scatter(x=group['lifeExp'],y=group['gdpPercap'],label=name, c=color,s=30)
plt.yscale('log')
plt.legend()


Out[61]:
<matplotlib.legend.Legend at 0x123193c8>

In [62]:
#playing with categories ... seaborn is pretty good with it
plt.figure(figsize=(40,20))
plt.subplot(121)
sns.boxplot(x='continent',y='gdpPercap',data=gap)

plt.subplot(122)
sns.violinplot(x='continent',y='gdpPercap',data=gap2007)


Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x125c7ba8>

In [63]:
# or with linear regression

anscombe = sns.load_dataset("anscombe")
sns.lmplot('x','y',col='dataset',hue='dataset', data=anscombe, col_wrap=2)
#g = sns.FacetGrid(anscombe, col="dataset", size=4, aspect=1)
#g.map(sns.regplot, "x", "y")


Out[63]:
<seaborn.axisgrid.FacetGrid at 0x12663710>

In [64]:
# or with structured heatmaps

#compute the correlations and take a look at them
corrmat = gap.corr()

# draw a clustered heatmap using seaborn
sns.clustermap(corrmat, square=True)


Out[64]:
<seaborn.matrix.ClusterGrid at 0x126672b0>

TASK

create a scatterplot where

  • x = lifeExp
  • y = gdpPerCap
  • color = continent
  • size = pop

label the axis appropiately and use a log scale for gdp


In [65]:
#for each group create an own plot an overlay them
pop_max = gap2007['pop'].max()
for (name, group),color in zip(gap2007.groupby('continent'),colors):
    plt.scatter(x=group['lifeExp'],y=group['gdpPercap'],label=name, c=color,s=(group['pop']/pop_max)*400)
plt.yscale('log')
plt.title('Life Expectancy vs GDP')
plt.xlabel('Life Expectancy')
plt.ylabel('GDP Per Cap')
plt.legend()


Out[65]:
<matplotlib.legend.Legend at 0x112927b8>

Interactive plots

simple interaction is possible with IPython by default. That means whenever the user changes some parameter the visualization is recreated on the server side and send to the client.


In [66]:
from IPython.html.widgets import interact, interact_manual

In [67]:
@interact(text='Hello', slider=(0,10),check=True,categories=['red','green','blue'])
def react(text, slider,check,categories):
    print(text,slider*10,check,categories)


Hello 50 True red

In [68]:
@interact_manual(text='Hello', slider=(0,10),check=True,categories=['red','green','blue'])
def react(text, slider,check,categories):
    print(text,slider*10,check,categories)

In [69]:
@interact(bins=(5, 25, 5),color=['red','green','orange','blue'])
def show_distplot(bins,color):
    cars['mpg'].hist(bins=bins, color=color)



In [70]:
#hard core

from IPython.html import widgets

[widget for widget in dir(widgets) if not widget.endswith('Widget') and widget[0] == widget[0].upper() and widget[0] != '_']


Out[70]:
['Accordion',
 'BoundedFloatText',
 'BoundedIntText',
 'Box',
 'Button',
 'CallbackDispatcher',
 'Checkbox',
 'Color',
 'Dropdown',
 'FlexBox',
 'FloatProgress',
 'FloatRangeSlider',
 'FloatSlider',
 'FloatText',
 'HBox',
 'HTML',
 'Image',
 'IntProgress',
 'IntRangeSlider',
 'IntSlider',
 'IntText',
 'Latex',
 'Output',
 'RadioButtons',
 'Select',
 'SelectMultiple',
 'Tab',
 'Text',
 'Textarea',
 'ToggleButton',
 'ToggleButtons',
 'VBox']

In [71]:
@interact(bins=widgets.FloatTextWidget(value=5))
def show_distplot(bins):
    cars['mpg'].hist(bins=bins)



In [72]:
text_widget = widgets.Textarea(value='Hello', description='text area')
slider_widget = widgets.BoundedFloatText(5,min=0,max=10, description='slider area')
check_widget = widgets.Checkbox(True,description="CheckboxWidget")
toggle = widgets.RadioButtons(options=['red','green','blue'], description="RadioButtonsWidget")

@interact(text=text_widget, slider=slider_widget,check=check_widget,categories=toggle)
def react(text, slider,check,categories):
    print(text,slider*10,check,categories)


Hello 50.0 True red

In [73]:
b = widgets.Button(description="Update")
checkbox = widgets.Checkbox(description="CheckboxWidget")

tab1_children = [b,
                 checkbox,
                 widgets.Dropdown(options=['A','B'], description="DropdownWidget"),
                 widgets.RadioButtons(options=['A','B'], description="RadioButtonsWidget"),
                 widgets.Select(options=['A','B'], description="SelectWidget"),
                 widgets.Text(description="TextWidget"),
                 widgets.Textarea(description="TextareaWidget"),
                 widgets.ToggleButton(description="ToggleButtonWidget"),
                 widgets.ToggleButtons(options=["Value 1", "Value2"], description="ToggleButtonsWidget"),
                 ]

tab2_children = [widgets.BoundedFloatText(description="BoundedFloatTextWidget"),
                 widgets.BoundedIntText(description="BoundedIntTextWidget"),
                 widgets.FloatSlider(description="FloatSliderWidget"),
                 widgets.FloatText(description="FloatTextWidget"),
                 widgets.IntSlider(description="IntSliderWidget"),
                 widgets.IntText(description="IntTextWidget"),
                 ]

tab1 = widgets.Box(children=tab1_children)
tab2 = widgets.Box(children=tab2_children)


i = widgets.Accordion(children=[tab1, tab2])

i.set_title(0,"Basic Widgets")
i.set_title(1,"Numbers Input")

from IPython.display import display

def button_clicked(bb):
    print(checkbox.value)
    #TODO update plot

b.on_click(button_clicked)

display(i)

TASK

make the plot from before interactive, such that you can slide the year


In [74]:
pop_max = gap['pop'].max()

@interact(year=(gap.year.min(), gap.year.max()))
def plot_gapminder(year):
    gapyear = gap[gap.year == year]
    for (name, group),color in zip(gapyear.groupby('continent'),colors):
        plt.scatter(x=group['lifeExp'],y=group['gdpPercap'],label=name, c=color,s=(group['pop']/pop_max)*400)
    plt.yscale('log')
    plt.title('Life Expectancy vs GDP')
    plt.xlabel('Life Expectancy')
    plt.ylabel('GDP Per Cap')
    plt.xlim(gap.gdpPercap.min(),gap.gdpPercap.max())
    plt.xlim(gap.lifeExp.min(),gap.lifeExp.max())
    plt.legend()