(Interactive) Plotting using Matplotlib and Seaborn

Matplotlib is basic plotting library for Python inspired by Matlab. Seaborn is built on top of it with integrated analysis and specialized plots + pretty good integration with Pandas

Also see the full gallery of Seaborn or Matplotlib.


In [2]:
#disable some annoying warning
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

#plots the figures in place instead of a new window
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

In [3]:
#use a standard dataset of heterogenous data
cars = pd.read_csv('data/mtcars.csv')
cars.head()


Out[3]:
car mpg cyl disp hp drat wt qsec vs am gear carb
0 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
1 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
2 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
3 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
4 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2

Scatterplot


In [4]:
plt.scatter(x=cars['mpg'],y=cars['wt'])
plt.xlabel('miles per gallon')
plt.ylabel('weight')
plt.title('MPG vs WT')
plt.show()



In [5]:
#integrated in pandas, too
cars.plot(x='mpg',y='wt',kind='scatter')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x88d3828>

In [6]:
cars.plot(kind='scatter', x='mpg',y='wt',c='hp',s=cars['cyl']*20,alpha=0.5)


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x90146d8>

In [7]:
#what if we plot everything?
cars.plot()


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0xa093f60>

Histogram


In [8]:
cars['mpg'].hist(bins=5)


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0xa1bc518>

In [9]:
plt.hist(cars['mpg'],bins=5)
plt.title('miles per gallon')


Out[9]:
<matplotlib.text.Text at 0xa259400>

In [10]:
#seaborn not just a histogram but also an kernel density enstimation and better default settings
sns.distplot(cars['mpg'],bins=5)


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0xa25e320>

Box Plots


In [11]:
#box plots
cars['mpg'].plot(kind='box')


Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x8920f60>

In [12]:
cars.boxplot('mpg')


Out[12]:
{'boxes': [<matplotlib.lines.Line2D at 0xa38c588>],
 'caps': [<matplotlib.lines.Line2D at 0xa395f28>,
  <matplotlib.lines.Line2D at 0xa39a748>],
 'fliers': [<matplotlib.lines.Line2D at 0xa39e668>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0xa39a9b0>],
 'whiskers': [<matplotlib.lines.Line2D at 0xa38cf28>,
  <matplotlib.lines.Line2D at 0xa395748>]}

In [13]:
#group by gear
cars.boxplot('mpg', by='gear')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0xa353860>

In [14]:
# load gapminder again and select 2007
gap = pd.read_csv('data/gapminder-unfiltered.tsv',index_col=0, sep='\t')
gap2007 = gap[gap.year == 2007]
gap2007.columns


Out[14]:
Index(['continent', 'year', 'lifeExp', 'pop', 'gdpPercap'], dtype='object')

Log Scale


In [15]:
gap2007.plot(kind='scatter', x='lifeExp',y='gdpPercap')


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0xa3aa080>

unbalanced with outliers what about log scale?


In [16]:
gap2007.plot(kind='scatter', x='lifeExp',y='gdpPercap')
plt.yscale('log')


Grouping / Coloring Plots

grouped by color?


In [17]:
#create a color palette
colors = sns.color_palette()
sns.palplot(colors)



In [18]:
#for each group create an own plot an overlay them
for (name, group),color in zip(gap2007.groupby('continent'),colors):
    plt.scatter(x=group['lifeExp'],y=group['gdpPercap'],label=name, c=color,s=30)
plt.yscale('log')
plt.legend()


Out[18]:
<matplotlib.legend.Legend at 0xa6a4390>

In [19]:
#playing with categories ... seaborn is pretty good with it
plt.figure(figsize=(40,20))
plt.subplot(121)
sns.boxplot(x='continent',y='gdpPercap',data=gap)

plt.subplot(122)
sns.violinplot(x='continent',y='gdpPercap',data=gap2007)


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0xba7bba8>

In [20]:
# or with linear regression

anscombe = sns.load_dataset("anscombe")
sns.lmplot('x','y',col='dataset',hue='dataset', data=anscombe, col_wrap=2)
#g = sns.FacetGrid(anscombe, col="dataset", size=4, aspect=1)
#g.map(sns.regplot, "x", "y")


Out[20]:
<seaborn.axisgrid.FacetGrid at 0xb9deda0>

In [21]:
# or with structured heatmaps

#compute the correlations and take a look at them
corrmat = gap.corr()

# draw a clustered heatmap using seaborn
sns.clustermap(corrmat, square=True)


Out[21]:
<seaborn.matrix.ClusterGrid at 0xb86e588>

TASK

create a scatterplot where

  • x = lifeExp
  • y = gdpPerCap
  • color = continent
  • size = pop

label the axis appropiately and use a log scale for gdp


In [ ]:
#your code

Interactive plots

simple interaction is possible with IPython by default. That means whenever the user changes some parameter the visualization is recreated on the server side and send to the client.


In [21]:
from IPython.html.widgets import interact, interact_manual

In [22]:
@interact(text='Hello', slider=(0,10),check=True,categories=['red','green','blue'])
def react(text, slider,check,categories):
    print(text,slider*10,check,categories)


Hello 50 True red

In [23]:
@interact_manual(text='Hello', slider=(0,10),check=True,categories=['red','green','blue'])
def react(text, slider,check,categories):
    print(text,slider*10,check,categories)

In [24]:
@interact(bins=(5, 25, 5),color=['red','green','orange','blue'])
def show_distplot(bins,color):
    cars['mpg'].hist(bins=bins, color=color)



In [25]:
#hard core

from IPython.html import widgets

[widget for widget in dir(widgets) if not widget.endswith('Widget') and widget[0] == widget[0].upper() and widget[0] != '_']


Out[25]:
['Accordion',
 'BoundedFloatText',
 'BoundedIntText',
 'Box',
 'Button',
 'CallbackDispatcher',
 'Checkbox',
 'Color',
 'Dropdown',
 'FlexBox',
 'FloatProgress',
 'FloatRangeSlider',
 'FloatSlider',
 'FloatText',
 'HBox',
 'HTML',
 'Image',
 'IntProgress',
 'IntRangeSlider',
 'IntSlider',
 'IntText',
 'Latex',
 'Output',
 'RadioButtons',
 'Select',
 'SelectMultiple',
 'Tab',
 'Text',
 'Textarea',
 'ToggleButton',
 'ToggleButtons',
 'VBox']

In [26]:
@interact(bins=widgets.FloatTextWidget(value=5))
def show_distplot(bins):
    cars['mpg'].hist(bins=bins)



In [27]:
text_widget = widgets.Textarea(value='Hello', description='text area')
slider_widget = widgets.BoundedFloatText(5,min=0,max=10, description='slider area')
check_widget = widgets.Checkbox(True,description="CheckboxWidget")
toggle = widgets.RadioButtons(options=['red','green','blue'], description="RadioButtonsWidget")

@interact(text=text_widget, slider=slider_widget,check=check_widget,categories=toggle)
def react(text, slider,check,categories):
    print(text,slider*10,check,categories)


Hello 50.0 True red

In [28]:
b = widgets.Button(description="Update")
checkbox = widgets.Checkbox(description="CheckboxWidget")

tab1_children = [b,
                 checkbox,
                 widgets.Dropdown(options=['A','B'], description="DropdownWidget"),
                 widgets.RadioButtons(options=['A','B'], description="RadioButtonsWidget"),
                 widgets.Select(options=['A','B'], description="SelectWidget"),
                 widgets.Text(description="TextWidget"),
                 widgets.Textarea(description="TextareaWidget"),
                 widgets.ToggleButton(description="ToggleButtonWidget"),
                 widgets.ToggleButtons(options=["Value 1", "Value2"], description="ToggleButtonsWidget"),
                 ]

tab2_children = [widgets.BoundedFloatText(description="BoundedFloatTextWidget"),
                 widgets.BoundedIntText(description="BoundedIntTextWidget"),
                 widgets.FloatSlider(description="FloatSliderWidget"),
                 widgets.FloatText(description="FloatTextWidget"),
                 widgets.IntSlider(description="IntSliderWidget"),
                 widgets.IntText(description="IntTextWidget"),
                 ]

tab1 = widgets.Box(children=tab1_children)
tab2 = widgets.Box(children=tab2_children)


i = widgets.Accordion(children=[tab1, tab2])

i.set_title(0,"Basic Widgets")
i.set_title(1,"Numbers Input")

from IPython.display import display

def button_clicked(bb):
    print(checkbox.value)
    #TODO update plot

b.on_click(button_clicked)

display(i)

TASK

make the plot from before interactive, such that you can slide the year


In [29]:
#your code