Matplotlib is basic plotting library for Python inspired by Matlab. Seaborn is built on top of it with integrated analysis and specialized plots + pretty good integration with Pandas
Also see the full gallery of Seaborn or Matplotlib.
In [45]:
#disable some annoying warning
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
#plots the figures in place instead of a new window
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
In [46]:
#use a standard dataset of heterogenous data
cars = pd.read_csv('data/mtcars.csv')
cars.head()
Out[46]:
In [47]:
plt.scatter(x=cars['mpg'],y=cars['wt'])
plt.xlabel('miles per gallon')
plt.ylabel('weight')
plt.title('MPG vs WT')
plt.show()
In [48]:
#integrated in pandas, too
cars.plot(x='mpg',y='wt',kind='scatter')
Out[48]:
In [49]:
cars.plot(kind='scatter', x='mpg',y='wt',c='hp',s=cars['cyl']*20,alpha=0.5)
Out[49]:
In [50]:
#what if we plot everything?
cars.plot()
Out[50]:
In [51]:
cars['mpg'].hist(bins=5)
Out[51]:
In [52]:
plt.hist(cars['mpg'],bins=5)
plt.title('miles per gallon')
Out[52]:
In [53]:
#seaborn not just a histogram but also an kernel density enstimation and better default settings
sns.distplot(cars['mpg'],bins=5)
Out[53]:
In [54]:
#box plots
cars['mpg'].plot(kind='box')
Out[54]:
In [55]:
cars.boxplot('mpg')
Out[55]:
In [56]:
#group by gear
cars.boxplot('mpg', by='gear')
Out[56]:
In [57]:
# load gapminder again and select 2007
gap = pd.read_csv('data/gapminder-unfiltered.tsv',index_col=0, sep='\t')
gap2007 = gap[gap.year == 2007]
gap2007.columns
Out[57]:
In [58]:
gap2007.plot(kind='scatter', x='lifeExp',y='gdpPercap')
Out[58]:
unbalanced with outliers what about log scale?
In [59]:
gap2007.plot(kind='scatter', x='lifeExp',y='gdpPercap')
plt.yscale('log')
In [60]:
#create a color palette
colors = sns.color_palette()
sns.palplot(colors)
In [61]:
#for each group create an own plot an overlay them
for (name, group),color in zip(gap2007.groupby('continent'),colors):
plt.scatter(x=group['lifeExp'],y=group['gdpPercap'],label=name, c=color,s=30)
plt.yscale('log')
plt.legend()
Out[61]:
In [62]:
#playing with categories ... seaborn is pretty good with it
plt.figure(figsize=(40,20))
plt.subplot(121)
sns.boxplot(x='continent',y='gdpPercap',data=gap)
plt.subplot(122)
sns.violinplot(x='continent',y='gdpPercap',data=gap2007)
Out[62]:
In [63]:
# or with linear regression
anscombe = sns.load_dataset("anscombe")
sns.lmplot('x','y',col='dataset',hue='dataset', data=anscombe, col_wrap=2)
#g = sns.FacetGrid(anscombe, col="dataset", size=4, aspect=1)
#g.map(sns.regplot, "x", "y")
Out[63]:
In [64]:
# or with structured heatmaps
#compute the correlations and take a look at them
corrmat = gap.corr()
# draw a clustered heatmap using seaborn
sns.clustermap(corrmat, square=True)
Out[64]:
In [65]:
#for each group create an own plot an overlay them
pop_max = gap2007['pop'].max()
for (name, group),color in zip(gap2007.groupby('continent'),colors):
plt.scatter(x=group['lifeExp'],y=group['gdpPercap'],label=name, c=color,s=(group['pop']/pop_max)*400)
plt.yscale('log')
plt.title('Life Expectancy vs GDP')
plt.xlabel('Life Expectancy')
plt.ylabel('GDP Per Cap')
plt.legend()
Out[65]:
In [66]:
from IPython.html.widgets import interact, interact_manual
In [67]:
@interact(text='Hello', slider=(0,10),check=True,categories=['red','green','blue'])
def react(text, slider,check,categories):
print(text,slider*10,check,categories)
In [68]:
@interact_manual(text='Hello', slider=(0,10),check=True,categories=['red','green','blue'])
def react(text, slider,check,categories):
print(text,slider*10,check,categories)
In [69]:
@interact(bins=(5, 25, 5),color=['red','green','orange','blue'])
def show_distplot(bins,color):
cars['mpg'].hist(bins=bins, color=color)
In [70]:
#hard core
from IPython.html import widgets
[widget for widget in dir(widgets) if not widget.endswith('Widget') and widget[0] == widget[0].upper() and widget[0] != '_']
Out[70]:
In [71]:
@interact(bins=widgets.FloatTextWidget(value=5))
def show_distplot(bins):
cars['mpg'].hist(bins=bins)
In [72]:
text_widget = widgets.Textarea(value='Hello', description='text area')
slider_widget = widgets.BoundedFloatText(5,min=0,max=10, description='slider area')
check_widget = widgets.Checkbox(True,description="CheckboxWidget")
toggle = widgets.RadioButtons(options=['red','green','blue'], description="RadioButtonsWidget")
@interact(text=text_widget, slider=slider_widget,check=check_widget,categories=toggle)
def react(text, slider,check,categories):
print(text,slider*10,check,categories)
In [73]:
b = widgets.Button(description="Update")
checkbox = widgets.Checkbox(description="CheckboxWidget")
tab1_children = [b,
checkbox,
widgets.Dropdown(options=['A','B'], description="DropdownWidget"),
widgets.RadioButtons(options=['A','B'], description="RadioButtonsWidget"),
widgets.Select(options=['A','B'], description="SelectWidget"),
widgets.Text(description="TextWidget"),
widgets.Textarea(description="TextareaWidget"),
widgets.ToggleButton(description="ToggleButtonWidget"),
widgets.ToggleButtons(options=["Value 1", "Value2"], description="ToggleButtonsWidget"),
]
tab2_children = [widgets.BoundedFloatText(description="BoundedFloatTextWidget"),
widgets.BoundedIntText(description="BoundedIntTextWidget"),
widgets.FloatSlider(description="FloatSliderWidget"),
widgets.FloatText(description="FloatTextWidget"),
widgets.IntSlider(description="IntSliderWidget"),
widgets.IntText(description="IntTextWidget"),
]
tab1 = widgets.Box(children=tab1_children)
tab2 = widgets.Box(children=tab2_children)
i = widgets.Accordion(children=[tab1, tab2])
i.set_title(0,"Basic Widgets")
i.set_title(1,"Numbers Input")
from IPython.display import display
def button_clicked(bb):
print(checkbox.value)
#TODO update plot
b.on_click(button_clicked)
display(i)
In [74]:
pop_max = gap['pop'].max()
@interact(year=(gap.year.min(), gap.year.max()))
def plot_gapminder(year):
gapyear = gap[gap.year == year]
for (name, group),color in zip(gapyear.groupby('continent'),colors):
plt.scatter(x=group['lifeExp'],y=group['gdpPercap'],label=name, c=color,s=(group['pop']/pop_max)*400)
plt.yscale('log')
plt.title('Life Expectancy vs GDP')
plt.xlabel('Life Expectancy')
plt.ylabel('GDP Per Cap')
plt.xlim(gap.gdpPercap.min(),gap.gdpPercap.max())
plt.xlim(gap.lifeExp.min(),gap.lifeExp.max())
plt.legend()