In [2]:
import plotnine as gg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
# Ignore some deprecation warnings from plotnine
# using deprecated numpy functions in matplotlib
import warnings
import matplotlib.cbook
warnings.filterwarnings("ignore",category=matplotlib.cbook.mplDeprecation)
warnings.filterwarnings("ignore",category=FutureWarning)
plotnine is an implementation of a grammar of graphics in Python, it is based on ggplot2. The grammar allows users to compose plots by explicitly mapping data to the visual objects that make up the plot. - Plotnine Official Website
Plotting with a grammar is powerful, it makes custom (and otherwise complex) plots are easy to think about and then create, while the simple plots remain simple.
Let's take a create a quick example using the mtcars
dataset.
In [3]:
DATASET_URL = 'https://gist.githubusercontent.com/ZeccaLehn/4e06d2575eb9589dbe8c365d61cb056c/raw/64f1660f38ef523b2a1a13be77b002b98665cdfe/mtcars.csv'
mtcars = pd.read_csv(DATASET_URL).rename(columns={'Unnamed: 0': 'brand'})
mtcars.head()
Out[3]:
In [15]:
(gg.ggplot(mtcars, gg.aes('disp', 'mpg', color='factor(gear)'))
+ gg.geom_point()
+ gg.geom_smooth(method="lm")
+ gg.facet_wrap("~gear"))
Out[15]:
The equivalent in plain matplotlib would be
In [10]:
plt.scatter(mtcars['disp'], mtcars['mpg'])
plt.xlabel('Disp')
plt.ylabel('MPG')
plt.title('Disp vs MPG')
plt.show()
In [11]:
(gg.ggplot(mtcars, gg.aes(y='hp', x='factor(gear)'))
+ gg.geom_boxplot())
Out[11]:
The matplotlib equivalent
In [34]:
auto_tmission = mtcars.loc[lambda df: df['am'] == 0].hp
man_tmission = mtcars.loc[lambda df: df['am'] == 1].hp
fig, axes = plt.subplots()
axes.set(xticklabels=['0', '1',], xlabel='am', ylabel='hp')
axes.boxplot([auto_tmission, man_tmission])
plt.show()
In [40]:
(gg.ggplot(mtcars, gg.aes(x='brand', y='wt'))
+ gg.geom_bar(stat='identity'))
Out[40]:
The matplotlib equivalent
In [42]:
plt.bar(mtcars['brand'], mtcars['wt'])
plt.xlabel('Brand')
plt.ylabel('wt')
plt.title('Brand by weight')
plt.show()
The brands are all quished together, let's try making the bar graph horizontal
In [41]:
(gg.ggplot(mtcars, gg.aes(x='brand', y='wt'))
+ gg.geom_bar(stat='identity')
+ gg.coord_flip())
Out[41]:
In [35]:
(gg.ggplot(mtcars, gg.aes(x='gear', y='hp', fill="factor(brand)"))
+ gg.geom_bar(stat='identity', color="black")
+ gg.coord_flip()
+ gg.theme_xkcd())
Out[35]:
In [43]:
plt.barh(mtcars['brand'], mtcars['wt'])
plt.xlabel('Brand')
plt.ylabel('wt')
plt.title('Brand by weight')
plt.show()
In [31]:
(gg.ggplot(mtcars, gg.aes(x='wt'))
+ gg.geom_histogram(bins=10, fill="blue"))
Out[31]:
In [15]:
plt.hist(mtcars['wt'], 10)
plt.xlabel('history')
plt.ylabel('wt')
plt.title('Histogram')
plt.show()
Let's see the most common gear in this dataset
Unfortunately, the function coord_polar
which is needed to created piecharts is not in the plotnine API, so pie charts:
Check out this issue