In [1]:
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')



In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:70em !important; }</style>"))
# displaydisplay(HTML("<style>.container { width:79em !important; }</style>"))



In [3]:
import pandas as pd
import numpy as np

%matplotlib inline
plt.rcParams['figure.figsize'] = (15, 5)

Fancy Plotting Skills
or
How to plot an Elephant

Read this and you'll know everything about matplotlib.


In [75]:
"""
Author: Piotr A. Zolnierczuk (zolnierczukp at ornl dot gov)
 
Based on a paper by:
Drawing an elephant with four complex parameters
Jurgen Mayer, Khaled Khairy, and Jonathon Howard,
Am. J. Phys. 78, 648 (2010), DOI:10.1119/1.3254017
"""
import numpy as np
import pylab
 
# elephant parameters
p1, p2, p3, p4 = (50 - 30j, 18 +  8j, 12 - 10j, -14 - 60j )
p5 = 40 + 20j # eyepiece
 
def fourier(t, C):
    f = np.zeros(t.shape)
    A, B = C.real, C.imag
    for k in range(len(C)):
        f = f + A[k]*np.cos(k*t) + B[k]*np.sin(k*t)
    return f
 
def elephant(t, p1, p2, p3, p4, p5):
    npar = 60j
    Cx = np.zeros((npar,), dtype='complex')
    Cy = np.zeros((npar,), dtype='complex')

    Cx[1] = p1.real*1j
    Cx[2] = p2.real*1j
    Cx[3] = p3.real
    Cx[5] = p4.real

    Cy[1] = p4.imag + p1.imag*1j
    Cy[2] = p2.imag*1j
    Cy[3] = p3.imag*1j

    x = np.append(fourier(t,Cx), [-p5.imag])
    y = np.append(fourier(t,Cy), [p5.imag])
    return x,y
 
x, y = elephant(np.linspace(0,2*np.pi,1000), p1, p2, p3, p4, p5)
pylab.plot(y,-x,'.')
pylab.plot(-y + 200, -x, '.')
pylab.show()



In [5]:
for _ in range(10):
    display(HTML("<br />"))












What will YOU learn?

  • You'll learn to do basic plots:
    • Barplot / Histogram
    • Pie Chart
    • Scatter Plot (only plot the points)
    • Boxplot (that statistical thang)
  • you'll learn fancy, advanced arranging of plots
  • You'll learn advanced formatting:
    • fancy colors
    • fancy tricks
    • fancy sizes
    • Axis stuff

Table of Contents

Okay, let's go:










In [19]:
display(HTML("<a id='fancyplot'>"))
hist_data = pd.DataFrame(np.random.exponential(3.3, size=10000), columns=['data']).astype(int)
hist_data = hist_data['data'].value_counts()
pie_data = pd.Series(np.repeat(hist_data.index == 0, hist_data.values)).value_counts()
box_data = hist_data.copy()
digit_data = box_data.apply(lambda x: len(str(x)))
hist_data = pd.DataFrame(hist_data)
hist_data.loc[0, 'data2'] = hist_data.loc[0, 'data']
hist_data.loc[0, 'data'] = np.nan

from matplotlib.ticker import ScalarFormatter
scalar_formatter = ScalarFormatter(useMathText=True)
scalar_formatter.set_powerlimits((3,3))

gs = plt.GridSpec(2, 3, width_ratios=(1, 5, 1), height_ratios=(5,2))
f = plt.figure(figsize=(20,15))
ax1 = f.add_subplot(gs[0, 0])
ax2 = f.add_subplot(gs[0, 1:])
ax3 = f.add_subplot(gs[1,:2])
ax4 = f.add_subplot(gs[1, 2])

pie_data.plot.pie(ax=ax1,
                  labels=('Yea', 'Nay'), explode=(0,0.2), shadow=True,autopct='%1.1f%%', startangle=90)
hist_data.plot.bar(ax=ax2,
                   rot=0, legend=False)
ax3.scatter(box_data.index, box_data.values, marker='*', cmap='viridis',
            s=300*box_data.apply(lambda x: (x % 7) + 1),  c=box_data.apply(lambda x: (x % 5) + 1), alpha=0.6)
box_data.plot.box(whis=5.5, ax=ax4, showmeans=True, meanline=True, notch=True, sym='*', bootstrap=10000)

# Pie Data
ax1.set_title('customer has friends\n (which have used login via App)')
ax1.set_ylabel('')
# Hist
ax2.set_title('Friends per Customer')
ax2.set_xlabel("Number of Friends")
ax2.set_ylabel("Number of Customers")
ax2.yaxis.set_major_formatter(scalar_formatter)  #fancy formatting
# Boxplot
ax4.xaxis.set_ticklabels('')
f.suptitle("Facebook Friends Analysis", fontsize=22)


# BG white
ax2.set_facecolor('white')
ax3.set_facecolor('white')
ax4.set_facecolor('white')

# Special Axis Stuff
ax1.axis("equal")
ax3.axis("off")
ax4.yaxis.tick_right() # axis to the right side

ax2.grid(True, which='major', color='lightgrey')
ax4.grid(True, which='major', color='lightgrey')

gs.tight_layout(f, rect=(0,0,1,0.95))
f.savefig("figures/facebook_friends_example.svg")
display(HTML("</a>"))




Basics - The Data and the Plot Types


In [39]:
pd.DataFrame(np.random.exponential(4, size=10000), columns=['data']).astype(int).hist()


Out[39]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x11fb0a090>]], dtype=object)

In [21]:
hist_data = pd.DataFrame(np.random.exponential(4, size=10000), columns=['data']).astype(int)
hist_data = hist_data['data'].value_counts()
hist_data.head()


Out[21]:
0    2192
1    1683
2    1306
3    1031
4     824
Name: data, dtype: int64

In [23]:
hist_data.plot()


Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d82ed50>

In [24]:
pie_data = pd.Series(np.repeat(hist_data.index == 0, hist_data.values)).value_counts()
# Hacky Trick, just to get True X times and False Y times, with X number of customers without friends and Y number of customers with friends
pie_data


Out[24]:
False    7808
True     2192
dtype: int64

In [25]:
pie_data.plot.pie(figsize=(5,5))


Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d6568d0>

In [63]:
box_data = hist_data.copy()
hist_data = pd.DataFrame(hist_data)
hist_data.loc[0, 'data2'] = hist_data.loc[0, 'data']
hist_data.loc[0, 'data'] = np.nan
hist_data.head()


Out[63]:
data data2
0 NaN 2192.0
1 1683.0 NaN
2 1306.0 NaN
3 1031.0 NaN
4 824.0 NaN

In [28]:
hist_data.plot(style='.-', markersize=20)


Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d055350>

Not so special, isn't it?


In [40]:
hist_data


Out[40]:
0     2192
1     1683
2     1306
3     1031
4      824
5      614
6      513
7      412
8      320
9      257
10     177
11     118
12     117
13     104
14      78
16      58
15      53
18      29
17      21
20      15
23      14
19      14
21      11
22      11
24       7
27       5
25       5
29       5
31       3
26       1
36       1
30       1
Name: data, dtype: int64

Basti asked how to add text to figures:


In [47]:
ax = hist_data.plot.bar()
for data in hist_data.iteritems():
    ax.text(data[0]+0.2, data[1]+0.3, data[1], fontsize=16)



In [17]:
hist_data.fillna(1000).plot.bar()


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d6fb110>

Ahh, there we go.

Finally we have scatter and Boxplots:


In [320]:
plt.scatter(box_data.index, box_data.values)


Out[320]:
<matplotlib.collections.PathCollection at 0x1382f1d50>

In [319]:
box_data.plot.box()


Out[319]:
<matplotlib.axes._subplots.AxesSubplot at 0x1378f4090>

Masterclass - Arranging them


In [277]:
ax0 = plt.subplot(121)
ax1 = plt.subplot(122)
# Wow, i don't want to do this, when creating more and more subplots
pie_data.plot.pie(ax=ax0)
hist_data.plot.bar(ax=ax1)


Out[277]:
<matplotlib.axes._subplots.AxesSubplot at 0x132bf7850>

In [53]:
fig = plt.figure(figsize=(20,5))
ax0 = fig.add_subplot(121)
ax1 = fig.add_subplot(122)
# Same here, not economical
pie_data.plot.pie(ax=ax0)
hist_data.plot.bar(ax=ax1)


Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x1210f2450>

In [279]:
fig, ax = plt.subplots(1, 2, figsize=(20,5))
# much better! yea!
pie_data.plot.pie(ax=ax[0])
hist_data.plot.bar(ax=ax[1])


Out[279]:
<matplotlib.axes._subplots.AxesSubplot at 0x133414250>

In [280]:
fig, ax = plt.subplots(1, 2, figsize=(20,5), gridspec_kw = {'width_ratios':[1,5]})
# much better! yea!
pie_data.plot.pie(ax=ax[0])
hist_data.plot.bar(ax=ax[1])


Out[280]:
<matplotlib.axes._subplots.AxesSubplot at 0x1338be110>

So, this is one row... but we saw multiple rows and columns, let's try this:


In [281]:
fig, ax = plt.subplots(2, 2, figsize=(20,5), gridspec_kw = {'width_ratios':[1,3]})
# much better! yea!
pie_data.plot.pie(ax=ax[0, 0])
hist_data.plot.bar(ax=ax[0, 1])
# important: this works also fine, instead of using the pandas routine which is kind of a wrapper, we give the object to the matplotlib routine directly
ax[1,0].scatter(box_data.index, box_data.values)
box_data.plot.box(ax=ax[1,1])


Out[281]:
<matplotlib.axes._subplots.AxesSubplot at 0x133e56350>

Fine, this is all of our data now... but how to make it look like [the first plot](#fancyplot)?

Solution for ALL of our Problems: Gridspec


In [282]:
gs = plt.GridSpec(1, 2, width_ratios=(1,5))
f = plt.figure(figsize=(20,5))
ax1 = f.add_subplot(gs[0])
ax2 = f.add_subplot(gs[1])
ax1.axis("equal")
pie_data.plot.pie(ax=ax1)
hist_data.plot.bar(ax=ax2)


Out[282]:
<matplotlib.axes._subplots.AxesSubplot at 0x1341a6050>

Cool, very easy... but not that's not enough:


In [55]:
gs = plt.GridSpec(2, 3, width_ratios=(1,5, 1))
f = plt.figure(figsize=(20,5))
ax1 = f.add_subplot(gs[0, 0])
ax2 = f.add_subplot(gs[0, 1:])
ax3 = f.add_subplot(gs[1, :2])
ax4 = f.add_subplot(gs[1, 2])

pie_data.plot.pie(ax=ax1)
hist_data.plot.bar(ax=ax2)
box_data.plot(style='.', markersize=20, ax=ax3)
box_data.plot.box(ax=ax4)
f.suptitle("Facebook Friends Analysis", fontsize=22)


Out[55]:
<matplotlib.text.Text at 0x12187a110>

Perfect, that's what we wanted.

Now, let's talk about the

individual formatting of the plots:


In [284]:
pie_data.plot.pie()


Out[284]:
<matplotlib.axes._subplots.AxesSubplot at 0x133721d90>

In [56]:
pie_data


Out[56]:
False    7808
True     2192
dtype: int64

In [60]:
pie_data.plot.pie(labels=('Yea', 'Nay'), explode=(0, 0.2), shadow=True,autopct='%1.1f%%', startangle=90)
ax1 = plt.gca()
ax1.axis("equal")
ax1.set_title('customer has friends\n (which have used login via App)')
ax1.set_ylabel('')


Out[60]:
<matplotlib.text.Text at 0x121dfd310>

In [65]:
hist_data.plot.bar()


Out[65]:
<matplotlib.axes._subplots.AxesSubplot at 0x1224e3b90>

In [64]:
from matplotlib.ticker import ScalarFormatter
scalar_formatter = ScalarFormatter(useMathText=True)
scalar_formatter.set_powerlimits((3,3))

hist_data.plot.bar(rot=0, legend=False)
ax2 = plt.gca()
# Hist
ax2.set_title('Friends per Customer')
ax2.set_xlabel("Number of Friends")
ax2.set_ylabel("Number of Customers")
# Digit Data

ax2.set_facecolor('white')  # whitesmoke
ax2.grid(True, which='major', color='lightgrey')
ax2.yaxis.set_major_formatter(scalar_formatter)  #fancy formatting



In [288]:
plt.scatter(box_data.index, box_data.values)


Out[288]:
<matplotlib.collections.PathCollection at 0x1353c0cd0>

In [70]:
plt.scatter(box_data.index, box_data.values, marker='*', cmap='viridis',
            s=300*box_data.apply(lambda x: (x % 7) + 1),
            c=box_data.apply(lambda x: (x % 5) + 1),
            alpha=0.6)
ax3 = plt.gca()
ax3.set_facecolor('white')  # whitesmoke
ax3.axis("off")


Out[70]:
(-1.815308025684703,
 37.815308025684701,
 -108.59053048671947,
 2301.5991362798268)

In [290]:
box_data.plot.box()


Out[290]:
<matplotlib.axes._subplots.AxesSubplot at 0x1353f1090>

In [72]:
plt.boxplot(box_data.values, whis=1.5, showmeans=True, meanline=True, notch=True, sym='*', bootstrap=10000)
ax4 = plt.gca()
ax4.set_facecolor('white')  # whitesmoke
ax4.yaxis.tick_right()
ax4.grid(color='lightgrey')
ax4.xaxis.set_ticklabels('')


Out[72]:
[]

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [73]:
hist_data = pd.DataFrame(np.random.exponential(3.3, size=10000), columns=['data']).astype(int)
hist_data = hist_data['data'].value_counts()
pie_data = pd.Series(np.repeat(hist_data.index == 0, hist_data.values)).value_counts()
box_data = hist_data.copy()
digit_data = box_data.apply(lambda x: len(str(x)))
hist_data = pd.DataFrame(hist_data)
hist_data.loc[0, 'data2'] = hist_data.loc[0, 'data']
hist_data.loc[0, 'data'] = np.nan

from matplotlib.ticker import ScalarFormatter
scalar_formatter = ScalarFormatter(useMathText=True)
scalar_formatter.set_powerlimits((3,3))

gs = plt.GridSpec(2, 3, width_ratios=(1, 5, 1), height_ratios=(5,2))
f = plt.figure(figsize=(20,10))
ax1 = f.add_subplot(gs[0, 0])
ax2 = f.add_subplot(gs[0, 1:])
ax3 = f.add_subplot(gs[1,:2])
ax4 = f.add_subplot(gs[1, 2])

pie_data.plot.pie(ax=ax1,
                  labels=('Yea', 'Nay'), explode=(0,0.2), shadow=True,autopct='%1.1f%%', startangle=90)
hist_data.plot.bar(ax=ax2,
                   rot=0, legend=False)
ax3.scatter(box_data.index, box_data.values, marker='*', cmap='viridis',
            s=300*box_data.apply(lambda x: (x % 7) + 1),  c=box_data.apply(lambda x: (x % 5) + 1), alpha=0.6)
box_data.plot.box(whis=5.5, ax=ax4, showmeans=True, meanline=True, notch=True, sym='*', bootstrap=10000)

# Pie Data
ax1.set_title('customer has friends\n (which have used login via App)')
ax1.set_ylabel('')
# Hist
ax2.set_title('Friends per Customer')
ax2.set_xlabel("Number of Friends")
ax2.set_ylabel("Number of Customers")
ax2.yaxis.set_major_formatter(scalar_formatter)  #fancy formatting
# Boxplot
ax4.xaxis.set_ticklabels('')
f.suptitle("Facebook Friends Analysis", fontsize=22)


# BG white
ax2.set_facecolor('white')
ax3.set_facecolor('white')
ax4.set_facecolor('white')

# Special Axis Stuff
ax1.axis("equal")
ax3.axis("off")
ax4.yaxis.tick_right() # axis to the right side

ax2.grid(True, which='major', color='lightgrey')
ax4.grid(True, which='major', color='lightgrey')

gs.tight_layout(f, rect=(0,0,1,0.95))



In [74]:
with plt.xkcd():
    hist_data = pd.DataFrame(np.random.exponential(3.3, size=10000), columns=['data']).astype(int)
    hist_data = hist_data['data'].value_counts()
    pie_data = pd.Series(np.repeat(hist_data.index == 0, hist_data.values)).value_counts()
    box_data = hist_data.copy()
    digit_data = box_data.apply(lambda x: len(str(x)))
    hist_data = pd.DataFrame(hist_data)
    hist_data.loc[0, 'data2'] = hist_data.loc[0, 'data']
    hist_data.loc[0, 'data'] = np.nan

    from matplotlib.ticker import ScalarFormatter
    scalar_formatter = ScalarFormatter(useMathText=True)
    scalar_formatter.set_powerlimits((3,3))

    gs = plt.GridSpec(2, 3, width_ratios=(1, 5, 1), height_ratios=(5,2))
    f = plt.figure(figsize=(20,10))
    ax1 = f.add_subplot(gs[0, 0])
    ax2 = f.add_subplot(gs[0, 1:])

    pie_data.plot.pie(ax=ax1,
                      labels=('Yea', 'Nay'), explode=(0,0.2), shadow=True,autopct='%1.1f%%', startangle=90)
    hist_data.plot.bar(ax=ax2,
                       rot=0, legend=False)
    # Pie Data
    ax1.set_title('customer has friends\n (which have used login via App)')
    ax1.set_ylabel('')
    # Hist
    ax2.set_title('Friends per Customer')
    ax2.set_xlabel("Number of Friends")
    ax2.set_ylabel("Number of Customers")
    ax2.yaxis.set_major_formatter(scalar_formatter)  #fancy formatting
    # Boxplot
    f.suptitle("Facebook Friends Analysis", fontsize=22)


    # BG white
    ax2.set_facecolor('white')
    ax3.set_facecolor('white')
    # Special Axis Stuff
    ax1.axis("equal")
    ax3.axis("off")
    ax2.grid(True, which='major', color='lightgrey')
    gs.tight_layout(f, rect=(0,0,1,0.95))


Links (click images for source code)

Gridspec in Gridspec in Gridspec in Gridspec in Gridspec in Gridspec in Gridspec

How did they achieve that? Basically not much code:

def squiggle_xy(a, b, c, d, i=np.arange(0.0, 2*np.pi, 0.05)):
    return np.sin(i*a)*np.cos(i*b), np.sin(i*c)*np.cos(i*d)

fig = plt.figure(figsize=(8, 8))

# gridspec inside gridspec
outer_grid = gridspec.GridSpec(4, 4, wspace=0.0, hspace=0.0)

for i in range(16):
    inner_grid = gridspec.GridSpecFromSubplotSpec(3, 3,
            subplot_spec=outer_grid[i], wspace=0.0, hspace=0.0)
    a, b = int(i/4)+1,i%4+1
    for j, (c, d) in enumerate(product(range(1, 4), repeat=2)):
        ax = plt.Subplot(fig, inner_grid[j])
        ax.plot(*squiggle_xy(a, b, c, d))
        ax.set_xticks([])
        ax.set_yticks([])
        fig.add_subplot(ax)

all_axes = fig.get_axes()

#show only the outside spines
for ax in all_axes:
    for sp in ax.spines.values():
        sp.set_visible(False)
    if ax.is_first_row():
        ax.spines['top'].set_visible(True)
    if ax.is_last_row():
        ax.spines['bottom'].set_visible(True)
    if ax.is_first_col():
        ax.spines['left'].set_visible(True)
    if ax.is_last_col():
        ax.spines['right'].set_visible(True)

plt.show()

Other Literature


In [ ]: