Subplots


In [1]:
%matplotlib notebook

import matplotlib.pyplot as plt
import numpy as np

plt.subplot?

In [2]:
plt.figure()
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)

linear_data = np.array([1,2,3,4,5,6,7,8])

plt.plot(linear_data, '-o')


Out[2]:
[<matplotlib.lines.Line2D at 0x7f349e3c2390>]

In [3]:
exponential_data = linear_data**2 

# subplot with 1 row, 2 columns, and current axis is 2nd subplot axes
plt.subplot(1, 2, 2)
plt.plot(exponential_data, '-o')


Out[3]:
[<matplotlib.lines.Line2D at 0x7f349e0c34a8>]

In [4]:
# plot exponential data on 1st subplot axes
plt.subplot(1, 2, 1)
plt.plot(exponential_data, '-x')


Out[4]:
[<matplotlib.lines.Line2D at 0x7f34c51fedd8>]

In [5]:
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-x')


Out[5]:
[<matplotlib.lines.Line2D at 0x7f349bf745f8>]

In [6]:
plt.figure()
# the right hand side is equivalent shorthand syntax
plt.subplot(1,2,1) == plt.subplot(121)


Out[6]:
True

In [7]:
# create a 3x3 grid of subplots
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)
# plot the linear_data on the 5th subplot axes 
ax5.plot(linear_data, '-')


Out[7]:
[<matplotlib.lines.Line2D at 0x7f349bf135f8>]

In [8]:
# set inside tick labels to visible
for ax in plt.gcf().get_axes():
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_visible(True)

In [9]:
# necessary on some systems to update the plot
plt.gcf().canvas.draw()

Histograms


In [10]:
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample)
    axs[n].set_title('n={}'.format(sample_size))



In [11]:
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=100)
    axs[n].set_title('n={}'.format(sample_size))



In [12]:
plt.figure()
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
plt.scatter(X,Y)


Out[12]:
<matplotlib.collections.PathCollection at 0x7f349b1256a0>

In [13]:
# use gridspec to partition the figure into subplots
import matplotlib.gridspec as gridspec

plt.figure()
gspec = gridspec.GridSpec(3, 3)

top_histogram = plt.subplot(gspec[0, 1:])
side_histogram = plt.subplot(gspec[1:, 0])
lower_right = plt.subplot(gspec[1:, 1:])



In [14]:
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
lower_right.scatter(X, Y)
top_histogram.hist(X, bins=100)
s = side_histogram.hist(Y, bins=100, orientation='horizontal')

In [15]:
# clear the histograms and plot normed histograms
top_histogram.clear()
top_histogram.hist(X, bins=100, normed=True)
side_histogram.clear()
side_histogram.hist(Y, bins=100, orientation='horizontal', normed=True)
# flip the side histogram's x axis
side_histogram.invert_xaxis()

In [16]:
# change axes limits
for ax in [top_histogram, lower_right]:
    ax.set_xlim(0, 1)
for ax in [side_histogram, lower_right]:
    ax.set_ylim(-5, 5)

In [17]:
%%HTML
<img src='http://educationxpress.mit.edu/sites/default/files/journal/WP1-Fig13.jpg' />


Box and Whisker Plots


In [18]:
import pandas as pd
normal_sample = np.random.normal(loc=0.0, scale=1.0, size=10000)
random_sample = np.random.random(size=10000)
gamma_sample = np.random.gamma(2, size=10000)

df = pd.DataFrame({'normal': normal_sample, 
                   'random': random_sample, 
                   'gamma': gamma_sample})
df


Out[18]:
gamma normal random
0 0.440609 0.575641 0.742618
1 2.380960 -0.687690 0.474229
2 1.315481 -0.216004 0.382739
3 1.211573 0.030815 0.327746
4 2.509590 0.816452 0.701328
5 2.341145 -0.550380 0.518515
6 4.113070 -2.264414 0.896886
7 0.740701 0.155617 0.783874
8 0.905761 2.337774 0.188864
9 1.185569 1.301147 0.075053
10 2.219070 0.970820 0.086233
11 1.328680 0.408251 0.521071
12 1.925582 1.290878 0.293128
13 0.926205 -0.072961 0.026245
14 2.158209 -0.810410 0.106197
15 4.802618 2.408269 0.384366
16 1.601048 -1.552808 0.743893
17 0.799934 -1.132184 0.814792
18 0.981244 0.827492 0.033804
19 1.701073 0.015484 0.565347
20 3.723498 1.274481 0.645879
21 2.558677 2.230469 0.509292
22 2.093589 -1.025066 0.547596
23 1.037980 -1.203106 0.432117
24 1.057553 -2.266773 0.790692
25 2.424307 -0.142625 0.028575
26 1.586741 -0.088958 0.361497
27 4.415752 0.345466 0.308286
28 1.795594 0.217234 0.999215
29 1.235403 0.532800 0.153826
... ... ... ...
9970 2.168456 -0.017262 0.939090
9971 6.264273 1.116909 0.178496
9972 3.606000 0.996133 0.114898
9973 7.309805 2.114650 0.749198
9974 0.516667 -0.559011 0.566284
9975 1.731431 -0.551288 0.642289
9976 0.240869 -2.895895 0.240216
9977 1.962745 0.669392 0.079811
9978 1.906429 -0.783637 0.681941
9979 0.224970 -0.743653 0.207712
9980 0.703625 -0.575521 0.955561
9981 1.283870 -0.714055 0.152691
9982 2.811935 0.556932 0.759912
9983 2.256777 0.626789 0.112445
9984 0.698931 -0.058863 0.896378
9985 0.464828 -0.756868 0.286653
9986 1.549793 0.523197 0.681960
9987 3.150460 -0.525583 0.568818
9988 0.561533 1.429781 0.951989
9989 0.310219 -0.038467 0.021718
9990 0.305217 -1.701947 0.265366
9991 0.279456 -0.860578 0.252238
9992 0.817297 -0.073400 0.495218
9993 1.425109 -0.541884 0.617208
9994 1.398760 0.340502 0.550074
9995 2.020303 0.572363 0.713191
9996 0.605350 -0.479461 0.110640
9997 2.887847 -0.196274 0.334835
9998 2.685894 -1.037154 0.005343
9999 3.885984 -0.218977 0.525906

10000 rows × 3 columns


In [19]:
df.describe()


Out[19]:
gamma normal random
count 10000.000000 10000.000000 10000.000000
mean 1.970136 -0.016483 0.499797
std 1.396956 0.995882 0.289804
min 0.011182 -3.578899 0.000130
25% 0.942716 -0.701731 0.249978
50% 1.658685 -0.014815 0.500616
75% 2.648172 0.672201 0.749175
max 11.602323 3.177792 0.999940

In [20]:
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
_ = plt.boxplot(df['normal'], whis='range')



In [21]:
# clear the current figure
plt.clf()
# plot boxplots for all three of df's columns
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')

In [22]:
plt.figure()
_ = plt.hist(df['gamma'], bins=100)



In [23]:
import mpl_toolkits.axes_grid1.inset_locator as mpl_il

plt.figure()
plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')
# overlay axis on top of another 
ax2 = mpl_il.inset_axes(plt.gca(), width='60%', height='40%', loc=2)
ax2.hist(df['gamma'], bins=100)
ax2.margins(x=0.5)



In [24]:
# switch the y axis ticks for ax2 to the right side
ax2.yaxis.tick_right()

In [25]:
# if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers
plt.figure()
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ] )


Heatmaps


In [26]:
plt.figure()

Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
_ = plt.hist2d(X, Y, bins=25)



In [27]:
plt.figure()
_ = plt.hist2d(X, Y, bins=100)



In [28]:
# add a colorbar legend
plt.colorbar()


Out[28]:
<matplotlib.colorbar.Colorbar at 0x7f3496778fd0>

Animations


In [36]:
import matplotlib.animation as animation

n = 100
x = np.random.randn(n)

In [37]:
# create the function that will do the plotting, where curr is the current frame
def update(curr):
    # check if animation is at the last frame, and if so, stop the animation a
    if curr == n: 
        a.event_source.stop()
    plt.cla()
    bins = np.arange(-4, 4, 0.5)
    plt.hist(x[:curr], bins=bins)
    plt.axis([-4,4,0,30])
    plt.gca().set_title('Sampling the Normal Distribution')
    plt.gca().set_ylabel('Frequency')
    plt.gca().set_xlabel('Value')
    plt.annotate('n = {}'.format(curr), [3,27])

In [39]:
fig = plt.figure()
a = animation.FuncAnimation(fig, update, interval=100)


Interactivity


In [47]:
plt.figure()
data = np.random.rand(10)
plt.plot(data)

def on_press(event):
    plt.cla()
    plt.plot(data)
    plt.gca().set_title('Event at pixels {},{} \nand data {},{}'.format(event.x, event.y, event.xdata, event.ydata))

# tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
plt.gcf().canvas.mpl_connect('button_press_event', on_press)


Out[47]:
7

In [41]:
from random import shuffle
origins = ['China', 'Brazil', 'India', 'USA', 'Canada', 'UK', 'Germany', 'Iraq', 'Chile', 'Mexico']

shuffle(origins)

df = pd.DataFrame({'height': np.random.rand(10),
                   'weight': np.random.rand(10),
                   'origin': origins})
df


Out[41]:
height origin weight
0 0.505857 Canada 0.175315
1 0.653235 India 0.652728
2 0.795082 UK 0.623295
3 0.818067 Mexico 0.392196
4 0.960182 China 0.656495
5 0.175666 Chile 0.397090
6 0.017622 USA 0.481149
7 0.305484 Iraq 0.634832
8 0.913788 Brazil 0.893557
9 0.908436 Germany 0.638947

In [42]:
plt.figure()
# picker=5 means the mouse doesn't have to click directly on an event, but can be up to 5 pixels away
plt.scatter(df['height'], df['weight'], picker=5)
plt.gca().set_ylabel('Weight')
plt.gca().set_xlabel('Height')


Out[42]:
<matplotlib.text.Text at 0x7f3496c4cda0>

In [44]:
def on_press(event):
    origin = df.iloc[event.ind[0]]['origin']
    plt.gca().set_title('Selected item came from {}'.format(origin))

# tell mpl_connect we want to pass a 'pick_event' into onpick when the event is detected
plt.gcf().canvas.mpl_connect('pick_event', onpick)


Out[44]:
7

In [ ]: