Subplots



In [1]:

    
%matplotlib notebook

import matplotlib.pyplot as plt
import numpy as np

plt.subplot?



In [2]:

    
plt.figure()
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)

linear_data = np.array([1,2,3,4,5,6,7,8])

plt.plot(linear_data, '-o')









    














    











    Out[2]:





[<matplotlib.lines.Line2D at 0x7f349e3c2390>]



In [3]:

    
exponential_data = linear_data**2 

# subplot with 1 row, 2 columns, and current axis is 2nd subplot axes
plt.subplot(1, 2, 2)
plt.plot(exponential_data, '-o')









    Out[3]:





[<matplotlib.lines.Line2D at 0x7f349e0c34a8>]



In [4]:

    
# plot exponential data on 1st subplot axes
plt.subplot(1, 2, 1)
plt.plot(exponential_data, '-x')









    Out[4]:





[<matplotlib.lines.Line2D at 0x7f34c51fedd8>]



In [5]:

    
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-x')









    














    











    Out[5]:





[<matplotlib.lines.Line2D at 0x7f349bf745f8>]



In [6]:

    
plt.figure()
# the right hand side is equivalent shorthand syntax
plt.subplot(1,2,1) == plt.subplot(121)



In [7]:

    
# create a 3x3 grid of subplots
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)
# plot the linear_data on the 5th subplot axes 
ax5.plot(linear_data, '-')









    














    











    Out[7]:





[<matplotlib.lines.Line2D at 0x7f349bf135f8>]



In [8]:

    
# set inside tick labels to visible
for ax in plt.gcf().get_axes():
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_visible(True)



In [9]:

    
# necessary on some systems to update the plot
plt.gcf().canvas.draw()

Histograms



In [10]:

    
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample)
    axs[n].set_title('n={}'.format(sample_size))



In [11]:

    
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=100)
    axs[n].set_title('n={}'.format(sample_size))



In [12]:

    
plt.figure()
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
plt.scatter(X,Y)









    














    











    Out[12]:





<matplotlib.collections.PathCollection at 0x7f349b1256a0>



In [13]:

    
# use gridspec to partition the figure into subplots
import matplotlib.gridspec as gridspec

plt.figure()
gspec = gridspec.GridSpec(3, 3)

top_histogram = plt.subplot(gspec[0, 1:])
side_histogram = plt.subplot(gspec[1:, 0])
lower_right = plt.subplot(gspec[1:, 1:])



In [14]:

    
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
lower_right.scatter(X, Y)
top_histogram.hist(X, bins=100)
s = side_histogram.hist(Y, bins=100, orientation='horizontal')



In [15]:

    
# clear the histograms and plot normed histograms
top_histogram.clear()
top_histogram.hist(X, bins=100, normed=True)
side_histogram.clear()
side_histogram.hist(Y, bins=100, orientation='horizontal', normed=True)
# flip the side histogram's x axis
side_histogram.invert_xaxis()



In [16]:

    
# change axes limits
for ax in [top_histogram, lower_right]:
    ax.set_xlim(0, 1)
for ax in [side_histogram, lower_right]:
    ax.set_ylim(-5, 5)



In [17]:

    
%%HTML
<img src='http://educationxpress.mit.edu/sites/default/files/journal/WP1-Fig13.jpg' />

Box and Whisker Plots



In [18]:

    
import pandas as pd
normal_sample = np.random.normal(loc=0.0, scale=1.0, size=10000)
random_sample = np.random.random(size=10000)
gamma_sample = np.random.gamma(2, size=10000)

df = pd.DataFrame({'normal': normal_sample, 
                   'random': random_sample, 
                   'gamma': gamma_sample})
df









    Out[18]:







  
    
      
      gamma
      normal
      random
    
  
  
    
      0
      0.440609
      0.575641
      0.742618
    
    
      1
      2.380960
      -0.687690
      0.474229
    
    
      2
      1.315481
      -0.216004
      0.382739
    
    
      3
      1.211573
      0.030815
      0.327746
    
    
      4
      2.509590
      0.816452
      0.701328
    
    
      5
      2.341145
      -0.550380
      0.518515
    
    
      6
      4.113070
      -2.264414
      0.896886
    
    
      7
      0.740701
      0.155617
      0.783874
    
    
      8
      0.905761
      2.337774
      0.188864
    
    
      9
      1.185569
      1.301147
      0.075053
    
    
      10
      2.219070
      0.970820
      0.086233
    
    
      11
      1.328680
      0.408251
      0.521071
    
    
      12
      1.925582
      1.290878
      0.293128
    
    
      13
      0.926205
      -0.072961
      0.026245
    
    
      14
      2.158209
      -0.810410
      0.106197
    
    
      15
      4.802618
      2.408269
      0.384366
    
    
      16
      1.601048
      -1.552808
      0.743893
    
    
      17
      0.799934
      -1.132184
      0.814792
    
    
      18
      0.981244
      0.827492
      0.033804
    
    
      19
      1.701073
      0.015484
      0.565347
    
    
      20
      3.723498
      1.274481
      0.645879
    
    
      21
      2.558677
      2.230469
      0.509292
    
    
      22
      2.093589
      -1.025066
      0.547596
    
    
      23
      1.037980
      -1.203106
      0.432117
    
    
      24
      1.057553
      -2.266773
      0.790692
    
    
      25
      2.424307
      -0.142625
      0.028575
    
    
      26
      1.586741
      -0.088958
      0.361497
    
    
      27
      4.415752
      0.345466
      0.308286
    
    
      28
      1.795594
      0.217234
      0.999215
    
    
      29
      1.235403
      0.532800
      0.153826
    
    
      ...
      ...
      ...
      ...
    
    
      9970
      2.168456
      -0.017262
      0.939090
    
    
      9971
      6.264273
      1.116909
      0.178496
    
    
      9972
      3.606000
      0.996133
      0.114898
    
    
      9973
      7.309805
      2.114650
      0.749198
    
    
      9974
      0.516667
      -0.559011
      0.566284
    
    
      9975
      1.731431
      -0.551288
      0.642289
    
    
      9976
      0.240869
      -2.895895
      0.240216
    
    
      9977
      1.962745
      0.669392
      0.079811
    
    
      9978
      1.906429
      -0.783637
      0.681941
    
    
      9979
      0.224970
      -0.743653
      0.207712
    
    
      9980
      0.703625
      -0.575521
      0.955561
    
    
      9981
      1.283870
      -0.714055
      0.152691
    
    
      9982
      2.811935
      0.556932
      0.759912
    
    
      9983
      2.256777
      0.626789
      0.112445
    
    
      9984
      0.698931
      -0.058863
      0.896378
    
    
      9985
      0.464828
      -0.756868
      0.286653
    
    
      9986
      1.549793
      0.523197
      0.681960
    
    
      9987
      3.150460
      -0.525583
      0.568818
    
    
      9988
      0.561533
      1.429781
      0.951989
    
    
      9989
      0.310219
      -0.038467
      0.021718
    
    
      9990
      0.305217
      -1.701947
      0.265366
    
    
      9991
      0.279456
      -0.860578
      0.252238
    
    
      9992
      0.817297
      -0.073400
      0.495218
    
    
      9993
      1.425109
      -0.541884
      0.617208
    
    
      9994
      1.398760
      0.340502
      0.550074
    
    
      9995
      2.020303
      0.572363
      0.713191
    
    
      9996
      0.605350
      -0.479461
      0.110640
    
    
      9997
      2.887847
      -0.196274
      0.334835
    
    
      9998
      2.685894
      -1.037154
      0.005343
    
    
      9999
      3.885984
      -0.218977
      0.525906
    
  

10000 rows × 3 columns



In [19]:

    
df.describe()









    Out[19]:







  
    
      
      gamma
      normal
      random
    
  
  
    
      count
      10000.000000
      10000.000000
      10000.000000
    
    
      mean
      1.970136
      -0.016483
      0.499797
    
    
      std
      1.396956
      0.995882
      0.289804
    
    
      min
      0.011182
      -3.578899
      0.000130
    
    
      25%
      0.942716
      -0.701731
      0.249978
    
    
      50%
      1.658685
      -0.014815
      0.500616
    
    
      75%
      2.648172
      0.672201
      0.749175
    
    
      max
      11.602323
      3.177792
      0.999940



In [20]:

    
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
_ = plt.boxplot(df['normal'], whis='range')



In [21]:

    
# clear the current figure
plt.clf()
# plot boxplots for all three of df's columns
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')



In [22]:

    
plt.figure()
_ = plt.hist(df['gamma'], bins=100)



In [23]:

    
import mpl_toolkits.axes_grid1.inset_locator as mpl_il

plt.figure()
plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')
# overlay axis on top of another 
ax2 = mpl_il.inset_axes(plt.gca(), width='60%', height='40%', loc=2)
ax2.hist(df['gamma'], bins=100)
ax2.margins(x=0.5)



In [24]:

    
# switch the y axis ticks for ax2 to the right side
ax2.yaxis.tick_right()



In [25]:

    
# if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers
plt.figure()
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ] )

Heatmaps



In [26]:

    
plt.figure()

Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
_ = plt.hist2d(X, Y, bins=25)



In [27]:

    
plt.figure()
_ = plt.hist2d(X, Y, bins=100)



In [28]:

    
# add a colorbar legend
plt.colorbar()









    Out[28]:





<matplotlib.colorbar.Colorbar at 0x7f3496778fd0>

Animations



In [36]:

    
import matplotlib.animation as animation

n = 100
x = np.random.randn(n)



In [37]:

    
# create the function that will do the plotting, where curr is the current frame
def update(curr):
    # check if animation is at the last frame, and if so, stop the animation a
    if curr == n: 
        a.event_source.stop()
    plt.cla()
    bins = np.arange(-4, 4, 0.5)
    plt.hist(x[:curr], bins=bins)
    plt.axis([-4,4,0,30])
    plt.gca().set_title('Sampling the Normal Distribution')
    plt.gca().set_ylabel('Frequency')
    plt.gca().set_xlabel('Value')
    plt.annotate('n = {}'.format(curr), [3,27])



In [39]:

    
fig = plt.figure()
a = animation.FuncAnimation(fig, update, interval=100)

Interactivity



In [47]:

    
plt.figure()
data = np.random.rand(10)
plt.plot(data)

def on_press(event):
    plt.cla()
    plt.plot(data)
    plt.gca().set_title('Event at pixels {},{} \nand data {},{}'.format(event.x, event.y, event.xdata, event.ydata))

# tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
plt.gcf().canvas.mpl_connect('button_press_event', on_press)



In [41]:

    
from random import shuffle
origins = ['China', 'Brazil', 'India', 'USA', 'Canada', 'UK', 'Germany', 'Iraq', 'Chile', 'Mexico']

shuffle(origins)

df = pd.DataFrame({'height': np.random.rand(10),
                   'weight': np.random.rand(10),
                   'origin': origins})
df



In [42]:

    
plt.figure()
# picker=5 means the mouse doesn't have to click directly on an event, but can be up to 5 pixels away
plt.scatter(df['height'], df['weight'], picker=5)
plt.gca().set_ylabel('Weight')
plt.gca().set_xlabel('Height')









    














    











    Out[42]:





<matplotlib.text.Text at 0x7f3496c4cda0>



In [44]:

    
def on_press(event):
    origin = df.iloc[event.ind[0]]['origin']
    plt.gca().set_title('Selected item came from {}'.format(origin))

# tell mpl_connect we want to pass a 'pick_event' into onpick when the event is detected
plt.gcf().canvas.mpl_connect('pick_event', onpick)









    Out[44]:





7



In [ ]:

	height	origin	weight
0	0.505857	Canada	0.175315
1	0.653235	India	0.652728
2	0.795082	UK	0.623295
3	0.818067	Mexico	0.392196
4	0.960182	China	0.656495
5	0.175666	Chile	0.397090
6	0.017622	USA	0.481149
7	0.305484	Iraq	0.634832
8	0.913788	Brazil	0.893557
9	0.908436	Germany	0.638947

	gamma	normal	random
0	0.440609	0.575641	0.742618
1	2.380960	-0.687690	0.474229
2	1.315481	-0.216004	0.382739
3	1.211573	0.030815	0.327746
4	2.509590	0.816452	0.701328
5	2.341145	-0.550380	0.518515
6	4.113070	-2.264414	0.896886
7	0.740701	0.155617	0.783874
8	0.905761	2.337774	0.188864
9	1.185569	1.301147	0.075053
10	2.219070	0.970820	0.086233
11	1.328680	0.408251	0.521071
12	1.925582	1.290878	0.293128
13	0.926205	-0.072961	0.026245
14	2.158209	-0.810410	0.106197
15	4.802618	2.408269	0.384366
16	1.601048	-1.552808	0.743893
17	0.799934	-1.132184	0.814792
18	0.981244	0.827492	0.033804
19	1.701073	0.015484	0.565347
20	3.723498	1.274481	0.645879
21	2.558677	2.230469	0.509292
22	2.093589	-1.025066	0.547596
23	1.037980	-1.203106	0.432117
24	1.057553	-2.266773	0.790692
25	2.424307	-0.142625	0.028575
26	1.586741	-0.088958	0.361497
27	4.415752	0.345466	0.308286
28	1.795594	0.217234	0.999215
29	1.235403	0.532800	0.153826
...	...	...	...
9970	2.168456	-0.017262	0.939090
9971	6.264273	1.116909	0.178496
9972	3.606000	0.996133	0.114898
9973	7.309805	2.114650	0.749198
9974	0.516667	-0.559011	0.566284
9975	1.731431	-0.551288	0.642289
9976	0.240869	-2.895895	0.240216
9977	1.962745	0.669392	0.079811
9978	1.906429	-0.783637	0.681941
9979	0.224970	-0.743653	0.207712
9980	0.703625	-0.575521	0.955561
9981	1.283870	-0.714055	0.152691
9982	2.811935	0.556932	0.759912
9983	2.256777	0.626789	0.112445
9984	0.698931	-0.058863	0.896378
9985	0.464828	-0.756868	0.286653
9986	1.549793	0.523197	0.681960
9987	3.150460	-0.525583	0.568818
9988	0.561533	1.429781	0.951989
9989	0.310219	-0.038467	0.021718
9990	0.305217	-1.701947	0.265366
9991	0.279456	-0.860578	0.252238
9992	0.817297	-0.073400	0.495218
9993	1.425109	-0.541884	0.617208
9994	1.398760	0.340502	0.550074
9995	2.020303	0.572363	0.713191
9996	0.605350	-0.479461	0.110640
9997	2.887847	-0.196274	0.334835
9998	2.685894	-1.037154	0.005343
9999	3.885984	-0.218977	0.525906

	gamma	normal	random
count	10000.000000	10000.000000	10000.000000
mean	1.970136	-0.016483	0.499797
std	1.396956	0.995882	0.289804
min	0.011182	-3.578899	0.000130
25%	0.942716	-0.701731	0.249978
50%	1.658685	-0.014815	0.500616
75%	2.648172	0.672201	0.749175
max	11.602323	3.177792	0.999940