In [ ]:
import matplotlib as mpl
mpl.use('TkAgg')
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import seaborn as sns
In [ ]:
import sys
print(sys.version)
In [ ]:
!python --version
In [ ]:
df = pd.DataFrame({'age':[1.,2,3,4,5,6,7,8,9],
'height':[4, 4.5, 5, 6, 7, 8, 9, 9.5, 10],
'gender':['M','F', 'F','M','M','F', 'F','M', 'F'],
#'hair color':['brown','black', 'brown', 'blonde', 'brown', 'red',
# 'brown', 'brown', 'black' ],
'hair length':[1,6,2,3,1,5,6,5,3] })
Plotting separate data columns as separate sub-plots:
In [ ]:
def plot_2_subplots(df, x1, y1, y2, x2=None, title=None):
fig, axs = plt.subplots(2, 1, figsize=(5, 4))
colors = ['c','b']
# get the data array for x1:
x1d = df[x1]
# get the data array for x2:
if x2 is None: # use x1 as x2
x2d=df[x1]
x2=x1
# todo (?) share x axis if x2 was None?
else:
x2d=df[x2]
# get the data arrays for y1, y2:
y1d=df[y1]
y2d=df[y2]
axs[0].plot(x1d, y1d, linestyle='--', marker='o', color=colors[0]) #, label=y1)
axs[0].set_xlabel(x1)
axs[0].set_ylabel(y1)
axs[1].plot(x2d, y2d, linestyle='--', marker='o', color=colors[1]) #, label=y2)
axs[1].set_xlabel(x2)
axs[1].set_ylabel(y2)
for subplot in axs:
subplot.legend(loc='best')
axs[0].axhline(y=0, color='k')
# fill 2nd plot
axs[1].axhline(y=0, color='k')
plt.legend()
if title is not None:
plt.title(title)
plt.tight_layout()
return fig
In [ ]:
p = plot_2_subplots(df=df, x1='age', y1='height', y2='hair length', x2=None, title=None)
Plot multiple groups (from same data columns) onto the same plot.
In [ ]:
df.plot
In [ ]:
def plot_by_group(df, group, x, y, title=None):
fig, ax = plt.subplots(1, 1, figsize=(3.5, 2.5))
ax.set_xlabel(x)
ax.set_ylabel(y)
# todo: move title up(?)
if title is not None:
ax.set_title(title)
for tup, group_df in df.groupby(group):
# sort on the x attribute
group_df = group_df.sort_values(x)
# todo: print label in legend.
ax.plot(group_df[x], group_df[y], marker='o', label=tup[0])
print(tup)
# todo: put legend outside the figure
plt.legend()
In [ ]:
plot_by_group(df=df, group='gender', x='age', y='height', title='this is a title, you bet.')
Jake Vanderplas:
plt.plot can be noticeably more efficient than plt.scatter. The reason is that plt.scatter has the capability to render a different size and/or color for each point, so the renderer must do the extra work of constructing each point individually.
In [ ]:
def plot_2_subplots_v2(df, x1, x2, y1, y2, title=None):
fig, axs = plt.subplots(2, 1, figsize=(5, 4))
plt_data = {1:(df[x1], df[y1]), 2:(df[x2], df[y2])}
titles = {1:x1, 2:x2}
colors = {1:'#b3cde3', 2:'#decbe4'}
for row, ax in enumerate(axs, start=1):
print(row, ax)
ax.plot(plt_data[row][0], plt_data[row][1], color=colors[row], marker='o', label=row)
ax.set_xlabel('some x')
ax.set_title(titles[row])
plt.tight_layout()
return fig
# kind of a silly example.
p = plot_2_subplots_v2(df=df, x1='age', y1='height', y2='hair length', x2='age', title=None)
In [ ]: