In [1]:
# Updated: Joey Murphy Spring 2017
Plotting is one of the most important aspects of data analysis!! unfortunately, your hard work means nothing if your data isn't presented aesthetically
Below are some of the basics of plotting in Python: line graphs, scatter plots, histographs subplots, sizes, colors, opacity, styles legends, labels, colormaps, colorbars text, order-of-plotting
This isn't comprehensive guide, but hopefully it serves as a great introduction!
Remember that Google is your best friend for plotting in Python!
In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
In [3]:
x = np.arange(20)
y = np.sin(x)
In [4]:
# standard line graphs that you guys already know and love
plt.plot(x,y)
plt.show()
In [5]:
# you can plot more than one equation at the same time
# don't forget to label the axes!
y1 = np.cos(x)
plt.plot(x,y,x,y1)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
In [6]:
# you can specify the thickness of the lines via the Line2D properties
# http://matplotlib.org/api/lines_api.html#matplotlib.lines.Line2D
plt.plot(x,y,lw=3)
plt.plot(x,y1,lw=7)
plt.show()
In [7]:
# you can also specify the order in which the lines are plotted
# http://matplotlib.org/examples/pylab_examples/zorder_demo.html
plt.plot(x,y,lw=3,zorder=2)
plt.plot(x,y1,lw=7,zorder=1)
plt.show()
In [8]:
# you can also specify the colors and styles of the lines via the Line2D properties
plt.plot(x,y,color='red',linestyle='dotted')
plt.plot(x,y1,color='blue',linestyle='dashed')
plt.show()
In [10]:
# warning: after you put in a "keyword arg," you can't put in a non-keyword argument
plt.plot(x,y,color='red',linestyle='dotted',x,y1,color='blue',linestyle='dashed')
In [11]:
# matplotlib.pyplot has certain shortcuts for plotting color and style
plt.plot(x,y,'r:',x,y1,'b--')
plt.show()
In [12]:
# by specifying a "marker" style, you can emphasize your data points
plt.plot(x,y,marker='o')
plt.show()
In [13]:
# but by specifying only a marker style... you get something that looks like a scatter plot!
plt.plot(x,y,'o')
plt.show()
In [14]:
# another way to get a scatter-looking plot:
plt.plot(x,y,marker='v',linestyle='none')
plt.show()
In [15]:
# the actual way to get a scatter plot:
plt.scatter(x,y,marker='v')
plt.show()
In [16]:
# you can also give your data points error bars
# more at http://matplotlib.org/1.2.1/examples/pylab_examples/errorbar_demo.html
# and http://matplotlib.org/examples/statistics/errorbar_limits.html
plt.scatter(x,y,marker='v')
plt.errorbar(x,y,xerr=0.1,yerr=0.05,ecolor='red',fmt='none')
plt.show()
In [17]:
# you can combine shortcuts for colors and markers for plt.plot
plt.plot(x,y,'ro',x,y1,'bv')
plt.show()
In [18]:
# you can't use shortcuts for plt.scatter which means...
# each scatter plot deserves its own line of code :)
plt.scatter(x,y,color='r',marker='o')
plt.scatter(x,y1,color='b',marker='v')
plt.show()
In [19]:
# did the above look confusing? well, you can put multiple plots on the same set of axes
plt.scatter(x,y,color='r',marker='o')
plt.scatter(x,y1,color='b',marker='v')
plt.plot(x,y,'r:',x,y1,'b--')
plt.show()
In [20]:
# you can also put multiple plots on different subplots
# you can also set titles to follow the graphs easier :)
f, axarr = plt.subplots(2, sharex=True)
axarr[0].plot(x,y,color='r')
axarr[0].set_xlim([0,10])
axarr[1].plot(x,y1,color='b')
axarr[0].set_title('sharing the x axis')
plt.show()
In [21]:
# subplots come in all different shapes and sizes
f, (ax0,ax1,ax2) = plt.subplots(3, sharey=True)
ax0.plot(x,y,color='r')
ax0.set_ylim([0,2])
ax1.plot(x,y1,color='b')
ax2.plot(y,y1,color='g')
f.subplots_adjust(hspace=0)
ax0.set_title('sharing the y axis')
plt.show()
In [22]:
# want to get fancier?
# http://matplotlib.org/examples/pylab_examples/subplots_demo.html
f, ((ax0, ax1),(ax2, ax3)) = plt.subplots(2,2)
ax0.set_title('solid line')
ax0.plot(x,y,'r-')
ax1.set_title('dashed line')
ax1.plot(x,y,'b--')
ax2.set_title('dashed-dotted line')
ax2.plot(x,y,'g-.')
ax3.set_title('dotted line')
ax3.plot(x,y,'m:')
plt.tight_layout()
plt.suptitle('the four different line styles', fontsize=20) #supertitle
plt.subplots_adjust(top=0.85)
plt.show()
In [23]:
# back to putting everything on the same set of axes, for simplicity's sake
# you can specify the plot limits
plt.scatter(x,y,color='r',marker='o')
plt.scatter(x,y1,color='b',marker='v')
plt.plot(x,y,'r:',x,y1,'b--')
plt.xlim([-10,30])
plt.ylim([-2,2])
plt.show()
In [24]:
# forgot which was sin(x) and which was cos(x)?
# label your plots and include a legend!
plt.scatter(x,y,color='r',marker='o',label='sin(x)')
plt.scatter(x,y1,color='b',marker='v',label='cos(x)')
plt.plot(x,y,'r:',x,y1,'b--')
plt.xlim([-10,30])
plt.ylim([-2,2])
plt.legend()
plt.show()
In [25]:
# be careful when you label, though
plt.scatter(x,y,color='r',marker='o')
plt.scatter(x,y1,color='b',marker='v')
plt.plot(x,y,'r:',x,y1,'b--',label='sin(x) and/or cos(x)')
plt.xlim([-10,30])
plt.ylim([-2,2])
plt.legend()
plt.show()
In [26]:
# want to learn more about legends?
# http://matplotlib.org/users/legend_guide.html
plt.scatter(x,y,color='r',marker='o',label='sin(x)')
plt.scatter(x,y1,color='b',marker='v',label='cos(x)')
plt.plot(x,y,'r:',x,y1,'b--')
plt.plot([-10,-5,0,5,10,20],[0.3,0.3,0.3,0.3,0.3,0.3],'ko',label='a random line')
plt.xlim([-10,20])
plt.ylim([-2,1])
plt.legend(numpoints=4,scatterpoints=5,loc='best',fontsize='20',frameon=True,shadow=True)
plt.show()
In [27]:
# back to scatter plots!
x = np.random.rand(50)
y = np.random.rand(50)
plt.scatter(x,y)
plt.show()
In [28]:
# you can give these random data points random pretty colors
# ideally, you would have three arrays of the same length:
# xdata, ydata, colordata
# you can plot 3 parameters on a 2D graph!
colors = np.random.rand(50)
plt.scatter(x,y,c=colors)
plt.show()
In [29]:
# want to plot 4 parameters on a 2D graph? change the sizes!
# xdata, ydata, colordata, sizedata
area = np.pi * (15 * np.random.rand(50))**2 # 0 to 15 point radiuses
plt.scatter(x,y,c=colors,s=area)
plt.show()
In [30]:
# we can make the above graph look prettier...
plt.scatter(x,y,c=colors,s=area,edgecolors='none',alpha=0.5)
plt.show()
In [31]:
# "alpha" is the opacity keyword argument
# opacity is uniform across all data points, and thus alpha does not accept an array
plt.scatter(x,y,c=colors,s=area,edgecolors='none',alpha=0.2)
plt.show()
In [32]:
# don't like random colors and want to use a colormap?
# http://matplotlib.org/examples/color/colormaps_reference.html
plt.scatter(x,y,c=colors,cmap=plt.cm.gist_rainbow,s=area,edgecolors='none',alpha=0.5)
plt.show()
In [33]:
# if you want to include a colorbar...
h = plt.scatter(x,y,c=colors,cmap=plt.cm.gist_rainbow,s=area,edgecolors='none',alpha=0.5)
h = plt.colorbar()
plt.show()
In [34]:
# it is up to you to determine what color and size mean
# here size correlates to x values, while color correlates to y values
size2 = np.pi * (15 * x)**2
colors2 = y
h = plt.scatter(x,y,c=colors2,cmap=plt.cm.gist_rainbow,s=size2,edgecolors='none',alpha=0.5)
h = plt.colorbar()
plt.show()
In [35]:
# you can put text on your plot as well, hopefully more tastefully than i have
plt.scatter(x,y,c=colors2,cmap=plt.cm.gist_rainbow,s=size2,edgecolors='none',alpha=0.5)
plt.text(0,0,'origin')
plt.text(0.4,0.4,'hi!',ha='right')
plt.text(0.4,0.4,'hi?',ha='left',color='magenta')
plt.text(max(x),max(y),'max x,y val',fontsize=13,va='bottom',rotation=180)
plt.text(max(x),max(y),'max x,y val',fontsize=13,va='top',color='blue')
plt.show()
In [36]:
# histograms are also a thing :)
# more at http://matplotlib.org/1.2.1/examples/pylab_examples/histogram_demo.html
xhist1 = np.random.randn(10000)
xhist2 = xhist1 * 2
plt.hist(xhist1,50,color='green',normed=1)
plt.hist(xhist2,50,color='blue',edgecolor='none',normed=1,alpha=0.5)
plt.xlim([-8,8])
plt.show()