Andreas Linz, Ying-Chi Lin and Huan Meng.
This presentation refers to the latest stable matplotlib version 1.4.2.
matplotlib.pylab
At first make sure you have pip or some other pre-built packages installed!
pip
works on Unix/Linux, OS X, and Windows.pip
comes with Python (no need to install it seperately)git clone git@github.com:matplotlib/matplotlib.git
cd matplotlib
python setup.py build
python setup.py install
curl -O https://bootstrap.pypa.io/get-pip.py
python get-pip.py
pip install matplotlib
pip install ipython[notebook]
sudo yum install python-matplotlib
sudo apt-get install python-matplotlib
sudo pacman -S python-matplotlib
python-pip
PATH
by adding the Scripts/
subdirectory of your python installation to this environment variable:Start → Control Panel → System → Advanced → Environment Variables ...
Path
under System Variables with the Path of the Scripts
directory, the default is C:\Python34\Scripts
pip install ipython[notebook]
(installs with dependencies for Ipython notebooks)pip install matplotlib
python3 -c 'import matplotlib; print(matplotlib.__version__, matplotlib.__file__)'
python2.7 -c 'import matplotlib; print(matplotlib.__version__, matplotlib.__file__)'
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
A small test example to see if matplotlib is working properly on your Notebook
In [ ]:
plt.plot([2,4,6,9],[1,4,9,16], 'ro')
plt.axis([0, 10, 0, 20])
plt.show()
matplotlib.pyplot
: is a collection of functions that make matplotlib work like MATLAB
In [ ]:
fig = plt.figure() # create an empty figure with no axes
fig, ax = plt.subplots(2, 2) # a figure with a 2x2 grid of Axes
function subplots( ) returns a tuple fig, ax
In [ ]:
# define data as lists
x1 = [2,4,6,8]
y1 = [5.5,7,2,4]
x2 = [4,6,8,10,12,14]
y2 = [-12,-14,-7,-12,-3.3,-1]
#let's draw the first line
# if no x-value is given an incrementing index, beginning from zero, will be used
plt.plot(y1)
#draw x, y plot
#plt.plot(x1,y1)
#draw two lines in one plot
#plt.plot(x1,y1,x2,y2)
plt.show()
In [ ]:
#-g : line in green
plt.plot(y1,'-g')
#ro- : red circle with line
plt.plot(x1,y1,'ro-')
#g* : star symbols in green
plt.plot(x1,y1,'ro-',x2,y2,'g*')
plt.show()
In [ ]:
plt.plot(x2, y2, color='green', linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=12,label='line1',linewidth=3)
plt.axis([2, 16, -16, 2])
plt.legend()
plt.show()
In [ ]:
lines = plt.plot(x1, y1, x2, y2)
# use keyword args
plt.setp(lines, color='r', linewidth=2.0)
# or MATLAB style string value pairs
plt.setp(lines, 'color', 'r', 'linewidth', 2.0)
plt.show()
to see available text properties (horizontal-alignment, rotation etc...)
In [ ]:
line1 = plt.plot(x1, y1)
line2 = plt.plot(x2, y2)
plt.setp(line1, color='r', linewidth=2.0, label='mouse')
plt.setp(line2, color='b', linewidth=2.0, label='pig')
plt.title('test results', color='g',fontsize=18)
plt.xlabel('days',fontsize=16,style='italic')
plt.ylabel('value',fontsize=16,style='italic', rotation='horizontal')
plt.legend() # to show legend
plt.show()
There are many different types of graphs possible, for more examples.
In [ ]:
import numpy as np
import math
# [linear space](http://docs.scipy.org/doc/numpy/reference/generated/numpy.linspace.html),
# returns a list of equally evenly spaced numbers in the given interval
x1 = np.linspace(0.0, 5.0)
x2 = np.linspace(0.0, 1.0, 1000)
# f(x) = cos(2*pi*x) * e^(-x)
y1 = np.cos(2 * np.pi * x1) * np.exp(-x1)
# second plot without numpy array for the y-axis
def f(x):
return math.sin(x**x)
y2 = [ f(x*math.pi) for x in x2 ]
plt.title('Two subplots')
plt.subplot(2, 1, 1)
plt.plot(x1, y1, 'yo-')
plt.ylabel(r'$\cos(2\pi*x) * e^{-x}$', fontsize=16)
plt.subplot(2, 1, 2)
plt.plot(x2, y2, 'r.-')
plt.xlabel('radiant')
plt.ylabel(r'$\sin(\pi*x^{\pi*x})$', fontsize=16)
# change the plot size
from pylab import rcParams
xSize, ySize = rcParams['figure.figsize']
# run this cell three times and the plot size will change, global state magic!
rcParams['figure.figsize'] = (16, 9)
plt.show()
In [ ]:
# reset global size
rcParams['figure.figsize'] = (6.0, 4.0)
np.array
or np.ma.masked_array
as input (where np
is numpy)daten.berlin.de
, so the following diagram is only for demonstration purposes and nothing else!
In [ ]:
# dataset
datasetpath = 'data/EWR_Ortsteile_2012-12-31.csv'
data = None
with open(datasetpath) as f:
data = f.readlines()
for i in range(10):
print(data[i])
print('...')
In [ ]:
# distribution of age and population for Berlin districts divided by foreigners and locals
import csv
from statistics import mean
import numpy as np
from collections import defaultdict
def clean(age, count):
if 'und' in age:
# fix inconsistent values
agerange = '95-100+'
else:
ages = age.split('_')
agerange = '{}-{}'.format(ages[0], ages[1])
return agerange, int(count.split(',')[0])
data = {
'total': {
'foreigner': defaultdict(list),
'local': defaultdict(list)
}
}
with open(datasetpath) as f:
# automatically uses csv header to generate keys
# regardless of their name, csv files are not always delimited by `,`
csvdata = csv.DictReader(f, delimiter=';')
# csvdata is a list of dict's, one dictionary for each line of data
for dict_row in csvdata:
# collect values for each district
district = dict_row['Bez-Name']
# maybe I should use defaultdicts ...
if not district in data:
data[district] = {
'foreigner': {},
'local': {}
}
citizenship = dict_row['Staatsangeh']
# clean up inconsistent values
age, cnt = clean(dict_row['Altersgr'], dict_row['Häufigkeit'])
if citizenship == 'A':
# totals could be accumulated by a seperate function, but it's more convenient this way
data['total']['foreigner'][age].append(cnt)
data[district]['foreigner'][age] = cnt
elif citizenship == 'D':
data['total']['local'][age].append(cnt)
data[district]['local'][age] = cnt
# calculate the average age of each district for foreigner and locals
foreigners = {
'age': [], # x-axis
'means': [] # y-axis
}
locals = {
'age':[],
'means': []
}
# iterate over the sorted keys and add the values in order to the list
for age, number in sorted(data['total']['foreigner'].items()):
# number is a list containing the number of foreigners/locals of the specific age for each district
foreigners['age'].append(age)
# add the arithmetic mean over all numbers of foreigners/local for that age in each district
foreigners['means'].append(mean(number))
for age, number in sorted(data['total']['local'].items()):
locals['age'].append(age)
locals['means'].append(mean(number))
# import pprint
# pp = pprint.PrettyPrinter()
# pp.pprint(locals)
# numpy.arange returns evenly spaced for the given range, g.e. arange(3) returns [0, 1, 2]
# this will be used to set the x-coordinate of each bar in the plot
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.arange.html
indices = np.arange(len(foreigners['means']))
# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.bar
# plt.bar("x-coord from the left", "bar height", ...)
fplt = plt.bar(indices, foreigners['means'], color='r')
# bottom - distance from the x-axis
lplt = plt.bar(indices, locals['means'], color='y', bottom=foreigners['means'])
# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.xticks
plt.xticks(indices, locals['age'], rotation='vertical')
# labels
plt.xlabel('age groups')
plt.ylabel('citizens')
# prints the legend
# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.legend
plt.legend((fplt, lplt), ('foreigners', 'locals'))
# show the plot
plt.show()
If you are interested in more examples, check this out.
In [ ]: