Goals
In [1]:
%pylab --no-import-all inline
In [2]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
In [3]:
# plot different types of graphs to show distribution of populations
# Chap 8 of PfDA
# http://my.safaribooksonline.com/book/programming/python/9781449323592/8dot-plotting-and-visualization/id2802076
# hello world of mpl
plt.plot(np.arange(10))
Out[3]:
In [4]:
fig = plt.figure()
In [5]:
# set up a 2x2 grid of subplots and instantiate 3 of them
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
fig
Out[5]:
PfDA:
When you issue a plotting command like plt.plot([1.5, 3.5, -2, 1.6])
, matplotlib draws on the last figure and subplot used (creating one if necessary), thus hiding the figure and subplot creation
In [6]:
plt.plot([1.5, 3.5, -2, 1.6])
Out[6]:
In [7]:
import math
options = [None, 'k--', 'ro', 'g+']
fig = plt.figure()
# let's plot subplot 2 columns wide
# try different options
num_row = math.ceil(len(options)/2.0)
for (i, option) in enumerate(options):
ax = fig.add_subplot(num_row,2, i+1)
if option is not None:
ax.plot([1.5, 3.5, -2, 1.6], option)
else:
ax.plot([1.5, 3.5, -2, 1.6])
fig.show()
In [8]:
from numpy.random import randn
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax1.hist(randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * randn(30))
ax3.plot(randn(50).cumsum(), 'k--')
fig.show()
In [9]:
#http://matplotlib.org/examples/api/barchart_demo.html
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = (20, 35, 30, 35, 27)
menStd = (2, 3, 4, 1, 2)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind, menMeans, width, color='r', yerr=menStd)
womenMeans = (25, 32, 34, 20, 25)
womenStd = (3, 5, 2, 3, 3)
rects2 = ax.bar(ind+width, womenMeans, width, color='y', yerr=womenStd)
# add some
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(ind+width)
ax.set_xticklabels( ('G1', 'G2', 'G3', 'G4', 'G5') )
ax.legend( (rects1[0], rects2[0]), ('Men', 'Women') )
def autolabel(rects):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
plt.show()
going back to our calculation from Day_01_B_World_Population.ipynb
In [10]:
# https://gist.github.com/rdhyee/8511607/raw/f16257434352916574473e63612fcea55a0c1b1c/population_of_countries.json
# scraping of https://en.wikipedia.org/w/index.php?title=List_of_countries_by_population_(United_Nations)&oldid=590438477
# read population in
import json
import requests
pop_json_url = "https://gist.github.com/rdhyee/8511607/raw/f16257434352916574473e63612fcea55a0c1b1c/population_of_countries.json"
pop_list= requests.get(pop_json_url).json()
pop_list
Out[10]:
In [11]:
country_num = range(len(pop_list))
country_names = [c[1] for c in pop_list]
country_pops = [int(c[2]) for c in pop_list]
In [12]:
plt.plot(country_pops)
Out[12]:
In [13]:
from itertools import izip, islice
sampled_country_tuples = list(islice(izip(country_num, country_names),0,None,10))
sampled_i = [s[0] for s in sampled_country_tuples]
sampled_countries = [s[1] for s in sampled_country_tuples]
In [14]:
# bar charts
# can find barh: http://matplotlib.org/examples/lines_bars_and_markers/barh_demo.html
# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.bar
plt.bar(left=map(lambda x: x, range(len(country_pops))),height=country_pops, width=2, log='x')
#plt.xticks(range(len(country_pops)), country_names, rotation='vertical')
plt.xticks(sampled_i, sampled_countries, rotation='vertical')
plt.ylabel('Population')
plt.show()
In [15]:
# what if we make a DataFrame from pop_list
df = DataFrame(pop_list)
In [16]:
plt.plot(df[2])
Out[16]:
In [17]:
plt.plot(df[2].cumsum())
Out[17]:
In [17]:
In [ ]: