The code comes from this URL:
In [5]:
%matplotlib inline
import sys
import pandas as pd # data package
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt # date and time module
# check versions (overkill, but why not?)
print('Python version:', sys.version)
print('Pandas version: ', pd.__version__)
print('Matplotlib version: ', mpl.__version__)
print('Today: ', dt.date.today())
In [6]:
gdp = [13271.1, 13773.5, 14234.2, 14613.8, 14873.7, 14830.4, 14418.7,
14783.8, 15020.6, 15369.2, 15710.3]
pce = [8867.6, 9208.2, 9531.8, 9821.7, 10041.6, 10007.2, 9847.0, 10036.3,
10263.5, 10449.7, 10699.7]
year = list(range(2003,2014)) # use range for years 2003-2013
us = pd.DataFrame({'gdp': gdp, 'pce': pce}, index=year)
print(us)
In [7]:
code = ['USA', 'FRA', 'JPN', 'CHN', 'IND', 'BRA', 'MEX']
country = ['United States', 'France', 'Japan', 'China', 'India',
'Brazil', 'Mexico']
gdppc = [53.1, 36.9, 36.3, 11.9, 5.4, 15.0, 16.5]
wbdf = pd.DataFrame({'gdppc': gdppc, 'country': country}, index=code)
wbdf
Out[7]:
In [8]:
import pandas.io.data as web
ff = web.DataReader('F-F_Research_Data_factors', 'famafrench')[1]
ff.columns = ['xsm', 'smb', 'hml', 'rf']
ff['rm'] = ff['xsm'] + ff['rf']
ff = ff[['rm', 'rf']] # extract rm (market) and rf (riskfree)
ff.head(5)
Out[8]:
In [9]:
us.plot.scatter('gdp', 'pce')
Out[9]:
In [10]:
ff.plot()
Out[10]:
In [11]:
ff.plot(kind='hist', # histogram
bins=20, # 20 bins
subplots=True) # two separate subplots
Out[11]:
In [12]:
import matplotlib.pyplot as plt
In [13]:
plt.plot(us.index, us['gdp'])
Out[13]:
In [15]:
plt.plot(us.index, us['gdp'])
Out[15]:
In [16]:
plt.plot(us.index, us['pce'])
Out[16]:
In [17]:
plt.bar(us.index, us['gdp'])
Out[17]:
In [18]:
plt.bar(us.index, us['gdp'],
align='center',
alpha=0.65,
color='red',
edgecolor='green')
Out[18]:
In [19]:
import matplotlib.pyplot as plt # import pyplot module
fig, ax = plt.subplots() # create fig and ax objects
In [20]:
fig, axe = plt.subplots() # create axis object axe
us.plot(ax=axe) # ax= looks for axis object, axe is it
Out[20]:
In [21]:
fig, ax = plt.subplots()
ff.plot(ax=ax,
kind='line', # line plot
color=['blue', 'magenta'], # line color
title='Fama-French market and riskfree returns')
Out[21]:
In [22]:
fig, ax = plt.subplots()
us.plot(ax=ax)
ax.set_title('US GDP and Consumption', fontsize=14, loc='left')
ax.set_ylabel('Billions of 2013 USD')
ax.legend(['GDP', 'Consumption']) # more descriptive variable names
ax.set_xlim(2002.5, 2013.5) # shrink x axis limits
ax.tick_params(labelcolor='red') # change tick labels to red
In [23]:
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True)
print('Object ax has dimension', len(ax))
In [24]:
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True)
us['gdp'].plot(ax=ax[0], color='green') # first plot
us['pce'].plot(ax=ax[1], color='red') # second plot
Out[24]:
In [25]:
import pandas as pd
import matplotlib.pyplot as plt
url = 'http://dx.doi.org/10.1787/888932937035'
pisa = pd.read_excel(url,
skiprows=18, # skip the first 18 rows
skipfooter=7, # skip the last 7
parse_cols=[0,1,9,13], # select columns of interest
index_col=0, # set the index as the first column
header=[0,1] # set the variable names
)
pisa = pisa.dropna() # drop blank lines
pisa.columns = ['Math', 'Reading', 'Science'] # simplify variable names
fig, ax = plt.subplots()
pisa['Math'].plot(kind='barh', ax=ax) # create bar chart
Out[25]:
In [30]:
import pandas as pd
import matplotlib.pyplot as plt
url = 'http://dx.doi.org/10.1787/888932937035'
pisa = pd.read_excel(url,
skiprows=18, # skip the first 18 rows
skipfooter=7, # skip the last 7
parse_cols=[0,1,9,13], # select columns of interest
index_col=0, # set the index as the first column
header=[0,1] # set the variable names
)
pisa = pisa.dropna() # drop blank lines
pisa.columns = ['Math', 'Reading', 'Science'] # simplify variable names
fig, ax = plt.subplots()
pisa['Math'].plot(kind='barh', ax=ax, figsize=(4,13))
ax.set_title('PISA Math Score', loc='left')
Out[30]:
In [31]:
import pandas as pd
import matplotlib.pyplot as plt
url = 'http://dx.doi.org/10.1787/888932937035'
pisa = pd.read_excel(url,
skiprows=18, # skip the first 18 rows
skipfooter=7, # skip the last 7
parse_cols=[0,1,9,13], # select columns of interest
index_col=0, # set the index as the first column
header=[0,1] # set the variable names
)
pisa = pisa.dropna() # drop blank lines
pisa.columns = ['Math', 'Reading', 'Science'] # simplify variable names
fig, ax = plt.subplots()
pisa['Math'].plot(ax=ax, kind='barh', figsize=(4,13))
ax.set_title('PISA Math Score', loc='left')
ax.get_children()[36].set_color('r')
In [32]:
# load packages (redundancy is ok)
import pandas as pd # data management tools
from pandas.io import wb # World Bank api
import matplotlib.pyplot as plt # plotting tools
# variable list (GDP, GDP per capita, life expectancy)
var = ['NY.GDP.PCAP.PP.KD', 'NY.GDP.MKTP.PP.KD', 'SP.DYN.LE00.IN']
# country list (ISO codes)
iso = ['USA', 'FRA', 'JPN', 'CHN', 'IND', 'BRA', 'MEX']
year = 2013
# get data from World Bank
df = wb.download(indicator=var, country=iso, start=year, end=year)
# munge data
df = df.reset_index(level='year', drop=True)
df.columns = ['gdppc', 'gdp', 'life'] # rename variables
df['pop'] = df['gdp']/df['gdppc'] # population
df['gdp'] = df['gdp']/10**12 # convert to trillions
df['gdppc'] = df['gdppc']/10**3 # convert to thousands
df['order'] = [5, 3, 1, 4, 2, 6, 0] # reorder countries
df = df.sort_values(by='order', ascending=False)
df
Out[32]:
In [33]:
fig, ax = plt.subplots()
df['gdp'].plot(ax=ax, kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
ax.set_xlabel('Trillions of US Dollars')
ax.set_ylabel('')
Out[33]:
In [34]:
fig, ax = plt.subplots()
df['gdppc'].plot(ax=ax, kind='barh', color='m', alpha=0.5)
ax.set_title('GDP Per Capita', loc='left', fontsize=14)
ax.set_xlabel('Thousands of US Dollars')
ax.set_ylabel('')
Out[34]:
In [35]:
fig, ax = plt.subplots()
df['gdppc'].plot(ax=ax, kind='barh', color='b', alpha=0.5)
ax.set_title('GDP Per Capita', loc='left', fontsize=14)
ax.set_xlabel('Thousands of US Dollars')
ax.set_ylabel('')
# Tufte-like axes
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
In [36]:
fig, ax = plt.subplots()
ax.scatter(df['gdppc'], df['life'], # x,y variables
s=df['pop']/10**6, # size of bubbles
alpha=0.5)
ax.set_title('Life expectancy vs. GDP per capita', loc='left', fontsize=14)
ax.set_xlabel('GDP Per Capita')
ax.set_ylabel('Life Expectancy')
ax.text(58, 66, 'Bubble size represents population', horizontalalignment='right')
Out[36]:
In [41]:
mpl.rcParams.update(mpl.rcParamsDefault)
%matplotlib inline
In [42]:
fig, ax = plt.subplots()
df['gdp'].plot(ax=ax, kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
ax.set_xlabel('Trillions of US Dollars')
ax.set_ylabel('')
Out[42]:
In [43]:
plt.style.use('fivethirtyeight')
fig, ax = plt.subplots()
df['gdp'].plot(ax=ax, kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
ax.set_xlabel('Trillions of US Dollars')
ax.set_ylabel('')
Out[43]:
In [44]:
plt.xkcd()
fig, ax = plt.subplots()
df['gdp'].plot(ax=ax, kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
ax.set_xlabel('Trillions of US Dollars')
ax.set_ylabel('')
Out[44]:
In [47]:
!jupyter nbconvert matplotlib.ipynb --to html
In [ ]: