The code comes from this URL:

https://davebackus.gitbooks.io/test/content/graphs1.html



In [5]:

    
%matplotlib inline

import sys
import pandas as pd                    # data package
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt                  # date and time module

# check versions (overkill, but why not?)
print('Python version:', sys.version)
print('Pandas version: ', pd.__version__)
print('Matplotlib version: ', mpl.__version__)
print('Today: ', dt.date.today())









    



Python version: 3.4.3 (default, Oct 14 2015, 20:28:29) 
[GCC 4.8.4]
Pandas version:  0.18.1
Matplotlib version:  1.5.2
Today:  2016-08-27



In [6]:

    
gdp  = [13271.1, 13773.5, 14234.2, 14613.8, 14873.7, 14830.4, 14418.7,
        14783.8, 15020.6, 15369.2, 15710.3]
pce  = [8867.6, 9208.2, 9531.8, 9821.7, 10041.6, 10007.2, 9847.0, 10036.3,
        10263.5, 10449.7, 10699.7]
year = list(range(2003,2014))        # use range for years 2003-2013

us = pd.DataFrame({'gdp': gdp, 'pce': pce}, index=year)
print(us)









    



          gdp      pce
2003  13271.1   8867.6
2004  13773.5   9208.2
2005  14234.2   9531.8
2006  14613.8   9821.7
2007  14873.7  10041.6
2008  14830.4  10007.2
2009  14418.7   9847.0
2010  14783.8  10036.3
2011  15020.6  10263.5
2012  15369.2  10449.7
2013  15710.3  10699.7



In [7]:

    
code    = ['USA', 'FRA', 'JPN', 'CHN', 'IND', 'BRA', 'MEX']
country = ['United States', 'France', 'Japan', 'China', 'India',
             'Brazil', 'Mexico']
gdppc   = [53.1, 36.9, 36.3, 11.9, 5.4, 15.0, 16.5]

wbdf = pd.DataFrame({'gdppc': gdppc, 'country': country}, index=code)
wbdf









    Out[7]:






  
    
      
      country
      gdppc
    
  
  
    
      USA
      United States
      53.1
    
    
      FRA
      France
      36.9
    
    
      JPN
      Japan
      36.3
    
    
      CHN
      China
      11.9
    
    
      IND
      India
      5.4
    
    
      BRA
      Brazil
      15.0
    
    
      MEX
      Mexico
      16.5



In [8]:

    
import pandas.io.data as web
ff = web.DataReader('F-F_Research_Data_factors', 'famafrench')[1]
ff.columns = ['xsm', 'smb', 'hml', 'rf']
ff['rm'] = ff['xsm'] + ff['rf']
ff = ff[['rm', 'rf']]               # extract rm (market) and rf (riskfree)
ff.head(5)









    



/venv35/lib/python3.4/site-packages/pandas/io/data.py:35: FutureWarning: 
The pandas.io.data module is moved to a separate package (pandas-datareader) and will be removed from pandas in a future version.
After installing the pandas-datareader package (https://github.com/pydata/pandas-datareader), you can change the import ``from pandas.io import data, wb`` to ``from pandas_datareader import data, wb``.
  FutureWarning)






    Out[8]:






  
    
      
      rm
      rf
    
  
  
    
      1927
      32.59
      3.12
    
    
      1928
      38.95
      3.56
    
    
      1929
      -14.79
      4.75
    
    
      1930
      -28.82
      2.41
    
    
      1931
      -44.04
      1.07



In [9]:

    
us.plot.scatter('gdp', 'pce')









    Out[9]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fdd0b9d8a20>



In [10]:

    
ff.plot()









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fdd0c3d5978>



In [11]:

    
ff.plot(kind='hist',         # histogram 
        bins=20,             # 20 bins
        subplots=True)       # two separate subplots









    Out[11]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7fdd08bc8438>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fdd08b3e0f0>], dtype=object)



In [12]:

    
import matplotlib.pyplot as plt



In [13]:

    
plt.plot(us.index, us['gdp'])









    Out[13]:





[<matplotlib.lines.Line2D at 0x7fdd08a2c0f0>]



In [15]:

    
plt.plot(us.index, us['gdp'])









    Out[15]:





[<matplotlib.lines.Line2D at 0x7fdd08978278>]



In [16]:

    
plt.plot(us.index, us['pce'])









    Out[16]:





[<matplotlib.lines.Line2D at 0x7fdd088d4cf8>]



In [17]:

    
plt.bar(us.index, us['gdp'])









    Out[17]:





<Container object of 11 artists>



In [18]:

    
plt.bar(us.index, us['gdp'],
        align='center',
        alpha=0.65,
        color='red',
        edgecolor='green')









    Out[18]:





<Container object of 11 artists>



In [19]:

    
import matplotlib.pyplot as plt  # import pyplot module 
fig, ax = plt.subplots()         # create fig and ax objects



In [20]:

    
fig, axe = plt.subplots()        # create axis object axe 
us.plot(ax=axe)                  # ax= looks for axis object, axe is it









    Out[20]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fdd08a00048>



In [21]:

    
fig, ax = plt.subplots()
ff.plot(ax=ax, 
        kind='line',                 # line plot 
        color=['blue', 'magenta'],   # line color 
        title='Fama-French market and riskfree returns')









    Out[21]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fdd087666d8>



In [22]:

    
fig, ax = plt.subplots()

us.plot(ax=ax)       
ax.set_title('US GDP and Consumption', fontsize=14, loc='left')
ax.set_ylabel('Billions of 2013 USD')
ax.legend(['GDP', 'Consumption'])           # more descriptive variable names 
ax.set_xlim(2002.5, 2013.5)                 # shrink x axis limits
ax.tick_params(labelcolor='red')            # change tick labels to red



In [23]:

    
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True)  
print('Object ax has dimension', len(ax))









    



Object ax has dimension 2



In [24]:

    
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True)

us['gdp'].plot(ax=ax[0], color='green')   # first plot
us['pce'].plot(ax=ax[1], color='red')     # second plot









    Out[24]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fdd0852ada0>



In [25]:

    
import pandas as pd
import matplotlib.pyplot as plt 

url = 'http://dx.doi.org/10.1787/888932937035'
pisa = pd.read_excel(url,
                     skiprows=18,      # skip the first 18 rows
                     skipfooter=7,     # skip the last 7
                     parse_cols=[0,1,9,13], # select columns of interest
                     index_col=0,      # set the index as the first column
                     header=[0,1]      # set the variable names
                     )
pisa = pisa.dropna()                   # drop blank lines
pisa.columns = ['Math', 'Reading', 'Science'] # simplify variable names

fig, ax = plt.subplots()
pisa['Math'].plot(kind='barh', ax=ax)  # create bar chart









    Out[25]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fdd081e2588>



In [30]:

    
import pandas as pd
import matplotlib.pyplot as plt 

url = 'http://dx.doi.org/10.1787/888932937035'
pisa = pd.read_excel(url,
                     skiprows=18,      # skip the first 18 rows
                     skipfooter=7,     # skip the last 7
                     parse_cols=[0,1,9,13], # select columns of interest
                     index_col=0,      # set the index as the first column
                     header=[0,1]      # set the variable names
                     )
pisa = pisa.dropna()                   # drop blank lines
pisa.columns = ['Math', 'Reading', 'Science'] # simplify variable names

fig, ax = plt.subplots()
pisa['Math'].plot(kind='barh', ax=ax, figsize=(4,13))  
ax.set_title('PISA Math Score', loc='left')









    Out[30]:





<matplotlib.text.Text at 0x7fdd07eb6a20>



In [31]:

    
import pandas as pd
import matplotlib.pyplot as plt 

url = 'http://dx.doi.org/10.1787/888932937035'
pisa = pd.read_excel(url,
                     skiprows=18,      # skip the first 18 rows
                     skipfooter=7,     # skip the last 7
                     parse_cols=[0,1,9,13], # select columns of interest
                     index_col=0,      # set the index as the first column
                     header=[0,1]      # set the variable names
                     )
pisa = pisa.dropna()                   # drop blank lines
pisa.columns = ['Math', 'Reading', 'Science'] # simplify variable names

fig, ax = plt.subplots()
pisa['Math'].plot(ax=ax, kind='barh', figsize=(4,13))
ax.set_title('PISA Math Score', loc='left')
ax.get_children()[36].set_color('r')



In [32]:

    
# load packages (redundancy is ok)
import pandas as pd                   # data management tools
from pandas.io import wb              # World Bank api
import matplotlib.pyplot as plt       # plotting tools

# variable list (GDP, GDP per capita, life expectancy)
var = ['NY.GDP.PCAP.PP.KD', 'NY.GDP.MKTP.PP.KD', 'SP.DYN.LE00.IN']  
# country list (ISO codes)
iso = ['USA', 'FRA', 'JPN', 'CHN', 'IND', 'BRA', 'MEX']
year = 2013

# get data from World Bank
df = wb.download(indicator=var, country=iso, start=year, end=year)

# munge data
df = df.reset_index(level='year', drop=True)
df.columns = ['gdppc', 'gdp', 'life'] # rename variables
df['pop']  = df['gdp']/df['gdppc']    # population
df['gdp'] = df['gdp']/10**12          # convert to trillions
df['gdppc'] = df['gdppc']/10**3       # convert to thousands
df['order'] = [5, 3, 1, 4, 2, 6, 0]   # reorder countries
df = df.sort_values(by='order', ascending=False)
df









    



/venv35/lib/python3.4/site-packages/pandas/io/wb.py:21: FutureWarning: 
The pandas.io.wb module is moved to a separate package (pandas-datareader) and will be removed from pandas in a future version.
After installing the pandas-datareader package (https://github.com/pydata/pandas-datareader), you can change the import ``from pandas.io import data, wb`` to ``from pandas_datareader import data, wb``.
  FutureWarning)






    Out[32]:






  
    
      
      gdppc
      gdp
      life
      pop
      order
    
    
      country
      
      
      
      
      
    
  
  
    
      Mexico
      16.154744
      1.998990
      76.532659
      1.237401e+08
      6
    
    
      Brazil
      15.281612
      3.121413
      74.122439
      2.042594e+08
      5
    
    
      India
      5.089581
      6.512113
      67.660415
      1.279499e+09
      4
    
    
      China
      11.805087
      16.023988
      75.585146
      1.357380e+09
      3
    
    
      Japan
      35.540536
      4.525683
      83.331951
      1.273386e+08
      2
    
    
      France
      37.250212
      2.457475
      82.219512
      6.597210e+07
      1
    
    
      United States
      50.878444
      16.099334
      78.841463
      3.164274e+08
      0



In [33]:

    
fig, ax = plt.subplots()
df['gdp'].plot(ax=ax, kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
ax.set_xlabel('Trillions of US Dollars')
ax.set_ylabel('')









    Out[33]:





<matplotlib.text.Text at 0x7fdd07a154a8>



In [34]:

    
fig, ax = plt.subplots()
df['gdppc'].plot(ax=ax, kind='barh', color='m', alpha=0.5)
ax.set_title('GDP Per Capita', loc='left', fontsize=14)
ax.set_xlabel('Thousands of US Dollars')
ax.set_ylabel('')









    Out[34]:





<matplotlib.text.Text at 0x7fdd079c0c18>



In [35]:

    
fig, ax = plt.subplots()
df['gdppc'].plot(ax=ax, kind='barh', color='b', alpha=0.5)
ax.set_title('GDP Per Capita', loc='left', fontsize=14)
ax.set_xlabel('Thousands of US Dollars')
ax.set_ylabel('')

# Tufte-like axes
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')



In [36]:

    
fig, ax = plt.subplots()
ax.scatter(df['gdppc'], df['life'],     # x,y variables
            s=df['pop']/10**6,          # size of bubbles
            alpha=0.5)   
ax.set_title('Life expectancy vs. GDP per capita', loc='left', fontsize=14)
ax.set_xlabel('GDP Per Capita')
ax.set_ylabel('Life Expectancy')
ax.text(58, 66, 'Bubble size represents population', horizontalalignment='right')









    Out[36]:





<matplotlib.text.Text at 0x7fdd07a43b70>



In [41]:

    
mpl.rcParams.update(mpl.rcParamsDefault)
%matplotlib inline



In [42]:

    
fig, ax = plt.subplots()
df['gdp'].plot(ax=ax, kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
ax.set_xlabel('Trillions of US Dollars')
ax.set_ylabel('')









    Out[42]:





<matplotlib.text.Text at 0x7fdd07bafe10>



In [43]:

    
plt.style.use('fivethirtyeight')
fig, ax = plt.subplots()
df['gdp'].plot(ax=ax, kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
ax.set_xlabel('Trillions of US Dollars')
ax.set_ylabel('')









    Out[43]:





<matplotlib.text.Text at 0x7fdd078e9ba8>



In [44]:

    
plt.xkcd()
fig, ax = plt.subplots()
df['gdp'].plot(ax=ax, kind='barh', alpha=0.5)
ax.set_title('GDP', loc='left', fontsize=14)
ax.set_xlabel('Trillions of US Dollars')
ax.set_ylabel('')









    Out[44]:





<matplotlib.text.Text at 0x7fdd07f17860>






    



/venv35/lib/python3.4/site-packages/matplotlib/font_manager.py:1288: UserWarning: findfont: Font family ['Humor Sans', 'Comic Sans MS'] not found. Falling back to Bitstream Vera Sans
  (prop.get_family(), self.defaultFamily[fontext]))



In [47]:

    
!jupyter nbconvert matplotlib.ipynb --to html









    



Traceback (most recent call last):
  File "app_main.py", line 72, in run_toplevel
  File "/usr/local/bin/jupyter", line 7, in <module>
    from jupyter_core.command import main
ImportError: No module named jupyter_core



In [ ]:

	country	gdppc
USA	United States	53.1
FRA	France	36.9
JPN	Japan	36.3
CHN	China	11.9
IND	India	5.4
BRA	Brazil	15.0
MEX	Mexico	16.5

	rm	rf
1927	32.59	3.12
1928	38.95	3.56
1929	-14.79	4.75
1930	-28.82	2.41
1931	-44.04	1.07

	gdppc	gdp	life	pop	order
country
Mexico	16.154744	1.998990	76.532659	1.237401e+08	6
Brazil	15.281612	3.121413	74.122439	2.042594e+08	5
India	5.089581	6.512113	67.660415	1.279499e+09	4
China	11.805087	16.023988	75.585146	1.357380e+09	3
Japan	35.540536	4.525683	83.331951	1.273386e+08	2
France	37.250212	2.457475	82.219512	6.597210e+07	1
United States	50.878444	16.099334	78.841463	3.164274e+08	0