In [ ]:
# Author: Chris Ward @ https://bit.ly/cward_kejbaly2
# License: MIT

from datetime import date
import requests as rq
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%pylab inline
%matplotlib

def date_conv(c):
    # source data comes with strange dates
    # this converts them to datetime.date()s
    y, m = c.split('M')
    d = date(int(y), int(m), 1)
    return d

def download_file(url):
    # adapted from src: http://stackoverflow.com/a/16696317
    local_filename = url.split('/')[-1]
    # NOTE the stream=True parameter
    r = rq.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                f.flush()
    return local_filename

red = lambda x: '\x1b[31m{}\x1b[0m'.format(x)  # colorized output

In [ ]:
# this is an ever extending source file; new row added **monthly**
xls_url = 'http://www.imf.org/external/np/res/commod/External_Data.xls'
xls_file = download_file(xls_url)
# row 1 is blank
# row 2 - 7... not useful?
# row 6 is data (type) description
# row 8 is the header
xls = pd.read_excel(xls_file, 'ExternalData', skiprows=7)
# copy and "drop" row 9, the description row
descriptions = xls.ix[:1]
xls = xls.ix[1:]

# convert dates from yyyyMmm -> datetime.date(y, m, 1)
xls['Series_Code'] = ix = xls['Series_Code'].map(date_conv)

# reset index to dates
xls = xls.set_index(ix)
# py27 has rounding issues... csv (string) output results in long decimals

In [ ]:
# export to csv
xls.to_csv('/tmp/out.csv')

In [ ]:
# print out Commodity Index Key Descriptions
_d = descriptions.to_dict()
d = {k: v[0] for k,v in _d.iteritems()}
for commodity, desc in d.iteritems():
    print '{:13s}: {}'.format(red(commodity), desc)

In [ ]: