In [ ]:
# Author: Chris Ward @ https://bit.ly/cward_kejbaly2
# License: MIT
from datetime import date
import requests as rq
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
%matplotlib
def date_conv(c):
# source data comes with strange dates
# this converts them to datetime.date()s
y, m = c.split('M')
d = date(int(y), int(m), 1)
return d
def download_file(url):
# adapted from src: http://stackoverflow.com/a/16696317
local_filename = url.split('/')[-1]
# NOTE the stream=True parameter
r = rq.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
return local_filename
red = lambda x: '\x1b[31m{}\x1b[0m'.format(x) # colorized output
In [ ]:
# this is an ever extending source file; new row added **monthly**
xls_url = 'http://www.imf.org/external/np/res/commod/External_Data.xls'
xls_file = download_file(xls_url)
# row 1 is blank
# row 2 - 7... not useful?
# row 6 is data (type) description
# row 8 is the header
xls = pd.read_excel(xls_file, 'ExternalData', skiprows=7)
# copy and "drop" row 9, the description row
descriptions = xls.ix[:1]
xls = xls.ix[1:]
# convert dates from yyyyMmm -> datetime.date(y, m, 1)
xls['Series_Code'] = ix = xls['Series_Code'].map(date_conv)
# reset index to dates
xls = xls.set_index(ix)
# py27 has rounding issues... csv (string) output results in long decimals
In [ ]:
# export to csv
xls.to_csv('/tmp/out.csv')
In [ ]:
# print out Commodity Index Key Descriptions
_d = descriptions.to_dict()
d = {k: v[0] for k,v in _d.iteritems()}
for commodity, desc in d.iteritems():
print '{:13s}: {}'.format(red(commodity), desc)
In [ ]: